diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,82888 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9999830999721149, + "eval_steps": 61000, + "global_step": 118342, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0001690002788504601, + "grad_norm": 8.151397705078125, + "learning_rate": 8.449514152936206e-09, + "loss": 0.5921, + "step": 10 + }, + { + "epoch": 0.0003380005577009202, + "grad_norm": 11.88992977142334, + "learning_rate": 1.6899028305872412e-08, + "loss": 0.6201, + "step": 20 + }, + { + "epoch": 0.0005070008365513803, + "grad_norm": 12.493903160095215, + "learning_rate": 2.5348542458808618e-08, + "loss": 0.627, + "step": 30 + }, + { + "epoch": 0.0006760011154018404, + "grad_norm": 9.884626388549805, + "learning_rate": 3.3798056611744824e-08, + "loss": 0.6306, + "step": 40 + }, + { + "epoch": 0.0008450013942523005, + "grad_norm": 7.7189764976501465, + "learning_rate": 4.224757076468103e-08, + "loss": 0.639, + "step": 50 + }, + { + "epoch": 0.0010140016731027606, + "grad_norm": 9.185622215270996, + "learning_rate": 5.0697084917617236e-08, + "loss": 0.6557, + "step": 60 + }, + { + "epoch": 0.0011830019519532208, + "grad_norm": 8.327007293701172, + "learning_rate": 5.9146599070553445e-08, + "loss": 0.617, + "step": 70 + }, + { + "epoch": 0.0013520022308036808, + "grad_norm": 7.37695837020874, + "learning_rate": 6.759611322348965e-08, + "loss": 0.626, + "step": 80 + }, + { + "epoch": 0.001521002509654141, + "grad_norm": 7.518730640411377, + "learning_rate": 7.604562737642586e-08, + "loss": 0.6532, + "step": 90 + }, + { + "epoch": 0.001690002788504601, + "grad_norm": 7.305288791656494, + "learning_rate": 8.449514152936207e-08, + "loss": 0.5994, + "step": 100 + }, + { + "epoch": 0.0018590030673550611, + "grad_norm": 8.757489204406738, + "learning_rate": 9.294465568229827e-08, + "loss": 0.6068, + "step": 110 + }, + { + "epoch": 0.0020280033462055213, + "grad_norm": 10.401308059692383, + "learning_rate": 1.0139416983523447e-07, + "loss": 0.5565, + "step": 120 + }, + { + "epoch": 0.0021970036250559813, + "grad_norm": 7.546243190765381, + "learning_rate": 1.0984368398817069e-07, + "loss": 0.5529, + "step": 130 + }, + { + "epoch": 0.0023660039039064417, + "grad_norm": 5.31623649597168, + "learning_rate": 1.1829319814110689e-07, + "loss": 0.5421, + "step": 140 + }, + { + "epoch": 0.0025350041827569016, + "grad_norm": 4.31443452835083, + "learning_rate": 1.267427122940431e-07, + "loss": 0.4825, + "step": 150 + }, + { + "epoch": 0.0027040044616073616, + "grad_norm": 4.859823703765869, + "learning_rate": 1.351922264469793e-07, + "loss": 0.478, + "step": 160 + }, + { + "epoch": 0.0028730047404578215, + "grad_norm": 4.5274834632873535, + "learning_rate": 1.4364174059991553e-07, + "loss": 0.446, + "step": 170 + }, + { + "epoch": 0.003042005019308282, + "grad_norm": 3.7420647144317627, + "learning_rate": 1.5209125475285173e-07, + "loss": 0.4171, + "step": 180 + }, + { + "epoch": 0.003211005298158742, + "grad_norm": 4.145787239074707, + "learning_rate": 1.6054076890578793e-07, + "loss": 0.3593, + "step": 190 + }, + { + "epoch": 0.003380005577009202, + "grad_norm": 3.9338839054107666, + "learning_rate": 1.6899028305872413e-07, + "loss": 0.3165, + "step": 200 + }, + { + "epoch": 0.0035490058558596623, + "grad_norm": 2.608560800552368, + "learning_rate": 1.7743979721166034e-07, + "loss": 0.2655, + "step": 210 + }, + { + "epoch": 0.0037180061347101222, + "grad_norm": 2.9996230602264404, + "learning_rate": 1.8588931136459654e-07, + "loss": 0.2363, + "step": 220 + }, + { + "epoch": 0.003887006413560582, + "grad_norm": 3.015291452407837, + "learning_rate": 1.9433882551753277e-07, + "loss": 0.222, + "step": 230 + }, + { + "epoch": 0.004056006692411043, + "grad_norm": 2.3702433109283447, + "learning_rate": 2.0278833967046894e-07, + "loss": 0.205, + "step": 240 + }, + { + "epoch": 0.0042250069712615025, + "grad_norm": 2.180356740951538, + "learning_rate": 2.1123785382340517e-07, + "loss": 0.1839, + "step": 250 + }, + { + "epoch": 0.0043940072501119625, + "grad_norm": 2.714348793029785, + "learning_rate": 2.1968736797634138e-07, + "loss": 0.1725, + "step": 260 + }, + { + "epoch": 0.0045630075289624225, + "grad_norm": 3.1054859161376953, + "learning_rate": 2.281368821292776e-07, + "loss": 0.1611, + "step": 270 + }, + { + "epoch": 0.004732007807812883, + "grad_norm": 2.3040058612823486, + "learning_rate": 2.3658639628221378e-07, + "loss": 0.1358, + "step": 280 + }, + { + "epoch": 0.004901008086663343, + "grad_norm": 2.4823195934295654, + "learning_rate": 2.4503591043515e-07, + "loss": 0.1308, + "step": 290 + }, + { + "epoch": 0.005070008365513803, + "grad_norm": 2.2402455806732178, + "learning_rate": 2.534854245880862e-07, + "loss": 0.1192, + "step": 300 + }, + { + "epoch": 0.005239008644364263, + "grad_norm": 1.8717119693756104, + "learning_rate": 2.619349387410224e-07, + "loss": 0.1085, + "step": 310 + }, + { + "epoch": 0.005408008923214723, + "grad_norm": 1.9309443235397339, + "learning_rate": 2.703844528939586e-07, + "loss": 0.1231, + "step": 320 + }, + { + "epoch": 0.005577009202065183, + "grad_norm": 1.9466526508331299, + "learning_rate": 2.788339670468948e-07, + "loss": 0.1119, + "step": 330 + }, + { + "epoch": 0.005746009480915643, + "grad_norm": 2.3317859172821045, + "learning_rate": 2.8728348119983105e-07, + "loss": 0.1019, + "step": 340 + }, + { + "epoch": 0.005915009759766104, + "grad_norm": 1.9998661279678345, + "learning_rate": 2.957329953527673e-07, + "loss": 0.0835, + "step": 350 + }, + { + "epoch": 0.006084010038616564, + "grad_norm": 1.7425237894058228, + "learning_rate": 3.0418250950570346e-07, + "loss": 0.0885, + "step": 360 + }, + { + "epoch": 0.006253010317467024, + "grad_norm": 2.8006510734558105, + "learning_rate": 3.1263202365863963e-07, + "loss": 0.0907, + "step": 370 + }, + { + "epoch": 0.006422010596317484, + "grad_norm": 1.7340335845947266, + "learning_rate": 3.2108153781157586e-07, + "loss": 0.0846, + "step": 380 + }, + { + "epoch": 0.006591010875167944, + "grad_norm": 1.3293828964233398, + "learning_rate": 3.295310519645121e-07, + "loss": 0.0819, + "step": 390 + }, + { + "epoch": 0.006760011154018404, + "grad_norm": 1.7171714305877686, + "learning_rate": 3.3798056611744827e-07, + "loss": 0.0764, + "step": 400 + }, + { + "epoch": 0.0069290114328688646, + "grad_norm": 1.758617639541626, + "learning_rate": 3.4643008027038444e-07, + "loss": 0.0693, + "step": 410 + }, + { + "epoch": 0.0070980117117193245, + "grad_norm": 1.6397101879119873, + "learning_rate": 3.5487959442332067e-07, + "loss": 0.0707, + "step": 420 + }, + { + "epoch": 0.0072670119905697845, + "grad_norm": 1.5723251104354858, + "learning_rate": 3.633291085762569e-07, + "loss": 0.065, + "step": 430 + }, + { + "epoch": 0.0074360122694202445, + "grad_norm": 1.4874505996704102, + "learning_rate": 3.717786227291931e-07, + "loss": 0.0717, + "step": 440 + }, + { + "epoch": 0.007605012548270704, + "grad_norm": 1.5094786882400513, + "learning_rate": 3.802281368821293e-07, + "loss": 0.0725, + "step": 450 + }, + { + "epoch": 0.007774012827121164, + "grad_norm": 1.599475622177124, + "learning_rate": 3.8867765103506554e-07, + "loss": 0.0604, + "step": 460 + }, + { + "epoch": 0.007943013105971624, + "grad_norm": 1.5444916486740112, + "learning_rate": 3.9712716518800176e-07, + "loss": 0.0615, + "step": 470 + }, + { + "epoch": 0.008112013384822085, + "grad_norm": 1.5351428985595703, + "learning_rate": 4.055766793409379e-07, + "loss": 0.0579, + "step": 480 + }, + { + "epoch": 0.008281013663672544, + "grad_norm": 2.069044351577759, + "learning_rate": 4.140261934938741e-07, + "loss": 0.0635, + "step": 490 + }, + { + "epoch": 0.008450013942523005, + "grad_norm": 1.7014342546463013, + "learning_rate": 4.2247570764681035e-07, + "loss": 0.0567, + "step": 500 + }, + { + "epoch": 0.008619014221373466, + "grad_norm": 1.249957799911499, + "learning_rate": 4.309252217997466e-07, + "loss": 0.0519, + "step": 510 + }, + { + "epoch": 0.008788014500223925, + "grad_norm": 1.6389652490615845, + "learning_rate": 4.3937473595268275e-07, + "loss": 0.0501, + "step": 520 + }, + { + "epoch": 0.008957014779074386, + "grad_norm": 1.3542331457138062, + "learning_rate": 4.47824250105619e-07, + "loss": 0.0532, + "step": 530 + }, + { + "epoch": 0.009126015057924845, + "grad_norm": 1.6643306016921997, + "learning_rate": 4.562737642585552e-07, + "loss": 0.0481, + "step": 540 + }, + { + "epoch": 0.009295015336775306, + "grad_norm": 2.3514788150787354, + "learning_rate": 4.647232784114914e-07, + "loss": 0.0563, + "step": 550 + }, + { + "epoch": 0.009464015615625767, + "grad_norm": 1.8567042350769043, + "learning_rate": 4.7317279256442756e-07, + "loss": 0.0638, + "step": 560 + }, + { + "epoch": 0.009633015894476226, + "grad_norm": 1.4401603937149048, + "learning_rate": 4.816223067173637e-07, + "loss": 0.051, + "step": 570 + }, + { + "epoch": 0.009802016173326687, + "grad_norm": 1.580834984779358, + "learning_rate": 4.900718208703e-07, + "loss": 0.0394, + "step": 580 + }, + { + "epoch": 0.009971016452177146, + "grad_norm": 1.2436542510986328, + "learning_rate": 4.985213350232362e-07, + "loss": 0.0534, + "step": 590 + }, + { + "epoch": 0.010140016731027606, + "grad_norm": 1.244551420211792, + "learning_rate": 5.069708491761724e-07, + "loss": 0.0309, + "step": 600 + }, + { + "epoch": 0.010309017009878066, + "grad_norm": 1.8506207466125488, + "learning_rate": 5.154203633291087e-07, + "loss": 0.0369, + "step": 610 + }, + { + "epoch": 0.010478017288728526, + "grad_norm": 1.8189890384674072, + "learning_rate": 5.238698774820448e-07, + "loss": 0.0495, + "step": 620 + }, + { + "epoch": 0.010647017567578987, + "grad_norm": 2.2815606594085693, + "learning_rate": 5.32319391634981e-07, + "loss": 0.051, + "step": 630 + }, + { + "epoch": 0.010816017846429446, + "grad_norm": 1.1429193019866943, + "learning_rate": 5.407689057879172e-07, + "loss": 0.0381, + "step": 640 + }, + { + "epoch": 0.010985018125279907, + "grad_norm": 1.6302450895309448, + "learning_rate": 5.492184199408535e-07, + "loss": 0.0389, + "step": 650 + }, + { + "epoch": 0.011154018404130366, + "grad_norm": 1.1284207105636597, + "learning_rate": 5.576679340937896e-07, + "loss": 0.0418, + "step": 660 + }, + { + "epoch": 0.011323018682980827, + "grad_norm": 1.0358737707138062, + "learning_rate": 5.661174482467258e-07, + "loss": 0.044, + "step": 670 + }, + { + "epoch": 0.011492018961831286, + "grad_norm": 1.4662261009216309, + "learning_rate": 5.745669623996621e-07, + "loss": 0.0362, + "step": 680 + }, + { + "epoch": 0.011661019240681747, + "grad_norm": 1.567488193511963, + "learning_rate": 5.830164765525983e-07, + "loss": 0.0455, + "step": 690 + }, + { + "epoch": 0.011830019519532208, + "grad_norm": 1.529253363609314, + "learning_rate": 5.914659907055346e-07, + "loss": 0.0323, + "step": 700 + }, + { + "epoch": 0.011999019798382667, + "grad_norm": 1.3436548709869385, + "learning_rate": 5.999155048584706e-07, + "loss": 0.0418, + "step": 710 + }, + { + "epoch": 0.012168020077233128, + "grad_norm": 1.8246670961380005, + "learning_rate": 6.083650190114069e-07, + "loss": 0.0298, + "step": 720 + }, + { + "epoch": 0.012337020356083587, + "grad_norm": 1.7654868364334106, + "learning_rate": 6.168145331643431e-07, + "loss": 0.0358, + "step": 730 + }, + { + "epoch": 0.012506020634934048, + "grad_norm": 1.6966760158538818, + "learning_rate": 6.252640473172793e-07, + "loss": 0.0456, + "step": 740 + }, + { + "epoch": 0.012675020913784509, + "grad_norm": 1.1358875036239624, + "learning_rate": 6.337135614702155e-07, + "loss": 0.0345, + "step": 750 + }, + { + "epoch": 0.012844021192634968, + "grad_norm": 1.6618404388427734, + "learning_rate": 6.421630756231517e-07, + "loss": 0.0372, + "step": 760 + }, + { + "epoch": 0.013013021471485428, + "grad_norm": 0.9701797366142273, + "learning_rate": 6.506125897760879e-07, + "loss": 0.0325, + "step": 770 + }, + { + "epoch": 0.013182021750335888, + "grad_norm": 0.8507864475250244, + "learning_rate": 6.590621039290242e-07, + "loss": 0.0358, + "step": 780 + }, + { + "epoch": 0.013351022029186348, + "grad_norm": 0.8990994095802307, + "learning_rate": 6.675116180819604e-07, + "loss": 0.0385, + "step": 790 + }, + { + "epoch": 0.013520022308036807, + "grad_norm": 1.6713075637817383, + "learning_rate": 6.759611322348965e-07, + "loss": 0.0317, + "step": 800 + }, + { + "epoch": 0.013689022586887268, + "grad_norm": 1.2208130359649658, + "learning_rate": 6.844106463878328e-07, + "loss": 0.0358, + "step": 810 + }, + { + "epoch": 0.013858022865737729, + "grad_norm": 1.2064299583435059, + "learning_rate": 6.928601605407689e-07, + "loss": 0.0361, + "step": 820 + }, + { + "epoch": 0.014027023144588188, + "grad_norm": 1.4440975189208984, + "learning_rate": 7.013096746937053e-07, + "loss": 0.0254, + "step": 830 + }, + { + "epoch": 0.014196023423438649, + "grad_norm": 1.4820549488067627, + "learning_rate": 7.097591888466413e-07, + "loss": 0.0301, + "step": 840 + }, + { + "epoch": 0.014365023702289108, + "grad_norm": 1.100009799003601, + "learning_rate": 7.182087029995775e-07, + "loss": 0.0288, + "step": 850 + }, + { + "epoch": 0.014534023981139569, + "grad_norm": 1.205920696258545, + "learning_rate": 7.266582171525138e-07, + "loss": 0.0257, + "step": 860 + }, + { + "epoch": 0.01470302425999003, + "grad_norm": 0.9258947372436523, + "learning_rate": 7.3510773130545e-07, + "loss": 0.0316, + "step": 870 + }, + { + "epoch": 0.014872024538840489, + "grad_norm": 1.1105785369873047, + "learning_rate": 7.435572454583862e-07, + "loss": 0.0283, + "step": 880 + }, + { + "epoch": 0.01504102481769095, + "grad_norm": 2.2304327487945557, + "learning_rate": 7.520067596113224e-07, + "loss": 0.0343, + "step": 890 + }, + { + "epoch": 0.015210025096541409, + "grad_norm": 1.0406194925308228, + "learning_rate": 7.604562737642586e-07, + "loss": 0.0338, + "step": 900 + }, + { + "epoch": 0.01537902537539187, + "grad_norm": 1.505669355392456, + "learning_rate": 7.689057879171949e-07, + "loss": 0.0237, + "step": 910 + }, + { + "epoch": 0.015548025654242329, + "grad_norm": 1.3921010494232178, + "learning_rate": 7.773553020701311e-07, + "loss": 0.0349, + "step": 920 + }, + { + "epoch": 0.01571702593309279, + "grad_norm": 0.5777738690376282, + "learning_rate": 7.858048162230672e-07, + "loss": 0.0275, + "step": 930 + }, + { + "epoch": 0.01588602621194325, + "grad_norm": 0.9491533637046814, + "learning_rate": 7.942543303760035e-07, + "loss": 0.0269, + "step": 940 + }, + { + "epoch": 0.01605502649079371, + "grad_norm": 2.254439353942871, + "learning_rate": 8.027038445289396e-07, + "loss": 0.0278, + "step": 950 + }, + { + "epoch": 0.01622402676964417, + "grad_norm": 0.8885716795921326, + "learning_rate": 8.111533586818758e-07, + "loss": 0.024, + "step": 960 + }, + { + "epoch": 0.01639302704849463, + "grad_norm": 1.567550539970398, + "learning_rate": 8.196028728348121e-07, + "loss": 0.0286, + "step": 970 + }, + { + "epoch": 0.01656202732734509, + "grad_norm": 0.8156539797782898, + "learning_rate": 8.280523869877482e-07, + "loss": 0.0259, + "step": 980 + }, + { + "epoch": 0.01673102760619555, + "grad_norm": 0.7024825811386108, + "learning_rate": 8.365019011406844e-07, + "loss": 0.0215, + "step": 990 + }, + { + "epoch": 0.01690002788504601, + "grad_norm": 1.4939314126968384, + "learning_rate": 8.449514152936207e-07, + "loss": 0.0261, + "step": 1000 + }, + { + "epoch": 0.01706902816389647, + "grad_norm": 0.7816221714019775, + "learning_rate": 8.534009294465569e-07, + "loss": 0.0257, + "step": 1010 + }, + { + "epoch": 0.017238028442746932, + "grad_norm": 0.7380543947219849, + "learning_rate": 8.618504435994932e-07, + "loss": 0.0272, + "step": 1020 + }, + { + "epoch": 0.01740702872159739, + "grad_norm": 0.7406052350997925, + "learning_rate": 8.702999577524293e-07, + "loss": 0.0232, + "step": 1030 + }, + { + "epoch": 0.01757602900044785, + "grad_norm": 0.8127363920211792, + "learning_rate": 8.787494719053655e-07, + "loss": 0.0238, + "step": 1040 + }, + { + "epoch": 0.01774502927929831, + "grad_norm": 1.1809587478637695, + "learning_rate": 8.871989860583018e-07, + "loss": 0.0264, + "step": 1050 + }, + { + "epoch": 0.01791402955814877, + "grad_norm": 1.326064944267273, + "learning_rate": 8.95648500211238e-07, + "loss": 0.0277, + "step": 1060 + }, + { + "epoch": 0.01808302983699923, + "grad_norm": 1.0980157852172852, + "learning_rate": 9.04098014364174e-07, + "loss": 0.018, + "step": 1070 + }, + { + "epoch": 0.01825203011584969, + "grad_norm": 0.8225419521331787, + "learning_rate": 9.125475285171104e-07, + "loss": 0.0267, + "step": 1080 + }, + { + "epoch": 0.018421030394700152, + "grad_norm": 0.9772644639015198, + "learning_rate": 9.209970426700465e-07, + "loss": 0.0212, + "step": 1090 + }, + { + "epoch": 0.01859003067355061, + "grad_norm": 1.07682466506958, + "learning_rate": 9.294465568229828e-07, + "loss": 0.0244, + "step": 1100 + }, + { + "epoch": 0.01875903095240107, + "grad_norm": 0.7112758159637451, + "learning_rate": 9.37896070975919e-07, + "loss": 0.0181, + "step": 1110 + }, + { + "epoch": 0.018928031231251533, + "grad_norm": 0.8537008166313171, + "learning_rate": 9.463455851288551e-07, + "loss": 0.0185, + "step": 1120 + }, + { + "epoch": 0.019097031510101992, + "grad_norm": 1.3446390628814697, + "learning_rate": 9.547950992817914e-07, + "loss": 0.0194, + "step": 1130 + }, + { + "epoch": 0.01926603178895245, + "grad_norm": 1.150349736213684, + "learning_rate": 9.632446134347275e-07, + "loss": 0.0269, + "step": 1140 + }, + { + "epoch": 0.01943503206780291, + "grad_norm": 0.9560878276824951, + "learning_rate": 9.716941275876638e-07, + "loss": 0.0261, + "step": 1150 + }, + { + "epoch": 0.019604032346653373, + "grad_norm": 0.605726957321167, + "learning_rate": 9.801436417406e-07, + "loss": 0.0264, + "step": 1160 + }, + { + "epoch": 0.019773032625503832, + "grad_norm": 1.082674264907837, + "learning_rate": 9.885931558935361e-07, + "loss": 0.0207, + "step": 1170 + }, + { + "epoch": 0.01994203290435429, + "grad_norm": 1.2841850519180298, + "learning_rate": 9.970426700464724e-07, + "loss": 0.0246, + "step": 1180 + }, + { + "epoch": 0.020111033183204754, + "grad_norm": 1.2737842798233032, + "learning_rate": 1.0054921841994087e-06, + "loss": 0.0183, + "step": 1190 + }, + { + "epoch": 0.020280033462055213, + "grad_norm": 1.370842456817627, + "learning_rate": 1.0139416983523447e-06, + "loss": 0.0327, + "step": 1200 + }, + { + "epoch": 0.020449033740905672, + "grad_norm": 1.343038558959961, + "learning_rate": 1.022391212505281e-06, + "loss": 0.0236, + "step": 1210 + }, + { + "epoch": 0.02061803401975613, + "grad_norm": 0.7596139311790466, + "learning_rate": 1.0308407266582173e-06, + "loss": 0.0199, + "step": 1220 + }, + { + "epoch": 0.020787034298606594, + "grad_norm": 0.6171843409538269, + "learning_rate": 1.0392902408111534e-06, + "loss": 0.0181, + "step": 1230 + }, + { + "epoch": 0.020956034577457053, + "grad_norm": 0.9393839836120605, + "learning_rate": 1.0477397549640897e-06, + "loss": 0.0174, + "step": 1240 + }, + { + "epoch": 0.021125034856307512, + "grad_norm": 0.6550285816192627, + "learning_rate": 1.056189269117026e-06, + "loss": 0.0191, + "step": 1250 + }, + { + "epoch": 0.021294035135157974, + "grad_norm": 2.181431770324707, + "learning_rate": 1.064638783269962e-06, + "loss": 0.027, + "step": 1260 + }, + { + "epoch": 0.021463035414008434, + "grad_norm": 1.4009298086166382, + "learning_rate": 1.0730882974228983e-06, + "loss": 0.018, + "step": 1270 + }, + { + "epoch": 0.021632035692858893, + "grad_norm": 0.7100903987884521, + "learning_rate": 1.0815378115758344e-06, + "loss": 0.0233, + "step": 1280 + }, + { + "epoch": 0.02180103597170935, + "grad_norm": 0.7620948553085327, + "learning_rate": 1.0899873257287706e-06, + "loss": 0.0169, + "step": 1290 + }, + { + "epoch": 0.021970036250559814, + "grad_norm": 1.241431713104248, + "learning_rate": 1.098436839881707e-06, + "loss": 0.0158, + "step": 1300 + }, + { + "epoch": 0.022139036529410273, + "grad_norm": 1.2758827209472656, + "learning_rate": 1.106886354034643e-06, + "loss": 0.0232, + "step": 1310 + }, + { + "epoch": 0.022308036808260732, + "grad_norm": 0.9061040878295898, + "learning_rate": 1.1153358681875793e-06, + "loss": 0.0147, + "step": 1320 + }, + { + "epoch": 0.022477037087111195, + "grad_norm": 0.692929744720459, + "learning_rate": 1.1237853823405156e-06, + "loss": 0.0265, + "step": 1330 + }, + { + "epoch": 0.022646037365961654, + "grad_norm": 1.0171177387237549, + "learning_rate": 1.1322348964934516e-06, + "loss": 0.0176, + "step": 1340 + }, + { + "epoch": 0.022815037644812113, + "grad_norm": 0.5499007105827332, + "learning_rate": 1.140684410646388e-06, + "loss": 0.0231, + "step": 1350 + }, + { + "epoch": 0.022984037923662572, + "grad_norm": 0.607111930847168, + "learning_rate": 1.1491339247993242e-06, + "loss": 0.0183, + "step": 1360 + }, + { + "epoch": 0.023153038202513035, + "grad_norm": 0.6044015288352966, + "learning_rate": 1.1575834389522603e-06, + "loss": 0.017, + "step": 1370 + }, + { + "epoch": 0.023322038481363494, + "grad_norm": 1.118083119392395, + "learning_rate": 1.1660329531051966e-06, + "loss": 0.0186, + "step": 1380 + }, + { + "epoch": 0.023491038760213953, + "grad_norm": 0.9161491394042969, + "learning_rate": 1.1744824672581326e-06, + "loss": 0.0225, + "step": 1390 + }, + { + "epoch": 0.023660039039064416, + "grad_norm": 0.6185082793235779, + "learning_rate": 1.1829319814110691e-06, + "loss": 0.0187, + "step": 1400 + }, + { + "epoch": 0.023829039317914875, + "grad_norm": 0.9716012477874756, + "learning_rate": 1.1913814955640052e-06, + "loss": 0.0167, + "step": 1410 + }, + { + "epoch": 0.023998039596765334, + "grad_norm": 1.1802388429641724, + "learning_rate": 1.1998310097169413e-06, + "loss": 0.0262, + "step": 1420 + }, + { + "epoch": 0.024167039875615796, + "grad_norm": 0.9881535172462463, + "learning_rate": 1.2082805238698775e-06, + "loss": 0.0187, + "step": 1430 + }, + { + "epoch": 0.024336040154466256, + "grad_norm": 1.5982509851455688, + "learning_rate": 1.2167300380228138e-06, + "loss": 0.0158, + "step": 1440 + }, + { + "epoch": 0.024505040433316715, + "grad_norm": 0.49811652302742004, + "learning_rate": 1.2251795521757499e-06, + "loss": 0.0197, + "step": 1450 + }, + { + "epoch": 0.024674040712167174, + "grad_norm": 0.9442542195320129, + "learning_rate": 1.2336290663286862e-06, + "loss": 0.0206, + "step": 1460 + }, + { + "epoch": 0.024843040991017636, + "grad_norm": 0.9874531030654907, + "learning_rate": 1.2420785804816225e-06, + "loss": 0.0197, + "step": 1470 + }, + { + "epoch": 0.025012041269868095, + "grad_norm": 1.2066454887390137, + "learning_rate": 1.2505280946345585e-06, + "loss": 0.0183, + "step": 1480 + }, + { + "epoch": 0.025181041548718554, + "grad_norm": 0.8461329936981201, + "learning_rate": 1.2589776087874946e-06, + "loss": 0.0188, + "step": 1490 + }, + { + "epoch": 0.025350041827569017, + "grad_norm": 0.6502810716629028, + "learning_rate": 1.267427122940431e-06, + "loss": 0.0151, + "step": 1500 + }, + { + "epoch": 0.025519042106419476, + "grad_norm": 1.0374168157577515, + "learning_rate": 1.2758766370933674e-06, + "loss": 0.0206, + "step": 1510 + }, + { + "epoch": 0.025688042385269935, + "grad_norm": 0.5221607685089111, + "learning_rate": 1.2843261512463034e-06, + "loss": 0.0127, + "step": 1520 + }, + { + "epoch": 0.025857042664120394, + "grad_norm": 0.7759193181991577, + "learning_rate": 1.2927756653992395e-06, + "loss": 0.02, + "step": 1530 + }, + { + "epoch": 0.026026042942970857, + "grad_norm": 1.2694779634475708, + "learning_rate": 1.3012251795521758e-06, + "loss": 0.0163, + "step": 1540 + }, + { + "epoch": 0.026195043221821316, + "grad_norm": 1.1054325103759766, + "learning_rate": 1.3096746937051123e-06, + "loss": 0.0199, + "step": 1550 + }, + { + "epoch": 0.026364043500671775, + "grad_norm": 0.9145587682723999, + "learning_rate": 1.3181242078580484e-06, + "loss": 0.0164, + "step": 1560 + }, + { + "epoch": 0.026533043779522238, + "grad_norm": 0.9101834893226624, + "learning_rate": 1.3265737220109844e-06, + "loss": 0.0157, + "step": 1570 + }, + { + "epoch": 0.026702044058372697, + "grad_norm": 0.7743887901306152, + "learning_rate": 1.3350232361639207e-06, + "loss": 0.0137, + "step": 1580 + }, + { + "epoch": 0.026871044337223156, + "grad_norm": 0.9588603973388672, + "learning_rate": 1.3434727503168568e-06, + "loss": 0.0188, + "step": 1590 + }, + { + "epoch": 0.027040044616073615, + "grad_norm": 0.9868338108062744, + "learning_rate": 1.351922264469793e-06, + "loss": 0.0172, + "step": 1600 + }, + { + "epoch": 0.027209044894924077, + "grad_norm": 0.7865809798240662, + "learning_rate": 1.3603717786227293e-06, + "loss": 0.0156, + "step": 1610 + }, + { + "epoch": 0.027378045173774537, + "grad_norm": 0.6851559281349182, + "learning_rate": 1.3688212927756656e-06, + "loss": 0.0165, + "step": 1620 + }, + { + "epoch": 0.027547045452624996, + "grad_norm": 0.6153374314308167, + "learning_rate": 1.3772708069286017e-06, + "loss": 0.019, + "step": 1630 + }, + { + "epoch": 0.027716045731475458, + "grad_norm": 0.7208739519119263, + "learning_rate": 1.3857203210815378e-06, + "loss": 0.0173, + "step": 1640 + }, + { + "epoch": 0.027885046010325917, + "grad_norm": 0.5045366287231445, + "learning_rate": 1.394169835234474e-06, + "loss": 0.0122, + "step": 1650 + }, + { + "epoch": 0.028054046289176376, + "grad_norm": 0.7605059742927551, + "learning_rate": 1.4026193493874105e-06, + "loss": 0.0172, + "step": 1660 + }, + { + "epoch": 0.028223046568026836, + "grad_norm": 0.7432957887649536, + "learning_rate": 1.4110688635403466e-06, + "loss": 0.0144, + "step": 1670 + }, + { + "epoch": 0.028392046846877298, + "grad_norm": 0.7140432000160217, + "learning_rate": 1.4195183776932827e-06, + "loss": 0.0174, + "step": 1680 + }, + { + "epoch": 0.028561047125727757, + "grad_norm": 1.02549409866333, + "learning_rate": 1.427967891846219e-06, + "loss": 0.0132, + "step": 1690 + }, + { + "epoch": 0.028730047404578216, + "grad_norm": 0.8485150337219238, + "learning_rate": 1.436417405999155e-06, + "loss": 0.0145, + "step": 1700 + }, + { + "epoch": 0.02889904768342868, + "grad_norm": 0.8102651238441467, + "learning_rate": 1.4448669201520913e-06, + "loss": 0.0108, + "step": 1710 + }, + { + "epoch": 0.029068047962279138, + "grad_norm": 0.9968916773796082, + "learning_rate": 1.4533164343050276e-06, + "loss": 0.0152, + "step": 1720 + }, + { + "epoch": 0.029237048241129597, + "grad_norm": 0.5318018794059753, + "learning_rate": 1.4617659484579639e-06, + "loss": 0.0124, + "step": 1730 + }, + { + "epoch": 0.02940604851998006, + "grad_norm": 0.863592267036438, + "learning_rate": 1.4702154626109e-06, + "loss": 0.0155, + "step": 1740 + }, + { + "epoch": 0.02957504879883052, + "grad_norm": 1.0146982669830322, + "learning_rate": 1.4786649767638362e-06, + "loss": 0.0133, + "step": 1750 + }, + { + "epoch": 0.029744049077680978, + "grad_norm": 1.1718655824661255, + "learning_rate": 1.4871144909167723e-06, + "loss": 0.0141, + "step": 1760 + }, + { + "epoch": 0.029913049356531437, + "grad_norm": 0.6279026865959167, + "learning_rate": 1.4955640050697088e-06, + "loss": 0.0118, + "step": 1770 + }, + { + "epoch": 0.0300820496353819, + "grad_norm": 0.4484230875968933, + "learning_rate": 1.5040135192226449e-06, + "loss": 0.0167, + "step": 1780 + }, + { + "epoch": 0.03025104991423236, + "grad_norm": 1.6238749027252197, + "learning_rate": 1.512463033375581e-06, + "loss": 0.0163, + "step": 1790 + }, + { + "epoch": 0.030420050193082818, + "grad_norm": 0.5909531116485596, + "learning_rate": 1.5209125475285172e-06, + "loss": 0.015, + "step": 1800 + }, + { + "epoch": 0.03058905047193328, + "grad_norm": 0.6374512910842896, + "learning_rate": 1.5293620616814533e-06, + "loss": 0.0153, + "step": 1810 + }, + { + "epoch": 0.03075805075078374, + "grad_norm": 0.5122248530387878, + "learning_rate": 1.5378115758343898e-06, + "loss": 0.013, + "step": 1820 + }, + { + "epoch": 0.0309270510296342, + "grad_norm": 1.1308633089065552, + "learning_rate": 1.5462610899873259e-06, + "loss": 0.0157, + "step": 1830 + }, + { + "epoch": 0.031096051308484658, + "grad_norm": 0.7191557884216309, + "learning_rate": 1.5547106041402621e-06, + "loss": 0.0136, + "step": 1840 + }, + { + "epoch": 0.03126505158733512, + "grad_norm": 0.8519582152366638, + "learning_rate": 1.5631601182931982e-06, + "loss": 0.0127, + "step": 1850 + }, + { + "epoch": 0.03143405186618558, + "grad_norm": 0.6365352869033813, + "learning_rate": 1.5716096324461345e-06, + "loss": 0.0171, + "step": 1860 + }, + { + "epoch": 0.03160305214503604, + "grad_norm": 0.7419239282608032, + "learning_rate": 1.5800591465990706e-06, + "loss": 0.0158, + "step": 1870 + }, + { + "epoch": 0.0317720524238865, + "grad_norm": 0.8644031882286072, + "learning_rate": 1.588508660752007e-06, + "loss": 0.0174, + "step": 1880 + }, + { + "epoch": 0.031941052702736956, + "grad_norm": 0.6560561060905457, + "learning_rate": 1.5969581749049431e-06, + "loss": 0.0136, + "step": 1890 + }, + { + "epoch": 0.03211005298158742, + "grad_norm": 1.0905290842056274, + "learning_rate": 1.6054076890578792e-06, + "loss": 0.0113, + "step": 1900 + }, + { + "epoch": 0.03227905326043788, + "grad_norm": 0.9088236689567566, + "learning_rate": 1.6138572032108155e-06, + "loss": 0.0131, + "step": 1910 + }, + { + "epoch": 0.03244805353928834, + "grad_norm": 0.6915132403373718, + "learning_rate": 1.6223067173637516e-06, + "loss": 0.0225, + "step": 1920 + }, + { + "epoch": 0.0326170538181388, + "grad_norm": 1.2276079654693604, + "learning_rate": 1.630756231516688e-06, + "loss": 0.0176, + "step": 1930 + }, + { + "epoch": 0.03278605409698926, + "grad_norm": 0.8565443754196167, + "learning_rate": 1.6392057456696241e-06, + "loss": 0.0167, + "step": 1940 + }, + { + "epoch": 0.03295505437583972, + "grad_norm": 0.7216575145721436, + "learning_rate": 1.6476552598225604e-06, + "loss": 0.0131, + "step": 1950 + }, + { + "epoch": 0.03312405465469018, + "grad_norm": 0.8094061613082886, + "learning_rate": 1.6561047739754965e-06, + "loss": 0.0108, + "step": 1960 + }, + { + "epoch": 0.03329305493354064, + "grad_norm": 1.2675161361694336, + "learning_rate": 1.6645542881284328e-06, + "loss": 0.0168, + "step": 1970 + }, + { + "epoch": 0.0334620552123911, + "grad_norm": 0.8833696842193604, + "learning_rate": 1.6730038022813688e-06, + "loss": 0.0119, + "step": 1980 + }, + { + "epoch": 0.03363105549124156, + "grad_norm": 0.6309747099876404, + "learning_rate": 1.6814533164343053e-06, + "loss": 0.016, + "step": 1990 + }, + { + "epoch": 0.03380005577009202, + "grad_norm": 1.0539937019348145, + "learning_rate": 1.6899028305872414e-06, + "loss": 0.0148, + "step": 2000 + }, + { + "epoch": 0.03396905604894248, + "grad_norm": 0.6724973320960999, + "learning_rate": 1.6983523447401777e-06, + "loss": 0.0122, + "step": 2010 + }, + { + "epoch": 0.03413805632779294, + "grad_norm": 1.0127696990966797, + "learning_rate": 1.7068018588931137e-06, + "loss": 0.0137, + "step": 2020 + }, + { + "epoch": 0.0343070566066434, + "grad_norm": 0.5368540287017822, + "learning_rate": 1.7152513730460498e-06, + "loss": 0.0145, + "step": 2030 + }, + { + "epoch": 0.034476056885493864, + "grad_norm": 0.4811045825481415, + "learning_rate": 1.7237008871989863e-06, + "loss": 0.0112, + "step": 2040 + }, + { + "epoch": 0.03464505716434432, + "grad_norm": 1.032359004020691, + "learning_rate": 1.7321504013519224e-06, + "loss": 0.0133, + "step": 2050 + }, + { + "epoch": 0.03481405744319478, + "grad_norm": 0.5104614496231079, + "learning_rate": 1.7405999155048587e-06, + "loss": 0.0108, + "step": 2060 + }, + { + "epoch": 0.03498305772204524, + "grad_norm": 0.8072396516799927, + "learning_rate": 1.7490494296577947e-06, + "loss": 0.0136, + "step": 2070 + }, + { + "epoch": 0.0351520580008957, + "grad_norm": 0.6972222924232483, + "learning_rate": 1.757498943810731e-06, + "loss": 0.0158, + "step": 2080 + }, + { + "epoch": 0.03532105827974616, + "grad_norm": 0.7694892883300781, + "learning_rate": 1.765948457963667e-06, + "loss": 0.0148, + "step": 2090 + }, + { + "epoch": 0.03549005855859662, + "grad_norm": 0.694287121295929, + "learning_rate": 1.7743979721166036e-06, + "loss": 0.0086, + "step": 2100 + }, + { + "epoch": 0.035659058837447084, + "grad_norm": 0.8641753196716309, + "learning_rate": 1.7828474862695396e-06, + "loss": 0.0162, + "step": 2110 + }, + { + "epoch": 0.03582805911629754, + "grad_norm": 0.373634934425354, + "learning_rate": 1.791297000422476e-06, + "loss": 0.0116, + "step": 2120 + }, + { + "epoch": 0.035997059395148, + "grad_norm": 0.5773941278457642, + "learning_rate": 1.799746514575412e-06, + "loss": 0.0126, + "step": 2130 + }, + { + "epoch": 0.03616605967399846, + "grad_norm": 0.9467854499816895, + "learning_rate": 1.808196028728348e-06, + "loss": 0.0192, + "step": 2140 + }, + { + "epoch": 0.03633505995284892, + "grad_norm": 0.6334127187728882, + "learning_rate": 1.8166455428812846e-06, + "loss": 0.0201, + "step": 2150 + }, + { + "epoch": 0.03650406023169938, + "grad_norm": 0.6911800503730774, + "learning_rate": 1.8250950570342208e-06, + "loss": 0.0149, + "step": 2160 + }, + { + "epoch": 0.036673060510549846, + "grad_norm": 0.7418361306190491, + "learning_rate": 1.833544571187157e-06, + "loss": 0.0102, + "step": 2170 + }, + { + "epoch": 0.036842060789400305, + "grad_norm": 0.4494631588459015, + "learning_rate": 1.841994085340093e-06, + "loss": 0.0115, + "step": 2180 + }, + { + "epoch": 0.037011061068250764, + "grad_norm": 0.5005876421928406, + "learning_rate": 1.8504435994930293e-06, + "loss": 0.0108, + "step": 2190 + }, + { + "epoch": 0.03718006134710122, + "grad_norm": 0.6692945957183838, + "learning_rate": 1.8588931136459655e-06, + "loss": 0.0142, + "step": 2200 + }, + { + "epoch": 0.03734906162595168, + "grad_norm": 0.7445424199104309, + "learning_rate": 1.8673426277989018e-06, + "loss": 0.0142, + "step": 2210 + }, + { + "epoch": 0.03751806190480214, + "grad_norm": 0.3330194354057312, + "learning_rate": 1.875792141951838e-06, + "loss": 0.0118, + "step": 2220 + }, + { + "epoch": 0.0376870621836526, + "grad_norm": 0.3205147385597229, + "learning_rate": 1.8842416561047742e-06, + "loss": 0.0137, + "step": 2230 + }, + { + "epoch": 0.037856062462503066, + "grad_norm": 0.7221159934997559, + "learning_rate": 1.8926911702577102e-06, + "loss": 0.0136, + "step": 2240 + }, + { + "epoch": 0.038025062741353526, + "grad_norm": 1.715714931488037, + "learning_rate": 1.9011406844106463e-06, + "loss": 0.0098, + "step": 2250 + }, + { + "epoch": 0.038194063020203985, + "grad_norm": 0.8114831447601318, + "learning_rate": 1.909590198563583e-06, + "loss": 0.0109, + "step": 2260 + }, + { + "epoch": 0.038363063299054444, + "grad_norm": 0.8724892735481262, + "learning_rate": 1.918039712716519e-06, + "loss": 0.0149, + "step": 2270 + }, + { + "epoch": 0.0385320635779049, + "grad_norm": 0.6100572943687439, + "learning_rate": 1.926489226869455e-06, + "loss": 0.0122, + "step": 2280 + }, + { + "epoch": 0.03870106385675536, + "grad_norm": 1.0169868469238281, + "learning_rate": 1.9349387410223914e-06, + "loss": 0.013, + "step": 2290 + }, + { + "epoch": 0.03887006413560582, + "grad_norm": 0.4001051187515259, + "learning_rate": 1.9433882551753275e-06, + "loss": 0.0079, + "step": 2300 + }, + { + "epoch": 0.03903906441445629, + "grad_norm": 0.804621160030365, + "learning_rate": 1.951837769328264e-06, + "loss": 0.0144, + "step": 2310 + }, + { + "epoch": 0.039208064693306746, + "grad_norm": 0.4144463539123535, + "learning_rate": 1.9602872834812e-06, + "loss": 0.0107, + "step": 2320 + }, + { + "epoch": 0.039377064972157205, + "grad_norm": 0.7317914962768555, + "learning_rate": 1.968736797634136e-06, + "loss": 0.0136, + "step": 2330 + }, + { + "epoch": 0.039546065251007664, + "grad_norm": 0.6143588423728943, + "learning_rate": 1.9771863117870722e-06, + "loss": 0.013, + "step": 2340 + }, + { + "epoch": 0.03971506552985812, + "grad_norm": 0.7639254331588745, + "learning_rate": 1.9856358259400087e-06, + "loss": 0.0136, + "step": 2350 + }, + { + "epoch": 0.03988406580870858, + "grad_norm": 0.6767488121986389, + "learning_rate": 1.9940853400929448e-06, + "loss": 0.0155, + "step": 2360 + }, + { + "epoch": 0.04005306608755904, + "grad_norm": 0.702724039554596, + "learning_rate": 2.0025348542458813e-06, + "loss": 0.0108, + "step": 2370 + }, + { + "epoch": 0.04022206636640951, + "grad_norm": 0.7386316061019897, + "learning_rate": 2.0109843683988174e-06, + "loss": 0.0108, + "step": 2380 + }, + { + "epoch": 0.04039106664525997, + "grad_norm": 0.966352105140686, + "learning_rate": 2.0194338825517534e-06, + "loss": 0.0143, + "step": 2390 + }, + { + "epoch": 0.040560066924110426, + "grad_norm": 1.3236074447631836, + "learning_rate": 2.0278833967046895e-06, + "loss": 0.0156, + "step": 2400 + }, + { + "epoch": 0.040729067202960885, + "grad_norm": 1.0675822496414185, + "learning_rate": 2.0363329108576256e-06, + "loss": 0.0138, + "step": 2410 + }, + { + "epoch": 0.040898067481811344, + "grad_norm": 0.8293465375900269, + "learning_rate": 2.044782425010562e-06, + "loss": 0.0136, + "step": 2420 + }, + { + "epoch": 0.0410670677606618, + "grad_norm": 0.675937294960022, + "learning_rate": 2.053231939163498e-06, + "loss": 0.0104, + "step": 2430 + }, + { + "epoch": 0.04123606803951226, + "grad_norm": 0.8487566709518433, + "learning_rate": 2.0616814533164346e-06, + "loss": 0.0138, + "step": 2440 + }, + { + "epoch": 0.04140506831836273, + "grad_norm": 0.3574985861778259, + "learning_rate": 2.0701309674693707e-06, + "loss": 0.012, + "step": 2450 + }, + { + "epoch": 0.04157406859721319, + "grad_norm": 0.6251744031906128, + "learning_rate": 2.0785804816223068e-06, + "loss": 0.0148, + "step": 2460 + }, + { + "epoch": 0.041743068876063646, + "grad_norm": 0.3150479197502136, + "learning_rate": 2.087029995775243e-06, + "loss": 0.0107, + "step": 2470 + }, + { + "epoch": 0.041912069154914106, + "grad_norm": 0.6732006669044495, + "learning_rate": 2.0954795099281793e-06, + "loss": 0.0107, + "step": 2480 + }, + { + "epoch": 0.042081069433764565, + "grad_norm": 0.5718549489974976, + "learning_rate": 2.1039290240811154e-06, + "loss": 0.0117, + "step": 2490 + }, + { + "epoch": 0.042250069712615024, + "grad_norm": 0.6807505488395691, + "learning_rate": 2.112378538234052e-06, + "loss": 0.0101, + "step": 2500 + }, + { + "epoch": 0.04241906999146548, + "grad_norm": 0.7062185406684875, + "learning_rate": 2.120828052386988e-06, + "loss": 0.0117, + "step": 2510 + }, + { + "epoch": 0.04258807027031595, + "grad_norm": 0.4019978642463684, + "learning_rate": 2.129277566539924e-06, + "loss": 0.0128, + "step": 2520 + }, + { + "epoch": 0.04275707054916641, + "grad_norm": 0.26033371686935425, + "learning_rate": 2.1377270806928605e-06, + "loss": 0.0124, + "step": 2530 + }, + { + "epoch": 0.04292607082801687, + "grad_norm": 0.7266717553138733, + "learning_rate": 2.1461765948457966e-06, + "loss": 0.0151, + "step": 2540 + }, + { + "epoch": 0.043095071106867326, + "grad_norm": 0.6882942914962769, + "learning_rate": 2.1546261089987327e-06, + "loss": 0.0124, + "step": 2550 + }, + { + "epoch": 0.043264071385717785, + "grad_norm": 0.494502454996109, + "learning_rate": 2.1630756231516687e-06, + "loss": 0.0129, + "step": 2560 + }, + { + "epoch": 0.043433071664568244, + "grad_norm": 0.35432446002960205, + "learning_rate": 2.1715251373046052e-06, + "loss": 0.0119, + "step": 2570 + }, + { + "epoch": 0.0436020719434187, + "grad_norm": 0.4541483521461487, + "learning_rate": 2.1799746514575413e-06, + "loss": 0.0113, + "step": 2580 + }, + { + "epoch": 0.04377107222226917, + "grad_norm": 0.5097981095314026, + "learning_rate": 2.188424165610478e-06, + "loss": 0.0104, + "step": 2590 + }, + { + "epoch": 0.04394007250111963, + "grad_norm": 0.7258899807929993, + "learning_rate": 2.196873679763414e-06, + "loss": 0.0141, + "step": 2600 + }, + { + "epoch": 0.04410907277997009, + "grad_norm": 0.5327528119087219, + "learning_rate": 2.20532319391635e-06, + "loss": 0.0113, + "step": 2610 + }, + { + "epoch": 0.04427807305882055, + "grad_norm": 0.5078420639038086, + "learning_rate": 2.213772708069286e-06, + "loss": 0.0091, + "step": 2620 + }, + { + "epoch": 0.044447073337671006, + "grad_norm": 0.7442695498466492, + "learning_rate": 2.222222222222222e-06, + "loss": 0.0084, + "step": 2630 + }, + { + "epoch": 0.044616073616521465, + "grad_norm": 0.7157345414161682, + "learning_rate": 2.2306717363751586e-06, + "loss": 0.01, + "step": 2640 + }, + { + "epoch": 0.044785073895371924, + "grad_norm": 0.7449740767478943, + "learning_rate": 2.239121250528095e-06, + "loss": 0.0136, + "step": 2650 + }, + { + "epoch": 0.04495407417422239, + "grad_norm": 3.954612970352173, + "learning_rate": 2.247570764681031e-06, + "loss": 0.0105, + "step": 2660 + }, + { + "epoch": 0.04512307445307285, + "grad_norm": 0.38182348012924194, + "learning_rate": 2.256020278833967e-06, + "loss": 0.0106, + "step": 2670 + }, + { + "epoch": 0.04529207473192331, + "grad_norm": 0.6025572419166565, + "learning_rate": 2.2644697929869033e-06, + "loss": 0.0087, + "step": 2680 + }, + { + "epoch": 0.04546107501077377, + "grad_norm": 0.47721076011657715, + "learning_rate": 2.2729193071398398e-06, + "loss": 0.0114, + "step": 2690 + }, + { + "epoch": 0.045630075289624226, + "grad_norm": 1.120234727859497, + "learning_rate": 2.281368821292776e-06, + "loss": 0.0125, + "step": 2700 + }, + { + "epoch": 0.045799075568474686, + "grad_norm": 0.42115306854248047, + "learning_rate": 2.289818335445712e-06, + "loss": 0.0099, + "step": 2710 + }, + { + "epoch": 0.045968075847325145, + "grad_norm": 0.8091180324554443, + "learning_rate": 2.2982678495986484e-06, + "loss": 0.0104, + "step": 2720 + }, + { + "epoch": 0.04613707612617561, + "grad_norm": 0.671550989151001, + "learning_rate": 2.3067173637515845e-06, + "loss": 0.0132, + "step": 2730 + }, + { + "epoch": 0.04630607640502607, + "grad_norm": 0.43608835339546204, + "learning_rate": 2.3151668779045205e-06, + "loss": 0.0104, + "step": 2740 + }, + { + "epoch": 0.04647507668387653, + "grad_norm": 0.9488043189048767, + "learning_rate": 2.323616392057457e-06, + "loss": 0.0086, + "step": 2750 + }, + { + "epoch": 0.04664407696272699, + "grad_norm": 0.6109009385108948, + "learning_rate": 2.332065906210393e-06, + "loss": 0.0111, + "step": 2760 + }, + { + "epoch": 0.04681307724157745, + "grad_norm": 0.6729733347892761, + "learning_rate": 2.340515420363329e-06, + "loss": 0.0123, + "step": 2770 + }, + { + "epoch": 0.046982077520427906, + "grad_norm": 0.4502004086971283, + "learning_rate": 2.3489649345162652e-06, + "loss": 0.0091, + "step": 2780 + }, + { + "epoch": 0.04715107779927837, + "grad_norm": 0.679175853729248, + "learning_rate": 2.3574144486692017e-06, + "loss": 0.0078, + "step": 2790 + }, + { + "epoch": 0.04732007807812883, + "grad_norm": 0.7278356552124023, + "learning_rate": 2.3658639628221382e-06, + "loss": 0.0131, + "step": 2800 + }, + { + "epoch": 0.04748907835697929, + "grad_norm": 0.36557623744010925, + "learning_rate": 2.3743134769750743e-06, + "loss": 0.0094, + "step": 2810 + }, + { + "epoch": 0.04765807863582975, + "grad_norm": 0.6772125959396362, + "learning_rate": 2.3827629911280104e-06, + "loss": 0.0116, + "step": 2820 + }, + { + "epoch": 0.04782707891468021, + "grad_norm": 0.6479217410087585, + "learning_rate": 2.3912125052809464e-06, + "loss": 0.0125, + "step": 2830 + }, + { + "epoch": 0.04799607919353067, + "grad_norm": 0.28205257654190063, + "learning_rate": 2.3996620194338825e-06, + "loss": 0.0076, + "step": 2840 + }, + { + "epoch": 0.04816507947238113, + "grad_norm": 0.5454665422439575, + "learning_rate": 2.408111533586819e-06, + "loss": 0.0136, + "step": 2850 + }, + { + "epoch": 0.04833407975123159, + "grad_norm": 0.6780948638916016, + "learning_rate": 2.416561047739755e-06, + "loss": 0.0123, + "step": 2860 + }, + { + "epoch": 0.04850308003008205, + "grad_norm": 0.6540535688400269, + "learning_rate": 2.4250105618926916e-06, + "loss": 0.0108, + "step": 2870 + }, + { + "epoch": 0.04867208030893251, + "grad_norm": 0.6456772089004517, + "learning_rate": 2.4334600760456276e-06, + "loss": 0.0088, + "step": 2880 + }, + { + "epoch": 0.04884108058778297, + "grad_norm": 0.6767243146896362, + "learning_rate": 2.4419095901985637e-06, + "loss": 0.0102, + "step": 2890 + }, + { + "epoch": 0.04901008086663343, + "grad_norm": 0.8583904504776001, + "learning_rate": 2.4503591043514998e-06, + "loss": 0.0091, + "step": 2900 + }, + { + "epoch": 0.04917908114548389, + "grad_norm": 0.24955904483795166, + "learning_rate": 2.4588086185044363e-06, + "loss": 0.0103, + "step": 2910 + }, + { + "epoch": 0.04934808142433435, + "grad_norm": 0.4824363887310028, + "learning_rate": 2.4672581326573723e-06, + "loss": 0.0095, + "step": 2920 + }, + { + "epoch": 0.04951708170318481, + "grad_norm": 0.6091395020484924, + "learning_rate": 2.4757076468103084e-06, + "loss": 0.0122, + "step": 2930 + }, + { + "epoch": 0.04968608198203527, + "grad_norm": 0.5298306941986084, + "learning_rate": 2.484157160963245e-06, + "loss": 0.0104, + "step": 2940 + }, + { + "epoch": 0.04985508226088573, + "grad_norm": 0.38689276576042175, + "learning_rate": 2.492606675116181e-06, + "loss": 0.0074, + "step": 2950 + }, + { + "epoch": 0.05002408253973619, + "grad_norm": 0.6825554966926575, + "learning_rate": 2.501056189269117e-06, + "loss": 0.0114, + "step": 2960 + }, + { + "epoch": 0.05019308281858665, + "grad_norm": 0.44893091917037964, + "learning_rate": 2.509505703422053e-06, + "loss": 0.0088, + "step": 2970 + }, + { + "epoch": 0.05036208309743711, + "grad_norm": 0.4857201874256134, + "learning_rate": 2.517955217574989e-06, + "loss": 0.0091, + "step": 2980 + }, + { + "epoch": 0.05053108337628757, + "grad_norm": 0.5426431894302368, + "learning_rate": 2.526404731727926e-06, + "loss": 0.0101, + "step": 2990 + }, + { + "epoch": 0.050700083655138034, + "grad_norm": 0.7086471915245056, + "learning_rate": 2.534854245880862e-06, + "loss": 0.0091, + "step": 3000 + }, + { + "epoch": 0.05086908393398849, + "grad_norm": 0.5153199434280396, + "learning_rate": 2.5433037600337983e-06, + "loss": 0.0126, + "step": 3010 + }, + { + "epoch": 0.05103808421283895, + "grad_norm": 0.5115523338317871, + "learning_rate": 2.5517532741867347e-06, + "loss": 0.0119, + "step": 3020 + }, + { + "epoch": 0.05120708449168941, + "grad_norm": 0.4097737669944763, + "learning_rate": 2.560202788339671e-06, + "loss": 0.0161, + "step": 3030 + }, + { + "epoch": 0.05137608477053987, + "grad_norm": 0.4461556077003479, + "learning_rate": 2.568652302492607e-06, + "loss": 0.011, + "step": 3040 + }, + { + "epoch": 0.05154508504939033, + "grad_norm": 0.6485928297042847, + "learning_rate": 2.577101816645543e-06, + "loss": 0.0099, + "step": 3050 + }, + { + "epoch": 0.05171408532824079, + "grad_norm": 0.49462923407554626, + "learning_rate": 2.585551330798479e-06, + "loss": 0.0092, + "step": 3060 + }, + { + "epoch": 0.051883085607091255, + "grad_norm": 0.496439129114151, + "learning_rate": 2.5940008449514155e-06, + "loss": 0.0102, + "step": 3070 + }, + { + "epoch": 0.052052085885941714, + "grad_norm": 0.724690854549408, + "learning_rate": 2.6024503591043516e-06, + "loss": 0.0124, + "step": 3080 + }, + { + "epoch": 0.05222108616479217, + "grad_norm": 0.4814421832561493, + "learning_rate": 2.6108998732572877e-06, + "loss": 0.0095, + "step": 3090 + }, + { + "epoch": 0.05239008644364263, + "grad_norm": 0.4340079724788666, + "learning_rate": 2.6193493874102246e-06, + "loss": 0.0094, + "step": 3100 + }, + { + "epoch": 0.05255908672249309, + "grad_norm": 0.33029723167419434, + "learning_rate": 2.6277989015631607e-06, + "loss": 0.0074, + "step": 3110 + }, + { + "epoch": 0.05272808700134355, + "grad_norm": 0.4209522306919098, + "learning_rate": 2.6362484157160967e-06, + "loss": 0.0095, + "step": 3120 + }, + { + "epoch": 0.05289708728019401, + "grad_norm": 0.7987039089202881, + "learning_rate": 2.644697929869033e-06, + "loss": 0.0121, + "step": 3130 + }, + { + "epoch": 0.053066087559044475, + "grad_norm": 0.371640145778656, + "learning_rate": 2.653147444021969e-06, + "loss": 0.0145, + "step": 3140 + }, + { + "epoch": 0.053235087837894934, + "grad_norm": 0.6809061169624329, + "learning_rate": 2.6615969581749054e-06, + "loss": 0.0104, + "step": 3150 + }, + { + "epoch": 0.05340408811674539, + "grad_norm": 0.4903334379196167, + "learning_rate": 2.6700464723278414e-06, + "loss": 0.0108, + "step": 3160 + }, + { + "epoch": 0.05357308839559585, + "grad_norm": 0.232858344912529, + "learning_rate": 2.6784959864807775e-06, + "loss": 0.008, + "step": 3170 + }, + { + "epoch": 0.05374208867444631, + "grad_norm": 0.7675929069519043, + "learning_rate": 2.6869455006337136e-06, + "loss": 0.0086, + "step": 3180 + }, + { + "epoch": 0.05391108895329677, + "grad_norm": 0.2953050136566162, + "learning_rate": 2.6953950147866496e-06, + "loss": 0.0084, + "step": 3190 + }, + { + "epoch": 0.05408008923214723, + "grad_norm": 0.5588734149932861, + "learning_rate": 2.703844528939586e-06, + "loss": 0.0079, + "step": 3200 + }, + { + "epoch": 0.054249089510997696, + "grad_norm": 0.428972989320755, + "learning_rate": 2.7122940430925226e-06, + "loss": 0.0118, + "step": 3210 + }, + { + "epoch": 0.054418089789848155, + "grad_norm": 0.6115742921829224, + "learning_rate": 2.7207435572454587e-06, + "loss": 0.0221, + "step": 3220 + }, + { + "epoch": 0.054587090068698614, + "grad_norm": 0.3895367980003357, + "learning_rate": 2.7291930713983948e-06, + "loss": 0.0099, + "step": 3230 + }, + { + "epoch": 0.05475609034754907, + "grad_norm": 0.3974573314189911, + "learning_rate": 2.7376425855513313e-06, + "loss": 0.0106, + "step": 3240 + }, + { + "epoch": 0.05492509062639953, + "grad_norm": 2.016308069229126, + "learning_rate": 2.7460920997042673e-06, + "loss": 0.011, + "step": 3250 + }, + { + "epoch": 0.05509409090524999, + "grad_norm": 0.5943537950515747, + "learning_rate": 2.7545416138572034e-06, + "loss": 0.0085, + "step": 3260 + }, + { + "epoch": 0.05526309118410045, + "grad_norm": 0.3754553496837616, + "learning_rate": 2.7629911280101395e-06, + "loss": 0.0078, + "step": 3270 + }, + { + "epoch": 0.055432091462950916, + "grad_norm": 0.42528584599494934, + "learning_rate": 2.7714406421630755e-06, + "loss": 0.0083, + "step": 3280 + }, + { + "epoch": 0.055601091741801376, + "grad_norm": 0.4286918342113495, + "learning_rate": 2.779890156316012e-06, + "loss": 0.0094, + "step": 3290 + }, + { + "epoch": 0.055770092020651835, + "grad_norm": 0.2522464692592621, + "learning_rate": 2.788339670468948e-06, + "loss": 0.007, + "step": 3300 + }, + { + "epoch": 0.055939092299502294, + "grad_norm": 0.4107299745082855, + "learning_rate": 2.796789184621884e-06, + "loss": 0.0072, + "step": 3310 + }, + { + "epoch": 0.05610809257835275, + "grad_norm": 0.5001223683357239, + "learning_rate": 2.805238698774821e-06, + "loss": 0.0098, + "step": 3320 + }, + { + "epoch": 0.05627709285720321, + "grad_norm": 0.3938818573951721, + "learning_rate": 2.813688212927757e-06, + "loss": 0.0114, + "step": 3330 + }, + { + "epoch": 0.05644609313605367, + "grad_norm": 0.36034202575683594, + "learning_rate": 2.8221377270806932e-06, + "loss": 0.0122, + "step": 3340 + }, + { + "epoch": 0.05661509341490414, + "grad_norm": 0.5340605974197388, + "learning_rate": 2.8305872412336293e-06, + "loss": 0.0089, + "step": 3350 + }, + { + "epoch": 0.056784093693754596, + "grad_norm": 0.633908748626709, + "learning_rate": 2.8390367553865654e-06, + "loss": 0.0095, + "step": 3360 + }, + { + "epoch": 0.056953093972605055, + "grad_norm": 0.9071553349494934, + "learning_rate": 2.847486269539502e-06, + "loss": 0.0119, + "step": 3370 + }, + { + "epoch": 0.057122094251455514, + "grad_norm": 0.5576254725456238, + "learning_rate": 2.855935783692438e-06, + "loss": 0.0112, + "step": 3380 + }, + { + "epoch": 0.057291094530305973, + "grad_norm": 0.2599470019340515, + "learning_rate": 2.864385297845374e-06, + "loss": 0.0101, + "step": 3390 + }, + { + "epoch": 0.05746009480915643, + "grad_norm": 0.19270236790180206, + "learning_rate": 2.87283481199831e-06, + "loss": 0.0093, + "step": 3400 + }, + { + "epoch": 0.0576290950880069, + "grad_norm": 0.36447200179100037, + "learning_rate": 2.881284326151246e-06, + "loss": 0.0098, + "step": 3410 + }, + { + "epoch": 0.05779809536685736, + "grad_norm": 0.14679476618766785, + "learning_rate": 2.8897338403041826e-06, + "loss": 0.0102, + "step": 3420 + }, + { + "epoch": 0.05796709564570782, + "grad_norm": 0.5648771524429321, + "learning_rate": 2.898183354457119e-06, + "loss": 0.0113, + "step": 3430 + }, + { + "epoch": 0.058136095924558276, + "grad_norm": 0.6138327121734619, + "learning_rate": 2.906632868610055e-06, + "loss": 0.0097, + "step": 3440 + }, + { + "epoch": 0.058305096203408735, + "grad_norm": 0.33649885654449463, + "learning_rate": 2.9150823827629917e-06, + "loss": 0.0079, + "step": 3450 + }, + { + "epoch": 0.058474096482259194, + "grad_norm": 0.5774721503257751, + "learning_rate": 2.9235318969159278e-06, + "loss": 0.0087, + "step": 3460 + }, + { + "epoch": 0.05864309676110965, + "grad_norm": 0.2652968168258667, + "learning_rate": 2.931981411068864e-06, + "loss": 0.0097, + "step": 3470 + }, + { + "epoch": 0.05881209703996012, + "grad_norm": 0.546208381652832, + "learning_rate": 2.9404309252218e-06, + "loss": 0.0078, + "step": 3480 + }, + { + "epoch": 0.05898109731881058, + "grad_norm": 0.4050743281841278, + "learning_rate": 2.948880439374736e-06, + "loss": 0.0092, + "step": 3490 + }, + { + "epoch": 0.05915009759766104, + "grad_norm": 0.5059739947319031, + "learning_rate": 2.9573299535276725e-06, + "loss": 0.0061, + "step": 3500 + }, + { + "epoch": 0.059319097876511497, + "grad_norm": 0.29913344979286194, + "learning_rate": 2.9657794676806085e-06, + "loss": 0.0112, + "step": 3510 + }, + { + "epoch": 0.059488098155361956, + "grad_norm": 0.4305241107940674, + "learning_rate": 2.9742289818335446e-06, + "loss": 0.0103, + "step": 3520 + }, + { + "epoch": 0.059657098434212415, + "grad_norm": 0.5640769004821777, + "learning_rate": 2.982678495986481e-06, + "loss": 0.0092, + "step": 3530 + }, + { + "epoch": 0.059826098713062874, + "grad_norm": 0.39654386043548584, + "learning_rate": 2.9911280101394176e-06, + "loss": 0.0084, + "step": 3540 + }, + { + "epoch": 0.05999509899191334, + "grad_norm": 0.2653600871562958, + "learning_rate": 2.9995775242923537e-06, + "loss": 0.0072, + "step": 3550 + }, + { + "epoch": 0.0601640992707638, + "grad_norm": 0.6193217635154724, + "learning_rate": 3.0080270384452897e-06, + "loss": 0.0091, + "step": 3560 + }, + { + "epoch": 0.06033309954961426, + "grad_norm": 0.48455023765563965, + "learning_rate": 3.016476552598226e-06, + "loss": 0.0078, + "step": 3570 + }, + { + "epoch": 0.06050209982846472, + "grad_norm": 0.8907110691070557, + "learning_rate": 3.024926066751162e-06, + "loss": 0.0085, + "step": 3580 + }, + { + "epoch": 0.060671100107315176, + "grad_norm": 0.5781303644180298, + "learning_rate": 3.0333755809040984e-06, + "loss": 0.0085, + "step": 3590 + }, + { + "epoch": 0.060840100386165635, + "grad_norm": 0.2834400236606598, + "learning_rate": 3.0418250950570345e-06, + "loss": 0.0099, + "step": 3600 + }, + { + "epoch": 0.061009100665016094, + "grad_norm": 0.5349828600883484, + "learning_rate": 3.0502746092099705e-06, + "loss": 0.0102, + "step": 3610 + }, + { + "epoch": 0.06117810094386656, + "grad_norm": 1.0900537967681885, + "learning_rate": 3.0587241233629066e-06, + "loss": 0.0096, + "step": 3620 + }, + { + "epoch": 0.06134710122271702, + "grad_norm": 0.8852835893630981, + "learning_rate": 3.0671736375158427e-06, + "loss": 0.0078, + "step": 3630 + }, + { + "epoch": 0.06151610150156748, + "grad_norm": 0.6187847852706909, + "learning_rate": 3.0756231516687796e-06, + "loss": 0.0108, + "step": 3640 + }, + { + "epoch": 0.06168510178041794, + "grad_norm": 0.730607807636261, + "learning_rate": 3.0840726658217156e-06, + "loss": 0.0098, + "step": 3650 + }, + { + "epoch": 0.0618541020592684, + "grad_norm": 0.47413742542266846, + "learning_rate": 3.0925221799746517e-06, + "loss": 0.0116, + "step": 3660 + }, + { + "epoch": 0.062023102338118856, + "grad_norm": 0.4965061843395233, + "learning_rate": 3.1009716941275882e-06, + "loss": 0.0079, + "step": 3670 + }, + { + "epoch": 0.062192102616969315, + "grad_norm": 0.3510805666446686, + "learning_rate": 3.1094212082805243e-06, + "loss": 0.0104, + "step": 3680 + }, + { + "epoch": 0.06236110289581978, + "grad_norm": 0.3473954498767853, + "learning_rate": 3.1178707224334604e-06, + "loss": 0.01, + "step": 3690 + }, + { + "epoch": 0.06253010317467024, + "grad_norm": 0.5971751809120178, + "learning_rate": 3.1263202365863964e-06, + "loss": 0.0086, + "step": 3700 + }, + { + "epoch": 0.0626991034535207, + "grad_norm": 0.6997776031494141, + "learning_rate": 3.1347697507393325e-06, + "loss": 0.0136, + "step": 3710 + }, + { + "epoch": 0.06286810373237116, + "grad_norm": 0.5057122111320496, + "learning_rate": 3.143219264892269e-06, + "loss": 0.0094, + "step": 3720 + }, + { + "epoch": 0.06303710401122162, + "grad_norm": 0.5285948514938354, + "learning_rate": 3.151668779045205e-06, + "loss": 0.0095, + "step": 3730 + }, + { + "epoch": 0.06320610429007208, + "grad_norm": 0.7009038329124451, + "learning_rate": 3.160118293198141e-06, + "loss": 0.0121, + "step": 3740 + }, + { + "epoch": 0.06337510456892254, + "grad_norm": 0.24084387719631195, + "learning_rate": 3.1685678073510776e-06, + "loss": 0.0061, + "step": 3750 + }, + { + "epoch": 0.063544104847773, + "grad_norm": 0.40020808577537537, + "learning_rate": 3.177017321504014e-06, + "loss": 0.011, + "step": 3760 + }, + { + "epoch": 0.06371310512662345, + "grad_norm": 0.4643620550632477, + "learning_rate": 3.18546683565695e-06, + "loss": 0.008, + "step": 3770 + }, + { + "epoch": 0.06388210540547391, + "grad_norm": 0.19212964177131653, + "learning_rate": 3.1939163498098863e-06, + "loss": 0.0071, + "step": 3780 + }, + { + "epoch": 0.06405110568432439, + "grad_norm": 0.4514760375022888, + "learning_rate": 3.2023658639628223e-06, + "loss": 0.0097, + "step": 3790 + }, + { + "epoch": 0.06422010596317484, + "grad_norm": 0.4289383590221405, + "learning_rate": 3.2108153781157584e-06, + "loss": 0.006, + "step": 3800 + }, + { + "epoch": 0.0643891062420253, + "grad_norm": 0.4901948869228363, + "learning_rate": 3.219264892268695e-06, + "loss": 0.0091, + "step": 3810 + }, + { + "epoch": 0.06455810652087576, + "grad_norm": 0.36952874064445496, + "learning_rate": 3.227714406421631e-06, + "loss": 0.01, + "step": 3820 + }, + { + "epoch": 0.06472710679972622, + "grad_norm": 0.3414541184902191, + "learning_rate": 3.236163920574567e-06, + "loss": 0.0112, + "step": 3830 + }, + { + "epoch": 0.06489610707857668, + "grad_norm": 0.5098986625671387, + "learning_rate": 3.244613434727503e-06, + "loss": 0.0092, + "step": 3840 + }, + { + "epoch": 0.06506510735742714, + "grad_norm": 0.33219394087791443, + "learning_rate": 3.2530629488804396e-06, + "loss": 0.0083, + "step": 3850 + }, + { + "epoch": 0.0652341076362776, + "grad_norm": 0.27958717942237854, + "learning_rate": 3.261512463033376e-06, + "loss": 0.0055, + "step": 3860 + }, + { + "epoch": 0.06540310791512806, + "grad_norm": 0.3715076446533203, + "learning_rate": 3.269961977186312e-06, + "loss": 0.0077, + "step": 3870 + }, + { + "epoch": 0.06557210819397852, + "grad_norm": 0.4441938102245331, + "learning_rate": 3.2784114913392482e-06, + "loss": 0.0086, + "step": 3880 + }, + { + "epoch": 0.06574110847282898, + "grad_norm": 0.4816913902759552, + "learning_rate": 3.2868610054921847e-06, + "loss": 0.0091, + "step": 3890 + }, + { + "epoch": 0.06591010875167944, + "grad_norm": 0.40409502387046814, + "learning_rate": 3.295310519645121e-06, + "loss": 0.0088, + "step": 3900 + }, + { + "epoch": 0.0660791090305299, + "grad_norm": 0.5365285873413086, + "learning_rate": 3.303760033798057e-06, + "loss": 0.0113, + "step": 3910 + }, + { + "epoch": 0.06624810930938035, + "grad_norm": 0.6550111770629883, + "learning_rate": 3.312209547950993e-06, + "loss": 0.0079, + "step": 3920 + }, + { + "epoch": 0.06641710958823083, + "grad_norm": 0.6132133603096008, + "learning_rate": 3.320659062103929e-06, + "loss": 0.0089, + "step": 3930 + }, + { + "epoch": 0.06658610986708129, + "grad_norm": 0.7494462132453918, + "learning_rate": 3.3291085762568655e-06, + "loss": 0.0092, + "step": 3940 + }, + { + "epoch": 0.06675511014593175, + "grad_norm": 0.5431383848190308, + "learning_rate": 3.3375580904098016e-06, + "loss": 0.0113, + "step": 3950 + }, + { + "epoch": 0.0669241104247822, + "grad_norm": 0.2721993327140808, + "learning_rate": 3.3460076045627376e-06, + "loss": 0.0107, + "step": 3960 + }, + { + "epoch": 0.06709311070363266, + "grad_norm": 0.1583024263381958, + "learning_rate": 3.3544571187156746e-06, + "loss": 0.0072, + "step": 3970 + }, + { + "epoch": 0.06726211098248312, + "grad_norm": 0.5978319644927979, + "learning_rate": 3.3629066328686106e-06, + "loss": 0.0068, + "step": 3980 + }, + { + "epoch": 0.06743111126133358, + "grad_norm": 0.5664768218994141, + "learning_rate": 3.3713561470215467e-06, + "loss": 0.0128, + "step": 3990 + }, + { + "epoch": 0.06760011154018404, + "grad_norm": 0.4068852663040161, + "learning_rate": 3.3798056611744828e-06, + "loss": 0.006, + "step": 4000 + }, + { + "epoch": 0.0677691118190345, + "grad_norm": 0.19173608720302582, + "learning_rate": 3.388255175327419e-06, + "loss": 0.0053, + "step": 4010 + }, + { + "epoch": 0.06793811209788496, + "grad_norm": 0.5509228706359863, + "learning_rate": 3.3967046894803553e-06, + "loss": 0.0109, + "step": 4020 + }, + { + "epoch": 0.06810711237673542, + "grad_norm": 0.23547115921974182, + "learning_rate": 3.4051542036332914e-06, + "loss": 0.007, + "step": 4030 + }, + { + "epoch": 0.06827611265558588, + "grad_norm": 0.46924418210983276, + "learning_rate": 3.4136037177862275e-06, + "loss": 0.0072, + "step": 4040 + }, + { + "epoch": 0.06844511293443634, + "grad_norm": 0.2977207601070404, + "learning_rate": 3.4220532319391635e-06, + "loss": 0.0054, + "step": 4050 + }, + { + "epoch": 0.0686141132132868, + "grad_norm": 0.3505478799343109, + "learning_rate": 3.4305027460920996e-06, + "loss": 0.0067, + "step": 4060 + }, + { + "epoch": 0.06878311349213727, + "grad_norm": 0.3547617495059967, + "learning_rate": 3.438952260245036e-06, + "loss": 0.0095, + "step": 4070 + }, + { + "epoch": 0.06895211377098773, + "grad_norm": 0.49734488129615784, + "learning_rate": 3.4474017743979726e-06, + "loss": 0.0075, + "step": 4080 + }, + { + "epoch": 0.06912111404983819, + "grad_norm": 0.5076583027839661, + "learning_rate": 3.4558512885509087e-06, + "loss": 0.0093, + "step": 4090 + }, + { + "epoch": 0.06929011432868865, + "grad_norm": 0.4036179482936859, + "learning_rate": 3.4643008027038447e-06, + "loss": 0.0092, + "step": 4100 + }, + { + "epoch": 0.0694591146075391, + "grad_norm": 0.8179068565368652, + "learning_rate": 3.4727503168567812e-06, + "loss": 0.0079, + "step": 4110 + }, + { + "epoch": 0.06962811488638956, + "grad_norm": 0.6014817953109741, + "learning_rate": 3.4811998310097173e-06, + "loss": 0.008, + "step": 4120 + }, + { + "epoch": 0.06979711516524002, + "grad_norm": 0.6647421717643738, + "learning_rate": 3.4896493451626534e-06, + "loss": 0.0086, + "step": 4130 + }, + { + "epoch": 0.06996611544409048, + "grad_norm": 0.4266943335533142, + "learning_rate": 3.4980988593155894e-06, + "loss": 0.0077, + "step": 4140 + }, + { + "epoch": 0.07013511572294094, + "grad_norm": 0.7118129134178162, + "learning_rate": 3.5065483734685255e-06, + "loss": 0.0075, + "step": 4150 + }, + { + "epoch": 0.0703041160017914, + "grad_norm": 0.4971505105495453, + "learning_rate": 3.514997887621462e-06, + "loss": 0.0064, + "step": 4160 + }, + { + "epoch": 0.07047311628064186, + "grad_norm": 0.19220784306526184, + "learning_rate": 3.523447401774398e-06, + "loss": 0.0079, + "step": 4170 + }, + { + "epoch": 0.07064211655949232, + "grad_norm": 0.2972339987754822, + "learning_rate": 3.531896915927334e-06, + "loss": 0.0061, + "step": 4180 + }, + { + "epoch": 0.07081111683834278, + "grad_norm": 0.5900372266769409, + "learning_rate": 3.540346430080271e-06, + "loss": 0.0068, + "step": 4190 + }, + { + "epoch": 0.07098011711719324, + "grad_norm": 0.5239884853363037, + "learning_rate": 3.548795944233207e-06, + "loss": 0.0085, + "step": 4200 + }, + { + "epoch": 0.07114911739604371, + "grad_norm": 0.35723498463630676, + "learning_rate": 3.557245458386143e-06, + "loss": 0.0076, + "step": 4210 + }, + { + "epoch": 0.07131811767489417, + "grad_norm": 0.48376893997192383, + "learning_rate": 3.5656949725390793e-06, + "loss": 0.0075, + "step": 4220 + }, + { + "epoch": 0.07148711795374463, + "grad_norm": 0.22889120876789093, + "learning_rate": 3.5741444866920154e-06, + "loss": 0.0083, + "step": 4230 + }, + { + "epoch": 0.07165611823259509, + "grad_norm": 0.14473536610603333, + "learning_rate": 3.582594000844952e-06, + "loss": 0.008, + "step": 4240 + }, + { + "epoch": 0.07182511851144555, + "grad_norm": 0.4018864631652832, + "learning_rate": 3.591043514997888e-06, + "loss": 0.0072, + "step": 4250 + }, + { + "epoch": 0.071994118790296, + "grad_norm": 0.25340160727500916, + "learning_rate": 3.599493029150824e-06, + "loss": 0.007, + "step": 4260 + }, + { + "epoch": 0.07216311906914646, + "grad_norm": 0.4823681116104126, + "learning_rate": 3.60794254330376e-06, + "loss": 0.01, + "step": 4270 + }, + { + "epoch": 0.07233211934799692, + "grad_norm": 0.45038795471191406, + "learning_rate": 3.616392057456696e-06, + "loss": 0.0064, + "step": 4280 + }, + { + "epoch": 0.07250111962684738, + "grad_norm": 0.511728048324585, + "learning_rate": 3.624841571609633e-06, + "loss": 0.0053, + "step": 4290 + }, + { + "epoch": 0.07267011990569784, + "grad_norm": 0.4706685245037079, + "learning_rate": 3.633291085762569e-06, + "loss": 0.0063, + "step": 4300 + }, + { + "epoch": 0.0728391201845483, + "grad_norm": 0.4813501834869385, + "learning_rate": 3.641740599915505e-06, + "loss": 0.0074, + "step": 4310 + }, + { + "epoch": 0.07300812046339876, + "grad_norm": 0.28050094842910767, + "learning_rate": 3.6501901140684417e-06, + "loss": 0.0121, + "step": 4320 + }, + { + "epoch": 0.07317712074224922, + "grad_norm": 0.44418439269065857, + "learning_rate": 3.6586396282213777e-06, + "loss": 0.0067, + "step": 4330 + }, + { + "epoch": 0.07334612102109969, + "grad_norm": 0.2895668148994446, + "learning_rate": 3.667089142374314e-06, + "loss": 0.0065, + "step": 4340 + }, + { + "epoch": 0.07351512129995015, + "grad_norm": 0.35102537274360657, + "learning_rate": 3.67553865652725e-06, + "loss": 0.0108, + "step": 4350 + }, + { + "epoch": 0.07368412157880061, + "grad_norm": 0.5368096232414246, + "learning_rate": 3.683988170680186e-06, + "loss": 0.0076, + "step": 4360 + }, + { + "epoch": 0.07385312185765107, + "grad_norm": 0.23388966917991638, + "learning_rate": 3.6924376848331225e-06, + "loss": 0.0081, + "step": 4370 + }, + { + "epoch": 0.07402212213650153, + "grad_norm": 0.35802993178367615, + "learning_rate": 3.7008871989860585e-06, + "loss": 0.006, + "step": 4380 + }, + { + "epoch": 0.07419112241535199, + "grad_norm": 0.24196232855319977, + "learning_rate": 3.7093367131389946e-06, + "loss": 0.0062, + "step": 4390 + }, + { + "epoch": 0.07436012269420245, + "grad_norm": 0.6262447237968445, + "learning_rate": 3.717786227291931e-06, + "loss": 0.0089, + "step": 4400 + }, + { + "epoch": 0.0745291229730529, + "grad_norm": 0.25484466552734375, + "learning_rate": 3.7262357414448676e-06, + "loss": 0.0086, + "step": 4410 + }, + { + "epoch": 0.07469812325190336, + "grad_norm": 0.5549577474594116, + "learning_rate": 3.7346852555978037e-06, + "loss": 0.0074, + "step": 4420 + }, + { + "epoch": 0.07486712353075382, + "grad_norm": 0.2302803099155426, + "learning_rate": 3.7431347697507397e-06, + "loss": 0.0055, + "step": 4430 + }, + { + "epoch": 0.07503612380960428, + "grad_norm": 0.6471118927001953, + "learning_rate": 3.751584283903676e-06, + "loss": 0.0086, + "step": 4440 + }, + { + "epoch": 0.07520512408845474, + "grad_norm": 0.4209013283252716, + "learning_rate": 3.760033798056612e-06, + "loss": 0.0078, + "step": 4450 + }, + { + "epoch": 0.0753741243673052, + "grad_norm": 0.43538352847099304, + "learning_rate": 3.7684833122095484e-06, + "loss": 0.0072, + "step": 4460 + }, + { + "epoch": 0.07554312464615566, + "grad_norm": 0.31720903515815735, + "learning_rate": 3.7769328263624844e-06, + "loss": 0.0103, + "step": 4470 + }, + { + "epoch": 0.07571212492500613, + "grad_norm": 0.15655528008937836, + "learning_rate": 3.7853823405154205e-06, + "loss": 0.0064, + "step": 4480 + }, + { + "epoch": 0.07588112520385659, + "grad_norm": 0.30433279275894165, + "learning_rate": 3.7938318546683566e-06, + "loss": 0.0065, + "step": 4490 + }, + { + "epoch": 0.07605012548270705, + "grad_norm": 0.4596308767795563, + "learning_rate": 3.8022813688212926e-06, + "loss": 0.0087, + "step": 4500 + }, + { + "epoch": 0.07621912576155751, + "grad_norm": 0.11451300978660583, + "learning_rate": 3.8107308829742296e-06, + "loss": 0.0091, + "step": 4510 + }, + { + "epoch": 0.07638812604040797, + "grad_norm": 0.5014181137084961, + "learning_rate": 3.819180397127166e-06, + "loss": 0.008, + "step": 4520 + }, + { + "epoch": 0.07655712631925843, + "grad_norm": 0.3703087270259857, + "learning_rate": 3.827629911280102e-06, + "loss": 0.0056, + "step": 4530 + }, + { + "epoch": 0.07672612659810889, + "grad_norm": 1.3860195875167847, + "learning_rate": 3.836079425433038e-06, + "loss": 0.0099, + "step": 4540 + }, + { + "epoch": 0.07689512687695935, + "grad_norm": 0.33820924162864685, + "learning_rate": 3.844528939585974e-06, + "loss": 0.0094, + "step": 4550 + }, + { + "epoch": 0.0770641271558098, + "grad_norm": 0.4271750748157501, + "learning_rate": 3.85297845373891e-06, + "loss": 0.0076, + "step": 4560 + }, + { + "epoch": 0.07723312743466026, + "grad_norm": 0.40488216280937195, + "learning_rate": 3.861427967891846e-06, + "loss": 0.0059, + "step": 4570 + }, + { + "epoch": 0.07740212771351072, + "grad_norm": 0.3280101418495178, + "learning_rate": 3.869877482044783e-06, + "loss": 0.0069, + "step": 4580 + }, + { + "epoch": 0.07757112799236118, + "grad_norm": 0.6588373184204102, + "learning_rate": 3.8783269961977185e-06, + "loss": 0.0092, + "step": 4590 + }, + { + "epoch": 0.07774012827121164, + "grad_norm": 0.4867706894874573, + "learning_rate": 3.886776510350655e-06, + "loss": 0.009, + "step": 4600 + }, + { + "epoch": 0.0779091285500621, + "grad_norm": 0.5322169661521912, + "learning_rate": 3.8952260245035915e-06, + "loss": 0.0114, + "step": 4610 + }, + { + "epoch": 0.07807812882891257, + "grad_norm": 0.616223156452179, + "learning_rate": 3.903675538656528e-06, + "loss": 0.0098, + "step": 4620 + }, + { + "epoch": 0.07824712910776303, + "grad_norm": 0.810210108757019, + "learning_rate": 3.912125052809464e-06, + "loss": 0.0098, + "step": 4630 + }, + { + "epoch": 0.07841612938661349, + "grad_norm": 0.2605763375759125, + "learning_rate": 3.9205745669624e-06, + "loss": 0.0067, + "step": 4640 + }, + { + "epoch": 0.07858512966546395, + "grad_norm": 0.16967809200286865, + "learning_rate": 3.929024081115337e-06, + "loss": 0.008, + "step": 4650 + }, + { + "epoch": 0.07875412994431441, + "grad_norm": 0.15028618276119232, + "learning_rate": 3.937473595268272e-06, + "loss": 0.0063, + "step": 4660 + }, + { + "epoch": 0.07892313022316487, + "grad_norm": 0.3279257118701935, + "learning_rate": 3.945923109421209e-06, + "loss": 0.0066, + "step": 4670 + }, + { + "epoch": 0.07909213050201533, + "grad_norm": 0.7601935267448425, + "learning_rate": 3.9543726235741444e-06, + "loss": 0.01, + "step": 4680 + }, + { + "epoch": 0.07926113078086579, + "grad_norm": 0.15642717480659485, + "learning_rate": 3.962822137727081e-06, + "loss": 0.0065, + "step": 4690 + }, + { + "epoch": 0.07943013105971625, + "grad_norm": 0.36829888820648193, + "learning_rate": 3.9712716518800174e-06, + "loss": 0.0071, + "step": 4700 + }, + { + "epoch": 0.0795991313385667, + "grad_norm": 0.42035579681396484, + "learning_rate": 3.979721166032953e-06, + "loss": 0.0056, + "step": 4710 + }, + { + "epoch": 0.07976813161741717, + "grad_norm": 0.20596951246261597, + "learning_rate": 3.9881706801858896e-06, + "loss": 0.0065, + "step": 4720 + }, + { + "epoch": 0.07993713189626762, + "grad_norm": 0.3290463089942932, + "learning_rate": 3.996620194338826e-06, + "loss": 0.0086, + "step": 4730 + }, + { + "epoch": 0.08010613217511808, + "grad_norm": 0.2539362609386444, + "learning_rate": 4.0050697084917626e-06, + "loss": 0.006, + "step": 4740 + }, + { + "epoch": 0.08027513245396854, + "grad_norm": 0.7381569147109985, + "learning_rate": 4.013519222644698e-06, + "loss": 0.0062, + "step": 4750 + }, + { + "epoch": 0.08044413273281902, + "grad_norm": 0.17694604396820068, + "learning_rate": 4.021968736797635e-06, + "loss": 0.0079, + "step": 4760 + }, + { + "epoch": 0.08061313301166947, + "grad_norm": 0.44804155826568604, + "learning_rate": 4.03041825095057e-06, + "loss": 0.008, + "step": 4770 + }, + { + "epoch": 0.08078213329051993, + "grad_norm": 0.3938175439834595, + "learning_rate": 4.038867765103507e-06, + "loss": 0.0089, + "step": 4780 + }, + { + "epoch": 0.08095113356937039, + "grad_norm": 0.6432244181632996, + "learning_rate": 4.047317279256443e-06, + "loss": 0.0055, + "step": 4790 + }, + { + "epoch": 0.08112013384822085, + "grad_norm": 0.3865385949611664, + "learning_rate": 4.055766793409379e-06, + "loss": 0.0059, + "step": 4800 + }, + { + "epoch": 0.08128913412707131, + "grad_norm": 0.3288101255893707, + "learning_rate": 4.0642163075623155e-06, + "loss": 0.008, + "step": 4810 + }, + { + "epoch": 0.08145813440592177, + "grad_norm": 0.386060893535614, + "learning_rate": 4.072665821715251e-06, + "loss": 0.0073, + "step": 4820 + }, + { + "epoch": 0.08162713468477223, + "grad_norm": 0.3735050559043884, + "learning_rate": 4.081115335868188e-06, + "loss": 0.0075, + "step": 4830 + }, + { + "epoch": 0.08179613496362269, + "grad_norm": 0.28494900465011597, + "learning_rate": 4.089564850021124e-06, + "loss": 0.0086, + "step": 4840 + }, + { + "epoch": 0.08196513524247315, + "grad_norm": 0.8604230284690857, + "learning_rate": 4.098014364174061e-06, + "loss": 0.0093, + "step": 4850 + }, + { + "epoch": 0.0821341355213236, + "grad_norm": 0.32281509041786194, + "learning_rate": 4.106463878326996e-06, + "loss": 0.0125, + "step": 4860 + }, + { + "epoch": 0.08230313580017407, + "grad_norm": 0.6922529935836792, + "learning_rate": 4.114913392479933e-06, + "loss": 0.0088, + "step": 4870 + }, + { + "epoch": 0.08247213607902452, + "grad_norm": 0.15109947323799133, + "learning_rate": 4.123362906632869e-06, + "loss": 0.0082, + "step": 4880 + }, + { + "epoch": 0.08264113635787498, + "grad_norm": 0.5638471841812134, + "learning_rate": 4.131812420785805e-06, + "loss": 0.0073, + "step": 4890 + }, + { + "epoch": 0.08281013663672546, + "grad_norm": 0.5557750463485718, + "learning_rate": 4.140261934938741e-06, + "loss": 0.0065, + "step": 4900 + }, + { + "epoch": 0.08297913691557592, + "grad_norm": 0.3340568244457245, + "learning_rate": 4.148711449091677e-06, + "loss": 0.008, + "step": 4910 + }, + { + "epoch": 0.08314813719442637, + "grad_norm": 0.4634811282157898, + "learning_rate": 4.1571609632446135e-06, + "loss": 0.0082, + "step": 4920 + }, + { + "epoch": 0.08331713747327683, + "grad_norm": 0.45580101013183594, + "learning_rate": 4.16561047739755e-06, + "loss": 0.0062, + "step": 4930 + }, + { + "epoch": 0.08348613775212729, + "grad_norm": 0.3351278007030487, + "learning_rate": 4.174059991550486e-06, + "loss": 0.0135, + "step": 4940 + }, + { + "epoch": 0.08365513803097775, + "grad_norm": 0.16381919384002686, + "learning_rate": 4.182509505703423e-06, + "loss": 0.0078, + "step": 4950 + }, + { + "epoch": 0.08382413830982821, + "grad_norm": 0.7266935706138611, + "learning_rate": 4.190959019856359e-06, + "loss": 0.0056, + "step": 4960 + }, + { + "epoch": 0.08399313858867867, + "grad_norm": 0.44963306188583374, + "learning_rate": 4.199408534009295e-06, + "loss": 0.0097, + "step": 4970 + }, + { + "epoch": 0.08416213886752913, + "grad_norm": 0.32577261328697205, + "learning_rate": 4.207858048162231e-06, + "loss": 0.0092, + "step": 4980 + }, + { + "epoch": 0.08433113914637959, + "grad_norm": 0.42351993918418884, + "learning_rate": 4.216307562315167e-06, + "loss": 0.0065, + "step": 4990 + }, + { + "epoch": 0.08450013942523005, + "grad_norm": 0.38387471437454224, + "learning_rate": 4.224757076468104e-06, + "loss": 0.0073, + "step": 5000 + }, + { + "epoch": 0.0846691397040805, + "grad_norm": 0.5673115849494934, + "learning_rate": 4.2332065906210394e-06, + "loss": 0.0077, + "step": 5010 + }, + { + "epoch": 0.08483813998293097, + "grad_norm": 0.46692800521850586, + "learning_rate": 4.241656104773976e-06, + "loss": 0.0081, + "step": 5020 + }, + { + "epoch": 0.08500714026178144, + "grad_norm": 0.27370235323905945, + "learning_rate": 4.2501056189269116e-06, + "loss": 0.0092, + "step": 5030 + }, + { + "epoch": 0.0851761405406319, + "grad_norm": 0.2749960720539093, + "learning_rate": 4.258555133079848e-06, + "loss": 0.008, + "step": 5040 + }, + { + "epoch": 0.08534514081948236, + "grad_norm": 0.34917569160461426, + "learning_rate": 4.2670046472327846e-06, + "loss": 0.0112, + "step": 5050 + }, + { + "epoch": 0.08551414109833282, + "grad_norm": 0.26685452461242676, + "learning_rate": 4.275454161385721e-06, + "loss": 0.0065, + "step": 5060 + }, + { + "epoch": 0.08568314137718328, + "grad_norm": 0.18275409936904907, + "learning_rate": 4.283903675538657e-06, + "loss": 0.0066, + "step": 5070 + }, + { + "epoch": 0.08585214165603373, + "grad_norm": 0.30578580498695374, + "learning_rate": 4.292353189691593e-06, + "loss": 0.0057, + "step": 5080 + }, + { + "epoch": 0.0860211419348842, + "grad_norm": 0.21141697466373444, + "learning_rate": 4.30080270384453e-06, + "loss": 0.0055, + "step": 5090 + }, + { + "epoch": 0.08619014221373465, + "grad_norm": 0.5646255612373352, + "learning_rate": 4.309252217997465e-06, + "loss": 0.0083, + "step": 5100 + }, + { + "epoch": 0.08635914249258511, + "grad_norm": 0.2993975579738617, + "learning_rate": 4.317701732150402e-06, + "loss": 0.0071, + "step": 5110 + }, + { + "epoch": 0.08652814277143557, + "grad_norm": 0.33375898003578186, + "learning_rate": 4.3261512463033375e-06, + "loss": 0.008, + "step": 5120 + }, + { + "epoch": 0.08669714305028603, + "grad_norm": 0.5091102719306946, + "learning_rate": 4.334600760456274e-06, + "loss": 0.0059, + "step": 5130 + }, + { + "epoch": 0.08686614332913649, + "grad_norm": 0.36565321683883667, + "learning_rate": 4.3430502746092105e-06, + "loss": 0.0076, + "step": 5140 + }, + { + "epoch": 0.08703514360798695, + "grad_norm": 0.683262288570404, + "learning_rate": 4.351499788762146e-06, + "loss": 0.0061, + "step": 5150 + }, + { + "epoch": 0.0872041438868374, + "grad_norm": 0.650642991065979, + "learning_rate": 4.359949302915083e-06, + "loss": 0.0078, + "step": 5160 + }, + { + "epoch": 0.08737314416568788, + "grad_norm": 0.5229181051254272, + "learning_rate": 4.368398817068019e-06, + "loss": 0.0096, + "step": 5170 + }, + { + "epoch": 0.08754214444453834, + "grad_norm": 0.4177519679069519, + "learning_rate": 4.376848331220956e-06, + "loss": 0.0093, + "step": 5180 + }, + { + "epoch": 0.0877111447233888, + "grad_norm": 0.5535804629325867, + "learning_rate": 4.385297845373891e-06, + "loss": 0.0081, + "step": 5190 + }, + { + "epoch": 0.08788014500223926, + "grad_norm": 0.294405460357666, + "learning_rate": 4.393747359526828e-06, + "loss": 0.0053, + "step": 5200 + }, + { + "epoch": 0.08804914528108972, + "grad_norm": 0.38873642683029175, + "learning_rate": 4.402196873679763e-06, + "loss": 0.006, + "step": 5210 + }, + { + "epoch": 0.08821814555994018, + "grad_norm": 0.45184701681137085, + "learning_rate": 4.4106463878327e-06, + "loss": 0.0073, + "step": 5220 + }, + { + "epoch": 0.08838714583879063, + "grad_norm": 0.5146165490150452, + "learning_rate": 4.419095901985636e-06, + "loss": 0.0082, + "step": 5230 + }, + { + "epoch": 0.0885561461176411, + "grad_norm": 0.4142181873321533, + "learning_rate": 4.427545416138572e-06, + "loss": 0.0051, + "step": 5240 + }, + { + "epoch": 0.08872514639649155, + "grad_norm": 0.13999409973621368, + "learning_rate": 4.4359949302915085e-06, + "loss": 0.0064, + "step": 5250 + }, + { + "epoch": 0.08889414667534201, + "grad_norm": 0.5468639731407166, + "learning_rate": 4.444444444444444e-06, + "loss": 0.0077, + "step": 5260 + }, + { + "epoch": 0.08906314695419247, + "grad_norm": 0.45908740162849426, + "learning_rate": 4.4528939585973815e-06, + "loss": 0.0085, + "step": 5270 + }, + { + "epoch": 0.08923214723304293, + "grad_norm": 0.6285553574562073, + "learning_rate": 4.461343472750317e-06, + "loss": 0.0086, + "step": 5280 + }, + { + "epoch": 0.08940114751189339, + "grad_norm": 0.3353530764579773, + "learning_rate": 4.469792986903254e-06, + "loss": 0.0079, + "step": 5290 + }, + { + "epoch": 0.08957014779074385, + "grad_norm": 0.4628467261791229, + "learning_rate": 4.47824250105619e-06, + "loss": 0.0085, + "step": 5300 + }, + { + "epoch": 0.08973914806959432, + "grad_norm": 0.35261330008506775, + "learning_rate": 4.486692015209126e-06, + "loss": 0.0092, + "step": 5310 + }, + { + "epoch": 0.08990814834844478, + "grad_norm": 0.33638912439346313, + "learning_rate": 4.495141529362062e-06, + "loss": 0.0082, + "step": 5320 + }, + { + "epoch": 0.09007714862729524, + "grad_norm": 0.3847145736217499, + "learning_rate": 4.503591043514998e-06, + "loss": 0.0078, + "step": 5330 + }, + { + "epoch": 0.0902461489061457, + "grad_norm": 0.41521865129470825, + "learning_rate": 4.512040557667934e-06, + "loss": 0.0083, + "step": 5340 + }, + { + "epoch": 0.09041514918499616, + "grad_norm": 0.5259934663772583, + "learning_rate": 4.520490071820871e-06, + "loss": 0.0094, + "step": 5350 + }, + { + "epoch": 0.09058414946384662, + "grad_norm": 0.4790876805782318, + "learning_rate": 4.5289395859738065e-06, + "loss": 0.0064, + "step": 5360 + }, + { + "epoch": 0.09075314974269708, + "grad_norm": 0.3402535319328308, + "learning_rate": 4.537389100126743e-06, + "loss": 0.0054, + "step": 5370 + }, + { + "epoch": 0.09092215002154753, + "grad_norm": 0.46615660190582275, + "learning_rate": 4.5458386142796795e-06, + "loss": 0.0073, + "step": 5380 + }, + { + "epoch": 0.091091150300398, + "grad_norm": 0.39094266295433044, + "learning_rate": 4.554288128432616e-06, + "loss": 0.0077, + "step": 5390 + }, + { + "epoch": 0.09126015057924845, + "grad_norm": 0.2667132616043091, + "learning_rate": 4.562737642585552e-06, + "loss": 0.0077, + "step": 5400 + }, + { + "epoch": 0.09142915085809891, + "grad_norm": 0.483772337436676, + "learning_rate": 4.571187156738488e-06, + "loss": 0.0076, + "step": 5410 + }, + { + "epoch": 0.09159815113694937, + "grad_norm": 0.4861210882663727, + "learning_rate": 4.579636670891424e-06, + "loss": 0.0107, + "step": 5420 + }, + { + "epoch": 0.09176715141579983, + "grad_norm": 0.5079867839813232, + "learning_rate": 4.58808618504436e-06, + "loss": 0.0089, + "step": 5430 + }, + { + "epoch": 0.09193615169465029, + "grad_norm": 0.33771970868110657, + "learning_rate": 4.596535699197297e-06, + "loss": 0.0081, + "step": 5440 + }, + { + "epoch": 0.09210515197350076, + "grad_norm": 0.10134998708963394, + "learning_rate": 4.6049852133502325e-06, + "loss": 0.0043, + "step": 5450 + }, + { + "epoch": 0.09227415225235122, + "grad_norm": 0.15871858596801758, + "learning_rate": 4.613434727503169e-06, + "loss": 0.0104, + "step": 5460 + }, + { + "epoch": 0.09244315253120168, + "grad_norm": 0.4372641444206238, + "learning_rate": 4.621884241656105e-06, + "loss": 0.0167, + "step": 5470 + }, + { + "epoch": 0.09261215281005214, + "grad_norm": 0.4832150340080261, + "learning_rate": 4.630333755809041e-06, + "loss": 0.0088, + "step": 5480 + }, + { + "epoch": 0.0927811530889026, + "grad_norm": 0.34747639298439026, + "learning_rate": 4.638783269961978e-06, + "loss": 0.0052, + "step": 5490 + }, + { + "epoch": 0.09295015336775306, + "grad_norm": 0.15239928662776947, + "learning_rate": 4.647232784114914e-06, + "loss": 0.0064, + "step": 5500 + }, + { + "epoch": 0.09311915364660352, + "grad_norm": 0.48043081164360046, + "learning_rate": 4.65568229826785e-06, + "loss": 0.006, + "step": 5510 + }, + { + "epoch": 0.09328815392545398, + "grad_norm": 0.30117249488830566, + "learning_rate": 4.664131812420786e-06, + "loss": 0.0062, + "step": 5520 + }, + { + "epoch": 0.09345715420430444, + "grad_norm": 0.5305731892585754, + "learning_rate": 4.672581326573723e-06, + "loss": 0.0069, + "step": 5530 + }, + { + "epoch": 0.0936261544831549, + "grad_norm": 0.31124499440193176, + "learning_rate": 4.681030840726658e-06, + "loss": 0.0065, + "step": 5540 + }, + { + "epoch": 0.09379515476200535, + "grad_norm": 0.15525099635124207, + "learning_rate": 4.689480354879595e-06, + "loss": 0.006, + "step": 5550 + }, + { + "epoch": 0.09396415504085581, + "grad_norm": 0.5805965065956116, + "learning_rate": 4.6979298690325305e-06, + "loss": 0.0067, + "step": 5560 + }, + { + "epoch": 0.09413315531970627, + "grad_norm": 0.21559707820415497, + "learning_rate": 4.706379383185467e-06, + "loss": 0.0055, + "step": 5570 + }, + { + "epoch": 0.09430215559855674, + "grad_norm": 0.4127371311187744, + "learning_rate": 4.7148288973384035e-06, + "loss": 0.0064, + "step": 5580 + }, + { + "epoch": 0.0944711558774072, + "grad_norm": 0.28241196274757385, + "learning_rate": 4.723278411491339e-06, + "loss": 0.0067, + "step": 5590 + }, + { + "epoch": 0.09464015615625766, + "grad_norm": 0.3856637477874756, + "learning_rate": 4.7317279256442765e-06, + "loss": 0.0086, + "step": 5600 + }, + { + "epoch": 0.09480915643510812, + "grad_norm": 0.4850485622882843, + "learning_rate": 4.740177439797212e-06, + "loss": 0.0076, + "step": 5610 + }, + { + "epoch": 0.09497815671395858, + "grad_norm": 0.3252734839916229, + "learning_rate": 4.748626953950149e-06, + "loss": 0.0068, + "step": 5620 + }, + { + "epoch": 0.09514715699280904, + "grad_norm": 0.27155035734176636, + "learning_rate": 4.757076468103084e-06, + "loss": 0.0065, + "step": 5630 + }, + { + "epoch": 0.0953161572716595, + "grad_norm": 0.21099352836608887, + "learning_rate": 4.765525982256021e-06, + "loss": 0.0074, + "step": 5640 + }, + { + "epoch": 0.09548515755050996, + "grad_norm": 0.23718442022800446, + "learning_rate": 4.773975496408957e-06, + "loss": 0.0066, + "step": 5650 + }, + { + "epoch": 0.09565415782936042, + "grad_norm": 0.12414859235286713, + "learning_rate": 4.782425010561893e-06, + "loss": 0.0072, + "step": 5660 + }, + { + "epoch": 0.09582315810821088, + "grad_norm": 0.1519409865140915, + "learning_rate": 4.790874524714829e-06, + "loss": 0.0063, + "step": 5670 + }, + { + "epoch": 0.09599215838706134, + "grad_norm": 0.16129052639007568, + "learning_rate": 4.799324038867765e-06, + "loss": 0.0069, + "step": 5680 + }, + { + "epoch": 0.0961611586659118, + "grad_norm": 0.394161194562912, + "learning_rate": 4.8077735530207015e-06, + "loss": 0.0042, + "step": 5690 + }, + { + "epoch": 0.09633015894476225, + "grad_norm": 0.15623830258846283, + "learning_rate": 4.816223067173638e-06, + "loss": 0.0068, + "step": 5700 + }, + { + "epoch": 0.09649915922361271, + "grad_norm": 0.4517795145511627, + "learning_rate": 4.8246725813265745e-06, + "loss": 0.0098, + "step": 5710 + }, + { + "epoch": 0.09666815950246319, + "grad_norm": 0.37541139125823975, + "learning_rate": 4.83312209547951e-06, + "loss": 0.0087, + "step": 5720 + }, + { + "epoch": 0.09683715978131364, + "grad_norm": 0.23171207308769226, + "learning_rate": 4.841571609632447e-06, + "loss": 0.0046, + "step": 5730 + }, + { + "epoch": 0.0970061600601641, + "grad_norm": 0.23239940404891968, + "learning_rate": 4.850021123785383e-06, + "loss": 0.0051, + "step": 5740 + }, + { + "epoch": 0.09717516033901456, + "grad_norm": 0.16493850946426392, + "learning_rate": 4.858470637938319e-06, + "loss": 0.0063, + "step": 5750 + }, + { + "epoch": 0.09734416061786502, + "grad_norm": 0.3797301650047302, + "learning_rate": 4.866920152091255e-06, + "loss": 0.0086, + "step": 5760 + }, + { + "epoch": 0.09751316089671548, + "grad_norm": 0.35541290044784546, + "learning_rate": 4.875369666244191e-06, + "loss": 0.0075, + "step": 5770 + }, + { + "epoch": 0.09768216117556594, + "grad_norm": 0.24128615856170654, + "learning_rate": 4.8838191803971274e-06, + "loss": 0.0058, + "step": 5780 + }, + { + "epoch": 0.0978511614544164, + "grad_norm": 0.29352807998657227, + "learning_rate": 4.892268694550064e-06, + "loss": 0.0084, + "step": 5790 + }, + { + "epoch": 0.09802016173326686, + "grad_norm": 0.10192513465881348, + "learning_rate": 4.9007182087029996e-06, + "loss": 0.008, + "step": 5800 + }, + { + "epoch": 0.09818916201211732, + "grad_norm": 0.3217865228652954, + "learning_rate": 4.909167722855936e-06, + "loss": 0.0056, + "step": 5810 + }, + { + "epoch": 0.09835816229096778, + "grad_norm": 0.25088396668434143, + "learning_rate": 4.9176172370088726e-06, + "loss": 0.0054, + "step": 5820 + }, + { + "epoch": 0.09852716256981824, + "grad_norm": 0.6246805787086487, + "learning_rate": 4.926066751161809e-06, + "loss": 0.0077, + "step": 5830 + }, + { + "epoch": 0.0986961628486687, + "grad_norm": 0.5747209787368774, + "learning_rate": 4.934516265314745e-06, + "loss": 0.0054, + "step": 5840 + }, + { + "epoch": 0.09886516312751915, + "grad_norm": 0.22276253998279572, + "learning_rate": 4.942965779467681e-06, + "loss": 0.0076, + "step": 5850 + }, + { + "epoch": 0.09903416340636963, + "grad_norm": 0.26171597838401794, + "learning_rate": 4.951415293620617e-06, + "loss": 0.0062, + "step": 5860 + }, + { + "epoch": 0.09920316368522009, + "grad_norm": 0.4480033218860626, + "learning_rate": 4.959864807773553e-06, + "loss": 0.006, + "step": 5870 + }, + { + "epoch": 0.09937216396407055, + "grad_norm": 0.44686561822891235, + "learning_rate": 4.96831432192649e-06, + "loss": 0.0073, + "step": 5880 + }, + { + "epoch": 0.099541164242921, + "grad_norm": 0.186452716588974, + "learning_rate": 4.9767638360794255e-06, + "loss": 0.0104, + "step": 5890 + }, + { + "epoch": 0.09971016452177146, + "grad_norm": 0.3807360529899597, + "learning_rate": 4.985213350232362e-06, + "loss": 0.0086, + "step": 5900 + }, + { + "epoch": 0.09987916480062192, + "grad_norm": 0.4487653076648712, + "learning_rate": 4.993662864385298e-06, + "loss": 0.0073, + "step": 5910 + }, + { + "epoch": 0.10004816507947238, + "grad_norm": 0.2705078125, + "learning_rate": 5.002112378538234e-06, + "loss": 0.0114, + "step": 5920 + }, + { + "epoch": 0.10021716535832284, + "grad_norm": 0.33181098103523254, + "learning_rate": 5.010561892691171e-06, + "loss": 0.0046, + "step": 5930 + }, + { + "epoch": 0.1003861656371733, + "grad_norm": 0.25259125232696533, + "learning_rate": 5.019011406844106e-06, + "loss": 0.0089, + "step": 5940 + }, + { + "epoch": 0.10055516591602376, + "grad_norm": 0.5207515954971313, + "learning_rate": 5.027460920997043e-06, + "loss": 0.0089, + "step": 5950 + }, + { + "epoch": 0.10072416619487422, + "grad_norm": 0.1929161250591278, + "learning_rate": 5.035910435149978e-06, + "loss": 0.0055, + "step": 5960 + }, + { + "epoch": 0.10089316647372468, + "grad_norm": 0.28532758355140686, + "learning_rate": 5.044359949302915e-06, + "loss": 0.0056, + "step": 5970 + }, + { + "epoch": 0.10106216675257514, + "grad_norm": 0.0873856320977211, + "learning_rate": 5.052809463455852e-06, + "loss": 0.0061, + "step": 5980 + }, + { + "epoch": 0.1012311670314256, + "grad_norm": 0.2607017755508423, + "learning_rate": 5.061258977608789e-06, + "loss": 0.0064, + "step": 5990 + }, + { + "epoch": 0.10140016731027607, + "grad_norm": 0.3413243293762207, + "learning_rate": 5.069708491761724e-06, + "loss": 0.0064, + "step": 6000 + }, + { + "epoch": 0.10156916758912653, + "grad_norm": 0.5261040329933167, + "learning_rate": 5.078158005914661e-06, + "loss": 0.0074, + "step": 6010 + }, + { + "epoch": 0.10173816786797699, + "grad_norm": 0.21714960038661957, + "learning_rate": 5.0866075200675965e-06, + "loss": 0.005, + "step": 6020 + }, + { + "epoch": 0.10190716814682745, + "grad_norm": 0.26323312520980835, + "learning_rate": 5.095057034220533e-06, + "loss": 0.0079, + "step": 6030 + }, + { + "epoch": 0.1020761684256779, + "grad_norm": 0.2866646647453308, + "learning_rate": 5.1035065483734695e-06, + "loss": 0.0062, + "step": 6040 + }, + { + "epoch": 0.10224516870452836, + "grad_norm": 0.21184338629245758, + "learning_rate": 5.111956062526405e-06, + "loss": 0.0063, + "step": 6050 + }, + { + "epoch": 0.10241416898337882, + "grad_norm": 0.1997213065624237, + "learning_rate": 5.120405576679342e-06, + "loss": 0.0072, + "step": 6060 + }, + { + "epoch": 0.10258316926222928, + "grad_norm": 0.38983890414237976, + "learning_rate": 5.128855090832277e-06, + "loss": 0.0065, + "step": 6070 + }, + { + "epoch": 0.10275216954107974, + "grad_norm": 0.23241905868053436, + "learning_rate": 5.137304604985214e-06, + "loss": 0.0059, + "step": 6080 + }, + { + "epoch": 0.1029211698199302, + "grad_norm": 0.44172483682632446, + "learning_rate": 5.14575411913815e-06, + "loss": 0.0053, + "step": 6090 + }, + { + "epoch": 0.10309017009878066, + "grad_norm": 0.33180946111679077, + "learning_rate": 5.154203633291086e-06, + "loss": 0.01, + "step": 6100 + }, + { + "epoch": 0.10325917037763112, + "grad_norm": 0.38379010558128357, + "learning_rate": 5.162653147444022e-06, + "loss": 0.0061, + "step": 6110 + }, + { + "epoch": 0.10342817065648158, + "grad_norm": 0.2291935533285141, + "learning_rate": 5.171102661596958e-06, + "loss": 0.0058, + "step": 6120 + }, + { + "epoch": 0.10359717093533205, + "grad_norm": 0.13137617707252502, + "learning_rate": 5.1795521757498946e-06, + "loss": 0.0055, + "step": 6130 + }, + { + "epoch": 0.10376617121418251, + "grad_norm": 0.3997475504875183, + "learning_rate": 5.188001689902831e-06, + "loss": 0.0071, + "step": 6140 + }, + { + "epoch": 0.10393517149303297, + "grad_norm": 0.4803593158721924, + "learning_rate": 5.196451204055767e-06, + "loss": 0.0078, + "step": 6150 + }, + { + "epoch": 0.10410417177188343, + "grad_norm": 0.2848051190376282, + "learning_rate": 5.204900718208703e-06, + "loss": 0.0064, + "step": 6160 + }, + { + "epoch": 0.10427317205073389, + "grad_norm": 0.4575238823890686, + "learning_rate": 5.213350232361639e-06, + "loss": 0.0077, + "step": 6170 + }, + { + "epoch": 0.10444217232958435, + "grad_norm": 0.2880443036556244, + "learning_rate": 5.221799746514575e-06, + "loss": 0.006, + "step": 6180 + }, + { + "epoch": 0.1046111726084348, + "grad_norm": 0.2867361605167389, + "learning_rate": 5.230249260667513e-06, + "loss": 0.0033, + "step": 6190 + }, + { + "epoch": 0.10478017288728526, + "grad_norm": 0.2843003273010254, + "learning_rate": 5.238698774820449e-06, + "loss": 0.0089, + "step": 6200 + }, + { + "epoch": 0.10494917316613572, + "grad_norm": 0.38200074434280396, + "learning_rate": 5.247148288973385e-06, + "loss": 0.0083, + "step": 6210 + }, + { + "epoch": 0.10511817344498618, + "grad_norm": 0.3250083029270172, + "learning_rate": 5.255597803126321e-06, + "loss": 0.0067, + "step": 6220 + }, + { + "epoch": 0.10528717372383664, + "grad_norm": 0.15679076313972473, + "learning_rate": 5.264047317279257e-06, + "loss": 0.0083, + "step": 6230 + }, + { + "epoch": 0.1054561740026871, + "grad_norm": 0.23474538326263428, + "learning_rate": 5.2724968314321934e-06, + "loss": 0.0071, + "step": 6240 + }, + { + "epoch": 0.10562517428153756, + "grad_norm": 0.21750317513942719, + "learning_rate": 5.28094634558513e-06, + "loss": 0.0075, + "step": 6250 + }, + { + "epoch": 0.10579417456038802, + "grad_norm": 0.31989774107933044, + "learning_rate": 5.289395859738066e-06, + "loss": 0.006, + "step": 6260 + }, + { + "epoch": 0.10596317483923849, + "grad_norm": 0.28282850980758667, + "learning_rate": 5.297845373891002e-06, + "loss": 0.0057, + "step": 6270 + }, + { + "epoch": 0.10613217511808895, + "grad_norm": 0.5023276209831238, + "learning_rate": 5.306294888043938e-06, + "loss": 0.0073, + "step": 6280 + }, + { + "epoch": 0.10630117539693941, + "grad_norm": 0.25754910707473755, + "learning_rate": 5.314744402196874e-06, + "loss": 0.0077, + "step": 6290 + }, + { + "epoch": 0.10647017567578987, + "grad_norm": 0.38880106806755066, + "learning_rate": 5.323193916349811e-06, + "loss": 0.0058, + "step": 6300 + }, + { + "epoch": 0.10663917595464033, + "grad_norm": 0.3246299922466278, + "learning_rate": 5.331643430502746e-06, + "loss": 0.0058, + "step": 6310 + }, + { + "epoch": 0.10680817623349079, + "grad_norm": 0.27981093525886536, + "learning_rate": 5.340092944655683e-06, + "loss": 0.0058, + "step": 6320 + }, + { + "epoch": 0.10697717651234125, + "grad_norm": 0.22193048894405365, + "learning_rate": 5.3485424588086185e-06, + "loss": 0.0075, + "step": 6330 + }, + { + "epoch": 0.1071461767911917, + "grad_norm": 0.6009519100189209, + "learning_rate": 5.356991972961555e-06, + "loss": 0.0063, + "step": 6340 + }, + { + "epoch": 0.10731517707004216, + "grad_norm": 0.2525694966316223, + "learning_rate": 5.3654414871144915e-06, + "loss": 0.006, + "step": 6350 + }, + { + "epoch": 0.10748417734889262, + "grad_norm": 0.45865657925605774, + "learning_rate": 5.373891001267427e-06, + "loss": 0.0078, + "step": 6360 + }, + { + "epoch": 0.10765317762774308, + "grad_norm": 0.5306742191314697, + "learning_rate": 5.382340515420364e-06, + "loss": 0.0074, + "step": 6370 + }, + { + "epoch": 0.10782217790659354, + "grad_norm": 0.35447096824645996, + "learning_rate": 5.390790029573299e-06, + "loss": 0.0083, + "step": 6380 + }, + { + "epoch": 0.107991178185444, + "grad_norm": 0.32727372646331787, + "learning_rate": 5.399239543726236e-06, + "loss": 0.0052, + "step": 6390 + }, + { + "epoch": 0.10816017846429446, + "grad_norm": 0.2099214345216751, + "learning_rate": 5.407689057879172e-06, + "loss": 0.0044, + "step": 6400 + }, + { + "epoch": 0.10832917874314493, + "grad_norm": 0.18449358642101288, + "learning_rate": 5.416138572032109e-06, + "loss": 0.0068, + "step": 6410 + }, + { + "epoch": 0.10849817902199539, + "grad_norm": 0.24026605486869812, + "learning_rate": 5.424588086185045e-06, + "loss": 0.0067, + "step": 6420 + }, + { + "epoch": 0.10866717930084585, + "grad_norm": 0.2846910059452057, + "learning_rate": 5.433037600337982e-06, + "loss": 0.0052, + "step": 6430 + }, + { + "epoch": 0.10883617957969631, + "grad_norm": 0.3208889365196228, + "learning_rate": 5.441487114490917e-06, + "loss": 0.0063, + "step": 6440 + }, + { + "epoch": 0.10900517985854677, + "grad_norm": 0.2054966241121292, + "learning_rate": 5.449936628643854e-06, + "loss": 0.0067, + "step": 6450 + }, + { + "epoch": 0.10917418013739723, + "grad_norm": 0.33208614587783813, + "learning_rate": 5.4583861427967895e-06, + "loss": 0.0076, + "step": 6460 + }, + { + "epoch": 0.10934318041624769, + "grad_norm": 0.22714951634407043, + "learning_rate": 5.466835656949726e-06, + "loss": 0.0092, + "step": 6470 + }, + { + "epoch": 0.10951218069509815, + "grad_norm": 0.26246002316474915, + "learning_rate": 5.4752851711026625e-06, + "loss": 0.0042, + "step": 6480 + }, + { + "epoch": 0.1096811809739486, + "grad_norm": 0.46016496419906616, + "learning_rate": 5.483734685255598e-06, + "loss": 0.0112, + "step": 6490 + }, + { + "epoch": 0.10985018125279906, + "grad_norm": 0.17244432866573334, + "learning_rate": 5.492184199408535e-06, + "loss": 0.0054, + "step": 6500 + }, + { + "epoch": 0.11001918153164952, + "grad_norm": 0.1076434776186943, + "learning_rate": 5.50063371356147e-06, + "loss": 0.0057, + "step": 6510 + }, + { + "epoch": 0.11018818181049998, + "grad_norm": 0.332721471786499, + "learning_rate": 5.509083227714407e-06, + "loss": 0.005, + "step": 6520 + }, + { + "epoch": 0.11035718208935044, + "grad_norm": 0.12702441215515137, + "learning_rate": 5.517532741867343e-06, + "loss": 0.0071, + "step": 6530 + }, + { + "epoch": 0.1105261823682009, + "grad_norm": 0.13691650331020355, + "learning_rate": 5.525982256020279e-06, + "loss": 0.0048, + "step": 6540 + }, + { + "epoch": 0.11069518264705137, + "grad_norm": 0.16740848124027252, + "learning_rate": 5.5344317701732154e-06, + "loss": 0.0045, + "step": 6550 + }, + { + "epoch": 0.11086418292590183, + "grad_norm": 0.14890030026435852, + "learning_rate": 5.542881284326151e-06, + "loss": 0.0083, + "step": 6560 + }, + { + "epoch": 0.11103318320475229, + "grad_norm": 0.5786375999450684, + "learning_rate": 5.5513307984790876e-06, + "loss": 0.0069, + "step": 6570 + }, + { + "epoch": 0.11120218348360275, + "grad_norm": 0.30092403292655945, + "learning_rate": 5.559780312632024e-06, + "loss": 0.0044, + "step": 6580 + }, + { + "epoch": 0.11137118376245321, + "grad_norm": 0.21859614551067352, + "learning_rate": 5.56822982678496e-06, + "loss": 0.0075, + "step": 6590 + }, + { + "epoch": 0.11154018404130367, + "grad_norm": 0.22003114223480225, + "learning_rate": 5.576679340937896e-06, + "loss": 0.0095, + "step": 6600 + }, + { + "epoch": 0.11170918432015413, + "grad_norm": 0.28145134449005127, + "learning_rate": 5.585128855090832e-06, + "loss": 0.0063, + "step": 6610 + }, + { + "epoch": 0.11187818459900459, + "grad_norm": 0.09334838390350342, + "learning_rate": 5.593578369243768e-06, + "loss": 0.0068, + "step": 6620 + }, + { + "epoch": 0.11204718487785505, + "grad_norm": 0.40071040391921997, + "learning_rate": 5.602027883396706e-06, + "loss": 0.0068, + "step": 6630 + }, + { + "epoch": 0.1122161851567055, + "grad_norm": 0.2164887636899948, + "learning_rate": 5.610477397549642e-06, + "loss": 0.0044, + "step": 6640 + }, + { + "epoch": 0.11238518543555596, + "grad_norm": 0.2855166792869568, + "learning_rate": 5.618926911702578e-06, + "loss": 0.0062, + "step": 6650 + }, + { + "epoch": 0.11255418571440642, + "grad_norm": 0.2651625871658325, + "learning_rate": 5.627376425855514e-06, + "loss": 0.0053, + "step": 6660 + }, + { + "epoch": 0.11272318599325688, + "grad_norm": 0.2063087671995163, + "learning_rate": 5.63582594000845e-06, + "loss": 0.0073, + "step": 6670 + }, + { + "epoch": 0.11289218627210734, + "grad_norm": 0.07739928364753723, + "learning_rate": 5.6442754541613865e-06, + "loss": 0.0055, + "step": 6680 + }, + { + "epoch": 0.11306118655095782, + "grad_norm": 0.4230482876300812, + "learning_rate": 5.652724968314323e-06, + "loss": 0.0075, + "step": 6690 + }, + { + "epoch": 0.11323018682980827, + "grad_norm": 0.26819807291030884, + "learning_rate": 5.661174482467259e-06, + "loss": 0.0064, + "step": 6700 + }, + { + "epoch": 0.11339918710865873, + "grad_norm": 0.5605406761169434, + "learning_rate": 5.669623996620195e-06, + "loss": 0.0057, + "step": 6710 + }, + { + "epoch": 0.11356818738750919, + "grad_norm": 0.27150842547416687, + "learning_rate": 5.678073510773131e-06, + "loss": 0.0055, + "step": 6720 + }, + { + "epoch": 0.11373718766635965, + "grad_norm": 0.3509826362133026, + "learning_rate": 5.686523024926067e-06, + "loss": 0.0053, + "step": 6730 + }, + { + "epoch": 0.11390618794521011, + "grad_norm": 0.2064315527677536, + "learning_rate": 5.694972539079004e-06, + "loss": 0.0055, + "step": 6740 + }, + { + "epoch": 0.11407518822406057, + "grad_norm": 0.2994695007801056, + "learning_rate": 5.703422053231939e-06, + "loss": 0.0055, + "step": 6750 + }, + { + "epoch": 0.11424418850291103, + "grad_norm": 0.5383670330047607, + "learning_rate": 5.711871567384876e-06, + "loss": 0.0062, + "step": 6760 + }, + { + "epoch": 0.11441318878176149, + "grad_norm": 0.20319953560829163, + "learning_rate": 5.7203210815378115e-06, + "loss": 0.004, + "step": 6770 + }, + { + "epoch": 0.11458218906061195, + "grad_norm": 0.28655731678009033, + "learning_rate": 5.728770595690748e-06, + "loss": 0.0032, + "step": 6780 + }, + { + "epoch": 0.1147511893394624, + "grad_norm": 0.225058451294899, + "learning_rate": 5.7372201098436845e-06, + "loss": 0.007, + "step": 6790 + }, + { + "epoch": 0.11492018961831287, + "grad_norm": 0.1450721025466919, + "learning_rate": 5.74566962399662e-06, + "loss": 0.0066, + "step": 6800 + }, + { + "epoch": 0.11508918989716332, + "grad_norm": 0.25479888916015625, + "learning_rate": 5.754119138149557e-06, + "loss": 0.0078, + "step": 6810 + }, + { + "epoch": 0.1152581901760138, + "grad_norm": 0.25367265939712524, + "learning_rate": 5.762568652302492e-06, + "loss": 0.0071, + "step": 6820 + }, + { + "epoch": 0.11542719045486426, + "grad_norm": 0.16044245660305023, + "learning_rate": 5.771018166455429e-06, + "loss": 0.0062, + "step": 6830 + }, + { + "epoch": 0.11559619073371472, + "grad_norm": 0.32665905356407166, + "learning_rate": 5.779467680608365e-06, + "loss": 0.0044, + "step": 6840 + }, + { + "epoch": 0.11576519101256517, + "grad_norm": 0.47442156076431274, + "learning_rate": 5.787917194761303e-06, + "loss": 0.0063, + "step": 6850 + }, + { + "epoch": 0.11593419129141563, + "grad_norm": 0.4188691973686218, + "learning_rate": 5.796366708914238e-06, + "loss": 0.0076, + "step": 6860 + }, + { + "epoch": 0.11610319157026609, + "grad_norm": 0.1815645545721054, + "learning_rate": 5.804816223067175e-06, + "loss": 0.0075, + "step": 6870 + }, + { + "epoch": 0.11627219184911655, + "grad_norm": 0.15755246579647064, + "learning_rate": 5.81326573722011e-06, + "loss": 0.0046, + "step": 6880 + }, + { + "epoch": 0.11644119212796701, + "grad_norm": 0.4051576852798462, + "learning_rate": 5.821715251373047e-06, + "loss": 0.0062, + "step": 6890 + }, + { + "epoch": 0.11661019240681747, + "grad_norm": 0.39070749282836914, + "learning_rate": 5.830164765525983e-06, + "loss": 0.0068, + "step": 6900 + }, + { + "epoch": 0.11677919268566793, + "grad_norm": 0.458823025226593, + "learning_rate": 5.838614279678919e-06, + "loss": 0.008, + "step": 6910 + }, + { + "epoch": 0.11694819296451839, + "grad_norm": 0.03617480769753456, + "learning_rate": 5.8470637938318555e-06, + "loss": 0.005, + "step": 6920 + }, + { + "epoch": 0.11711719324336885, + "grad_norm": 0.2267484962940216, + "learning_rate": 5.855513307984791e-06, + "loss": 0.0051, + "step": 6930 + }, + { + "epoch": 0.1172861935222193, + "grad_norm": 0.2612169682979584, + "learning_rate": 5.863962822137728e-06, + "loss": 0.0062, + "step": 6940 + }, + { + "epoch": 0.11745519380106977, + "grad_norm": 0.42087146639823914, + "learning_rate": 5.872412336290664e-06, + "loss": 0.0127, + "step": 6950 + }, + { + "epoch": 0.11762419407992024, + "grad_norm": 0.3855131268501282, + "learning_rate": 5.8808618504436e-06, + "loss": 0.0056, + "step": 6960 + }, + { + "epoch": 0.1177931943587707, + "grad_norm": 0.4098065197467804, + "learning_rate": 5.889311364596536e-06, + "loss": 0.0071, + "step": 6970 + }, + { + "epoch": 0.11796219463762116, + "grad_norm": 0.18783697485923767, + "learning_rate": 5.897760878749472e-06, + "loss": 0.0093, + "step": 6980 + }, + { + "epoch": 0.11813119491647162, + "grad_norm": 0.1892809122800827, + "learning_rate": 5.9062103929024085e-06, + "loss": 0.0102, + "step": 6990 + }, + { + "epoch": 0.11830019519532207, + "grad_norm": 0.14258983731269836, + "learning_rate": 5.914659907055345e-06, + "loss": 0.0059, + "step": 7000 + }, + { + "epoch": 0.11846919547417253, + "grad_norm": 0.47493696212768555, + "learning_rate": 5.923109421208281e-06, + "loss": 0.005, + "step": 7010 + }, + { + "epoch": 0.11863819575302299, + "grad_norm": 0.12930482625961304, + "learning_rate": 5.931558935361217e-06, + "loss": 0.0053, + "step": 7020 + }, + { + "epoch": 0.11880719603187345, + "grad_norm": 0.26768070459365845, + "learning_rate": 5.940008449514153e-06, + "loss": 0.0052, + "step": 7030 + }, + { + "epoch": 0.11897619631072391, + "grad_norm": 0.4684838354587555, + "learning_rate": 5.948457963667089e-06, + "loss": 0.0037, + "step": 7040 + }, + { + "epoch": 0.11914519658957437, + "grad_norm": 0.33800092339515686, + "learning_rate": 5.956907477820026e-06, + "loss": 0.0094, + "step": 7050 + }, + { + "epoch": 0.11931419686842483, + "grad_norm": 0.1822284311056137, + "learning_rate": 5.965356991972962e-06, + "loss": 0.0069, + "step": 7060 + }, + { + "epoch": 0.11948319714727529, + "grad_norm": 0.3310371935367584, + "learning_rate": 5.973806506125899e-06, + "loss": 0.0073, + "step": 7070 + }, + { + "epoch": 0.11965219742612575, + "grad_norm": 0.1805610954761505, + "learning_rate": 5.982256020278835e-06, + "loss": 0.0067, + "step": 7080 + }, + { + "epoch": 0.1198211977049762, + "grad_norm": 0.22287240624427795, + "learning_rate": 5.990705534431771e-06, + "loss": 0.0062, + "step": 7090 + }, + { + "epoch": 0.11999019798382668, + "grad_norm": 0.22043073177337646, + "learning_rate": 5.999155048584707e-06, + "loss": 0.0054, + "step": 7100 + }, + { + "epoch": 0.12015919826267714, + "grad_norm": 0.36443981528282166, + "learning_rate": 6.007604562737643e-06, + "loss": 0.0065, + "step": 7110 + }, + { + "epoch": 0.1203281985415276, + "grad_norm": 0.22064130008220673, + "learning_rate": 6.0160540768905795e-06, + "loss": 0.0073, + "step": 7120 + }, + { + "epoch": 0.12049719882037806, + "grad_norm": 0.48764801025390625, + "learning_rate": 6.024503591043516e-06, + "loss": 0.0122, + "step": 7130 + }, + { + "epoch": 0.12066619909922852, + "grad_norm": 0.15102821588516235, + "learning_rate": 6.032953105196452e-06, + "loss": 0.0062, + "step": 7140 + }, + { + "epoch": 0.12083519937807898, + "grad_norm": 0.22233891487121582, + "learning_rate": 6.041402619349388e-06, + "loss": 0.0058, + "step": 7150 + }, + { + "epoch": 0.12100419965692943, + "grad_norm": 0.4343150854110718, + "learning_rate": 6.049852133502324e-06, + "loss": 0.0065, + "step": 7160 + }, + { + "epoch": 0.1211731999357799, + "grad_norm": 0.28188592195510864, + "learning_rate": 6.05830164765526e-06, + "loss": 0.0102, + "step": 7170 + }, + { + "epoch": 0.12134220021463035, + "grad_norm": 0.5624836683273315, + "learning_rate": 6.066751161808197e-06, + "loss": 0.0052, + "step": 7180 + }, + { + "epoch": 0.12151120049348081, + "grad_norm": 0.3889232873916626, + "learning_rate": 6.075200675961132e-06, + "loss": 0.0056, + "step": 7190 + }, + { + "epoch": 0.12168020077233127, + "grad_norm": 0.22911286354064941, + "learning_rate": 6.083650190114069e-06, + "loss": 0.0035, + "step": 7200 + }, + { + "epoch": 0.12184920105118173, + "grad_norm": 0.42093756794929504, + "learning_rate": 6.0920997042670045e-06, + "loss": 0.0062, + "step": 7210 + }, + { + "epoch": 0.12201820133003219, + "grad_norm": 0.39620158076286316, + "learning_rate": 6.100549218419941e-06, + "loss": 0.0049, + "step": 7220 + }, + { + "epoch": 0.12218720160888265, + "grad_norm": 0.25324249267578125, + "learning_rate": 6.1089987325728775e-06, + "loss": 0.0085, + "step": 7230 + }, + { + "epoch": 0.12235620188773312, + "grad_norm": 0.5439518094062805, + "learning_rate": 6.117448246725813e-06, + "loss": 0.0076, + "step": 7240 + }, + { + "epoch": 0.12252520216658358, + "grad_norm": 0.2995526194572449, + "learning_rate": 6.12589776087875e-06, + "loss": 0.0091, + "step": 7250 + }, + { + "epoch": 0.12269420244543404, + "grad_norm": 0.07601664960384369, + "learning_rate": 6.134347275031685e-06, + "loss": 0.0048, + "step": 7260 + }, + { + "epoch": 0.1228632027242845, + "grad_norm": 0.196809783577919, + "learning_rate": 6.142796789184622e-06, + "loss": 0.0073, + "step": 7270 + }, + { + "epoch": 0.12303220300313496, + "grad_norm": 0.1969756931066513, + "learning_rate": 6.151246303337559e-06, + "loss": 0.0057, + "step": 7280 + }, + { + "epoch": 0.12320120328198542, + "grad_norm": 0.21454773843288422, + "learning_rate": 6.159695817490496e-06, + "loss": 0.0037, + "step": 7290 + }, + { + "epoch": 0.12337020356083588, + "grad_norm": 0.21385742723941803, + "learning_rate": 6.168145331643431e-06, + "loss": 0.006, + "step": 7300 + }, + { + "epoch": 0.12353920383968633, + "grad_norm": 0.2406219244003296, + "learning_rate": 6.176594845796368e-06, + "loss": 0.0084, + "step": 7310 + }, + { + "epoch": 0.1237082041185368, + "grad_norm": 0.23890087008476257, + "learning_rate": 6.1850443599493034e-06, + "loss": 0.0097, + "step": 7320 + }, + { + "epoch": 0.12387720439738725, + "grad_norm": 0.4540402293205261, + "learning_rate": 6.19349387410224e-06, + "loss": 0.0056, + "step": 7330 + }, + { + "epoch": 0.12404620467623771, + "grad_norm": 0.2217201441526413, + "learning_rate": 6.2019433882551764e-06, + "loss": 0.0068, + "step": 7340 + }, + { + "epoch": 0.12421520495508817, + "grad_norm": 0.4099099040031433, + "learning_rate": 6.210392902408112e-06, + "loss": 0.0056, + "step": 7350 + }, + { + "epoch": 0.12438420523393863, + "grad_norm": 0.3341483175754547, + "learning_rate": 6.2188424165610486e-06, + "loss": 0.0051, + "step": 7360 + }, + { + "epoch": 0.1245532055127891, + "grad_norm": 0.23542355000972748, + "learning_rate": 6.227291930713984e-06, + "loss": 0.0039, + "step": 7370 + }, + { + "epoch": 0.12472220579163956, + "grad_norm": 0.24790506064891815, + "learning_rate": 6.235741444866921e-06, + "loss": 0.0063, + "step": 7380 + }, + { + "epoch": 0.12489120607049002, + "grad_norm": 0.5249748826026917, + "learning_rate": 6.244190959019857e-06, + "loss": 0.0057, + "step": 7390 + }, + { + "epoch": 0.12506020634934048, + "grad_norm": 0.3524332642555237, + "learning_rate": 6.252640473172793e-06, + "loss": 0.0049, + "step": 7400 + }, + { + "epoch": 0.12522920662819093, + "grad_norm": 0.21495512127876282, + "learning_rate": 6.261089987325729e-06, + "loss": 0.0055, + "step": 7410 + }, + { + "epoch": 0.1253982069070414, + "grad_norm": 0.17406326532363892, + "learning_rate": 6.269539501478665e-06, + "loss": 0.0059, + "step": 7420 + }, + { + "epoch": 0.12556720718589184, + "grad_norm": 0.17095540463924408, + "learning_rate": 6.2779890156316015e-06, + "loss": 0.005, + "step": 7430 + }, + { + "epoch": 0.12573620746474232, + "grad_norm": 0.12662187218666077, + "learning_rate": 6.286438529784538e-06, + "loss": 0.0072, + "step": 7440 + }, + { + "epoch": 0.1259052077435928, + "grad_norm": 0.14037641882896423, + "learning_rate": 6.294888043937474e-06, + "loss": 0.005, + "step": 7450 + }, + { + "epoch": 0.12607420802244323, + "grad_norm": 0.4467966854572296, + "learning_rate": 6.30333755809041e-06, + "loss": 0.0068, + "step": 7460 + }, + { + "epoch": 0.1262432083012937, + "grad_norm": 0.29418110847473145, + "learning_rate": 6.311787072243346e-06, + "loss": 0.0072, + "step": 7470 + }, + { + "epoch": 0.12641220858014415, + "grad_norm": 0.2855652868747711, + "learning_rate": 6.320236586396282e-06, + "loss": 0.0045, + "step": 7480 + }, + { + "epoch": 0.12658120885899463, + "grad_norm": 0.249783456325531, + "learning_rate": 6.328686100549219e-06, + "loss": 0.0053, + "step": 7490 + }, + { + "epoch": 0.12675020913784507, + "grad_norm": 0.1699150800704956, + "learning_rate": 6.337135614702155e-06, + "loss": 0.0074, + "step": 7500 + }, + { + "epoch": 0.12691920941669554, + "grad_norm": 0.1926853507757187, + "learning_rate": 6.345585128855092e-06, + "loss": 0.0058, + "step": 7510 + }, + { + "epoch": 0.127088209695546, + "grad_norm": 0.18150867521762848, + "learning_rate": 6.354034643008028e-06, + "loss": 0.0057, + "step": 7520 + }, + { + "epoch": 0.12725720997439646, + "grad_norm": 0.23561467230319977, + "learning_rate": 6.362484157160964e-06, + "loss": 0.0057, + "step": 7530 + }, + { + "epoch": 0.1274262102532469, + "grad_norm": 0.12343981117010117, + "learning_rate": 6.3709336713139e-06, + "loss": 0.006, + "step": 7540 + }, + { + "epoch": 0.12759521053209738, + "grad_norm": 0.23419040441513062, + "learning_rate": 6.379383185466836e-06, + "loss": 0.0041, + "step": 7550 + }, + { + "epoch": 0.12776421081094783, + "grad_norm": 0.2999023497104645, + "learning_rate": 6.3878326996197725e-06, + "loss": 0.0096, + "step": 7560 + }, + { + "epoch": 0.1279332110897983, + "grad_norm": 0.513978123664856, + "learning_rate": 6.396282213772709e-06, + "loss": 0.0049, + "step": 7570 + }, + { + "epoch": 0.12810221136864877, + "grad_norm": 0.49737265706062317, + "learning_rate": 6.404731727925645e-06, + "loss": 0.0061, + "step": 7580 + }, + { + "epoch": 0.12827121164749922, + "grad_norm": 0.2800723612308502, + "learning_rate": 6.413181242078581e-06, + "loss": 0.0079, + "step": 7590 + }, + { + "epoch": 0.1284402119263497, + "grad_norm": 0.2312825322151184, + "learning_rate": 6.421630756231517e-06, + "loss": 0.0068, + "step": 7600 + }, + { + "epoch": 0.12860921220520014, + "grad_norm": 0.10857295989990234, + "learning_rate": 6.430080270384453e-06, + "loss": 0.0045, + "step": 7610 + }, + { + "epoch": 0.1287782124840506, + "grad_norm": 0.2910762131214142, + "learning_rate": 6.43852978453739e-06, + "loss": 0.0064, + "step": 7620 + }, + { + "epoch": 0.12894721276290105, + "grad_norm": 0.09099793434143066, + "learning_rate": 6.4469792986903254e-06, + "loss": 0.0046, + "step": 7630 + }, + { + "epoch": 0.12911621304175153, + "grad_norm": 0.5014476180076599, + "learning_rate": 6.455428812843262e-06, + "loss": 0.0064, + "step": 7640 + }, + { + "epoch": 0.12928521332060197, + "grad_norm": 0.4255957305431366, + "learning_rate": 6.4638783269961976e-06, + "loss": 0.0084, + "step": 7650 + }, + { + "epoch": 0.12945421359945244, + "grad_norm": 0.4127286672592163, + "learning_rate": 6.472327841149134e-06, + "loss": 0.0055, + "step": 7660 + }, + { + "epoch": 0.1296232138783029, + "grad_norm": 0.3005797266960144, + "learning_rate": 6.4807773553020706e-06, + "loss": 0.0066, + "step": 7670 + }, + { + "epoch": 0.12979221415715336, + "grad_norm": 0.24604128301143646, + "learning_rate": 6.489226869455006e-06, + "loss": 0.0062, + "step": 7680 + }, + { + "epoch": 0.1299612144360038, + "grad_norm": 0.11351402848958969, + "learning_rate": 6.497676383607943e-06, + "loss": 0.0068, + "step": 7690 + }, + { + "epoch": 0.13013021471485428, + "grad_norm": 0.31498342752456665, + "learning_rate": 6.506125897760879e-06, + "loss": 0.0052, + "step": 7700 + }, + { + "epoch": 0.13029921499370473, + "grad_norm": 0.17923851311206818, + "learning_rate": 6.514575411913816e-06, + "loss": 0.0085, + "step": 7710 + }, + { + "epoch": 0.1304682152725552, + "grad_norm": 0.1601186990737915, + "learning_rate": 6.523024926066752e-06, + "loss": 0.0044, + "step": 7720 + }, + { + "epoch": 0.13063721555140567, + "grad_norm": 0.3751295506954193, + "learning_rate": 6.531474440219689e-06, + "loss": 0.0076, + "step": 7730 + }, + { + "epoch": 0.13080621583025612, + "grad_norm": 0.33075717091560364, + "learning_rate": 6.539923954372624e-06, + "loss": 0.0067, + "step": 7740 + }, + { + "epoch": 0.1309752161091066, + "grad_norm": 0.12268586456775665, + "learning_rate": 6.548373468525561e-06, + "loss": 0.0061, + "step": 7750 + }, + { + "epoch": 0.13114421638795704, + "grad_norm": 0.4134727418422699, + "learning_rate": 6.5568229826784965e-06, + "loss": 0.0084, + "step": 7760 + }, + { + "epoch": 0.1313132166668075, + "grad_norm": 0.030967149883508682, + "learning_rate": 6.565272496831433e-06, + "loss": 0.0048, + "step": 7770 + }, + { + "epoch": 0.13148221694565795, + "grad_norm": 0.10871484875679016, + "learning_rate": 6.5737220109843695e-06, + "loss": 0.007, + "step": 7780 + }, + { + "epoch": 0.13165121722450843, + "grad_norm": 0.3758116364479065, + "learning_rate": 6.582171525137305e-06, + "loss": 0.0055, + "step": 7790 + }, + { + "epoch": 0.13182021750335887, + "grad_norm": 0.90146404504776, + "learning_rate": 6.590621039290242e-06, + "loss": 0.0051, + "step": 7800 + }, + { + "epoch": 0.13198921778220934, + "grad_norm": 0.4192593991756439, + "learning_rate": 6.599070553443177e-06, + "loss": 0.0123, + "step": 7810 + }, + { + "epoch": 0.1321582180610598, + "grad_norm": 0.4820626378059387, + "learning_rate": 6.607520067596114e-06, + "loss": 0.0082, + "step": 7820 + }, + { + "epoch": 0.13232721833991026, + "grad_norm": 0.0934116318821907, + "learning_rate": 6.61596958174905e-06, + "loss": 0.0058, + "step": 7830 + }, + { + "epoch": 0.1324962186187607, + "grad_norm": 0.46475282311439514, + "learning_rate": 6.624419095901986e-06, + "loss": 0.0053, + "step": 7840 + }, + { + "epoch": 0.13266521889761118, + "grad_norm": 0.15889766812324524, + "learning_rate": 6.632868610054922e-06, + "loss": 0.0036, + "step": 7850 + }, + { + "epoch": 0.13283421917646165, + "grad_norm": 0.1389142870903015, + "learning_rate": 6.641318124207858e-06, + "loss": 0.0069, + "step": 7860 + }, + { + "epoch": 0.1330032194553121, + "grad_norm": 0.31071558594703674, + "learning_rate": 6.6497676383607945e-06, + "loss": 0.0075, + "step": 7870 + }, + { + "epoch": 0.13317221973416257, + "grad_norm": 0.2616838812828064, + "learning_rate": 6.658217152513731e-06, + "loss": 0.006, + "step": 7880 + }, + { + "epoch": 0.13334122001301302, + "grad_norm": 0.4653517007827759, + "learning_rate": 6.666666666666667e-06, + "loss": 0.0053, + "step": 7890 + }, + { + "epoch": 0.1335102202918635, + "grad_norm": 0.33546552062034607, + "learning_rate": 6.675116180819603e-06, + "loss": 0.0059, + "step": 7900 + }, + { + "epoch": 0.13367922057071394, + "grad_norm": 0.21979467570781708, + "learning_rate": 6.683565694972539e-06, + "loss": 0.0053, + "step": 7910 + }, + { + "epoch": 0.1338482208495644, + "grad_norm": 0.24753090739250183, + "learning_rate": 6.692015209125475e-06, + "loss": 0.0055, + "step": 7920 + }, + { + "epoch": 0.13401722112841485, + "grad_norm": 0.4232696294784546, + "learning_rate": 6.700464723278413e-06, + "loss": 0.0077, + "step": 7930 + }, + { + "epoch": 0.13418622140726533, + "grad_norm": 0.14672839641571045, + "learning_rate": 6.708914237431349e-06, + "loss": 0.0058, + "step": 7940 + }, + { + "epoch": 0.13435522168611577, + "grad_norm": 0.1414640247821808, + "learning_rate": 6.717363751584285e-06, + "loss": 0.0064, + "step": 7950 + }, + { + "epoch": 0.13452422196496625, + "grad_norm": 0.17650949954986572, + "learning_rate": 6.725813265737221e-06, + "loss": 0.0055, + "step": 7960 + }, + { + "epoch": 0.1346932222438167, + "grad_norm": 0.14567339420318604, + "learning_rate": 6.734262779890157e-06, + "loss": 0.0063, + "step": 7970 + }, + { + "epoch": 0.13486222252266716, + "grad_norm": 0.24944984912872314, + "learning_rate": 6.742712294043093e-06, + "loss": 0.0073, + "step": 7980 + }, + { + "epoch": 0.13503122280151764, + "grad_norm": 0.5359504818916321, + "learning_rate": 6.75116180819603e-06, + "loss": 0.0056, + "step": 7990 + }, + { + "epoch": 0.13520022308036808, + "grad_norm": 0.43457257747650146, + "learning_rate": 6.7596113223489655e-06, + "loss": 0.0068, + "step": 8000 + }, + { + "epoch": 0.13536922335921855, + "grad_norm": 0.38084739446640015, + "learning_rate": 6.768060836501902e-06, + "loss": 0.0065, + "step": 8010 + }, + { + "epoch": 0.135538223638069, + "grad_norm": 6.1073760986328125, + "learning_rate": 6.776510350654838e-06, + "loss": 0.0073, + "step": 8020 + }, + { + "epoch": 0.13570722391691947, + "grad_norm": 0.7895514369010925, + "learning_rate": 6.784959864807774e-06, + "loss": 0.0088, + "step": 8030 + }, + { + "epoch": 0.13587622419576992, + "grad_norm": 0.2781662046909332, + "learning_rate": 6.793409378960711e-06, + "loss": 0.0047, + "step": 8040 + }, + { + "epoch": 0.1360452244746204, + "grad_norm": 0.16183938086032867, + "learning_rate": 6.801858893113646e-06, + "loss": 0.0062, + "step": 8050 + }, + { + "epoch": 0.13621422475347084, + "grad_norm": 0.2602287828922272, + "learning_rate": 6.810308407266583e-06, + "loss": 0.0055, + "step": 8060 + }, + { + "epoch": 0.1363832250323213, + "grad_norm": 0.1746702939271927, + "learning_rate": 6.8187579214195185e-06, + "loss": 0.007, + "step": 8070 + }, + { + "epoch": 0.13655222531117175, + "grad_norm": 0.26238518953323364, + "learning_rate": 6.827207435572455e-06, + "loss": 0.0073, + "step": 8080 + }, + { + "epoch": 0.13672122559002223, + "grad_norm": 0.22383089363574982, + "learning_rate": 6.8356569497253914e-06, + "loss": 0.0064, + "step": 8090 + }, + { + "epoch": 0.13689022586887267, + "grad_norm": 0.33924341201782227, + "learning_rate": 6.844106463878327e-06, + "loss": 0.0068, + "step": 8100 + }, + { + "epoch": 0.13705922614772315, + "grad_norm": 0.25141218304634094, + "learning_rate": 6.852555978031264e-06, + "loss": 0.0041, + "step": 8110 + }, + { + "epoch": 0.1372282264265736, + "grad_norm": 0.15798458456993103, + "learning_rate": 6.861005492184199e-06, + "loss": 0.006, + "step": 8120 + }, + { + "epoch": 0.13739722670542406, + "grad_norm": 0.6393792629241943, + "learning_rate": 6.869455006337136e-06, + "loss": 0.0043, + "step": 8130 + }, + { + "epoch": 0.13756622698427454, + "grad_norm": 0.12887583673000336, + "learning_rate": 6.877904520490072e-06, + "loss": 0.0069, + "step": 8140 + }, + { + "epoch": 0.13773522726312498, + "grad_norm": 0.3371468484401703, + "learning_rate": 6.886354034643009e-06, + "loss": 0.0036, + "step": 8150 + }, + { + "epoch": 0.13790422754197545, + "grad_norm": 1.44239342212677, + "learning_rate": 6.894803548795945e-06, + "loss": 0.0063, + "step": 8160 + }, + { + "epoch": 0.1380732278208259, + "grad_norm": 0.2369224727153778, + "learning_rate": 6.903253062948882e-06, + "loss": 0.0062, + "step": 8170 + }, + { + "epoch": 0.13824222809967637, + "grad_norm": 0.30964913964271545, + "learning_rate": 6.911702577101817e-06, + "loss": 0.0074, + "step": 8180 + }, + { + "epoch": 0.13841122837852682, + "grad_norm": 0.27758997678756714, + "learning_rate": 6.920152091254754e-06, + "loss": 0.0073, + "step": 8190 + }, + { + "epoch": 0.1385802286573773, + "grad_norm": 0.295818030834198, + "learning_rate": 6.9286016054076895e-06, + "loss": 0.0076, + "step": 8200 + }, + { + "epoch": 0.13874922893622774, + "grad_norm": 0.2563367784023285, + "learning_rate": 6.937051119560626e-06, + "loss": 0.0062, + "step": 8210 + }, + { + "epoch": 0.1389182292150782, + "grad_norm": 0.406826913356781, + "learning_rate": 6.9455006337135625e-06, + "loss": 0.0056, + "step": 8220 + }, + { + "epoch": 0.13908722949392865, + "grad_norm": 0.34856709837913513, + "learning_rate": 6.953950147866498e-06, + "loss": 0.0057, + "step": 8230 + }, + { + "epoch": 0.13925622977277913, + "grad_norm": 0.04294797405600548, + "learning_rate": 6.962399662019435e-06, + "loss": 0.0047, + "step": 8240 + }, + { + "epoch": 0.13942523005162957, + "grad_norm": 0.28117257356643677, + "learning_rate": 6.97084917617237e-06, + "loss": 0.0088, + "step": 8250 + }, + { + "epoch": 0.13959423033048005, + "grad_norm": 0.15075592696666718, + "learning_rate": 6.979298690325307e-06, + "loss": 0.0051, + "step": 8260 + }, + { + "epoch": 0.13976323060933052, + "grad_norm": 0.2688157558441162, + "learning_rate": 6.987748204478243e-06, + "loss": 0.0093, + "step": 8270 + }, + { + "epoch": 0.13993223088818096, + "grad_norm": 0.277136892080307, + "learning_rate": 6.996197718631179e-06, + "loss": 0.0041, + "step": 8280 + }, + { + "epoch": 0.14010123116703144, + "grad_norm": 0.15421010553836823, + "learning_rate": 7.004647232784115e-06, + "loss": 0.0052, + "step": 8290 + }, + { + "epoch": 0.14027023144588188, + "grad_norm": 0.20295974612236023, + "learning_rate": 7.013096746937051e-06, + "loss": 0.006, + "step": 8300 + }, + { + "epoch": 0.14043923172473236, + "grad_norm": 0.24425384402275085, + "learning_rate": 7.0215462610899875e-06, + "loss": 0.0055, + "step": 8310 + }, + { + "epoch": 0.1406082320035828, + "grad_norm": 0.20600204169750214, + "learning_rate": 7.029995775242924e-06, + "loss": 0.0041, + "step": 8320 + }, + { + "epoch": 0.14077723228243327, + "grad_norm": 0.3156473636627197, + "learning_rate": 7.03844528939586e-06, + "loss": 0.0092, + "step": 8330 + }, + { + "epoch": 0.14094623256128372, + "grad_norm": 0.09433050453662872, + "learning_rate": 7.046894803548796e-06, + "loss": 0.0041, + "step": 8340 + }, + { + "epoch": 0.1411152328401342, + "grad_norm": 0.14726519584655762, + "learning_rate": 7.055344317701732e-06, + "loss": 0.0072, + "step": 8350 + }, + { + "epoch": 0.14128423311898464, + "grad_norm": 0.40286463499069214, + "learning_rate": 7.063793831854668e-06, + "loss": 0.0052, + "step": 8360 + }, + { + "epoch": 0.1414532333978351, + "grad_norm": 0.3082537055015564, + "learning_rate": 7.072243346007606e-06, + "loss": 0.0037, + "step": 8370 + }, + { + "epoch": 0.14162223367668555, + "grad_norm": 0.1920309066772461, + "learning_rate": 7.080692860160542e-06, + "loss": 0.008, + "step": 8380 + }, + { + "epoch": 0.14179123395553603, + "grad_norm": 0.30830442905426025, + "learning_rate": 7.089142374313478e-06, + "loss": 0.0067, + "step": 8390 + }, + { + "epoch": 0.14196023423438647, + "grad_norm": 0.23955096304416656, + "learning_rate": 7.097591888466414e-06, + "loss": 0.0042, + "step": 8400 + }, + { + "epoch": 0.14212923451323695, + "grad_norm": 0.44451770186424255, + "learning_rate": 7.10604140261935e-06, + "loss": 0.0055, + "step": 8410 + }, + { + "epoch": 0.14229823479208742, + "grad_norm": 0.15314729511737823, + "learning_rate": 7.114490916772286e-06, + "loss": 0.0032, + "step": 8420 + }, + { + "epoch": 0.14246723507093786, + "grad_norm": 0.27596771717071533, + "learning_rate": 7.122940430925223e-06, + "loss": 0.0051, + "step": 8430 + }, + { + "epoch": 0.14263623534978834, + "grad_norm": 0.11215230822563171, + "learning_rate": 7.1313899450781586e-06, + "loss": 0.0043, + "step": 8440 + }, + { + "epoch": 0.14280523562863878, + "grad_norm": 0.35231873393058777, + "learning_rate": 7.139839459231095e-06, + "loss": 0.0051, + "step": 8450 + }, + { + "epoch": 0.14297423590748926, + "grad_norm": 0.26749947667121887, + "learning_rate": 7.148288973384031e-06, + "loss": 0.0057, + "step": 8460 + }, + { + "epoch": 0.1431432361863397, + "grad_norm": 0.1803816854953766, + "learning_rate": 7.156738487536967e-06, + "loss": 0.0042, + "step": 8470 + }, + { + "epoch": 0.14331223646519017, + "grad_norm": 0.264035701751709, + "learning_rate": 7.165188001689904e-06, + "loss": 0.0067, + "step": 8480 + }, + { + "epoch": 0.14348123674404062, + "grad_norm": 0.22792071104049683, + "learning_rate": 7.173637515842839e-06, + "loss": 0.0047, + "step": 8490 + }, + { + "epoch": 0.1436502370228911, + "grad_norm": 0.0940081998705864, + "learning_rate": 7.182087029995776e-06, + "loss": 0.006, + "step": 8500 + }, + { + "epoch": 0.14381923730174154, + "grad_norm": 0.33512696623802185, + "learning_rate": 7.1905365441487115e-06, + "loss": 0.0049, + "step": 8510 + }, + { + "epoch": 0.143988237580592, + "grad_norm": 0.1594853699207306, + "learning_rate": 7.198986058301648e-06, + "loss": 0.0035, + "step": 8520 + }, + { + "epoch": 0.14415723785944246, + "grad_norm": 0.33963024616241455, + "learning_rate": 7.2074355724545845e-06, + "loss": 0.0043, + "step": 8530 + }, + { + "epoch": 0.14432623813829293, + "grad_norm": 2.5675406455993652, + "learning_rate": 7.21588508660752e-06, + "loss": 0.0066, + "step": 8540 + }, + { + "epoch": 0.1444952384171434, + "grad_norm": 0.38166311383247375, + "learning_rate": 7.224334600760457e-06, + "loss": 0.0055, + "step": 8550 + }, + { + "epoch": 0.14466423869599385, + "grad_norm": 0.15342292189598083, + "learning_rate": 7.232784114913392e-06, + "loss": 0.0049, + "step": 8560 + }, + { + "epoch": 0.14483323897484432, + "grad_norm": 0.20990873873233795, + "learning_rate": 7.241233629066329e-06, + "loss": 0.0051, + "step": 8570 + }, + { + "epoch": 0.14500223925369476, + "grad_norm": 0.17052221298217773, + "learning_rate": 7.249683143219266e-06, + "loss": 0.0049, + "step": 8580 + }, + { + "epoch": 0.14517123953254524, + "grad_norm": 0.4310913681983948, + "learning_rate": 7.258132657372203e-06, + "loss": 0.0063, + "step": 8590 + }, + { + "epoch": 0.14534023981139568, + "grad_norm": 0.25773683190345764, + "learning_rate": 7.266582171525138e-06, + "loss": 0.0108, + "step": 8600 + }, + { + "epoch": 0.14550924009024616, + "grad_norm": 0.20230819284915924, + "learning_rate": 7.275031685678075e-06, + "loss": 0.008, + "step": 8610 + }, + { + "epoch": 0.1456782403690966, + "grad_norm": 0.21330519020557404, + "learning_rate": 7.28348119983101e-06, + "loss": 0.0057, + "step": 8620 + }, + { + "epoch": 0.14584724064794707, + "grad_norm": 0.22225132584571838, + "learning_rate": 7.291930713983947e-06, + "loss": 0.0047, + "step": 8630 + }, + { + "epoch": 0.14601624092679752, + "grad_norm": 0.23057548701763153, + "learning_rate": 7.300380228136883e-06, + "loss": 0.0075, + "step": 8640 + }, + { + "epoch": 0.146185241205648, + "grad_norm": 0.10859983414411545, + "learning_rate": 7.308829742289819e-06, + "loss": 0.0053, + "step": 8650 + }, + { + "epoch": 0.14635424148449844, + "grad_norm": 0.3728826642036438, + "learning_rate": 7.3172792564427555e-06, + "loss": 0.0076, + "step": 8660 + }, + { + "epoch": 0.1465232417633489, + "grad_norm": 0.24150754511356354, + "learning_rate": 7.325728770595691e-06, + "loss": 0.0065, + "step": 8670 + }, + { + "epoch": 0.14669224204219938, + "grad_norm": 0.2621629536151886, + "learning_rate": 7.334178284748628e-06, + "loss": 0.0038, + "step": 8680 + }, + { + "epoch": 0.14686124232104983, + "grad_norm": 0.1941870152950287, + "learning_rate": 7.342627798901564e-06, + "loss": 0.0056, + "step": 8690 + }, + { + "epoch": 0.1470302425999003, + "grad_norm": 0.3373866677284241, + "learning_rate": 7.3510773130545e-06, + "loss": 0.007, + "step": 8700 + }, + { + "epoch": 0.14719924287875075, + "grad_norm": 0.27150392532348633, + "learning_rate": 7.359526827207436e-06, + "loss": 0.0042, + "step": 8710 + }, + { + "epoch": 0.14736824315760122, + "grad_norm": 0.271064430475235, + "learning_rate": 7.367976341360372e-06, + "loss": 0.0072, + "step": 8720 + }, + { + "epoch": 0.14753724343645166, + "grad_norm": 0.11885469406843185, + "learning_rate": 7.376425855513308e-06, + "loss": 0.0064, + "step": 8730 + }, + { + "epoch": 0.14770624371530214, + "grad_norm": 0.36805516481399536, + "learning_rate": 7.384875369666245e-06, + "loss": 0.0046, + "step": 8740 + }, + { + "epoch": 0.14787524399415258, + "grad_norm": 0.3535545766353607, + "learning_rate": 7.3933248838191806e-06, + "loss": 0.0052, + "step": 8750 + }, + { + "epoch": 0.14804424427300306, + "grad_norm": 0.4129721224308014, + "learning_rate": 7.401774397972117e-06, + "loss": 0.0063, + "step": 8760 + }, + { + "epoch": 0.1482132445518535, + "grad_norm": 0.24997562170028687, + "learning_rate": 7.410223912125053e-06, + "loss": 0.0063, + "step": 8770 + }, + { + "epoch": 0.14838224483070397, + "grad_norm": 0.28022584319114685, + "learning_rate": 7.418673426277989e-06, + "loss": 0.0059, + "step": 8780 + }, + { + "epoch": 0.14855124510955442, + "grad_norm": 0.2782224118709564, + "learning_rate": 7.427122940430926e-06, + "loss": 0.0051, + "step": 8790 + }, + { + "epoch": 0.1487202453884049, + "grad_norm": 0.17080119252204895, + "learning_rate": 7.435572454583862e-06, + "loss": 0.0047, + "step": 8800 + }, + { + "epoch": 0.14888924566725534, + "grad_norm": 0.2720976769924164, + "learning_rate": 7.444021968736799e-06, + "loss": 0.0042, + "step": 8810 + }, + { + "epoch": 0.1490582459461058, + "grad_norm": 0.1649436503648758, + "learning_rate": 7.452471482889735e-06, + "loss": 0.0037, + "step": 8820 + }, + { + "epoch": 0.14922724622495628, + "grad_norm": 0.1506228744983673, + "learning_rate": 7.460920997042671e-06, + "loss": 0.0087, + "step": 8830 + }, + { + "epoch": 0.14939624650380673, + "grad_norm": 0.21330970525741577, + "learning_rate": 7.469370511195607e-06, + "loss": 0.0055, + "step": 8840 + }, + { + "epoch": 0.1495652467826572, + "grad_norm": 0.12506476044654846, + "learning_rate": 7.477820025348543e-06, + "loss": 0.0058, + "step": 8850 + }, + { + "epoch": 0.14973424706150765, + "grad_norm": 0.1709899604320526, + "learning_rate": 7.4862695395014794e-06, + "loss": 0.0043, + "step": 8860 + }, + { + "epoch": 0.14990324734035812, + "grad_norm": 0.16848011314868927, + "learning_rate": 7.494719053654416e-06, + "loss": 0.0042, + "step": 8870 + }, + { + "epoch": 0.15007224761920857, + "grad_norm": 0.12850339710712433, + "learning_rate": 7.503168567807352e-06, + "loss": 0.0056, + "step": 8880 + }, + { + "epoch": 0.15024124789805904, + "grad_norm": 0.3136073648929596, + "learning_rate": 7.511618081960288e-06, + "loss": 0.0029, + "step": 8890 + }, + { + "epoch": 0.15041024817690948, + "grad_norm": 0.15144753456115723, + "learning_rate": 7.520067596113224e-06, + "loss": 0.0045, + "step": 8900 + }, + { + "epoch": 0.15057924845575996, + "grad_norm": 0.4413564205169678, + "learning_rate": 7.52851711026616e-06, + "loss": 0.0054, + "step": 8910 + }, + { + "epoch": 0.1507482487346104, + "grad_norm": 0.5044617056846619, + "learning_rate": 7.536966624419097e-06, + "loss": 0.008, + "step": 8920 + }, + { + "epoch": 0.15091724901346087, + "grad_norm": 0.16861020028591156, + "learning_rate": 7.545416138572032e-06, + "loss": 0.004, + "step": 8930 + }, + { + "epoch": 0.15108624929231132, + "grad_norm": 0.2561737298965454, + "learning_rate": 7.553865652724969e-06, + "loss": 0.0082, + "step": 8940 + }, + { + "epoch": 0.1512552495711618, + "grad_norm": 0.12116044759750366, + "learning_rate": 7.5623151668779045e-06, + "loss": 0.005, + "step": 8950 + }, + { + "epoch": 0.15142424985001227, + "grad_norm": 0.16940058767795563, + "learning_rate": 7.570764681030841e-06, + "loss": 0.0058, + "step": 8960 + }, + { + "epoch": 0.1515932501288627, + "grad_norm": 0.18350815773010254, + "learning_rate": 7.5792141951837775e-06, + "loss": 0.0041, + "step": 8970 + }, + { + "epoch": 0.15176225040771318, + "grad_norm": 0.1131502166390419, + "learning_rate": 7.587663709336713e-06, + "loss": 0.0044, + "step": 8980 + }, + { + "epoch": 0.15193125068656363, + "grad_norm": 0.15428772568702698, + "learning_rate": 7.59611322348965e-06, + "loss": 0.0053, + "step": 8990 + }, + { + "epoch": 0.1521002509654141, + "grad_norm": 0.19609490036964417, + "learning_rate": 7.604562737642585e-06, + "loss": 0.006, + "step": 9000 + }, + { + "epoch": 0.15226925124426455, + "grad_norm": 0.2686976194381714, + "learning_rate": 7.613012251795522e-06, + "loss": 0.0036, + "step": 9010 + }, + { + "epoch": 0.15243825152311502, + "grad_norm": 0.11909843981266022, + "learning_rate": 7.621461765948459e-06, + "loss": 0.0039, + "step": 9020 + }, + { + "epoch": 0.15260725180196547, + "grad_norm": 0.26848167181015015, + "learning_rate": 7.629911280101395e-06, + "loss": 0.0048, + "step": 9030 + }, + { + "epoch": 0.15277625208081594, + "grad_norm": 0.23602674901485443, + "learning_rate": 7.638360794254331e-06, + "loss": 0.0063, + "step": 9040 + }, + { + "epoch": 0.15294525235966638, + "grad_norm": 0.47805437445640564, + "learning_rate": 7.646810308407268e-06, + "loss": 0.0057, + "step": 9050 + }, + { + "epoch": 0.15311425263851686, + "grad_norm": 0.3113015294075012, + "learning_rate": 7.655259822560204e-06, + "loss": 0.0066, + "step": 9060 + }, + { + "epoch": 0.1532832529173673, + "grad_norm": 0.1774858981370926, + "learning_rate": 7.663709336713139e-06, + "loss": 0.0048, + "step": 9070 + }, + { + "epoch": 0.15345225319621777, + "grad_norm": 0.1720050722360611, + "learning_rate": 7.672158850866076e-06, + "loss": 0.0055, + "step": 9080 + }, + { + "epoch": 0.15362125347506822, + "grad_norm": 0.2195959836244583, + "learning_rate": 7.680608365019012e-06, + "loss": 0.0076, + "step": 9090 + }, + { + "epoch": 0.1537902537539187, + "grad_norm": 0.209056556224823, + "learning_rate": 7.689057879171949e-06, + "loss": 0.0069, + "step": 9100 + }, + { + "epoch": 0.15395925403276917, + "grad_norm": 0.31863802671432495, + "learning_rate": 7.697507393324885e-06, + "loss": 0.0036, + "step": 9110 + }, + { + "epoch": 0.1541282543116196, + "grad_norm": 0.1689879596233368, + "learning_rate": 7.70595690747782e-06, + "loss": 0.0035, + "step": 9120 + }, + { + "epoch": 0.15429725459047008, + "grad_norm": 0.3629443049430847, + "learning_rate": 7.714406421630756e-06, + "loss": 0.0052, + "step": 9130 + }, + { + "epoch": 0.15446625486932053, + "grad_norm": 0.2640196681022644, + "learning_rate": 7.722855935783693e-06, + "loss": 0.0094, + "step": 9140 + }, + { + "epoch": 0.154635255148171, + "grad_norm": 0.06258325278759003, + "learning_rate": 7.73130544993663e-06, + "loss": 0.0059, + "step": 9150 + }, + { + "epoch": 0.15480425542702145, + "grad_norm": 0.02285955473780632, + "learning_rate": 7.739754964089566e-06, + "loss": 0.0054, + "step": 9160 + }, + { + "epoch": 0.15497325570587192, + "grad_norm": 0.1107051745057106, + "learning_rate": 7.7482044782425e-06, + "loss": 0.0037, + "step": 9170 + }, + { + "epoch": 0.15514225598472237, + "grad_norm": 0.24938778579235077, + "learning_rate": 7.756653992395437e-06, + "loss": 0.0074, + "step": 9180 + }, + { + "epoch": 0.15531125626357284, + "grad_norm": 0.18850697576999664, + "learning_rate": 7.765103506548374e-06, + "loss": 0.0047, + "step": 9190 + }, + { + "epoch": 0.15548025654242328, + "grad_norm": 0.2750691771507263, + "learning_rate": 7.77355302070131e-06, + "loss": 0.0075, + "step": 9200 + }, + { + "epoch": 0.15564925682127376, + "grad_norm": 0.13034598529338837, + "learning_rate": 7.782002534854247e-06, + "loss": 0.0044, + "step": 9210 + }, + { + "epoch": 0.1558182571001242, + "grad_norm": 0.21011145412921906, + "learning_rate": 7.790452049007183e-06, + "loss": 0.0053, + "step": 9220 + }, + { + "epoch": 0.15598725737897468, + "grad_norm": 0.30511417984962463, + "learning_rate": 7.79890156316012e-06, + "loss": 0.0055, + "step": 9230 + }, + { + "epoch": 0.15615625765782515, + "grad_norm": 0.17314879596233368, + "learning_rate": 7.807351077313056e-06, + "loss": 0.0039, + "step": 9240 + }, + { + "epoch": 0.1563252579366756, + "grad_norm": 0.2600672245025635, + "learning_rate": 7.815800591465993e-06, + "loss": 0.0065, + "step": 9250 + }, + { + "epoch": 0.15649425821552607, + "grad_norm": 0.30571845173835754, + "learning_rate": 7.824250105618927e-06, + "loss": 0.0046, + "step": 9260 + }, + { + "epoch": 0.1566632584943765, + "grad_norm": 0.2151174694299698, + "learning_rate": 7.832699619771864e-06, + "loss": 0.0062, + "step": 9270 + }, + { + "epoch": 0.15683225877322698, + "grad_norm": 0.14800691604614258, + "learning_rate": 7.8411491339248e-06, + "loss": 0.0058, + "step": 9280 + }, + { + "epoch": 0.15700125905207743, + "grad_norm": 0.2288718819618225, + "learning_rate": 7.849598648077737e-06, + "loss": 0.0062, + "step": 9290 + }, + { + "epoch": 0.1571702593309279, + "grad_norm": 0.24087797105312347, + "learning_rate": 7.858048162230673e-06, + "loss": 0.0059, + "step": 9300 + }, + { + "epoch": 0.15733925960977835, + "grad_norm": 0.2878643274307251, + "learning_rate": 7.866497676383608e-06, + "loss": 0.006, + "step": 9310 + }, + { + "epoch": 0.15750825988862882, + "grad_norm": 0.14686961472034454, + "learning_rate": 7.874947190536545e-06, + "loss": 0.0054, + "step": 9320 + }, + { + "epoch": 0.15767726016747927, + "grad_norm": 0.29242226481437683, + "learning_rate": 7.883396704689481e-06, + "loss": 0.0059, + "step": 9330 + }, + { + "epoch": 0.15784626044632974, + "grad_norm": 0.32249775528907776, + "learning_rate": 7.891846218842418e-06, + "loss": 0.005, + "step": 9340 + }, + { + "epoch": 0.15801526072518018, + "grad_norm": 0.6770492196083069, + "learning_rate": 7.900295732995354e-06, + "loss": 0.0064, + "step": 9350 + }, + { + "epoch": 0.15818426100403066, + "grad_norm": 0.2366202175617218, + "learning_rate": 7.908745247148289e-06, + "loss": 0.0077, + "step": 9360 + }, + { + "epoch": 0.15835326128288113, + "grad_norm": 0.36044564843177795, + "learning_rate": 7.917194761301225e-06, + "loss": 0.0071, + "step": 9370 + }, + { + "epoch": 0.15852226156173158, + "grad_norm": 0.37434545159339905, + "learning_rate": 7.925644275454162e-06, + "loss": 0.0056, + "step": 9380 + }, + { + "epoch": 0.15869126184058205, + "grad_norm": 0.4788360893726349, + "learning_rate": 7.934093789607098e-06, + "loss": 0.006, + "step": 9390 + }, + { + "epoch": 0.1588602621194325, + "grad_norm": 0.28214511275291443, + "learning_rate": 7.942543303760035e-06, + "loss": 0.004, + "step": 9400 + }, + { + "epoch": 0.15902926239828297, + "grad_norm": 0.11522787064313889, + "learning_rate": 7.95099281791297e-06, + "loss": 0.0041, + "step": 9410 + }, + { + "epoch": 0.1591982626771334, + "grad_norm": 0.33369603753089905, + "learning_rate": 7.959442332065906e-06, + "loss": 0.0064, + "step": 9420 + }, + { + "epoch": 0.15936726295598388, + "grad_norm": 0.15886670351028442, + "learning_rate": 7.967891846218843e-06, + "loss": 0.0045, + "step": 9430 + }, + { + "epoch": 0.15953626323483433, + "grad_norm": 0.378985732793808, + "learning_rate": 7.976341360371779e-06, + "loss": 0.0095, + "step": 9440 + }, + { + "epoch": 0.1597052635136848, + "grad_norm": 0.7028583884239197, + "learning_rate": 7.984790874524716e-06, + "loss": 0.0075, + "step": 9450 + }, + { + "epoch": 0.15987426379253525, + "grad_norm": 0.5431110262870789, + "learning_rate": 7.993240388677652e-06, + "loss": 0.0071, + "step": 9460 + }, + { + "epoch": 0.16004326407138572, + "grad_norm": 0.21072594821453094, + "learning_rate": 8.001689902830589e-06, + "loss": 0.0059, + "step": 9470 + }, + { + "epoch": 0.16021226435023617, + "grad_norm": 0.2906375527381897, + "learning_rate": 8.010139416983525e-06, + "loss": 0.0061, + "step": 9480 + }, + { + "epoch": 0.16038126462908664, + "grad_norm": 0.5510354042053223, + "learning_rate": 8.01858893113646e-06, + "loss": 0.0108, + "step": 9490 + }, + { + "epoch": 0.16055026490793708, + "grad_norm": 0.0482487753033638, + "learning_rate": 8.027038445289396e-06, + "loss": 0.0063, + "step": 9500 + }, + { + "epoch": 0.16071926518678756, + "grad_norm": 0.34865421056747437, + "learning_rate": 8.035487959442333e-06, + "loss": 0.0073, + "step": 9510 + }, + { + "epoch": 0.16088826546563803, + "grad_norm": 0.1495426744222641, + "learning_rate": 8.04393747359527e-06, + "loss": 0.0062, + "step": 9520 + }, + { + "epoch": 0.16105726574448848, + "grad_norm": 0.3056929409503937, + "learning_rate": 8.052386987748206e-06, + "loss": 0.0077, + "step": 9530 + }, + { + "epoch": 0.16122626602333895, + "grad_norm": 0.13944317400455475, + "learning_rate": 8.06083650190114e-06, + "loss": 0.0044, + "step": 9540 + }, + { + "epoch": 0.1613952663021894, + "grad_norm": 0.2999503016471863, + "learning_rate": 8.069286016054077e-06, + "loss": 0.0073, + "step": 9550 + }, + { + "epoch": 0.16156426658103987, + "grad_norm": 0.22944676876068115, + "learning_rate": 8.077735530207014e-06, + "loss": 0.0043, + "step": 9560 + }, + { + "epoch": 0.1617332668598903, + "grad_norm": 0.18615588545799255, + "learning_rate": 8.08618504435995e-06, + "loss": 0.0032, + "step": 9570 + }, + { + "epoch": 0.16190226713874079, + "grad_norm": 0.10034358501434326, + "learning_rate": 8.094634558512887e-06, + "loss": 0.0052, + "step": 9580 + }, + { + "epoch": 0.16207126741759123, + "grad_norm": 0.19554342329502106, + "learning_rate": 8.103084072665821e-06, + "loss": 0.0052, + "step": 9590 + }, + { + "epoch": 0.1622402676964417, + "grad_norm": 0.18776758015155792, + "learning_rate": 8.111533586818758e-06, + "loss": 0.0092, + "step": 9600 + }, + { + "epoch": 0.16240926797529215, + "grad_norm": 0.16730335354804993, + "learning_rate": 8.119983100971694e-06, + "loss": 0.0082, + "step": 9610 + }, + { + "epoch": 0.16257826825414262, + "grad_norm": 0.2268962264060974, + "learning_rate": 8.128432615124631e-06, + "loss": 0.0031, + "step": 9620 + }, + { + "epoch": 0.16274726853299307, + "grad_norm": 0.2297000288963318, + "learning_rate": 8.136882129277567e-06, + "loss": 0.0051, + "step": 9630 + }, + { + "epoch": 0.16291626881184354, + "grad_norm": 0.29321032762527466, + "learning_rate": 8.145331643430502e-06, + "loss": 0.0077, + "step": 9640 + }, + { + "epoch": 0.163085269090694, + "grad_norm": 0.10185467451810837, + "learning_rate": 8.153781157583439e-06, + "loss": 0.0075, + "step": 9650 + }, + { + "epoch": 0.16325426936954446, + "grad_norm": 0.23513555526733398, + "learning_rate": 8.162230671736375e-06, + "loss": 0.0052, + "step": 9660 + }, + { + "epoch": 0.16342326964839493, + "grad_norm": 0.29854118824005127, + "learning_rate": 8.170680185889312e-06, + "loss": 0.0057, + "step": 9670 + }, + { + "epoch": 0.16359226992724538, + "grad_norm": 0.11316808313131332, + "learning_rate": 8.179129700042248e-06, + "loss": 0.0039, + "step": 9680 + }, + { + "epoch": 0.16376127020609585, + "grad_norm": 0.11226405948400497, + "learning_rate": 8.187579214195185e-06, + "loss": 0.0053, + "step": 9690 + }, + { + "epoch": 0.1639302704849463, + "grad_norm": 0.5727311372756958, + "learning_rate": 8.196028728348121e-06, + "loss": 0.006, + "step": 9700 + }, + { + "epoch": 0.16409927076379677, + "grad_norm": 0.09775212407112122, + "learning_rate": 8.204478242501058e-06, + "loss": 0.0033, + "step": 9710 + }, + { + "epoch": 0.1642682710426472, + "grad_norm": 0.11045091599225998, + "learning_rate": 8.212927756653993e-06, + "loss": 0.0039, + "step": 9720 + }, + { + "epoch": 0.16443727132149769, + "grad_norm": 0.38140159845352173, + "learning_rate": 8.221377270806929e-06, + "loss": 0.0075, + "step": 9730 + }, + { + "epoch": 0.16460627160034813, + "grad_norm": 0.2585310935974121, + "learning_rate": 8.229826784959865e-06, + "loss": 0.0073, + "step": 9740 + }, + { + "epoch": 0.1647752718791986, + "grad_norm": 0.7000439167022705, + "learning_rate": 8.238276299112802e-06, + "loss": 0.0055, + "step": 9750 + }, + { + "epoch": 0.16494427215804905, + "grad_norm": 0.12472444027662277, + "learning_rate": 8.246725813265738e-06, + "loss": 0.0044, + "step": 9760 + }, + { + "epoch": 0.16511327243689952, + "grad_norm": 0.09951147437095642, + "learning_rate": 8.255175327418673e-06, + "loss": 0.0049, + "step": 9770 + }, + { + "epoch": 0.16528227271574997, + "grad_norm": 0.23709943890571594, + "learning_rate": 8.26362484157161e-06, + "loss": 0.0069, + "step": 9780 + }, + { + "epoch": 0.16545127299460044, + "grad_norm": 0.16604699194431305, + "learning_rate": 8.272074355724546e-06, + "loss": 0.005, + "step": 9790 + }, + { + "epoch": 0.1656202732734509, + "grad_norm": 0.4178698658943176, + "learning_rate": 8.280523869877483e-06, + "loss": 0.0061, + "step": 9800 + }, + { + "epoch": 0.16578927355230136, + "grad_norm": 1.2892835140228271, + "learning_rate": 8.28897338403042e-06, + "loss": 0.0044, + "step": 9810 + }, + { + "epoch": 0.16595827383115183, + "grad_norm": 0.6563330292701721, + "learning_rate": 8.297422898183354e-06, + "loss": 0.0104, + "step": 9820 + }, + { + "epoch": 0.16612727411000228, + "grad_norm": 0.11363092809915543, + "learning_rate": 8.30587241233629e-06, + "loss": 0.0057, + "step": 9830 + }, + { + "epoch": 0.16629627438885275, + "grad_norm": 0.14816680550575256, + "learning_rate": 8.314321926489227e-06, + "loss": 0.0037, + "step": 9840 + }, + { + "epoch": 0.1664652746677032, + "grad_norm": 0.134018212556839, + "learning_rate": 8.322771440642164e-06, + "loss": 0.0058, + "step": 9850 + }, + { + "epoch": 0.16663427494655367, + "grad_norm": 0.20563648641109467, + "learning_rate": 8.3312209547951e-06, + "loss": 0.0073, + "step": 9860 + }, + { + "epoch": 0.1668032752254041, + "grad_norm": 0.10587208718061447, + "learning_rate": 8.339670468948035e-06, + "loss": 0.0059, + "step": 9870 + }, + { + "epoch": 0.16697227550425459, + "grad_norm": 0.2292458564043045, + "learning_rate": 8.348119983100971e-06, + "loss": 0.0078, + "step": 9880 + }, + { + "epoch": 0.16714127578310503, + "grad_norm": 0.24652674794197083, + "learning_rate": 8.35656949725391e-06, + "loss": 0.0054, + "step": 9890 + }, + { + "epoch": 0.1673102760619555, + "grad_norm": 0.15121500194072723, + "learning_rate": 8.365019011406846e-06, + "loss": 0.0047, + "step": 9900 + }, + { + "epoch": 0.16747927634080595, + "grad_norm": 0.07074388861656189, + "learning_rate": 8.37346852555978e-06, + "loss": 0.0068, + "step": 9910 + }, + { + "epoch": 0.16764827661965642, + "grad_norm": 0.0782100260257721, + "learning_rate": 8.381918039712717e-06, + "loss": 0.0041, + "step": 9920 + }, + { + "epoch": 0.1678172768985069, + "grad_norm": 0.14748837053775787, + "learning_rate": 8.390367553865654e-06, + "loss": 0.0043, + "step": 9930 + }, + { + "epoch": 0.16798627717735734, + "grad_norm": 0.1910974383354187, + "learning_rate": 8.39881706801859e-06, + "loss": 0.0063, + "step": 9940 + }, + { + "epoch": 0.1681552774562078, + "grad_norm": 0.26988935470581055, + "learning_rate": 8.407266582171527e-06, + "loss": 0.0045, + "step": 9950 + }, + { + "epoch": 0.16832427773505826, + "grad_norm": 0.22018760442733765, + "learning_rate": 8.415716096324462e-06, + "loss": 0.0067, + "step": 9960 + }, + { + "epoch": 0.16849327801390873, + "grad_norm": 0.15286549925804138, + "learning_rate": 8.424165610477398e-06, + "loss": 0.005, + "step": 9970 + }, + { + "epoch": 0.16866227829275918, + "grad_norm": 0.04976119101047516, + "learning_rate": 8.432615124630335e-06, + "loss": 0.0045, + "step": 9980 + }, + { + "epoch": 0.16883127857160965, + "grad_norm": 0.1893671154975891, + "learning_rate": 8.441064638783271e-06, + "loss": 0.0047, + "step": 9990 + }, + { + "epoch": 0.1690002788504601, + "grad_norm": 0.2919318974018097, + "learning_rate": 8.449514152936208e-06, + "loss": 0.0057, + "step": 10000 + }, + { + "epoch": 0.16916927912931057, + "grad_norm": 0.21773135662078857, + "learning_rate": 8.457963667089142e-06, + "loss": 0.0059, + "step": 10010 + }, + { + "epoch": 0.169338279408161, + "grad_norm": 0.13610824942588806, + "learning_rate": 8.466413181242079e-06, + "loss": 0.0052, + "step": 10020 + }, + { + "epoch": 0.1695072796870115, + "grad_norm": 0.17957353591918945, + "learning_rate": 8.474862695395015e-06, + "loss": 0.0049, + "step": 10030 + }, + { + "epoch": 0.16967627996586193, + "grad_norm": 0.2154211401939392, + "learning_rate": 8.483312209547952e-06, + "loss": 0.0064, + "step": 10040 + }, + { + "epoch": 0.1698452802447124, + "grad_norm": 0.24149611592292786, + "learning_rate": 8.491761723700888e-06, + "loss": 0.0035, + "step": 10050 + }, + { + "epoch": 0.17001428052356288, + "grad_norm": 0.43823128938674927, + "learning_rate": 8.500211237853823e-06, + "loss": 0.0052, + "step": 10060 + }, + { + "epoch": 0.17018328080241332, + "grad_norm": 0.4004029333591461, + "learning_rate": 8.50866075200676e-06, + "loss": 0.004, + "step": 10070 + }, + { + "epoch": 0.1703522810812638, + "grad_norm": 0.23778338730335236, + "learning_rate": 8.517110266159696e-06, + "loss": 0.0046, + "step": 10080 + }, + { + "epoch": 0.17052128136011424, + "grad_norm": 0.09642806649208069, + "learning_rate": 8.525559780312633e-06, + "loss": 0.0073, + "step": 10090 + }, + { + "epoch": 0.1706902816389647, + "grad_norm": 0.1605379730463028, + "learning_rate": 8.534009294465569e-06, + "loss": 0.0043, + "step": 10100 + }, + { + "epoch": 0.17085928191781516, + "grad_norm": 0.24077445268630981, + "learning_rate": 8.542458808618506e-06, + "loss": 0.0057, + "step": 10110 + }, + { + "epoch": 0.17102828219666563, + "grad_norm": 0.43282195925712585, + "learning_rate": 8.550908322771442e-06, + "loss": 0.0053, + "step": 10120 + }, + { + "epoch": 0.17119728247551608, + "grad_norm": 0.23690132796764374, + "learning_rate": 8.559357836924379e-06, + "loss": 0.0047, + "step": 10130 + }, + { + "epoch": 0.17136628275436655, + "grad_norm": 0.1514607071876526, + "learning_rate": 8.567807351077313e-06, + "loss": 0.0049, + "step": 10140 + }, + { + "epoch": 0.171535283033217, + "grad_norm": 0.23836401104927063, + "learning_rate": 8.57625686523025e-06, + "loss": 0.0044, + "step": 10150 + }, + { + "epoch": 0.17170428331206747, + "grad_norm": 0.17873355746269226, + "learning_rate": 8.584706379383186e-06, + "loss": 0.0039, + "step": 10160 + }, + { + "epoch": 0.1718732835909179, + "grad_norm": 0.22357626259326935, + "learning_rate": 8.593155893536123e-06, + "loss": 0.0067, + "step": 10170 + }, + { + "epoch": 0.1720422838697684, + "grad_norm": 0.3310186266899109, + "learning_rate": 8.60160540768906e-06, + "loss": 0.0048, + "step": 10180 + }, + { + "epoch": 0.17221128414861883, + "grad_norm": 0.2950020134449005, + "learning_rate": 8.610054921841994e-06, + "loss": 0.0081, + "step": 10190 + }, + { + "epoch": 0.1723802844274693, + "grad_norm": 0.1528913527727127, + "learning_rate": 8.61850443599493e-06, + "loss": 0.0035, + "step": 10200 + }, + { + "epoch": 0.17254928470631978, + "grad_norm": 0.09008288383483887, + "learning_rate": 8.626953950147867e-06, + "loss": 0.0065, + "step": 10210 + }, + { + "epoch": 0.17271828498517022, + "grad_norm": 0.28624215722084045, + "learning_rate": 8.635403464300804e-06, + "loss": 0.0055, + "step": 10220 + }, + { + "epoch": 0.1728872852640207, + "grad_norm": 0.33840370178222656, + "learning_rate": 8.64385297845374e-06, + "loss": 0.0045, + "step": 10230 + }, + { + "epoch": 0.17305628554287114, + "grad_norm": 0.2384103536605835, + "learning_rate": 8.652302492606675e-06, + "loss": 0.0059, + "step": 10240 + }, + { + "epoch": 0.17322528582172161, + "grad_norm": 0.1607908308506012, + "learning_rate": 8.660752006759611e-06, + "loss": 0.0047, + "step": 10250 + }, + { + "epoch": 0.17339428610057206, + "grad_norm": 0.31546470522880554, + "learning_rate": 8.669201520912548e-06, + "loss": 0.0043, + "step": 10260 + }, + { + "epoch": 0.17356328637942253, + "grad_norm": 0.21398183703422546, + "learning_rate": 8.677651035065484e-06, + "loss": 0.0086, + "step": 10270 + }, + { + "epoch": 0.17373228665827298, + "grad_norm": 0.35206085443496704, + "learning_rate": 8.686100549218421e-06, + "loss": 0.0052, + "step": 10280 + }, + { + "epoch": 0.17390128693712345, + "grad_norm": 0.15468555688858032, + "learning_rate": 8.694550063371356e-06, + "loss": 0.004, + "step": 10290 + }, + { + "epoch": 0.1740702872159739, + "grad_norm": 0.2814697027206421, + "learning_rate": 8.702999577524292e-06, + "loss": 0.0064, + "step": 10300 + }, + { + "epoch": 0.17423928749482437, + "grad_norm": 0.3659762144088745, + "learning_rate": 8.711449091677229e-06, + "loss": 0.0088, + "step": 10310 + }, + { + "epoch": 0.1744082877736748, + "grad_norm": 0.11075379699468613, + "learning_rate": 8.719898605830165e-06, + "loss": 0.007, + "step": 10320 + }, + { + "epoch": 0.1745772880525253, + "grad_norm": 0.45485588908195496, + "learning_rate": 8.728348119983102e-06, + "loss": 0.0052, + "step": 10330 + }, + { + "epoch": 0.17474628833137576, + "grad_norm": 0.1501804143190384, + "learning_rate": 8.736797634136038e-06, + "loss": 0.0037, + "step": 10340 + }, + { + "epoch": 0.1749152886102262, + "grad_norm": 0.2205362170934677, + "learning_rate": 8.745247148288975e-06, + "loss": 0.007, + "step": 10350 + }, + { + "epoch": 0.17508428888907668, + "grad_norm": 0.18038053810596466, + "learning_rate": 8.753696662441911e-06, + "loss": 0.003, + "step": 10360 + }, + { + "epoch": 0.17525328916792712, + "grad_norm": 0.22610805928707123, + "learning_rate": 8.762146176594846e-06, + "loss": 0.0062, + "step": 10370 + }, + { + "epoch": 0.1754222894467776, + "grad_norm": 0.3337898254394531, + "learning_rate": 8.770595690747782e-06, + "loss": 0.0055, + "step": 10380 + }, + { + "epoch": 0.17559128972562804, + "grad_norm": 0.16095170378684998, + "learning_rate": 8.779045204900719e-06, + "loss": 0.0063, + "step": 10390 + }, + { + "epoch": 0.17576029000447851, + "grad_norm": 0.22648227214813232, + "learning_rate": 8.787494719053655e-06, + "loss": 0.0039, + "step": 10400 + }, + { + "epoch": 0.17592929028332896, + "grad_norm": 0.21391649544239044, + "learning_rate": 8.795944233206592e-06, + "loss": 0.0025, + "step": 10410 + }, + { + "epoch": 0.17609829056217943, + "grad_norm": 0.1315697729587555, + "learning_rate": 8.804393747359527e-06, + "loss": 0.0049, + "step": 10420 + }, + { + "epoch": 0.17626729084102988, + "grad_norm": 0.1340722292661667, + "learning_rate": 8.812843261512463e-06, + "loss": 0.0049, + "step": 10430 + }, + { + "epoch": 0.17643629111988035, + "grad_norm": 0.2515009045600891, + "learning_rate": 8.8212927756654e-06, + "loss": 0.0042, + "step": 10440 + }, + { + "epoch": 0.1766052913987308, + "grad_norm": 0.33529043197631836, + "learning_rate": 8.829742289818336e-06, + "loss": 0.0051, + "step": 10450 + }, + { + "epoch": 0.17677429167758127, + "grad_norm": 0.880221962928772, + "learning_rate": 8.838191803971273e-06, + "loss": 0.0053, + "step": 10460 + }, + { + "epoch": 0.17694329195643174, + "grad_norm": 0.22810405492782593, + "learning_rate": 8.846641318124208e-06, + "loss": 0.0056, + "step": 10470 + }, + { + "epoch": 0.1771122922352822, + "grad_norm": 0.5324404239654541, + "learning_rate": 8.855090832277144e-06, + "loss": 0.0042, + "step": 10480 + }, + { + "epoch": 0.17728129251413266, + "grad_norm": 0.30728843808174133, + "learning_rate": 8.86354034643008e-06, + "loss": 0.0052, + "step": 10490 + }, + { + "epoch": 0.1774502927929831, + "grad_norm": 0.26511019468307495, + "learning_rate": 8.871989860583017e-06, + "loss": 0.0065, + "step": 10500 + }, + { + "epoch": 0.17761929307183358, + "grad_norm": 0.3257712125778198, + "learning_rate": 8.880439374735953e-06, + "loss": 0.0076, + "step": 10510 + }, + { + "epoch": 0.17778829335068402, + "grad_norm": 0.40313950181007385, + "learning_rate": 8.888888888888888e-06, + "loss": 0.0057, + "step": 10520 + }, + { + "epoch": 0.1779572936295345, + "grad_norm": 0.4945511519908905, + "learning_rate": 8.897338403041825e-06, + "loss": 0.0065, + "step": 10530 + }, + { + "epoch": 0.17812629390838494, + "grad_norm": 0.4786947965621948, + "learning_rate": 8.905787917194763e-06, + "loss": 0.0034, + "step": 10540 + }, + { + "epoch": 0.17829529418723541, + "grad_norm": 0.21327023208141327, + "learning_rate": 8.9142374313477e-06, + "loss": 0.0039, + "step": 10550 + }, + { + "epoch": 0.17846429446608586, + "grad_norm": 0.33647388219833374, + "learning_rate": 8.922686945500634e-06, + "loss": 0.0048, + "step": 10560 + }, + { + "epoch": 0.17863329474493633, + "grad_norm": 0.5618618726730347, + "learning_rate": 8.93113645965357e-06, + "loss": 0.0049, + "step": 10570 + }, + { + "epoch": 0.17880229502378678, + "grad_norm": 0.324091374874115, + "learning_rate": 8.939585973806507e-06, + "loss": 0.0055, + "step": 10580 + }, + { + "epoch": 0.17897129530263725, + "grad_norm": 0.09489540755748749, + "learning_rate": 8.948035487959444e-06, + "loss": 0.0037, + "step": 10590 + }, + { + "epoch": 0.1791402955814877, + "grad_norm": 0.1073186919093132, + "learning_rate": 8.95648500211238e-06, + "loss": 0.0056, + "step": 10600 + }, + { + "epoch": 0.17930929586033817, + "grad_norm": 0.027942942455410957, + "learning_rate": 8.964934516265315e-06, + "loss": 0.0046, + "step": 10610 + }, + { + "epoch": 0.17947829613918864, + "grad_norm": 0.21087293326854706, + "learning_rate": 8.973384030418252e-06, + "loss": 0.0036, + "step": 10620 + }, + { + "epoch": 0.1796472964180391, + "grad_norm": 0.2617759704589844, + "learning_rate": 8.981833544571188e-06, + "loss": 0.0051, + "step": 10630 + }, + { + "epoch": 0.17981629669688956, + "grad_norm": 0.3466041684150696, + "learning_rate": 8.990283058724125e-06, + "loss": 0.0122, + "step": 10640 + }, + { + "epoch": 0.17998529697574, + "grad_norm": 0.1437150090932846, + "learning_rate": 8.998732572877061e-06, + "loss": 0.0045, + "step": 10650 + }, + { + "epoch": 0.18015429725459048, + "grad_norm": 0.21957731246948242, + "learning_rate": 9.007182087029996e-06, + "loss": 0.0041, + "step": 10660 + }, + { + "epoch": 0.18032329753344092, + "grad_norm": 0.1508844494819641, + "learning_rate": 9.015631601182932e-06, + "loss": 0.0075, + "step": 10670 + }, + { + "epoch": 0.1804922978122914, + "grad_norm": 0.2619732916355133, + "learning_rate": 9.024081115335869e-06, + "loss": 0.0046, + "step": 10680 + }, + { + "epoch": 0.18066129809114184, + "grad_norm": 0.18197904527187347, + "learning_rate": 9.032530629488805e-06, + "loss": 0.0043, + "step": 10690 + }, + { + "epoch": 0.18083029836999231, + "grad_norm": 0.5457032322883606, + "learning_rate": 9.040980143641742e-06, + "loss": 0.0069, + "step": 10700 + }, + { + "epoch": 0.18099929864884276, + "grad_norm": 0.12650050222873688, + "learning_rate": 9.049429657794677e-06, + "loss": 0.0044, + "step": 10710 + }, + { + "epoch": 0.18116829892769323, + "grad_norm": 0.20195062458515167, + "learning_rate": 9.057879171947613e-06, + "loss": 0.0047, + "step": 10720 + }, + { + "epoch": 0.18133729920654368, + "grad_norm": 0.3241768479347229, + "learning_rate": 9.06632868610055e-06, + "loss": 0.0059, + "step": 10730 + }, + { + "epoch": 0.18150629948539415, + "grad_norm": 0.16222558915615082, + "learning_rate": 9.074778200253486e-06, + "loss": 0.0057, + "step": 10740 + }, + { + "epoch": 0.18167529976424462, + "grad_norm": 0.21460038423538208, + "learning_rate": 9.083227714406423e-06, + "loss": 0.0047, + "step": 10750 + }, + { + "epoch": 0.18184430004309507, + "grad_norm": 0.28541454672813416, + "learning_rate": 9.091677228559359e-06, + "loss": 0.0047, + "step": 10760 + }, + { + "epoch": 0.18201330032194554, + "grad_norm": 0.20260149240493774, + "learning_rate": 9.100126742712296e-06, + "loss": 0.0083, + "step": 10770 + }, + { + "epoch": 0.182182300600796, + "grad_norm": 0.5409811735153198, + "learning_rate": 9.108576256865232e-06, + "loss": 0.0039, + "step": 10780 + }, + { + "epoch": 0.18235130087964646, + "grad_norm": 0.17389380931854248, + "learning_rate": 9.117025771018167e-06, + "loss": 0.0057, + "step": 10790 + }, + { + "epoch": 0.1825203011584969, + "grad_norm": 0.23779360949993134, + "learning_rate": 9.125475285171103e-06, + "loss": 0.0038, + "step": 10800 + }, + { + "epoch": 0.18268930143734738, + "grad_norm": 0.10513414442539215, + "learning_rate": 9.13392479932404e-06, + "loss": 0.0037, + "step": 10810 + }, + { + "epoch": 0.18285830171619782, + "grad_norm": 0.17357668280601501, + "learning_rate": 9.142374313476976e-06, + "loss": 0.0061, + "step": 10820 + }, + { + "epoch": 0.1830273019950483, + "grad_norm": 0.15187624096870422, + "learning_rate": 9.150823827629913e-06, + "loss": 0.0066, + "step": 10830 + }, + { + "epoch": 0.18319630227389874, + "grad_norm": 0.14482544362545013, + "learning_rate": 9.159273341782848e-06, + "loss": 0.0057, + "step": 10840 + }, + { + "epoch": 0.18336530255274922, + "grad_norm": 0.15263095498085022, + "learning_rate": 9.167722855935784e-06, + "loss": 0.0042, + "step": 10850 + }, + { + "epoch": 0.18353430283159966, + "grad_norm": 0.12884816527366638, + "learning_rate": 9.17617237008872e-06, + "loss": 0.004, + "step": 10860 + }, + { + "epoch": 0.18370330311045013, + "grad_norm": 0.1251261681318283, + "learning_rate": 9.184621884241657e-06, + "loss": 0.0049, + "step": 10870 + }, + { + "epoch": 0.18387230338930058, + "grad_norm": 0.1957172453403473, + "learning_rate": 9.193071398394594e-06, + "loss": 0.0043, + "step": 10880 + }, + { + "epoch": 0.18404130366815105, + "grad_norm": 0.3047395348548889, + "learning_rate": 9.201520912547528e-06, + "loss": 0.0054, + "step": 10890 + }, + { + "epoch": 0.18421030394700152, + "grad_norm": 0.6501251459121704, + "learning_rate": 9.209970426700465e-06, + "loss": 0.0055, + "step": 10900 + }, + { + "epoch": 0.18437930422585197, + "grad_norm": 0.10551861673593521, + "learning_rate": 9.218419940853401e-06, + "loss": 0.003, + "step": 10910 + }, + { + "epoch": 0.18454830450470244, + "grad_norm": 0.08184027671813965, + "learning_rate": 9.226869455006338e-06, + "loss": 0.0072, + "step": 10920 + }, + { + "epoch": 0.1847173047835529, + "grad_norm": 0.09533879905939102, + "learning_rate": 9.235318969159274e-06, + "loss": 0.0032, + "step": 10930 + }, + { + "epoch": 0.18488630506240336, + "grad_norm": 0.3011474311351776, + "learning_rate": 9.24376848331221e-06, + "loss": 0.0048, + "step": 10940 + }, + { + "epoch": 0.1850553053412538, + "grad_norm": 0.13552433252334595, + "learning_rate": 9.252217997465146e-06, + "loss": 0.0044, + "step": 10950 + }, + { + "epoch": 0.18522430562010428, + "grad_norm": 1.5320141315460205, + "learning_rate": 9.260667511618082e-06, + "loss": 0.0098, + "step": 10960 + }, + { + "epoch": 0.18539330589895472, + "grad_norm": 0.41396188735961914, + "learning_rate": 9.269117025771019e-06, + "loss": 0.0054, + "step": 10970 + }, + { + "epoch": 0.1855623061778052, + "grad_norm": 0.27680331468582153, + "learning_rate": 9.277566539923955e-06, + "loss": 0.0067, + "step": 10980 + }, + { + "epoch": 0.18573130645665564, + "grad_norm": 0.17900198698043823, + "learning_rate": 9.286016054076892e-06, + "loss": 0.0084, + "step": 10990 + }, + { + "epoch": 0.18590030673550612, + "grad_norm": 0.09265404939651489, + "learning_rate": 9.294465568229828e-06, + "loss": 0.0049, + "step": 11000 + }, + { + "epoch": 0.18606930701435656, + "grad_norm": 0.22261707484722137, + "learning_rate": 9.302915082382765e-06, + "loss": 0.0057, + "step": 11010 + }, + { + "epoch": 0.18623830729320703, + "grad_norm": 0.32542920112609863, + "learning_rate": 9.3113645965357e-06, + "loss": 0.0043, + "step": 11020 + }, + { + "epoch": 0.1864073075720575, + "grad_norm": 0.054340165108442307, + "learning_rate": 9.319814110688636e-06, + "loss": 0.004, + "step": 11030 + }, + { + "epoch": 0.18657630785090795, + "grad_norm": 0.28003227710723877, + "learning_rate": 9.328263624841572e-06, + "loss": 0.006, + "step": 11040 + }, + { + "epoch": 0.18674530812975842, + "grad_norm": 0.263875275850296, + "learning_rate": 9.336713138994509e-06, + "loss": 0.0058, + "step": 11050 + }, + { + "epoch": 0.18691430840860887, + "grad_norm": 0.28672799468040466, + "learning_rate": 9.345162653147445e-06, + "loss": 0.0057, + "step": 11060 + }, + { + "epoch": 0.18708330868745934, + "grad_norm": 0.16641516983509064, + "learning_rate": 9.35361216730038e-06, + "loss": 0.0075, + "step": 11070 + }, + { + "epoch": 0.1872523089663098, + "grad_norm": 0.017938191071152687, + "learning_rate": 9.362061681453317e-06, + "loss": 0.0064, + "step": 11080 + }, + { + "epoch": 0.18742130924516026, + "grad_norm": 0.17037849128246307, + "learning_rate": 9.370511195606253e-06, + "loss": 0.0069, + "step": 11090 + }, + { + "epoch": 0.1875903095240107, + "grad_norm": 0.17609409987926483, + "learning_rate": 9.37896070975919e-06, + "loss": 0.0057, + "step": 11100 + }, + { + "epoch": 0.18775930980286118, + "grad_norm": 0.1867038458585739, + "learning_rate": 9.387410223912126e-06, + "loss": 0.0042, + "step": 11110 + }, + { + "epoch": 0.18792831008171162, + "grad_norm": 0.2825443744659424, + "learning_rate": 9.395859738065061e-06, + "loss": 0.005, + "step": 11120 + }, + { + "epoch": 0.1880973103605621, + "grad_norm": 0.7442421913146973, + "learning_rate": 9.404309252217997e-06, + "loss": 0.0066, + "step": 11130 + }, + { + "epoch": 0.18826631063941254, + "grad_norm": 0.43998825550079346, + "learning_rate": 9.412758766370934e-06, + "loss": 0.0054, + "step": 11140 + }, + { + "epoch": 0.18843531091826302, + "grad_norm": 0.3201293647289276, + "learning_rate": 9.42120828052387e-06, + "loss": 0.0047, + "step": 11150 + }, + { + "epoch": 0.1886043111971135, + "grad_norm": 0.541490912437439, + "learning_rate": 9.429657794676807e-06, + "loss": 0.0041, + "step": 11160 + }, + { + "epoch": 0.18877331147596393, + "grad_norm": 0.24210959672927856, + "learning_rate": 9.438107308829742e-06, + "loss": 0.0045, + "step": 11170 + }, + { + "epoch": 0.1889423117548144, + "grad_norm": 0.2670549154281616, + "learning_rate": 9.446556822982678e-06, + "loss": 0.0039, + "step": 11180 + }, + { + "epoch": 0.18911131203366485, + "grad_norm": 0.5198076367378235, + "learning_rate": 9.455006337135616e-06, + "loss": 0.0061, + "step": 11190 + }, + { + "epoch": 0.18928031231251533, + "grad_norm": 0.08111140131950378, + "learning_rate": 9.463455851288553e-06, + "loss": 0.0064, + "step": 11200 + }, + { + "epoch": 0.18944931259136577, + "grad_norm": 0.10334686934947968, + "learning_rate": 9.471905365441488e-06, + "loss": 0.0072, + "step": 11210 + }, + { + "epoch": 0.18961831287021624, + "grad_norm": 0.12224064022302628, + "learning_rate": 9.480354879594424e-06, + "loss": 0.005, + "step": 11220 + }, + { + "epoch": 0.1897873131490667, + "grad_norm": 0.16509070992469788, + "learning_rate": 9.48880439374736e-06, + "loss": 0.005, + "step": 11230 + }, + { + "epoch": 0.18995631342791716, + "grad_norm": 0.19058158993721008, + "learning_rate": 9.497253907900297e-06, + "loss": 0.0044, + "step": 11240 + }, + { + "epoch": 0.1901253137067676, + "grad_norm": 0.14141295850276947, + "learning_rate": 9.505703422053234e-06, + "loss": 0.005, + "step": 11250 + }, + { + "epoch": 0.19029431398561808, + "grad_norm": 0.6358665227890015, + "learning_rate": 9.514152936206169e-06, + "loss": 0.0063, + "step": 11260 + }, + { + "epoch": 0.19046331426446853, + "grad_norm": 0.11508552730083466, + "learning_rate": 9.522602450359105e-06, + "loss": 0.0079, + "step": 11270 + }, + { + "epoch": 0.190632314543319, + "grad_norm": 0.18003638088703156, + "learning_rate": 9.531051964512042e-06, + "loss": 0.0044, + "step": 11280 + }, + { + "epoch": 0.19080131482216944, + "grad_norm": 0.10557529330253601, + "learning_rate": 9.539501478664978e-06, + "loss": 0.0054, + "step": 11290 + }, + { + "epoch": 0.19097031510101992, + "grad_norm": 0.09687496721744537, + "learning_rate": 9.547950992817914e-06, + "loss": 0.007, + "step": 11300 + }, + { + "epoch": 0.1911393153798704, + "grad_norm": 0.25146037340164185, + "learning_rate": 9.55640050697085e-06, + "loss": 0.0046, + "step": 11310 + }, + { + "epoch": 0.19130831565872083, + "grad_norm": 0.15835073590278625, + "learning_rate": 9.564850021123786e-06, + "loss": 0.0037, + "step": 11320 + }, + { + "epoch": 0.1914773159375713, + "grad_norm": 0.053235359489917755, + "learning_rate": 9.573299535276722e-06, + "loss": 0.0029, + "step": 11330 + }, + { + "epoch": 0.19164631621642175, + "grad_norm": 0.16447119414806366, + "learning_rate": 9.581749049429659e-06, + "loss": 0.0043, + "step": 11340 + }, + { + "epoch": 0.19181531649527223, + "grad_norm": 0.05096621811389923, + "learning_rate": 9.590198563582595e-06, + "loss": 0.0033, + "step": 11350 + }, + { + "epoch": 0.19198431677412267, + "grad_norm": 0.11670669168233871, + "learning_rate": 9.59864807773553e-06, + "loss": 0.0055, + "step": 11360 + }, + { + "epoch": 0.19215331705297314, + "grad_norm": 0.22420817613601685, + "learning_rate": 9.607097591888467e-06, + "loss": 0.0081, + "step": 11370 + }, + { + "epoch": 0.1923223173318236, + "grad_norm": 0.18114367127418518, + "learning_rate": 9.615547106041403e-06, + "loss": 0.0052, + "step": 11380 + }, + { + "epoch": 0.19249131761067406, + "grad_norm": 0.1688271462917328, + "learning_rate": 9.62399662019434e-06, + "loss": 0.0044, + "step": 11390 + }, + { + "epoch": 0.1926603178895245, + "grad_norm": 0.2208949476480484, + "learning_rate": 9.632446134347276e-06, + "loss": 0.0045, + "step": 11400 + }, + { + "epoch": 0.19282931816837498, + "grad_norm": 0.04804624989628792, + "learning_rate": 9.640895648500213e-06, + "loss": 0.0052, + "step": 11410 + }, + { + "epoch": 0.19299831844722543, + "grad_norm": 0.037728212773799896, + "learning_rate": 9.649345162653149e-06, + "loss": 0.0049, + "step": 11420 + }, + { + "epoch": 0.1931673187260759, + "grad_norm": 0.0866350308060646, + "learning_rate": 9.657794676806086e-06, + "loss": 0.0038, + "step": 11430 + }, + { + "epoch": 0.19333631900492637, + "grad_norm": 0.18869557976722717, + "learning_rate": 9.66624419095902e-06, + "loss": 0.0041, + "step": 11440 + }, + { + "epoch": 0.19350531928377682, + "grad_norm": 0.2800125181674957, + "learning_rate": 9.674693705111957e-06, + "loss": 0.0081, + "step": 11450 + }, + { + "epoch": 0.1936743195626273, + "grad_norm": 0.07992298156023026, + "learning_rate": 9.683143219264893e-06, + "loss": 0.0055, + "step": 11460 + }, + { + "epoch": 0.19384331984147773, + "grad_norm": 0.34909525513648987, + "learning_rate": 9.69159273341783e-06, + "loss": 0.0044, + "step": 11470 + }, + { + "epoch": 0.1940123201203282, + "grad_norm": 0.2653554677963257, + "learning_rate": 9.700042247570766e-06, + "loss": 0.0047, + "step": 11480 + }, + { + "epoch": 0.19418132039917865, + "grad_norm": 0.13063736259937286, + "learning_rate": 9.708491761723701e-06, + "loss": 0.0034, + "step": 11490 + }, + { + "epoch": 0.19435032067802913, + "grad_norm": 0.1563275158405304, + "learning_rate": 9.716941275876638e-06, + "loss": 0.0025, + "step": 11500 + }, + { + "epoch": 0.19451932095687957, + "grad_norm": 0.07389644533395767, + "learning_rate": 9.725390790029574e-06, + "loss": 0.0054, + "step": 11510 + }, + { + "epoch": 0.19468832123573004, + "grad_norm": 0.27305924892425537, + "learning_rate": 9.73384030418251e-06, + "loss": 0.0043, + "step": 11520 + }, + { + "epoch": 0.1948573215145805, + "grad_norm": 0.21362082660198212, + "learning_rate": 9.742289818335447e-06, + "loss": 0.0053, + "step": 11530 + }, + { + "epoch": 0.19502632179343096, + "grad_norm": 0.3026335835456848, + "learning_rate": 9.750739332488382e-06, + "loss": 0.0055, + "step": 11540 + }, + { + "epoch": 0.1951953220722814, + "grad_norm": 0.14857229590415955, + "learning_rate": 9.759188846641318e-06, + "loss": 0.0041, + "step": 11550 + }, + { + "epoch": 0.19536432235113188, + "grad_norm": 0.2065477967262268, + "learning_rate": 9.767638360794255e-06, + "loss": 0.0044, + "step": 11560 + }, + { + "epoch": 0.19553332262998233, + "grad_norm": 0.39100638031959534, + "learning_rate": 9.776087874947191e-06, + "loss": 0.0028, + "step": 11570 + }, + { + "epoch": 0.1957023229088328, + "grad_norm": 0.5789624452590942, + "learning_rate": 9.784537389100128e-06, + "loss": 0.0028, + "step": 11580 + }, + { + "epoch": 0.19587132318768327, + "grad_norm": 0.23838987946510315, + "learning_rate": 9.792986903253063e-06, + "loss": 0.0048, + "step": 11590 + }, + { + "epoch": 0.19604032346653372, + "grad_norm": 0.1580793559551239, + "learning_rate": 9.801436417405999e-06, + "loss": 0.0048, + "step": 11600 + }, + { + "epoch": 0.1962093237453842, + "grad_norm": 0.1848992258310318, + "learning_rate": 9.809885931558936e-06, + "loss": 0.0031, + "step": 11610 + }, + { + "epoch": 0.19637832402423464, + "grad_norm": 0.21419130265712738, + "learning_rate": 9.818335445711872e-06, + "loss": 0.0055, + "step": 11620 + }, + { + "epoch": 0.1965473243030851, + "grad_norm": 0.13937658071517944, + "learning_rate": 9.826784959864809e-06, + "loss": 0.0049, + "step": 11630 + }, + { + "epoch": 0.19671632458193555, + "grad_norm": 0.29598268866539, + "learning_rate": 9.835234474017745e-06, + "loss": 0.0031, + "step": 11640 + }, + { + "epoch": 0.19688532486078603, + "grad_norm": 0.18870386481285095, + "learning_rate": 9.843683988170682e-06, + "loss": 0.0043, + "step": 11650 + }, + { + "epoch": 0.19705432513963647, + "grad_norm": 0.2481774091720581, + "learning_rate": 9.852133502323618e-06, + "loss": 0.0048, + "step": 11660 + }, + { + "epoch": 0.19722332541848694, + "grad_norm": 0.19238494336605072, + "learning_rate": 9.860583016476553e-06, + "loss": 0.009, + "step": 11670 + }, + { + "epoch": 0.1973923256973374, + "grad_norm": 0.18605957925319672, + "learning_rate": 9.86903253062949e-06, + "loss": 0.0086, + "step": 11680 + }, + { + "epoch": 0.19756132597618786, + "grad_norm": 0.38150960206985474, + "learning_rate": 9.877482044782426e-06, + "loss": 0.0047, + "step": 11690 + }, + { + "epoch": 0.1977303262550383, + "grad_norm": 0.2200096994638443, + "learning_rate": 9.885931558935362e-06, + "loss": 0.0046, + "step": 11700 + }, + { + "epoch": 0.19789932653388878, + "grad_norm": 0.34150439500808716, + "learning_rate": 9.894381073088299e-06, + "loss": 0.0058, + "step": 11710 + }, + { + "epoch": 0.19806832681273925, + "grad_norm": 0.24692192673683167, + "learning_rate": 9.902830587241234e-06, + "loss": 0.0061, + "step": 11720 + }, + { + "epoch": 0.1982373270915897, + "grad_norm": 0.19490328431129456, + "learning_rate": 9.91128010139417e-06, + "loss": 0.0064, + "step": 11730 + }, + { + "epoch": 0.19840632737044017, + "grad_norm": 0.2527712881565094, + "learning_rate": 9.919729615547107e-06, + "loss": 0.0036, + "step": 11740 + }, + { + "epoch": 0.19857532764929062, + "grad_norm": 0.13580848276615143, + "learning_rate": 9.928179129700043e-06, + "loss": 0.0033, + "step": 11750 + }, + { + "epoch": 0.1987443279281411, + "grad_norm": 0.4252132773399353, + "learning_rate": 9.93662864385298e-06, + "loss": 0.0046, + "step": 11760 + }, + { + "epoch": 0.19891332820699154, + "grad_norm": 0.08987266570329666, + "learning_rate": 9.945078158005914e-06, + "loss": 0.004, + "step": 11770 + }, + { + "epoch": 0.199082328485842, + "grad_norm": 0.13477776944637299, + "learning_rate": 9.953527672158851e-06, + "loss": 0.006, + "step": 11780 + }, + { + "epoch": 0.19925132876469245, + "grad_norm": 0.3274911046028137, + "learning_rate": 9.961977186311787e-06, + "loss": 0.005, + "step": 11790 + }, + { + "epoch": 0.19942032904354293, + "grad_norm": 0.059423573315143585, + "learning_rate": 9.970426700464724e-06, + "loss": 0.0035, + "step": 11800 + }, + { + "epoch": 0.19958932932239337, + "grad_norm": 0.06708939373493195, + "learning_rate": 9.97887621461766e-06, + "loss": 0.005, + "step": 11810 + }, + { + "epoch": 0.19975832960124384, + "grad_norm": 0.05580686032772064, + "learning_rate": 9.987325728770595e-06, + "loss": 0.0049, + "step": 11820 + }, + { + "epoch": 0.1999273298800943, + "grad_norm": 0.09520850330591202, + "learning_rate": 9.995775242923532e-06, + "loss": 0.0036, + "step": 11830 + }, + { + "epoch": 0.20009633015894476, + "grad_norm": 0.23919424414634705, + "learning_rate": 9.999999945621971e-06, + "loss": 0.0068, + "step": 11840 + }, + { + "epoch": 0.20026533043779524, + "grad_norm": 0.14946545660495758, + "learning_rate": 9.99999951059775e-06, + "loss": 0.0044, + "step": 11850 + }, + { + "epoch": 0.20043433071664568, + "grad_norm": 0.25027787685394287, + "learning_rate": 9.999998640549348e-06, + "loss": 0.0048, + "step": 11860 + }, + { + "epoch": 0.20060333099549615, + "grad_norm": 0.1638702154159546, + "learning_rate": 9.999997335476836e-06, + "loss": 0.0077, + "step": 11870 + }, + { + "epoch": 0.2007723312743466, + "grad_norm": 0.42983323335647583, + "learning_rate": 9.999995595380332e-06, + "loss": 0.0062, + "step": 11880 + }, + { + "epoch": 0.20094133155319707, + "grad_norm": 0.23045586049556732, + "learning_rate": 9.999993420259984e-06, + "loss": 0.0055, + "step": 11890 + }, + { + "epoch": 0.20111033183204752, + "grad_norm": 0.27778342366218567, + "learning_rate": 9.999990810115985e-06, + "loss": 0.0068, + "step": 11900 + }, + { + "epoch": 0.201279332110898, + "grad_norm": 0.20811425149440765, + "learning_rate": 9.999987764948558e-06, + "loss": 0.006, + "step": 11910 + }, + { + "epoch": 0.20144833238974844, + "grad_norm": 0.4125745892524719, + "learning_rate": 9.999984284757971e-06, + "loss": 0.0061, + "step": 11920 + }, + { + "epoch": 0.2016173326685989, + "grad_norm": 0.26201602816581726, + "learning_rate": 9.999980369544525e-06, + "loss": 0.0058, + "step": 11930 + }, + { + "epoch": 0.20178633294744935, + "grad_norm": 0.18778546154499054, + "learning_rate": 9.999976019308562e-06, + "loss": 0.006, + "step": 11940 + }, + { + "epoch": 0.20195533322629983, + "grad_norm": 0.19676972925662994, + "learning_rate": 9.99997123405046e-06, + "loss": 0.0065, + "step": 11950 + }, + { + "epoch": 0.20212433350515027, + "grad_norm": 0.24656665325164795, + "learning_rate": 9.999966013770634e-06, + "loss": 0.005, + "step": 11960 + }, + { + "epoch": 0.20229333378400075, + "grad_norm": 0.11270641535520554, + "learning_rate": 9.999960358469542e-06, + "loss": 0.0057, + "step": 11970 + }, + { + "epoch": 0.2024623340628512, + "grad_norm": 0.3879396915435791, + "learning_rate": 9.999954268147671e-06, + "loss": 0.0051, + "step": 11980 + }, + { + "epoch": 0.20263133434170166, + "grad_norm": 0.047397516667842865, + "learning_rate": 9.999947742805554e-06, + "loss": 0.0045, + "step": 11990 + }, + { + "epoch": 0.20280033462055214, + "grad_norm": 0.5076800584793091, + "learning_rate": 9.999940782443758e-06, + "loss": 0.0106, + "step": 12000 + }, + { + "epoch": 0.20296933489940258, + "grad_norm": 0.12176298350095749, + "learning_rate": 9.99993338706289e-06, + "loss": 0.0052, + "step": 12010 + }, + { + "epoch": 0.20313833517825305, + "grad_norm": 0.17117035388946533, + "learning_rate": 9.99992555666359e-06, + "loss": 0.0031, + "step": 12020 + }, + { + "epoch": 0.2033073354571035, + "grad_norm": 0.1444518268108368, + "learning_rate": 9.999917291246543e-06, + "loss": 0.0044, + "step": 12030 + }, + { + "epoch": 0.20347633573595397, + "grad_norm": 0.33203762769699097, + "learning_rate": 9.999908590812466e-06, + "loss": 0.0044, + "step": 12040 + }, + { + "epoch": 0.20364533601480442, + "grad_norm": 0.17868980765342712, + "learning_rate": 9.999899455362117e-06, + "loss": 0.004, + "step": 12050 + }, + { + "epoch": 0.2038143362936549, + "grad_norm": 0.03706150874495506, + "learning_rate": 9.999889884896288e-06, + "loss": 0.0063, + "step": 12060 + }, + { + "epoch": 0.20398333657250534, + "grad_norm": 0.12980827689170837, + "learning_rate": 9.999879879415816e-06, + "loss": 0.005, + "step": 12070 + }, + { + "epoch": 0.2041523368513558, + "grad_norm": 0.25462740659713745, + "learning_rate": 9.99986943892157e-06, + "loss": 0.0041, + "step": 12080 + }, + { + "epoch": 0.20432133713020625, + "grad_norm": 0.0007251466158777475, + "learning_rate": 9.999858563414457e-06, + "loss": 0.006, + "step": 12090 + }, + { + "epoch": 0.20449033740905673, + "grad_norm": 0.18278099596500397, + "learning_rate": 9.999847252895423e-06, + "loss": 0.0057, + "step": 12100 + }, + { + "epoch": 0.20465933768790717, + "grad_norm": 0.4333384335041046, + "learning_rate": 9.999835507365454e-06, + "loss": 0.0051, + "step": 12110 + }, + { + "epoch": 0.20482833796675765, + "grad_norm": 0.1187647208571434, + "learning_rate": 9.999823326825572e-06, + "loss": 0.0085, + "step": 12120 + }, + { + "epoch": 0.20499733824560812, + "grad_norm": 0.17793165147304535, + "learning_rate": 9.999810711276837e-06, + "loss": 0.0067, + "step": 12130 + }, + { + "epoch": 0.20516633852445856, + "grad_norm": 0.3019254207611084, + "learning_rate": 9.999797660720343e-06, + "loss": 0.0085, + "step": 12140 + }, + { + "epoch": 0.20533533880330904, + "grad_norm": 0.12040320783853531, + "learning_rate": 9.999784175157228e-06, + "loss": 0.0049, + "step": 12150 + }, + { + "epoch": 0.20550433908215948, + "grad_norm": 0.2843913733959198, + "learning_rate": 9.999770254588666e-06, + "loss": 0.0058, + "step": 12160 + }, + { + "epoch": 0.20567333936100995, + "grad_norm": 0.25708165764808655, + "learning_rate": 9.999755899015866e-06, + "loss": 0.0053, + "step": 12170 + }, + { + "epoch": 0.2058423396398604, + "grad_norm": 0.28958919644355774, + "learning_rate": 9.99974110844008e-06, + "loss": 0.006, + "step": 12180 + }, + { + "epoch": 0.20601133991871087, + "grad_norm": 0.15175886452198029, + "learning_rate": 9.999725882862594e-06, + "loss": 0.0044, + "step": 12190 + }, + { + "epoch": 0.20618034019756132, + "grad_norm": 0.17259806394577026, + "learning_rate": 9.999710222284731e-06, + "loss": 0.0075, + "step": 12200 + }, + { + "epoch": 0.2063493404764118, + "grad_norm": 0.310881644487381, + "learning_rate": 9.999694126707856e-06, + "loss": 0.0054, + "step": 12210 + }, + { + "epoch": 0.20651834075526224, + "grad_norm": 0.09686554968357086, + "learning_rate": 9.999677596133364e-06, + "loss": 0.0053, + "step": 12220 + }, + { + "epoch": 0.2066873410341127, + "grad_norm": 0.17074330151081085, + "learning_rate": 9.999660630562699e-06, + "loss": 0.0036, + "step": 12230 + }, + { + "epoch": 0.20685634131296315, + "grad_norm": 0.41990748047828674, + "learning_rate": 9.999643229997337e-06, + "loss": 0.0053, + "step": 12240 + }, + { + "epoch": 0.20702534159181363, + "grad_norm": 0.23658278584480286, + "learning_rate": 9.99962539443879e-06, + "loss": 0.0059, + "step": 12250 + }, + { + "epoch": 0.2071943418706641, + "grad_norm": 0.38540852069854736, + "learning_rate": 9.999607123888608e-06, + "loss": 0.0063, + "step": 12260 + }, + { + "epoch": 0.20736334214951455, + "grad_norm": 0.14451487362384796, + "learning_rate": 9.999588418348384e-06, + "loss": 0.0065, + "step": 12270 + }, + { + "epoch": 0.20753234242836502, + "grad_norm": 0.19181889295578003, + "learning_rate": 9.999569277819743e-06, + "loss": 0.0044, + "step": 12280 + }, + { + "epoch": 0.20770134270721546, + "grad_norm": 0.29033470153808594, + "learning_rate": 9.999549702304351e-06, + "loss": 0.0039, + "step": 12290 + }, + { + "epoch": 0.20787034298606594, + "grad_norm": 0.13398291170597076, + "learning_rate": 9.999529691803913e-06, + "loss": 0.0059, + "step": 12300 + }, + { + "epoch": 0.20803934326491638, + "grad_norm": 0.39608412981033325, + "learning_rate": 9.999509246320169e-06, + "loss": 0.0062, + "step": 12310 + }, + { + "epoch": 0.20820834354376686, + "grad_norm": 0.03699997439980507, + "learning_rate": 9.999488365854894e-06, + "loss": 0.0053, + "step": 12320 + }, + { + "epoch": 0.2083773438226173, + "grad_norm": 0.14555566012859344, + "learning_rate": 9.999467050409912e-06, + "loss": 0.0038, + "step": 12330 + }, + { + "epoch": 0.20854634410146777, + "grad_norm": 0.20318925380706787, + "learning_rate": 9.999445299987072e-06, + "loss": 0.0061, + "step": 12340 + }, + { + "epoch": 0.20871534438031822, + "grad_norm": 0.29658663272857666, + "learning_rate": 9.999423114588268e-06, + "loss": 0.0059, + "step": 12350 + }, + { + "epoch": 0.2088843446591687, + "grad_norm": 0.1962607055902481, + "learning_rate": 9.99940049421543e-06, + "loss": 0.0042, + "step": 12360 + }, + { + "epoch": 0.20905334493801914, + "grad_norm": 0.12968648970127106, + "learning_rate": 9.999377438870528e-06, + "loss": 0.0033, + "step": 12370 + }, + { + "epoch": 0.2092223452168696, + "grad_norm": 1.8838653564453125, + "learning_rate": 9.999353948555563e-06, + "loss": 0.0058, + "step": 12380 + }, + { + "epoch": 0.20939134549572005, + "grad_norm": 0.09402038902044296, + "learning_rate": 9.999330023272584e-06, + "loss": 0.0041, + "step": 12390 + }, + { + "epoch": 0.20956034577457053, + "grad_norm": 0.13466863334178925, + "learning_rate": 9.999305663023672e-06, + "loss": 0.0052, + "step": 12400 + }, + { + "epoch": 0.209729346053421, + "grad_norm": 0.32404276728630066, + "learning_rate": 9.999280867810943e-06, + "loss": 0.0065, + "step": 12410 + }, + { + "epoch": 0.20989834633227145, + "grad_norm": 0.14441674947738647, + "learning_rate": 9.999255637636558e-06, + "loss": 0.0055, + "step": 12420 + }, + { + "epoch": 0.21006734661112192, + "grad_norm": 0.39780494570732117, + "learning_rate": 9.999229972502708e-06, + "loss": 0.0066, + "step": 12430 + }, + { + "epoch": 0.21023634688997236, + "grad_norm": 0.31614312529563904, + "learning_rate": 9.999203872411632e-06, + "loss": 0.0062, + "step": 12440 + }, + { + "epoch": 0.21040534716882284, + "grad_norm": 0.23963358998298645, + "learning_rate": 9.999177337365596e-06, + "loss": 0.0046, + "step": 12450 + }, + { + "epoch": 0.21057434744767328, + "grad_norm": 0.15409396588802338, + "learning_rate": 9.99915036736691e-06, + "loss": 0.0046, + "step": 12460 + }, + { + "epoch": 0.21074334772652376, + "grad_norm": 0.12386608868837357, + "learning_rate": 9.999122962417919e-06, + "loss": 0.0033, + "step": 12470 + }, + { + "epoch": 0.2109123480053742, + "grad_norm": 0.1905480921268463, + "learning_rate": 9.999095122521012e-06, + "loss": 0.0061, + "step": 12480 + }, + { + "epoch": 0.21108134828422467, + "grad_norm": 0.27343010902404785, + "learning_rate": 9.999066847678606e-06, + "loss": 0.0117, + "step": 12490 + }, + { + "epoch": 0.21125034856307512, + "grad_norm": 0.10000277310609818, + "learning_rate": 9.999038137893163e-06, + "loss": 0.0034, + "step": 12500 + }, + { + "epoch": 0.2114193488419256, + "grad_norm": 0.2904250919818878, + "learning_rate": 9.999008993167183e-06, + "loss": 0.0053, + "step": 12510 + }, + { + "epoch": 0.21158834912077604, + "grad_norm": 0.10671574622392654, + "learning_rate": 9.998979413503199e-06, + "loss": 0.0038, + "step": 12520 + }, + { + "epoch": 0.2117573493996265, + "grad_norm": 0.04285205155611038, + "learning_rate": 9.998949398903784e-06, + "loss": 0.0053, + "step": 12530 + }, + { + "epoch": 0.21192634967847698, + "grad_norm": 0.1715831607580185, + "learning_rate": 9.998918949371552e-06, + "loss": 0.0054, + "step": 12540 + }, + { + "epoch": 0.21209534995732743, + "grad_norm": 0.12727899849414825, + "learning_rate": 9.998888064909152e-06, + "loss": 0.0064, + "step": 12550 + }, + { + "epoch": 0.2122643502361779, + "grad_norm": 0.20481939613819122, + "learning_rate": 9.998856745519269e-06, + "loss": 0.0041, + "step": 12560 + }, + { + "epoch": 0.21243335051502835, + "grad_norm": 0.20393306016921997, + "learning_rate": 9.998824991204628e-06, + "loss": 0.0031, + "step": 12570 + }, + { + "epoch": 0.21260235079387882, + "grad_norm": 0.3028099834918976, + "learning_rate": 9.998792801967996e-06, + "loss": 0.0097, + "step": 12580 + }, + { + "epoch": 0.21277135107272926, + "grad_norm": 0.20722444355487823, + "learning_rate": 9.998760177812167e-06, + "loss": 0.0032, + "step": 12590 + }, + { + "epoch": 0.21294035135157974, + "grad_norm": 0.2849743068218231, + "learning_rate": 9.998727118739986e-06, + "loss": 0.005, + "step": 12600 + }, + { + "epoch": 0.21310935163043018, + "grad_norm": 0.10578224062919617, + "learning_rate": 9.998693624754325e-06, + "loss": 0.0038, + "step": 12610 + }, + { + "epoch": 0.21327835190928066, + "grad_norm": 0.17871376872062683, + "learning_rate": 9.998659695858099e-06, + "loss": 0.0049, + "step": 12620 + }, + { + "epoch": 0.2134473521881311, + "grad_norm": 0.1357841044664383, + "learning_rate": 9.998625332054262e-06, + "loss": 0.0027, + "step": 12630 + }, + { + "epoch": 0.21361635246698157, + "grad_norm": 0.1257234364748001, + "learning_rate": 9.9985905333458e-06, + "loss": 0.0063, + "step": 12640 + }, + { + "epoch": 0.21378535274583202, + "grad_norm": 0.22507745027542114, + "learning_rate": 9.998555299735744e-06, + "loss": 0.005, + "step": 12650 + }, + { + "epoch": 0.2139543530246825, + "grad_norm": 0.1501859575510025, + "learning_rate": 9.998519631227158e-06, + "loss": 0.0045, + "step": 12660 + }, + { + "epoch": 0.21412335330353294, + "grad_norm": 0.13181540369987488, + "learning_rate": 9.998483527823146e-06, + "loss": 0.0036, + "step": 12670 + }, + { + "epoch": 0.2142923535823834, + "grad_norm": 0.11986613273620605, + "learning_rate": 9.998446989526849e-06, + "loss": 0.0042, + "step": 12680 + }, + { + "epoch": 0.21446135386123388, + "grad_norm": 0.18259811401367188, + "learning_rate": 9.998410016341447e-06, + "loss": 0.0049, + "step": 12690 + }, + { + "epoch": 0.21463035414008433, + "grad_norm": 0.17600387334823608, + "learning_rate": 9.998372608270152e-06, + "loss": 0.0053, + "step": 12700 + }, + { + "epoch": 0.2147993544189348, + "grad_norm": 0.11839409172534943, + "learning_rate": 9.998334765316226e-06, + "loss": 0.0041, + "step": 12710 + }, + { + "epoch": 0.21496835469778525, + "grad_norm": 0.10443863272666931, + "learning_rate": 9.998296487482956e-06, + "loss": 0.0043, + "step": 12720 + }, + { + "epoch": 0.21513735497663572, + "grad_norm": 0.3014087677001953, + "learning_rate": 9.998257774773676e-06, + "loss": 0.0054, + "step": 12730 + }, + { + "epoch": 0.21530635525548616, + "grad_norm": 0.06494636088609695, + "learning_rate": 9.99821862719175e-06, + "loss": 0.0044, + "step": 12740 + }, + { + "epoch": 0.21547535553433664, + "grad_norm": 0.1542501151561737, + "learning_rate": 9.99817904474059e-06, + "loss": 0.0068, + "step": 12750 + }, + { + "epoch": 0.21564435581318708, + "grad_norm": 0.21469393372535706, + "learning_rate": 9.998139027423635e-06, + "loss": 0.0048, + "step": 12760 + }, + { + "epoch": 0.21581335609203756, + "grad_norm": 0.20954947173595428, + "learning_rate": 9.998098575244367e-06, + "loss": 0.0073, + "step": 12770 + }, + { + "epoch": 0.215982356370888, + "grad_norm": 0.24559859931468964, + "learning_rate": 9.998057688206307e-06, + "loss": 0.0045, + "step": 12780 + }, + { + "epoch": 0.21615135664973847, + "grad_norm": 0.24553582072257996, + "learning_rate": 9.998016366313012e-06, + "loss": 0.0036, + "step": 12790 + }, + { + "epoch": 0.21632035692858892, + "grad_norm": 0.267829954624176, + "learning_rate": 9.997974609568077e-06, + "loss": 0.0041, + "step": 12800 + }, + { + "epoch": 0.2164893572074394, + "grad_norm": 0.16241054236888885, + "learning_rate": 9.997932417975135e-06, + "loss": 0.0038, + "step": 12810 + }, + { + "epoch": 0.21665835748628987, + "grad_norm": 0.10603076964616776, + "learning_rate": 9.997889791537859e-06, + "loss": 0.0027, + "step": 12820 + }, + { + "epoch": 0.2168273577651403, + "grad_norm": 0.020528987050056458, + "learning_rate": 9.997846730259955e-06, + "loss": 0.0046, + "step": 12830 + }, + { + "epoch": 0.21699635804399078, + "grad_norm": 0.18575718998908997, + "learning_rate": 9.99780323414517e-06, + "loss": 0.0039, + "step": 12840 + }, + { + "epoch": 0.21716535832284123, + "grad_norm": 0.21036140620708466, + "learning_rate": 9.997759303197287e-06, + "loss": 0.003, + "step": 12850 + }, + { + "epoch": 0.2173343586016917, + "grad_norm": 0.22635206580162048, + "learning_rate": 9.99771493742013e-06, + "loss": 0.0051, + "step": 12860 + }, + { + "epoch": 0.21750335888054215, + "grad_norm": 0.116457998752594, + "learning_rate": 9.997670136817561e-06, + "loss": 0.005, + "step": 12870 + }, + { + "epoch": 0.21767235915939262, + "grad_norm": 0.17869815230369568, + "learning_rate": 9.997624901393477e-06, + "loss": 0.0035, + "step": 12880 + }, + { + "epoch": 0.21784135943824307, + "grad_norm": 0.2751922309398651, + "learning_rate": 9.99757923115181e-06, + "loss": 0.0049, + "step": 12890 + }, + { + "epoch": 0.21801035971709354, + "grad_norm": 0.4475092589855194, + "learning_rate": 9.997533126096537e-06, + "loss": 0.0066, + "step": 12900 + }, + { + "epoch": 0.21817935999594398, + "grad_norm": 0.48583754897117615, + "learning_rate": 9.997486586231668e-06, + "loss": 0.0025, + "step": 12910 + }, + { + "epoch": 0.21834836027479446, + "grad_norm": 0.20378488302230835, + "learning_rate": 9.997439611561252e-06, + "loss": 0.0034, + "step": 12920 + }, + { + "epoch": 0.2185173605536449, + "grad_norm": 0.058693524450063705, + "learning_rate": 9.997392202089378e-06, + "loss": 0.0057, + "step": 12930 + }, + { + "epoch": 0.21868636083249537, + "grad_norm": 0.1765138804912567, + "learning_rate": 9.997344357820168e-06, + "loss": 0.0042, + "step": 12940 + }, + { + "epoch": 0.21885536111134585, + "grad_norm": 0.2786969542503357, + "learning_rate": 9.997296078757788e-06, + "loss": 0.0047, + "step": 12950 + }, + { + "epoch": 0.2190243613901963, + "grad_norm": 0.1344185769557953, + "learning_rate": 9.997247364906435e-06, + "loss": 0.0039, + "step": 12960 + }, + { + "epoch": 0.21919336166904677, + "grad_norm": 0.14174804091453552, + "learning_rate": 9.99719821627035e-06, + "loss": 0.0056, + "step": 12970 + }, + { + "epoch": 0.2193623619478972, + "grad_norm": 0.055436067283153534, + "learning_rate": 9.99714863285381e-06, + "loss": 0.0027, + "step": 12980 + }, + { + "epoch": 0.21953136222674768, + "grad_norm": 0.20866861939430237, + "learning_rate": 9.997098614661124e-06, + "loss": 0.0069, + "step": 12990 + }, + { + "epoch": 0.21970036250559813, + "grad_norm": 0.08676794171333313, + "learning_rate": 9.997048161696649e-06, + "loss": 0.005, + "step": 13000 + }, + { + "epoch": 0.2198693627844486, + "grad_norm": 0.09381774812936783, + "learning_rate": 9.996997273964771e-06, + "loss": 0.0027, + "step": 13010 + }, + { + "epoch": 0.22003836306329905, + "grad_norm": 0.12107016891241074, + "learning_rate": 9.996945951469921e-06, + "loss": 0.0041, + "step": 13020 + }, + { + "epoch": 0.22020736334214952, + "grad_norm": 0.07253371924161911, + "learning_rate": 9.996894194216562e-06, + "loss": 0.0029, + "step": 13030 + }, + { + "epoch": 0.22037636362099997, + "grad_norm": 0.08685167133808136, + "learning_rate": 9.996842002209196e-06, + "loss": 0.0068, + "step": 13040 + }, + { + "epoch": 0.22054536389985044, + "grad_norm": 0.10493378341197968, + "learning_rate": 9.996789375452367e-06, + "loss": 0.0054, + "step": 13050 + }, + { + "epoch": 0.22071436417870088, + "grad_norm": 0.12263305485248566, + "learning_rate": 9.996736313950652e-06, + "loss": 0.0058, + "step": 13060 + }, + { + "epoch": 0.22088336445755136, + "grad_norm": 0.18619713187217712, + "learning_rate": 9.996682817708668e-06, + "loss": 0.0034, + "step": 13070 + }, + { + "epoch": 0.2210523647364018, + "grad_norm": 0.053724195808172226, + "learning_rate": 9.996628886731071e-06, + "loss": 0.0032, + "step": 13080 + }, + { + "epoch": 0.22122136501525227, + "grad_norm": 0.023195017129182816, + "learning_rate": 9.996574521022548e-06, + "loss": 0.0053, + "step": 13090 + }, + { + "epoch": 0.22139036529410275, + "grad_norm": 0.2828596830368042, + "learning_rate": 9.996519720587835e-06, + "loss": 0.0035, + "step": 13100 + }, + { + "epoch": 0.2215593655729532, + "grad_norm": 0.19304883480072021, + "learning_rate": 9.996464485431699e-06, + "loss": 0.004, + "step": 13110 + }, + { + "epoch": 0.22172836585180367, + "grad_norm": 0.15398164093494415, + "learning_rate": 9.996408815558941e-06, + "loss": 0.0043, + "step": 13120 + }, + { + "epoch": 0.2218973661306541, + "grad_norm": 0.1082804799079895, + "learning_rate": 9.99635271097441e-06, + "loss": 0.0053, + "step": 13130 + }, + { + "epoch": 0.22206636640950458, + "grad_norm": 0.09701705724000931, + "learning_rate": 9.996296171682984e-06, + "loss": 0.0043, + "step": 13140 + }, + { + "epoch": 0.22223536668835503, + "grad_norm": 0.28328144550323486, + "learning_rate": 9.996239197689586e-06, + "loss": 0.0054, + "step": 13150 + }, + { + "epoch": 0.2224043669672055, + "grad_norm": 0.1199091449379921, + "learning_rate": 9.996181788999168e-06, + "loss": 0.0038, + "step": 13160 + }, + { + "epoch": 0.22257336724605595, + "grad_norm": 0.1811678409576416, + "learning_rate": 9.996123945616727e-06, + "loss": 0.0034, + "step": 13170 + }, + { + "epoch": 0.22274236752490642, + "grad_norm": 0.05057886987924576, + "learning_rate": 9.996065667547298e-06, + "loss": 0.0039, + "step": 13180 + }, + { + "epoch": 0.22291136780375687, + "grad_norm": 0.11425051838159561, + "learning_rate": 9.996006954795949e-06, + "loss": 0.0028, + "step": 13190 + }, + { + "epoch": 0.22308036808260734, + "grad_norm": 0.1719987392425537, + "learning_rate": 9.995947807367787e-06, + "loss": 0.0037, + "step": 13200 + }, + { + "epoch": 0.22324936836145778, + "grad_norm": 0.1575050801038742, + "learning_rate": 9.99588822526796e-06, + "loss": 0.0032, + "step": 13210 + }, + { + "epoch": 0.22341836864030826, + "grad_norm": 0.3843485116958618, + "learning_rate": 9.995828208501654e-06, + "loss": 0.0034, + "step": 13220 + }, + { + "epoch": 0.22358736891915873, + "grad_norm": 0.17446254193782806, + "learning_rate": 9.995767757074087e-06, + "loss": 0.0036, + "step": 13230 + }, + { + "epoch": 0.22375636919800918, + "grad_norm": 0.059524331241846085, + "learning_rate": 9.99570687099052e-06, + "loss": 0.0047, + "step": 13240 + }, + { + "epoch": 0.22392536947685965, + "grad_norm": 0.15919871628284454, + "learning_rate": 9.995645550256251e-06, + "loss": 0.0094, + "step": 13250 + }, + { + "epoch": 0.2240943697557101, + "grad_norm": 0.22587934136390686, + "learning_rate": 9.995583794876616e-06, + "loss": 0.0054, + "step": 13260 + }, + { + "epoch": 0.22426337003456057, + "grad_norm": 0.13634823262691498, + "learning_rate": 9.995521604856983e-06, + "loss": 0.007, + "step": 13270 + }, + { + "epoch": 0.224432370313411, + "grad_norm": 0.06666050106287003, + "learning_rate": 9.99545898020277e-06, + "loss": 0.0052, + "step": 13280 + }, + { + "epoch": 0.22460137059226148, + "grad_norm": 0.3350073993206024, + "learning_rate": 9.99539592091942e-06, + "loss": 0.0062, + "step": 13290 + }, + { + "epoch": 0.22477037087111193, + "grad_norm": 0.10985402017831802, + "learning_rate": 9.995332427012422e-06, + "loss": 0.0033, + "step": 13300 + }, + { + "epoch": 0.2249393711499624, + "grad_norm": 0.12870152294635773, + "learning_rate": 9.9952684984873e-06, + "loss": 0.0066, + "step": 13310 + }, + { + "epoch": 0.22510837142881285, + "grad_norm": 0.09722672402858734, + "learning_rate": 9.995204135349617e-06, + "loss": 0.0038, + "step": 13320 + }, + { + "epoch": 0.22527737170766332, + "grad_norm": 0.49585485458374023, + "learning_rate": 9.995139337604971e-06, + "loss": 0.0064, + "step": 13330 + }, + { + "epoch": 0.22544637198651377, + "grad_norm": 0.21925857663154602, + "learning_rate": 9.995074105259001e-06, + "loss": 0.0032, + "step": 13340 + }, + { + "epoch": 0.22561537226536424, + "grad_norm": 0.18533477187156677, + "learning_rate": 9.995008438317384e-06, + "loss": 0.0038, + "step": 13350 + }, + { + "epoch": 0.22578437254421468, + "grad_norm": 0.18802869319915771, + "learning_rate": 9.994942336785829e-06, + "loss": 0.0033, + "step": 13360 + }, + { + "epoch": 0.22595337282306516, + "grad_norm": 0.17008714377880096, + "learning_rate": 9.99487580067009e-06, + "loss": 0.0039, + "step": 13370 + }, + { + "epoch": 0.22612237310191563, + "grad_norm": 0.15874530375003815, + "learning_rate": 9.994808829975955e-06, + "loss": 0.0054, + "step": 13380 + }, + { + "epoch": 0.22629137338076608, + "grad_norm": 0.16346663236618042, + "learning_rate": 9.994741424709253e-06, + "loss": 0.0061, + "step": 13390 + }, + { + "epoch": 0.22646037365961655, + "grad_norm": 0.21573509275913239, + "learning_rate": 9.994673584875846e-06, + "loss": 0.0048, + "step": 13400 + }, + { + "epoch": 0.226629373938467, + "grad_norm": 0.18036310374736786, + "learning_rate": 9.994605310481637e-06, + "loss": 0.0044, + "step": 13410 + }, + { + "epoch": 0.22679837421731747, + "grad_norm": 0.5115329623222351, + "learning_rate": 9.994536601532568e-06, + "loss": 0.0047, + "step": 13420 + }, + { + "epoch": 0.2269673744961679, + "grad_norm": 0.09441306442022324, + "learning_rate": 9.994467458034613e-06, + "loss": 0.0052, + "step": 13430 + }, + { + "epoch": 0.22713637477501838, + "grad_norm": 0.09542250633239746, + "learning_rate": 9.994397879993793e-06, + "loss": 0.0049, + "step": 13440 + }, + { + "epoch": 0.22730537505386883, + "grad_norm": 0.15195991098880768, + "learning_rate": 9.994327867416159e-06, + "loss": 0.0063, + "step": 13450 + }, + { + "epoch": 0.2274743753327193, + "grad_norm": 0.039866454899311066, + "learning_rate": 9.9942574203078e-06, + "loss": 0.0039, + "step": 13460 + }, + { + "epoch": 0.22764337561156975, + "grad_norm": 0.17334040999412537, + "learning_rate": 9.99418653867485e-06, + "loss": 0.004, + "step": 13470 + }, + { + "epoch": 0.22781237589042022, + "grad_norm": 0.1515452116727829, + "learning_rate": 9.994115222523472e-06, + "loss": 0.0044, + "step": 13480 + }, + { + "epoch": 0.22798137616927067, + "grad_norm": 0.11643579602241516, + "learning_rate": 9.994043471859875e-06, + "loss": 0.0074, + "step": 13490 + }, + { + "epoch": 0.22815037644812114, + "grad_norm": 0.14805810153484344, + "learning_rate": 9.993971286690299e-06, + "loss": 0.0057, + "step": 13500 + }, + { + "epoch": 0.2283193767269716, + "grad_norm": 0.09401725232601166, + "learning_rate": 9.993898667021023e-06, + "loss": 0.005, + "step": 13510 + }, + { + "epoch": 0.22848837700582206, + "grad_norm": 0.22201889753341675, + "learning_rate": 9.993825612858367e-06, + "loss": 0.0054, + "step": 13520 + }, + { + "epoch": 0.22865737728467253, + "grad_norm": 0.13076730072498322, + "learning_rate": 9.993752124208687e-06, + "loss": 0.0049, + "step": 13530 + }, + { + "epoch": 0.22882637756352298, + "grad_norm": 0.08481460809707642, + "learning_rate": 9.993678201078378e-06, + "loss": 0.0084, + "step": 13540 + }, + { + "epoch": 0.22899537784237345, + "grad_norm": 0.02569173462688923, + "learning_rate": 9.99360384347387e-06, + "loss": 0.0055, + "step": 13550 + }, + { + "epoch": 0.2291643781212239, + "grad_norm": 0.2832903563976288, + "learning_rate": 9.993529051401633e-06, + "loss": 0.0044, + "step": 13560 + }, + { + "epoch": 0.22933337840007437, + "grad_norm": 0.156660258769989, + "learning_rate": 9.993453824868175e-06, + "loss": 0.0042, + "step": 13570 + }, + { + "epoch": 0.2295023786789248, + "grad_norm": 0.3335116505622864, + "learning_rate": 9.993378163880038e-06, + "loss": 0.0073, + "step": 13580 + }, + { + "epoch": 0.22967137895777529, + "grad_norm": 0.055631548166275024, + "learning_rate": 9.993302068443811e-06, + "loss": 0.0034, + "step": 13590 + }, + { + "epoch": 0.22984037923662573, + "grad_norm": 0.18227708339691162, + "learning_rate": 9.993225538566109e-06, + "loss": 0.0073, + "step": 13600 + }, + { + "epoch": 0.2300093795154762, + "grad_norm": 0.28870803117752075, + "learning_rate": 9.993148574253592e-06, + "loss": 0.0063, + "step": 13610 + }, + { + "epoch": 0.23017837979432665, + "grad_norm": 0.09444483369588852, + "learning_rate": 9.993071175512957e-06, + "loss": 0.005, + "step": 13620 + }, + { + "epoch": 0.23034738007317712, + "grad_norm": 0.1265878528356552, + "learning_rate": 9.992993342350939e-06, + "loss": 0.0032, + "step": 13630 + }, + { + "epoch": 0.2305163803520276, + "grad_norm": 0.06479588150978088, + "learning_rate": 9.992915074774307e-06, + "loss": 0.0044, + "step": 13640 + }, + { + "epoch": 0.23068538063087804, + "grad_norm": 0.23029182851314545, + "learning_rate": 9.992836372789871e-06, + "loss": 0.0049, + "step": 13650 + }, + { + "epoch": 0.2308543809097285, + "grad_norm": 0.19470201432704926, + "learning_rate": 9.992757236404483e-06, + "loss": 0.0044, + "step": 13660 + }, + { + "epoch": 0.23102338118857896, + "grad_norm": 0.3662036657333374, + "learning_rate": 9.992677665625023e-06, + "loss": 0.0042, + "step": 13670 + }, + { + "epoch": 0.23119238146742943, + "grad_norm": 0.27756965160369873, + "learning_rate": 9.992597660458416e-06, + "loss": 0.0049, + "step": 13680 + }, + { + "epoch": 0.23136138174627988, + "grad_norm": 0.3882659673690796, + "learning_rate": 9.992517220911623e-06, + "loss": 0.0051, + "step": 13690 + }, + { + "epoch": 0.23153038202513035, + "grad_norm": 0.11957082152366638, + "learning_rate": 9.992436346991642e-06, + "loss": 0.0041, + "step": 13700 + }, + { + "epoch": 0.2316993823039808, + "grad_norm": 0.15464948117733002, + "learning_rate": 9.992355038705512e-06, + "loss": 0.0051, + "step": 13710 + }, + { + "epoch": 0.23186838258283127, + "grad_norm": 0.08093637973070145, + "learning_rate": 9.992273296060304e-06, + "loss": 0.0043, + "step": 13720 + }, + { + "epoch": 0.2320373828616817, + "grad_norm": 0.38809144496917725, + "learning_rate": 9.99219111906313e-06, + "loss": 0.006, + "step": 13730 + }, + { + "epoch": 0.23220638314053219, + "grad_norm": 0.18487603962421417, + "learning_rate": 9.992108507721143e-06, + "loss": 0.0062, + "step": 13740 + }, + { + "epoch": 0.23237538341938263, + "grad_norm": 0.12486666440963745, + "learning_rate": 9.992025462041526e-06, + "loss": 0.0048, + "step": 13750 + }, + { + "epoch": 0.2325443836982331, + "grad_norm": 0.21606610715389252, + "learning_rate": 9.99194198203151e-06, + "loss": 0.0052, + "step": 13760 + }, + { + "epoch": 0.23271338397708355, + "grad_norm": 0.11294318735599518, + "learning_rate": 9.991858067698353e-06, + "loss": 0.0031, + "step": 13770 + }, + { + "epoch": 0.23288238425593402, + "grad_norm": 0.15346220135688782, + "learning_rate": 9.991773719049358e-06, + "loss": 0.0054, + "step": 13780 + }, + { + "epoch": 0.2330513845347845, + "grad_norm": 0.19085946679115295, + "learning_rate": 9.991688936091863e-06, + "loss": 0.0027, + "step": 13790 + }, + { + "epoch": 0.23322038481363494, + "grad_norm": 0.15518149733543396, + "learning_rate": 9.991603718833248e-06, + "loss": 0.0055, + "step": 13800 + }, + { + "epoch": 0.2333893850924854, + "grad_norm": 0.08052901178598404, + "learning_rate": 9.991518067280921e-06, + "loss": 0.0067, + "step": 13810 + }, + { + "epoch": 0.23355838537133586, + "grad_norm": 0.08903773874044418, + "learning_rate": 9.991431981442341e-06, + "loss": 0.0048, + "step": 13820 + }, + { + "epoch": 0.23372738565018633, + "grad_norm": 0.12610691785812378, + "learning_rate": 9.991345461324992e-06, + "loss": 0.0054, + "step": 13830 + }, + { + "epoch": 0.23389638592903678, + "grad_norm": 0.2848169505596161, + "learning_rate": 9.991258506936406e-06, + "loss": 0.0054, + "step": 13840 + }, + { + "epoch": 0.23406538620788725, + "grad_norm": 0.10816321521997452, + "learning_rate": 9.991171118284147e-06, + "loss": 0.0043, + "step": 13850 + }, + { + "epoch": 0.2342343864867377, + "grad_norm": 0.053277160972356796, + "learning_rate": 9.991083295375817e-06, + "loss": 0.0052, + "step": 13860 + }, + { + "epoch": 0.23440338676558817, + "grad_norm": 0.09209351986646652, + "learning_rate": 9.990995038219056e-06, + "loss": 0.0033, + "step": 13870 + }, + { + "epoch": 0.2345723870444386, + "grad_norm": 0.13402052223682404, + "learning_rate": 9.990906346821548e-06, + "loss": 0.0038, + "step": 13880 + }, + { + "epoch": 0.23474138732328909, + "grad_norm": 0.19042134284973145, + "learning_rate": 9.990817221191005e-06, + "loss": 0.0041, + "step": 13890 + }, + { + "epoch": 0.23491038760213953, + "grad_norm": 0.1634221225976944, + "learning_rate": 9.990727661335183e-06, + "loss": 0.0058, + "step": 13900 + }, + { + "epoch": 0.23507938788099, + "grad_norm": 0.06429363042116165, + "learning_rate": 9.990637667261873e-06, + "loss": 0.0052, + "step": 13910 + }, + { + "epoch": 0.23524838815984048, + "grad_norm": 0.07568903267383575, + "learning_rate": 9.990547238978907e-06, + "loss": 0.0033, + "step": 13920 + }, + { + "epoch": 0.23541738843869092, + "grad_norm": 0.23190951347351074, + "learning_rate": 9.99045637649415e-06, + "loss": 0.0061, + "step": 13930 + }, + { + "epoch": 0.2355863887175414, + "grad_norm": 0.03557845950126648, + "learning_rate": 9.990365079815509e-06, + "loss": 0.0037, + "step": 13940 + }, + { + "epoch": 0.23575538899639184, + "grad_norm": 0.10422530025243759, + "learning_rate": 9.990273348950928e-06, + "loss": 0.0051, + "step": 13950 + }, + { + "epoch": 0.2359243892752423, + "grad_norm": 0.2304789423942566, + "learning_rate": 9.990181183908387e-06, + "loss": 0.0036, + "step": 13960 + }, + { + "epoch": 0.23609338955409276, + "grad_norm": 0.1503400355577469, + "learning_rate": 9.990088584695905e-06, + "loss": 0.0067, + "step": 13970 + }, + { + "epoch": 0.23626238983294323, + "grad_norm": 0.2617582678794861, + "learning_rate": 9.989995551321538e-06, + "loss": 0.0047, + "step": 13980 + }, + { + "epoch": 0.23643139011179368, + "grad_norm": 0.2739869952201843, + "learning_rate": 9.989902083793383e-06, + "loss": 0.0047, + "step": 13990 + }, + { + "epoch": 0.23660039039064415, + "grad_norm": 0.12977376580238342, + "learning_rate": 9.989808182119569e-06, + "loss": 0.0034, + "step": 14000 + }, + { + "epoch": 0.2367693906694946, + "grad_norm": 0.3293931782245636, + "learning_rate": 9.989713846308267e-06, + "loss": 0.0039, + "step": 14010 + }, + { + "epoch": 0.23693839094834507, + "grad_norm": 0.11267578601837158, + "learning_rate": 9.989619076367683e-06, + "loss": 0.0038, + "step": 14020 + }, + { + "epoch": 0.2371073912271955, + "grad_norm": 0.33272507786750793, + "learning_rate": 9.989523872306067e-06, + "loss": 0.0038, + "step": 14030 + }, + { + "epoch": 0.23727639150604599, + "grad_norm": 0.20745112001895905, + "learning_rate": 9.989428234131697e-06, + "loss": 0.0048, + "step": 14040 + }, + { + "epoch": 0.23744539178489643, + "grad_norm": 0.1488361805677414, + "learning_rate": 9.989332161852896e-06, + "loss": 0.0037, + "step": 14050 + }, + { + "epoch": 0.2376143920637469, + "grad_norm": 0.036361102014780045, + "learning_rate": 9.989235655478025e-06, + "loss": 0.0051, + "step": 14060 + }, + { + "epoch": 0.23778339234259738, + "grad_norm": 0.2059086710214615, + "learning_rate": 9.989138715015478e-06, + "loss": 0.0039, + "step": 14070 + }, + { + "epoch": 0.23795239262144782, + "grad_norm": 0.08130540698766708, + "learning_rate": 9.989041340473688e-06, + "loss": 0.0047, + "step": 14080 + }, + { + "epoch": 0.2381213929002983, + "grad_norm": 0.1256265938282013, + "learning_rate": 9.98894353186113e-06, + "loss": 0.0036, + "step": 14090 + }, + { + "epoch": 0.23829039317914874, + "grad_norm": 0.09473961591720581, + "learning_rate": 9.988845289186314e-06, + "loss": 0.0075, + "step": 14100 + }, + { + "epoch": 0.2384593934579992, + "grad_norm": 0.14110074937343597, + "learning_rate": 9.988746612457786e-06, + "loss": 0.0036, + "step": 14110 + }, + { + "epoch": 0.23862839373684966, + "grad_norm": 0.3486090302467346, + "learning_rate": 9.988647501684131e-06, + "loss": 0.0057, + "step": 14120 + }, + { + "epoch": 0.23879739401570013, + "grad_norm": 0.377188503742218, + "learning_rate": 9.988547956873974e-06, + "loss": 0.0035, + "step": 14130 + }, + { + "epoch": 0.23896639429455058, + "grad_norm": 0.2691023349761963, + "learning_rate": 9.988447978035974e-06, + "loss": 0.0036, + "step": 14140 + }, + { + "epoch": 0.23913539457340105, + "grad_norm": 0.24957741796970367, + "learning_rate": 9.988347565178828e-06, + "loss": 0.0042, + "step": 14150 + }, + { + "epoch": 0.2393043948522515, + "grad_norm": 0.2357136458158493, + "learning_rate": 9.988246718311276e-06, + "loss": 0.004, + "step": 14160 + }, + { + "epoch": 0.23947339513110197, + "grad_norm": 0.7160472273826599, + "learning_rate": 9.988145437442093e-06, + "loss": 0.0036, + "step": 14170 + }, + { + "epoch": 0.2396423954099524, + "grad_norm": 0.24182184040546417, + "learning_rate": 9.988043722580088e-06, + "loss": 0.0042, + "step": 14180 + }, + { + "epoch": 0.2398113956888029, + "grad_norm": 0.3829415440559387, + "learning_rate": 9.98794157373411e-06, + "loss": 0.0038, + "step": 14190 + }, + { + "epoch": 0.23998039596765336, + "grad_norm": 0.15229769051074982, + "learning_rate": 9.987838990913049e-06, + "loss": 0.0045, + "step": 14200 + }, + { + "epoch": 0.2401493962465038, + "grad_norm": 0.3212862014770508, + "learning_rate": 9.98773597412583e-06, + "loss": 0.0061, + "step": 14210 + }, + { + "epoch": 0.24031839652535428, + "grad_norm": 0.2428811639547348, + "learning_rate": 9.987632523381412e-06, + "loss": 0.0045, + "step": 14220 + }, + { + "epoch": 0.24048739680420472, + "grad_norm": 0.329699844121933, + "learning_rate": 9.9875286386888e-06, + "loss": 0.0074, + "step": 14230 + }, + { + "epoch": 0.2406563970830552, + "grad_norm": 0.148408442735672, + "learning_rate": 9.987424320057035e-06, + "loss": 0.0042, + "step": 14240 + }, + { + "epoch": 0.24082539736190564, + "grad_norm": 0.28386399149894714, + "learning_rate": 9.987319567495187e-06, + "loss": 0.0038, + "step": 14250 + }, + { + "epoch": 0.24099439764075611, + "grad_norm": 0.15869386494159698, + "learning_rate": 9.987214381012372e-06, + "loss": 0.0039, + "step": 14260 + }, + { + "epoch": 0.24116339791960656, + "grad_norm": 0.12310047447681427, + "learning_rate": 9.987108760617741e-06, + "loss": 0.003, + "step": 14270 + }, + { + "epoch": 0.24133239819845703, + "grad_norm": 0.14694136381149292, + "learning_rate": 9.987002706320488e-06, + "loss": 0.0042, + "step": 14280 + }, + { + "epoch": 0.24150139847730748, + "grad_norm": 0.07741039246320724, + "learning_rate": 9.986896218129834e-06, + "loss": 0.004, + "step": 14290 + }, + { + "epoch": 0.24167039875615795, + "grad_norm": 0.3411828875541687, + "learning_rate": 9.986789296055048e-06, + "loss": 0.0042, + "step": 14300 + }, + { + "epoch": 0.2418393990350084, + "grad_norm": 0.185929074883461, + "learning_rate": 9.986681940105432e-06, + "loss": 0.0046, + "step": 14310 + }, + { + "epoch": 0.24200839931385887, + "grad_norm": 0.06522829830646515, + "learning_rate": 9.986574150290327e-06, + "loss": 0.0048, + "step": 14320 + }, + { + "epoch": 0.24217739959270934, + "grad_norm": 0.23070041835308075, + "learning_rate": 9.986465926619109e-06, + "loss": 0.0044, + "step": 14330 + }, + { + "epoch": 0.2423463998715598, + "grad_norm": 0.07087959349155426, + "learning_rate": 9.986357269101197e-06, + "loss": 0.0026, + "step": 14340 + }, + { + "epoch": 0.24251540015041026, + "grad_norm": 0.13839539885520935, + "learning_rate": 9.986248177746041e-06, + "loss": 0.0041, + "step": 14350 + }, + { + "epoch": 0.2426844004292607, + "grad_norm": 0.3412225842475891, + "learning_rate": 9.986138652563135e-06, + "loss": 0.0033, + "step": 14360 + }, + { + "epoch": 0.24285340070811118, + "grad_norm": 0.11813254654407501, + "learning_rate": 9.98602869356201e-06, + "loss": 0.0035, + "step": 14370 + }, + { + "epoch": 0.24302240098696162, + "grad_norm": 0.12311132252216339, + "learning_rate": 9.985918300752229e-06, + "loss": 0.0029, + "step": 14380 + }, + { + "epoch": 0.2431914012658121, + "grad_norm": 0.008245767094194889, + "learning_rate": 9.985807474143398e-06, + "loss": 0.0034, + "step": 14390 + }, + { + "epoch": 0.24336040154466254, + "grad_norm": 0.19633473455905914, + "learning_rate": 9.985696213745162e-06, + "loss": 0.0063, + "step": 14400 + }, + { + "epoch": 0.24352940182351301, + "grad_norm": 0.1152830645442009, + "learning_rate": 9.985584519567197e-06, + "loss": 0.0034, + "step": 14410 + }, + { + "epoch": 0.24369840210236346, + "grad_norm": 0.30895453691482544, + "learning_rate": 9.985472391619225e-06, + "loss": 0.0049, + "step": 14420 + }, + { + "epoch": 0.24386740238121393, + "grad_norm": 0.09675145894289017, + "learning_rate": 9.985359829910998e-06, + "loss": 0.0049, + "step": 14430 + }, + { + "epoch": 0.24403640266006438, + "grad_norm": 0.15377745032310486, + "learning_rate": 9.98524683445231e-06, + "loss": 0.007, + "step": 14440 + }, + { + "epoch": 0.24420540293891485, + "grad_norm": 0.1542172133922577, + "learning_rate": 9.985133405252997e-06, + "loss": 0.0051, + "step": 14450 + }, + { + "epoch": 0.2443744032177653, + "grad_norm": 0.09837678074836731, + "learning_rate": 9.985019542322923e-06, + "loss": 0.0047, + "step": 14460 + }, + { + "epoch": 0.24454340349661577, + "grad_norm": 0.2775014638900757, + "learning_rate": 9.984905245671995e-06, + "loss": 0.004, + "step": 14470 + }, + { + "epoch": 0.24471240377546624, + "grad_norm": 0.11706257611513138, + "learning_rate": 9.984790515310158e-06, + "loss": 0.0027, + "step": 14480 + }, + { + "epoch": 0.2448814040543167, + "grad_norm": 0.20297011733055115, + "learning_rate": 9.984675351247395e-06, + "loss": 0.0052, + "step": 14490 + }, + { + "epoch": 0.24505040433316716, + "grad_norm": 0.12076503783464432, + "learning_rate": 9.984559753493724e-06, + "loss": 0.0027, + "step": 14500 + }, + { + "epoch": 0.2452194046120176, + "grad_norm": 0.1017741933465004, + "learning_rate": 9.984443722059206e-06, + "loss": 0.0047, + "step": 14510 + }, + { + "epoch": 0.24538840489086808, + "grad_norm": 0.1515427678823471, + "learning_rate": 9.984327256953932e-06, + "loss": 0.0059, + "step": 14520 + }, + { + "epoch": 0.24555740516971852, + "grad_norm": 0.11836301535367966, + "learning_rate": 9.984210358188038e-06, + "loss": 0.003, + "step": 14530 + }, + { + "epoch": 0.245726405448569, + "grad_norm": 0.14873522520065308, + "learning_rate": 9.984093025771693e-06, + "loss": 0.0032, + "step": 14540 + }, + { + "epoch": 0.24589540572741944, + "grad_norm": 0.13684223592281342, + "learning_rate": 9.983975259715106e-06, + "loss": 0.0048, + "step": 14550 + }, + { + "epoch": 0.24606440600626991, + "grad_norm": 0.22442513704299927, + "learning_rate": 9.983857060028524e-06, + "loss": 0.0029, + "step": 14560 + }, + { + "epoch": 0.24623340628512036, + "grad_norm": 0.11228632926940918, + "learning_rate": 9.983738426722232e-06, + "loss": 0.0063, + "step": 14570 + }, + { + "epoch": 0.24640240656397083, + "grad_norm": 0.13161133229732513, + "learning_rate": 9.983619359806547e-06, + "loss": 0.0031, + "step": 14580 + }, + { + "epoch": 0.24657140684282128, + "grad_norm": 0.17749890685081482, + "learning_rate": 9.983499859291834e-06, + "loss": 0.005, + "step": 14590 + }, + { + "epoch": 0.24674040712167175, + "grad_norm": 0.1559005081653595, + "learning_rate": 9.983379925188488e-06, + "loss": 0.0049, + "step": 14600 + }, + { + "epoch": 0.24690940740052222, + "grad_norm": 0.11561734974384308, + "learning_rate": 9.983259557506941e-06, + "loss": 0.0076, + "step": 14610 + }, + { + "epoch": 0.24707840767937267, + "grad_norm": 0.20156532526016235, + "learning_rate": 9.98313875625767e-06, + "loss": 0.0062, + "step": 14620 + }, + { + "epoch": 0.24724740795822314, + "grad_norm": 0.08245956152677536, + "learning_rate": 9.983017521451183e-06, + "loss": 0.0033, + "step": 14630 + }, + { + "epoch": 0.2474164082370736, + "grad_norm": 0.09482322633266449, + "learning_rate": 9.982895853098027e-06, + "loss": 0.0037, + "step": 14640 + }, + { + "epoch": 0.24758540851592406, + "grad_norm": 0.09171206504106522, + "learning_rate": 9.98277375120879e-06, + "loss": 0.0037, + "step": 14650 + }, + { + "epoch": 0.2477544087947745, + "grad_norm": 0.049698662012815475, + "learning_rate": 9.982651215794096e-06, + "loss": 0.0028, + "step": 14660 + }, + { + "epoch": 0.24792340907362498, + "grad_norm": 0.1317257583141327, + "learning_rate": 9.982528246864603e-06, + "loss": 0.0029, + "step": 14670 + }, + { + "epoch": 0.24809240935247542, + "grad_norm": 0.2114088535308838, + "learning_rate": 9.982404844431013e-06, + "loss": 0.0031, + "step": 14680 + }, + { + "epoch": 0.2482614096313259, + "grad_norm": 0.08456574380397797, + "learning_rate": 9.98228100850406e-06, + "loss": 0.0031, + "step": 14690 + }, + { + "epoch": 0.24843040991017634, + "grad_norm": 0.26122376322746277, + "learning_rate": 9.98215673909452e-06, + "loss": 0.0055, + "step": 14700 + }, + { + "epoch": 0.24859941018902681, + "grad_norm": 0.3789493143558502, + "learning_rate": 9.982032036213202e-06, + "loss": 0.0029, + "step": 14710 + }, + { + "epoch": 0.24876841046787726, + "grad_norm": 0.2621791660785675, + "learning_rate": 9.981906899870962e-06, + "loss": 0.0056, + "step": 14720 + }, + { + "epoch": 0.24893741074672773, + "grad_norm": 0.06250303238630295, + "learning_rate": 9.981781330078683e-06, + "loss": 0.0031, + "step": 14730 + }, + { + "epoch": 0.2491064110255782, + "grad_norm": 0.3181895613670349, + "learning_rate": 9.98165532684729e-06, + "loss": 0.0054, + "step": 14740 + }, + { + "epoch": 0.24927541130442865, + "grad_norm": 0.15799207985401154, + "learning_rate": 9.981528890187749e-06, + "loss": 0.0058, + "step": 14750 + }, + { + "epoch": 0.24944441158327912, + "grad_norm": 0.1720704287290573, + "learning_rate": 9.981402020111057e-06, + "loss": 0.0031, + "step": 14760 + }, + { + "epoch": 0.24961341186212957, + "grad_norm": 0.13233742117881775, + "learning_rate": 9.981274716628253e-06, + "loss": 0.004, + "step": 14770 + }, + { + "epoch": 0.24978241214098004, + "grad_norm": 0.15866127610206604, + "learning_rate": 9.981146979750414e-06, + "loss": 0.0043, + "step": 14780 + }, + { + "epoch": 0.2499514124198305, + "grad_norm": 0.08606252819299698, + "learning_rate": 9.981018809488655e-06, + "loss": 0.004, + "step": 14790 + }, + { + "epoch": 0.25012041269868096, + "grad_norm": 0.14118173718452454, + "learning_rate": 9.980890205854125e-06, + "loss": 0.0039, + "step": 14800 + }, + { + "epoch": 0.25028941297753143, + "grad_norm": 0.057821471244096756, + "learning_rate": 9.980761168858015e-06, + "loss": 0.0035, + "step": 14810 + }, + { + "epoch": 0.25045841325638185, + "grad_norm": 0.3060109615325928, + "learning_rate": 9.98063169851155e-06, + "loss": 0.0048, + "step": 14820 + }, + { + "epoch": 0.2506274135352323, + "grad_norm": 0.39351001381874084, + "learning_rate": 9.980501794825995e-06, + "loss": 0.0069, + "step": 14830 + }, + { + "epoch": 0.2507964138140828, + "grad_norm": 0.2610602378845215, + "learning_rate": 9.980371457812654e-06, + "loss": 0.0054, + "step": 14840 + }, + { + "epoch": 0.25096541409293327, + "grad_norm": 0.08033086359500885, + "learning_rate": 9.980240687482864e-06, + "loss": 0.0038, + "step": 14850 + }, + { + "epoch": 0.2511344143717837, + "grad_norm": 0.2523294985294342, + "learning_rate": 9.980109483848005e-06, + "loss": 0.0047, + "step": 14860 + }, + { + "epoch": 0.25130341465063416, + "grad_norm": 0.09582140296697617, + "learning_rate": 9.979977846919494e-06, + "loss": 0.0029, + "step": 14870 + }, + { + "epoch": 0.25147241492948463, + "grad_norm": 0.07006009668111801, + "learning_rate": 9.979845776708779e-06, + "loss": 0.0043, + "step": 14880 + }, + { + "epoch": 0.2516414152083351, + "grad_norm": 0.2001093178987503, + "learning_rate": 9.979713273227356e-06, + "loss": 0.0037, + "step": 14890 + }, + { + "epoch": 0.2518104154871856, + "grad_norm": 0.3163839876651764, + "learning_rate": 9.979580336486749e-06, + "loss": 0.0052, + "step": 14900 + }, + { + "epoch": 0.251979415766036, + "grad_norm": 0.15247350931167603, + "learning_rate": 9.979446966498527e-06, + "loss": 0.004, + "step": 14910 + }, + { + "epoch": 0.25214841604488647, + "grad_norm": 0.207497701048851, + "learning_rate": 9.979313163274293e-06, + "loss": 0.0054, + "step": 14920 + }, + { + "epoch": 0.25231741632373694, + "grad_norm": 0.05913986265659332, + "learning_rate": 9.979178926825688e-06, + "loss": 0.0025, + "step": 14930 + }, + { + "epoch": 0.2524864166025874, + "grad_norm": 0.11547628045082092, + "learning_rate": 9.979044257164391e-06, + "loss": 0.0053, + "step": 14940 + }, + { + "epoch": 0.25265541688143783, + "grad_norm": 0.4229731857776642, + "learning_rate": 9.978909154302122e-06, + "loss": 0.0034, + "step": 14950 + }, + { + "epoch": 0.2528244171602883, + "grad_norm": 0.09081882983446121, + "learning_rate": 9.97877361825063e-06, + "loss": 0.0111, + "step": 14960 + }, + { + "epoch": 0.2529934174391388, + "grad_norm": 0.1778619885444641, + "learning_rate": 9.978637649021712e-06, + "loss": 0.0062, + "step": 14970 + }, + { + "epoch": 0.25316241771798925, + "grad_norm": 0.22037698328495026, + "learning_rate": 9.978501246627197e-06, + "loss": 0.0048, + "step": 14980 + }, + { + "epoch": 0.25333141799683967, + "grad_norm": 0.22217440605163574, + "learning_rate": 9.978364411078951e-06, + "loss": 0.0035, + "step": 14990 + }, + { + "epoch": 0.25350041827569014, + "grad_norm": 0.1691211760044098, + "learning_rate": 9.97822714238888e-06, + "loss": 0.0059, + "step": 15000 + }, + { + "epoch": 0.2536694185545406, + "grad_norm": 0.3209209740161896, + "learning_rate": 9.97808944056893e-06, + "loss": 0.0058, + "step": 15010 + }, + { + "epoch": 0.2538384188333911, + "grad_norm": 0.17985154688358307, + "learning_rate": 9.97795130563108e-06, + "loss": 0.0034, + "step": 15020 + }, + { + "epoch": 0.25400741911224156, + "grad_norm": 0.31680458784103394, + "learning_rate": 9.977812737587345e-06, + "loss": 0.0048, + "step": 15030 + }, + { + "epoch": 0.254176419391092, + "grad_norm": 0.3769959807395935, + "learning_rate": 9.977673736449783e-06, + "loss": 0.003, + "step": 15040 + }, + { + "epoch": 0.25434541966994245, + "grad_norm": 0.18421614170074463, + "learning_rate": 9.97753430223049e-06, + "loss": 0.0032, + "step": 15050 + }, + { + "epoch": 0.2545144199487929, + "grad_norm": 0.24074816703796387, + "learning_rate": 9.977394434941597e-06, + "loss": 0.0049, + "step": 15060 + }, + { + "epoch": 0.2546834202276434, + "grad_norm": 0.34638261795043945, + "learning_rate": 9.977254134595271e-06, + "loss": 0.0047, + "step": 15070 + }, + { + "epoch": 0.2548524205064938, + "grad_norm": 0.0570184588432312, + "learning_rate": 9.97711340120372e-06, + "loss": 0.0046, + "step": 15080 + }, + { + "epoch": 0.2550214207853443, + "grad_norm": 0.15112407505512238, + "learning_rate": 9.976972234779188e-06, + "loss": 0.0074, + "step": 15090 + }, + { + "epoch": 0.25519042106419476, + "grad_norm": 0.0630689412355423, + "learning_rate": 9.976830635333958e-06, + "loss": 0.005, + "step": 15100 + }, + { + "epoch": 0.25535942134304523, + "grad_norm": 0.07248848676681519, + "learning_rate": 9.97668860288035e-06, + "loss": 0.0033, + "step": 15110 + }, + { + "epoch": 0.25552842162189565, + "grad_norm": 0.321814626455307, + "learning_rate": 9.97654613743072e-06, + "loss": 0.0048, + "step": 15120 + }, + { + "epoch": 0.2556974219007461, + "grad_norm": 0.09801140427589417, + "learning_rate": 9.976403238997466e-06, + "loss": 0.0043, + "step": 15130 + }, + { + "epoch": 0.2558664221795966, + "grad_norm": 0.09544934332370758, + "learning_rate": 9.976259907593017e-06, + "loss": 0.0035, + "step": 15140 + }, + { + "epoch": 0.25603542245844707, + "grad_norm": 0.16570298373699188, + "learning_rate": 9.976116143229846e-06, + "loss": 0.003, + "step": 15150 + }, + { + "epoch": 0.25620442273729754, + "grad_norm": 0.26754939556121826, + "learning_rate": 9.975971945920459e-06, + "loss": 0.0046, + "step": 15160 + }, + { + "epoch": 0.25637342301614796, + "grad_norm": 0.3961745798587799, + "learning_rate": 9.975827315677406e-06, + "loss": 0.004, + "step": 15170 + }, + { + "epoch": 0.25654242329499843, + "grad_norm": 0.1366022229194641, + "learning_rate": 9.975682252513269e-06, + "loss": 0.0052, + "step": 15180 + }, + { + "epoch": 0.2567114235738489, + "grad_norm": 0.3830682337284088, + "learning_rate": 9.975536756440666e-06, + "loss": 0.0032, + "step": 15190 + }, + { + "epoch": 0.2568804238526994, + "grad_norm": 0.07213328778743744, + "learning_rate": 9.975390827472258e-06, + "loss": 0.0035, + "step": 15200 + }, + { + "epoch": 0.2570494241315498, + "grad_norm": 0.14138446748256683, + "learning_rate": 9.975244465620744e-06, + "loss": 0.0038, + "step": 15210 + }, + { + "epoch": 0.25721842441040027, + "grad_norm": 0.1475197672843933, + "learning_rate": 9.975097670898855e-06, + "loss": 0.0031, + "step": 15220 + }, + { + "epoch": 0.25738742468925074, + "grad_norm": 0.013520384207367897, + "learning_rate": 9.974950443319362e-06, + "loss": 0.0044, + "step": 15230 + }, + { + "epoch": 0.2575564249681012, + "grad_norm": 0.14274832606315613, + "learning_rate": 9.974802782895077e-06, + "loss": 0.003, + "step": 15240 + }, + { + "epoch": 0.25772542524695163, + "grad_norm": 0.19510887563228607, + "learning_rate": 9.974654689638847e-06, + "loss": 0.0048, + "step": 15250 + }, + { + "epoch": 0.2578944255258021, + "grad_norm": 0.12843619287014008, + "learning_rate": 9.974506163563557e-06, + "loss": 0.0047, + "step": 15260 + }, + { + "epoch": 0.2580634258046526, + "grad_norm": 0.1665114313364029, + "learning_rate": 9.974357204682127e-06, + "loss": 0.004, + "step": 15270 + }, + { + "epoch": 0.25823242608350305, + "grad_norm": 0.17540499567985535, + "learning_rate": 9.97420781300752e-06, + "loss": 0.0041, + "step": 15280 + }, + { + "epoch": 0.25840142636235347, + "grad_norm": 0.061821773648262024, + "learning_rate": 9.974057988552733e-06, + "loss": 0.0047, + "step": 15290 + }, + { + "epoch": 0.25857042664120394, + "grad_norm": 0.13247564435005188, + "learning_rate": 9.9739077313308e-06, + "loss": 0.0037, + "step": 15300 + }, + { + "epoch": 0.2587394269200544, + "grad_norm": 0.21004728972911835, + "learning_rate": 9.973757041354795e-06, + "loss": 0.0037, + "step": 15310 + }, + { + "epoch": 0.2589084271989049, + "grad_norm": 0.06903041154146194, + "learning_rate": 9.97360591863783e-06, + "loss": 0.0059, + "step": 15320 + }, + { + "epoch": 0.25907742747775536, + "grad_norm": 0.11489465087652206, + "learning_rate": 9.973454363193053e-06, + "loss": 0.0039, + "step": 15330 + }, + { + "epoch": 0.2592464277566058, + "grad_norm": 0.28296104073524475, + "learning_rate": 9.973302375033648e-06, + "loss": 0.0035, + "step": 15340 + }, + { + "epoch": 0.25941542803545625, + "grad_norm": 0.038037847727537155, + "learning_rate": 9.97314995417284e-06, + "loss": 0.0036, + "step": 15350 + }, + { + "epoch": 0.2595844283143067, + "grad_norm": 0.24455541372299194, + "learning_rate": 9.972997100623892e-06, + "loss": 0.0051, + "step": 15360 + }, + { + "epoch": 0.2597534285931572, + "grad_norm": 0.1739683598279953, + "learning_rate": 9.9728438144001e-06, + "loss": 0.0057, + "step": 15370 + }, + { + "epoch": 0.2599224288720076, + "grad_norm": 0.014664621092379093, + "learning_rate": 9.972690095514802e-06, + "loss": 0.0034, + "step": 15380 + }, + { + "epoch": 0.2600914291508581, + "grad_norm": 0.13587848842144012, + "learning_rate": 9.972535943981374e-06, + "loss": 0.0029, + "step": 15390 + }, + { + "epoch": 0.26026042942970856, + "grad_norm": 0.09054923802614212, + "learning_rate": 9.972381359813227e-06, + "loss": 0.0051, + "step": 15400 + }, + { + "epoch": 0.26042942970855903, + "grad_norm": 0.14758436381816864, + "learning_rate": 9.972226343023809e-06, + "loss": 0.0034, + "step": 15410 + }, + { + "epoch": 0.26059842998740945, + "grad_norm": 0.23704244196414948, + "learning_rate": 9.972070893626607e-06, + "loss": 0.0048, + "step": 15420 + }, + { + "epoch": 0.2607674302662599, + "grad_norm": 0.11782558262348175, + "learning_rate": 9.971915011635149e-06, + "loss": 0.0038, + "step": 15430 + }, + { + "epoch": 0.2609364305451104, + "grad_norm": 0.19053632020950317, + "learning_rate": 9.971758697062994e-06, + "loss": 0.0038, + "step": 15440 + }, + { + "epoch": 0.26110543082396087, + "grad_norm": 0.02710811421275139, + "learning_rate": 9.971601949923744e-06, + "loss": 0.0044, + "step": 15450 + }, + { + "epoch": 0.26127443110281134, + "grad_norm": 0.18848317861557007, + "learning_rate": 9.971444770231036e-06, + "loss": 0.0058, + "step": 15460 + }, + { + "epoch": 0.26144343138166176, + "grad_norm": 0.09840314090251923, + "learning_rate": 9.971287157998546e-06, + "loss": 0.0035, + "step": 15470 + }, + { + "epoch": 0.26161243166051223, + "grad_norm": 0.17986223101615906, + "learning_rate": 9.971129113239988e-06, + "loss": 0.0056, + "step": 15480 + }, + { + "epoch": 0.2617814319393627, + "grad_norm": 0.10968855768442154, + "learning_rate": 9.970970635969111e-06, + "loss": 0.0032, + "step": 15490 + }, + { + "epoch": 0.2619504322182132, + "grad_norm": 0.06092964857816696, + "learning_rate": 9.970811726199702e-06, + "loss": 0.0094, + "step": 15500 + }, + { + "epoch": 0.2621194324970636, + "grad_norm": 0.04663922265172005, + "learning_rate": 9.970652383945591e-06, + "loss": 0.0034, + "step": 15510 + }, + { + "epoch": 0.26228843277591407, + "grad_norm": 0.06795930862426758, + "learning_rate": 9.970492609220638e-06, + "loss": 0.0035, + "step": 15520 + }, + { + "epoch": 0.26245743305476454, + "grad_norm": 0.2160760760307312, + "learning_rate": 9.970332402038745e-06, + "loss": 0.0046, + "step": 15530 + }, + { + "epoch": 0.262626433333615, + "grad_norm": 0.15789270401000977, + "learning_rate": 9.970171762413852e-06, + "loss": 0.0044, + "step": 15540 + }, + { + "epoch": 0.26279543361246543, + "grad_norm": 0.3337605893611908, + "learning_rate": 9.970010690359935e-06, + "loss": 0.0051, + "step": 15550 + }, + { + "epoch": 0.2629644338913159, + "grad_norm": 0.013609246350824833, + "learning_rate": 9.969849185891007e-06, + "loss": 0.0034, + "step": 15560 + }, + { + "epoch": 0.2631334341701664, + "grad_norm": 0.1721629798412323, + "learning_rate": 9.96968724902112e-06, + "loss": 0.0052, + "step": 15570 + }, + { + "epoch": 0.26330243444901685, + "grad_norm": 0.07175387442111969, + "learning_rate": 9.969524879764364e-06, + "loss": 0.0045, + "step": 15580 + }, + { + "epoch": 0.2634714347278673, + "grad_norm": 0.09726123511791229, + "learning_rate": 9.969362078134867e-06, + "loss": 0.006, + "step": 15590 + }, + { + "epoch": 0.26364043500671774, + "grad_norm": 0.16029754281044006, + "learning_rate": 9.96919884414679e-06, + "loss": 0.0029, + "step": 15600 + }, + { + "epoch": 0.2638094352855682, + "grad_norm": 0.14958487451076508, + "learning_rate": 9.96903517781434e-06, + "loss": 0.0041, + "step": 15610 + }, + { + "epoch": 0.2639784355644187, + "grad_norm": 0.2625289857387543, + "learning_rate": 9.96887107915175e-06, + "loss": 0.0043, + "step": 15620 + }, + { + "epoch": 0.26414743584326916, + "grad_norm": 0.011520381085574627, + "learning_rate": 9.968706548173304e-06, + "loss": 0.004, + "step": 15630 + }, + { + "epoch": 0.2643164361221196, + "grad_norm": 0.38226380944252014, + "learning_rate": 9.968541584893314e-06, + "loss": 0.0042, + "step": 15640 + }, + { + "epoch": 0.26448543640097005, + "grad_norm": 0.09610099345445633, + "learning_rate": 9.968376189326131e-06, + "loss": 0.0028, + "step": 15650 + }, + { + "epoch": 0.2646544366798205, + "grad_norm": 0.21713967621326447, + "learning_rate": 9.96821036148615e-06, + "loss": 0.0039, + "step": 15660 + }, + { + "epoch": 0.264823436958671, + "grad_norm": 0.4078729450702667, + "learning_rate": 9.968044101387794e-06, + "loss": 0.0071, + "step": 15670 + }, + { + "epoch": 0.2649924372375214, + "grad_norm": 0.15719065070152283, + "learning_rate": 9.967877409045533e-06, + "loss": 0.0045, + "step": 15680 + }, + { + "epoch": 0.2651614375163719, + "grad_norm": 0.22879549860954285, + "learning_rate": 9.967710284473866e-06, + "loss": 0.0042, + "step": 15690 + }, + { + "epoch": 0.26533043779522236, + "grad_norm": 0.3180147409439087, + "learning_rate": 9.967542727687334e-06, + "loss": 0.0077, + "step": 15700 + }, + { + "epoch": 0.26549943807407284, + "grad_norm": 0.05374595522880554, + "learning_rate": 9.967374738700518e-06, + "loss": 0.0026, + "step": 15710 + }, + { + "epoch": 0.2656684383529233, + "grad_norm": 0.38689932227134705, + "learning_rate": 9.967206317528032e-06, + "loss": 0.0033, + "step": 15720 + }, + { + "epoch": 0.2658374386317737, + "grad_norm": 0.09185245633125305, + "learning_rate": 9.967037464184529e-06, + "loss": 0.0041, + "step": 15730 + }, + { + "epoch": 0.2660064389106242, + "grad_norm": 0.2640765905380249, + "learning_rate": 9.9668681786847e-06, + "loss": 0.0045, + "step": 15740 + }, + { + "epoch": 0.26617543918947467, + "grad_norm": 0.12624865770339966, + "learning_rate": 9.966698461043277e-06, + "loss": 0.005, + "step": 15750 + }, + { + "epoch": 0.26634443946832514, + "grad_norm": 0.20609816908836365, + "learning_rate": 9.966528311275022e-06, + "loss": 0.0037, + "step": 15760 + }, + { + "epoch": 0.26651343974717556, + "grad_norm": 0.1872451901435852, + "learning_rate": 9.96635772939474e-06, + "loss": 0.0063, + "step": 15770 + }, + { + "epoch": 0.26668244002602604, + "grad_norm": 0.11002341657876968, + "learning_rate": 9.966186715417274e-06, + "loss": 0.0052, + "step": 15780 + }, + { + "epoch": 0.2668514403048765, + "grad_norm": 0.08211161196231842, + "learning_rate": 9.966015269357502e-06, + "loss": 0.0076, + "step": 15790 + }, + { + "epoch": 0.267020440583727, + "grad_norm": 0.08170771598815918, + "learning_rate": 9.96584339123034e-06, + "loss": 0.0029, + "step": 15800 + }, + { + "epoch": 0.2671894408625774, + "grad_norm": 0.11849153786897659, + "learning_rate": 9.965671081050745e-06, + "loss": 0.0057, + "step": 15810 + }, + { + "epoch": 0.26735844114142787, + "grad_norm": 0.09837999939918518, + "learning_rate": 9.965498338833705e-06, + "loss": 0.0043, + "step": 15820 + }, + { + "epoch": 0.26752744142027834, + "grad_norm": 0.4591045081615448, + "learning_rate": 9.96532516459425e-06, + "loss": 0.007, + "step": 15830 + }, + { + "epoch": 0.2676964416991288, + "grad_norm": 0.29004165530204773, + "learning_rate": 9.96515155834745e-06, + "loss": 0.0033, + "step": 15840 + }, + { + "epoch": 0.2678654419779793, + "grad_norm": 0.16533708572387695, + "learning_rate": 9.964977520108407e-06, + "loss": 0.0034, + "step": 15850 + }, + { + "epoch": 0.2680344422568297, + "grad_norm": 0.26187756657600403, + "learning_rate": 9.964803049892265e-06, + "loss": 0.0049, + "step": 15860 + }, + { + "epoch": 0.2682034425356802, + "grad_norm": 0.1001354232430458, + "learning_rate": 9.964628147714202e-06, + "loss": 0.004, + "step": 15870 + }, + { + "epoch": 0.26837244281453065, + "grad_norm": 0.17999950051307678, + "learning_rate": 9.964452813589435e-06, + "loss": 0.0053, + "step": 15880 + }, + { + "epoch": 0.2685414430933811, + "grad_norm": 0.1562132090330124, + "learning_rate": 9.96427704753322e-06, + "loss": 0.0032, + "step": 15890 + }, + { + "epoch": 0.26871044337223154, + "grad_norm": 0.04589414224028587, + "learning_rate": 9.96410084956085e-06, + "loss": 0.0016, + "step": 15900 + }, + { + "epoch": 0.268879443651082, + "grad_norm": 0.2619129419326782, + "learning_rate": 9.963924219687655e-06, + "loss": 0.005, + "step": 15910 + }, + { + "epoch": 0.2690484439299325, + "grad_norm": 0.1960720419883728, + "learning_rate": 9.963747157929e-06, + "loss": 0.0046, + "step": 15920 + }, + { + "epoch": 0.26921744420878296, + "grad_norm": 0.056682687252759933, + "learning_rate": 9.963569664300294e-06, + "loss": 0.0034, + "step": 15930 + }, + { + "epoch": 0.2693864444876334, + "grad_norm": 0.1986301988363266, + "learning_rate": 9.963391738816979e-06, + "loss": 0.0027, + "step": 15940 + }, + { + "epoch": 0.26955544476648385, + "grad_norm": 0.21491877734661102, + "learning_rate": 9.963213381494532e-06, + "loss": 0.006, + "step": 15950 + }, + { + "epoch": 0.2697244450453343, + "grad_norm": 0.14494657516479492, + "learning_rate": 9.963034592348476e-06, + "loss": 0.0042, + "step": 15960 + }, + { + "epoch": 0.2698934453241848, + "grad_norm": 0.11323204636573792, + "learning_rate": 9.962855371394362e-06, + "loss": 0.0042, + "step": 15970 + }, + { + "epoch": 0.2700624456030353, + "grad_norm": 0.14095667004585266, + "learning_rate": 9.962675718647788e-06, + "loss": 0.0035, + "step": 15980 + }, + { + "epoch": 0.2702314458818857, + "grad_norm": 0.14305680990219116, + "learning_rate": 9.962495634124378e-06, + "loss": 0.0026, + "step": 15990 + }, + { + "epoch": 0.27040044616073616, + "grad_norm": 0.2864258289337158, + "learning_rate": 9.962315117839807e-06, + "loss": 0.0041, + "step": 16000 + }, + { + "epoch": 0.27056944643958664, + "grad_norm": 0.07068732380867004, + "learning_rate": 9.962134169809776e-06, + "loss": 0.0027, + "step": 16010 + }, + { + "epoch": 0.2707384467184371, + "grad_norm": 0.0339294970035553, + "learning_rate": 9.961952790050031e-06, + "loss": 0.0058, + "step": 16020 + }, + { + "epoch": 0.2709074469972875, + "grad_norm": 0.15254400670528412, + "learning_rate": 9.961770978576352e-06, + "loss": 0.0059, + "step": 16030 + }, + { + "epoch": 0.271076447276138, + "grad_norm": 0.18165895342826843, + "learning_rate": 9.961588735404557e-06, + "loss": 0.0042, + "step": 16040 + }, + { + "epoch": 0.2712454475549885, + "grad_norm": 0.2015973925590515, + "learning_rate": 9.961406060550503e-06, + "loss": 0.0036, + "step": 16050 + }, + { + "epoch": 0.27141444783383895, + "grad_norm": 0.14254651963710785, + "learning_rate": 9.961222954030084e-06, + "loss": 0.0029, + "step": 16060 + }, + { + "epoch": 0.27158344811268936, + "grad_norm": 0.3237738013267517, + "learning_rate": 9.961039415859228e-06, + "loss": 0.0047, + "step": 16070 + }, + { + "epoch": 0.27175244839153984, + "grad_norm": 0.03651570901274681, + "learning_rate": 9.960855446053908e-06, + "loss": 0.0059, + "step": 16080 + }, + { + "epoch": 0.2719214486703903, + "grad_norm": 0.13540002703666687, + "learning_rate": 9.960671044630129e-06, + "loss": 0.0028, + "step": 16090 + }, + { + "epoch": 0.2720904489492408, + "grad_norm": 0.1004335880279541, + "learning_rate": 9.960486211603932e-06, + "loss": 0.0044, + "step": 16100 + }, + { + "epoch": 0.2722594492280912, + "grad_norm": 0.03541584312915802, + "learning_rate": 9.960300946991402e-06, + "loss": 0.0029, + "step": 16110 + }, + { + "epoch": 0.2724284495069417, + "grad_norm": 0.2536197602748871, + "learning_rate": 9.960115250808654e-06, + "loss": 0.003, + "step": 16120 + }, + { + "epoch": 0.27259744978579215, + "grad_norm": 0.3499818742275238, + "learning_rate": 9.959929123071848e-06, + "loss": 0.0057, + "step": 16130 + }, + { + "epoch": 0.2727664500646426, + "grad_norm": 0.018729379400610924, + "learning_rate": 9.959742563797178e-06, + "loss": 0.0041, + "step": 16140 + }, + { + "epoch": 0.2729354503434931, + "grad_norm": 0.20778696238994598, + "learning_rate": 9.959555573000874e-06, + "loss": 0.0038, + "step": 16150 + }, + { + "epoch": 0.2731044506223435, + "grad_norm": 0.03607981279492378, + "learning_rate": 9.959368150699205e-06, + "loss": 0.0048, + "step": 16160 + }, + { + "epoch": 0.273273450901194, + "grad_norm": 0.04568514972925186, + "learning_rate": 9.95918029690848e-06, + "loss": 0.0036, + "step": 16170 + }, + { + "epoch": 0.27344245118004445, + "grad_norm": 0.08559437096118927, + "learning_rate": 9.958992011645038e-06, + "loss": 0.0034, + "step": 16180 + }, + { + "epoch": 0.2736114514588949, + "grad_norm": 0.027680907398462296, + "learning_rate": 9.958803294925268e-06, + "loss": 0.0036, + "step": 16190 + }, + { + "epoch": 0.27378045173774535, + "grad_norm": 0.092922143638134, + "learning_rate": 9.958614146765583e-06, + "loss": 0.0029, + "step": 16200 + }, + { + "epoch": 0.2739494520165958, + "grad_norm": 0.14137724041938782, + "learning_rate": 9.958424567182443e-06, + "loss": 0.0056, + "step": 16210 + }, + { + "epoch": 0.2741184522954463, + "grad_norm": 0.19524511694908142, + "learning_rate": 9.95823455619234e-06, + "loss": 0.0022, + "step": 16220 + }, + { + "epoch": 0.27428745257429676, + "grad_norm": 0.0908576250076294, + "learning_rate": 9.95804411381181e-06, + "loss": 0.0026, + "step": 16230 + }, + { + "epoch": 0.2744564528531472, + "grad_norm": 0.16146492958068848, + "learning_rate": 9.957853240057418e-06, + "loss": 0.0034, + "step": 16240 + }, + { + "epoch": 0.27462545313199765, + "grad_norm": 0.23391632735729218, + "learning_rate": 9.957661934945773e-06, + "loss": 0.0046, + "step": 16250 + }, + { + "epoch": 0.2747944534108481, + "grad_norm": 0.20969712734222412, + "learning_rate": 9.95747019849352e-06, + "loss": 0.0047, + "step": 16260 + }, + { + "epoch": 0.2749634536896986, + "grad_norm": 0.026646453887224197, + "learning_rate": 9.957278030717338e-06, + "loss": 0.0022, + "step": 16270 + }, + { + "epoch": 0.2751324539685491, + "grad_norm": 0.10912485420703888, + "learning_rate": 9.957085431633953e-06, + "loss": 0.0026, + "step": 16280 + }, + { + "epoch": 0.2753014542473995, + "grad_norm": 0.29427626729011536, + "learning_rate": 9.956892401260115e-06, + "loss": 0.0045, + "step": 16290 + }, + { + "epoch": 0.27547045452624996, + "grad_norm": 0.12857367098331451, + "learning_rate": 9.956698939612622e-06, + "loss": 0.0033, + "step": 16300 + }, + { + "epoch": 0.27563945480510044, + "grad_norm": 0.16636571288108826, + "learning_rate": 9.956505046708304e-06, + "loss": 0.0052, + "step": 16310 + }, + { + "epoch": 0.2758084550839509, + "grad_norm": 0.10839074850082397, + "learning_rate": 9.956310722564033e-06, + "loss": 0.0036, + "step": 16320 + }, + { + "epoch": 0.2759774553628013, + "grad_norm": 0.11550406366586685, + "learning_rate": 9.956115967196716e-06, + "loss": 0.0037, + "step": 16330 + }, + { + "epoch": 0.2761464556416518, + "grad_norm": 0.10233989357948303, + "learning_rate": 9.955920780623296e-06, + "loss": 0.0042, + "step": 16340 + }, + { + "epoch": 0.2763154559205023, + "grad_norm": 0.20577624440193176, + "learning_rate": 9.955725162860758e-06, + "loss": 0.0025, + "step": 16350 + }, + { + "epoch": 0.27648445619935275, + "grad_norm": 0.16312482953071594, + "learning_rate": 9.955529113926119e-06, + "loss": 0.0054, + "step": 16360 + }, + { + "epoch": 0.27665345647820316, + "grad_norm": 0.14550970494747162, + "learning_rate": 9.955332633836435e-06, + "loss": 0.0054, + "step": 16370 + }, + { + "epoch": 0.27682245675705364, + "grad_norm": 0.35984915494918823, + "learning_rate": 9.955135722608804e-06, + "loss": 0.0052, + "step": 16380 + }, + { + "epoch": 0.2769914570359041, + "grad_norm": 0.199117511510849, + "learning_rate": 9.954938380260357e-06, + "loss": 0.0045, + "step": 16390 + }, + { + "epoch": 0.2771604573147546, + "grad_norm": 0.2786180377006531, + "learning_rate": 9.954740606808265e-06, + "loss": 0.0045, + "step": 16400 + }, + { + "epoch": 0.27732945759360506, + "grad_norm": 0.08469782024621964, + "learning_rate": 9.95454240226973e-06, + "loss": 0.0043, + "step": 16410 + }, + { + "epoch": 0.2774984578724555, + "grad_norm": 0.10433948785066605, + "learning_rate": 9.954343766662004e-06, + "loss": 0.0037, + "step": 16420 + }, + { + "epoch": 0.27766745815130595, + "grad_norm": 0.3268999457359314, + "learning_rate": 9.954144700002367e-06, + "loss": 0.0046, + "step": 16430 + }, + { + "epoch": 0.2778364584301564, + "grad_norm": 0.06011407449841499, + "learning_rate": 9.953945202308135e-06, + "loss": 0.0039, + "step": 16440 + }, + { + "epoch": 0.2780054587090069, + "grad_norm": 0.05640736222267151, + "learning_rate": 9.953745273596669e-06, + "loss": 0.004, + "step": 16450 + }, + { + "epoch": 0.2781744589878573, + "grad_norm": 0.11180564761161804, + "learning_rate": 9.953544913885362e-06, + "loss": 0.0052, + "step": 16460 + }, + { + "epoch": 0.2783434592667078, + "grad_norm": 0.030079906806349754, + "learning_rate": 9.953344123191649e-06, + "loss": 0.0026, + "step": 16470 + }, + { + "epoch": 0.27851245954555826, + "grad_norm": 0.22607122361660004, + "learning_rate": 9.953142901532996e-06, + "loss": 0.004, + "step": 16480 + }, + { + "epoch": 0.27868145982440873, + "grad_norm": 0.04031490907073021, + "learning_rate": 9.952941248926913e-06, + "loss": 0.0056, + "step": 16490 + }, + { + "epoch": 0.27885046010325915, + "grad_norm": 0.047156427055597305, + "learning_rate": 9.952739165390944e-06, + "loss": 0.0033, + "step": 16500 + }, + { + "epoch": 0.2790194603821096, + "grad_norm": 0.01596965454518795, + "learning_rate": 9.95253665094267e-06, + "loss": 0.0017, + "step": 16510 + }, + { + "epoch": 0.2791884606609601, + "grad_norm": 0.19963178038597107, + "learning_rate": 9.952333705599712e-06, + "loss": 0.0047, + "step": 16520 + }, + { + "epoch": 0.27935746093981056, + "grad_norm": 0.07075642049312592, + "learning_rate": 9.952130329379728e-06, + "loss": 0.0033, + "step": 16530 + }, + { + "epoch": 0.27952646121866104, + "grad_norm": 0.15401625633239746, + "learning_rate": 9.95192652230041e-06, + "loss": 0.0042, + "step": 16540 + }, + { + "epoch": 0.27969546149751146, + "grad_norm": 0.15883556008338928, + "learning_rate": 9.951722284379493e-06, + "loss": 0.0035, + "step": 16550 + }, + { + "epoch": 0.27986446177636193, + "grad_norm": 0.3158673346042633, + "learning_rate": 9.951517615634745e-06, + "loss": 0.0031, + "step": 16560 + }, + { + "epoch": 0.2800334620552124, + "grad_norm": 0.07894056290388107, + "learning_rate": 9.951312516083975e-06, + "loss": 0.0033, + "step": 16570 + }, + { + "epoch": 0.2802024623340629, + "grad_norm": 0.1073017567396164, + "learning_rate": 9.951106985745024e-06, + "loss": 0.0032, + "step": 16580 + }, + { + "epoch": 0.2803714626129133, + "grad_norm": 0.0701015368103981, + "learning_rate": 9.95090102463578e-06, + "loss": 0.0037, + "step": 16590 + }, + { + "epoch": 0.28054046289176376, + "grad_norm": 0.08924991637468338, + "learning_rate": 9.950694632774157e-06, + "loss": 0.0047, + "step": 16600 + }, + { + "epoch": 0.28070946317061424, + "grad_norm": 0.1637372523546219, + "learning_rate": 9.950487810178115e-06, + "loss": 0.0028, + "step": 16610 + }, + { + "epoch": 0.2808784634494647, + "grad_norm": 0.04116513207554817, + "learning_rate": 9.950280556865649e-06, + "loss": 0.0053, + "step": 16620 + }, + { + "epoch": 0.28104746372831513, + "grad_norm": 0.14847958087921143, + "learning_rate": 9.950072872854787e-06, + "loss": 0.0024, + "step": 16630 + }, + { + "epoch": 0.2812164640071656, + "grad_norm": 0.06650839000940323, + "learning_rate": 9.949864758163603e-06, + "loss": 0.0027, + "step": 16640 + }, + { + "epoch": 0.2813854642860161, + "grad_norm": 0.0613737478852272, + "learning_rate": 9.949656212810201e-06, + "loss": 0.0046, + "step": 16650 + }, + { + "epoch": 0.28155446456486655, + "grad_norm": 0.13115599751472473, + "learning_rate": 9.94944723681273e-06, + "loss": 0.0041, + "step": 16660 + }, + { + "epoch": 0.281723464843717, + "grad_norm": 0.14419174194335938, + "learning_rate": 9.949237830189366e-06, + "loss": 0.0045, + "step": 16670 + }, + { + "epoch": 0.28189246512256744, + "grad_norm": 0.12032968550920486, + "learning_rate": 9.949027992958333e-06, + "loss": 0.0029, + "step": 16680 + }, + { + "epoch": 0.2820614654014179, + "grad_norm": 0.2870495319366455, + "learning_rate": 9.948817725137884e-06, + "loss": 0.0047, + "step": 16690 + }, + { + "epoch": 0.2822304656802684, + "grad_norm": 0.1272762268781662, + "learning_rate": 9.948607026746316e-06, + "loss": 0.0047, + "step": 16700 + }, + { + "epoch": 0.28239946595911886, + "grad_norm": 0.2602250874042511, + "learning_rate": 9.948395897801962e-06, + "loss": 0.0046, + "step": 16710 + }, + { + "epoch": 0.2825684662379693, + "grad_norm": 0.01893269270658493, + "learning_rate": 9.948184338323188e-06, + "loss": 0.0033, + "step": 16720 + }, + { + "epoch": 0.28273746651681975, + "grad_norm": 0.1346733719110489, + "learning_rate": 9.9479723483284e-06, + "loss": 0.0032, + "step": 16730 + }, + { + "epoch": 0.2829064667956702, + "grad_norm": 0.08700961619615555, + "learning_rate": 9.947759927836048e-06, + "loss": 0.0035, + "step": 16740 + }, + { + "epoch": 0.2830754670745207, + "grad_norm": 0.15652678906917572, + "learning_rate": 9.947547076864607e-06, + "loss": 0.0038, + "step": 16750 + }, + { + "epoch": 0.2832444673533711, + "grad_norm": 0.20715713500976562, + "learning_rate": 9.9473337954326e-06, + "loss": 0.0062, + "step": 16760 + }, + { + "epoch": 0.2834134676322216, + "grad_norm": 0.2449064552783966, + "learning_rate": 9.947120083558582e-06, + "loss": 0.0028, + "step": 16770 + }, + { + "epoch": 0.28358246791107206, + "grad_norm": 0.11783338338136673, + "learning_rate": 9.946905941261148e-06, + "loss": 0.0056, + "step": 16780 + }, + { + "epoch": 0.28375146818992253, + "grad_norm": 0.1863008588552475, + "learning_rate": 9.946691368558929e-06, + "loss": 0.0034, + "step": 16790 + }, + { + "epoch": 0.28392046846877295, + "grad_norm": 0.23544996976852417, + "learning_rate": 9.946476365470591e-06, + "loss": 0.0068, + "step": 16800 + }, + { + "epoch": 0.2840894687476234, + "grad_norm": 0.2011137753725052, + "learning_rate": 9.946260932014847e-06, + "loss": 0.0038, + "step": 16810 + }, + { + "epoch": 0.2842584690264739, + "grad_norm": 0.17312456667423248, + "learning_rate": 9.946045068210434e-06, + "loss": 0.0035, + "step": 16820 + }, + { + "epoch": 0.28442746930532437, + "grad_norm": 0.1152682825922966, + "learning_rate": 9.945828774076138e-06, + "loss": 0.0022, + "step": 16830 + }, + { + "epoch": 0.28459646958417484, + "grad_norm": 0.1757705807685852, + "learning_rate": 9.945612049630774e-06, + "loss": 0.0036, + "step": 16840 + }, + { + "epoch": 0.28476546986302526, + "grad_norm": 0.06967198848724365, + "learning_rate": 9.9453948948932e-06, + "loss": 0.0026, + "step": 16850 + }, + { + "epoch": 0.28493447014187573, + "grad_norm": 0.0472136065363884, + "learning_rate": 9.94517730988231e-06, + "loss": 0.0037, + "step": 16860 + }, + { + "epoch": 0.2851034704207262, + "grad_norm": 0.10674723237752914, + "learning_rate": 9.944959294617031e-06, + "loss": 0.0043, + "step": 16870 + }, + { + "epoch": 0.2852724706995767, + "grad_norm": 0.24035988748073578, + "learning_rate": 9.944740849116338e-06, + "loss": 0.0041, + "step": 16880 + }, + { + "epoch": 0.2854414709784271, + "grad_norm": 0.3217427730560303, + "learning_rate": 9.944521973399233e-06, + "loss": 0.0041, + "step": 16890 + }, + { + "epoch": 0.28561047125727757, + "grad_norm": 0.21625342965126038, + "learning_rate": 9.944302667484757e-06, + "loss": 0.0033, + "step": 16900 + }, + { + "epoch": 0.28577947153612804, + "grad_norm": 0.18237721920013428, + "learning_rate": 9.944082931391997e-06, + "loss": 0.0042, + "step": 16910 + }, + { + "epoch": 0.2859484718149785, + "grad_norm": 0.11755727231502533, + "learning_rate": 9.943862765140065e-06, + "loss": 0.004, + "step": 16920 + }, + { + "epoch": 0.28611747209382893, + "grad_norm": 0.1553824096918106, + "learning_rate": 9.943642168748117e-06, + "loss": 0.0064, + "step": 16930 + }, + { + "epoch": 0.2862864723726794, + "grad_norm": 0.1014246940612793, + "learning_rate": 9.94342114223535e-06, + "loss": 0.0065, + "step": 16940 + }, + { + "epoch": 0.2864554726515299, + "grad_norm": 0.13518716394901276, + "learning_rate": 9.943199685620992e-06, + "loss": 0.0041, + "step": 16950 + }, + { + "epoch": 0.28662447293038035, + "grad_norm": 0.2961231768131256, + "learning_rate": 9.942977798924312e-06, + "loss": 0.0026, + "step": 16960 + }, + { + "epoch": 0.2867934732092308, + "grad_norm": 0.24852749705314636, + "learning_rate": 9.942755482164612e-06, + "loss": 0.0048, + "step": 16970 + }, + { + "epoch": 0.28696247348808124, + "grad_norm": 0.09043409675359726, + "learning_rate": 9.94253273536124e-06, + "loss": 0.0025, + "step": 16980 + }, + { + "epoch": 0.2871314737669317, + "grad_norm": 0.08916544169187546, + "learning_rate": 9.942309558533569e-06, + "loss": 0.0062, + "step": 16990 + }, + { + "epoch": 0.2873004740457822, + "grad_norm": 0.1598898470401764, + "learning_rate": 9.942085951701024e-06, + "loss": 0.0044, + "step": 17000 + }, + { + "epoch": 0.28746947432463266, + "grad_norm": 0.13179609179496765, + "learning_rate": 9.941861914883055e-06, + "loss": 0.003, + "step": 17010 + }, + { + "epoch": 0.2876384746034831, + "grad_norm": 0.1741129755973816, + "learning_rate": 9.941637448099155e-06, + "loss": 0.0036, + "step": 17020 + }, + { + "epoch": 0.28780747488233355, + "grad_norm": 0.08615001291036606, + "learning_rate": 9.941412551368854e-06, + "loss": 0.0174, + "step": 17030 + }, + { + "epoch": 0.287976475161184, + "grad_norm": 0.17396552860736847, + "learning_rate": 9.941187224711719e-06, + "loss": 0.0055, + "step": 17040 + }, + { + "epoch": 0.2881454754400345, + "grad_norm": 0.08446299284696579, + "learning_rate": 9.940961468147356e-06, + "loss": 0.0025, + "step": 17050 + }, + { + "epoch": 0.2883144757188849, + "grad_norm": 0.18342605233192444, + "learning_rate": 9.940735281695406e-06, + "loss": 0.0037, + "step": 17060 + }, + { + "epoch": 0.2884834759977354, + "grad_norm": 0.1513669490814209, + "learning_rate": 9.940508665375547e-06, + "loss": 0.0038, + "step": 17070 + }, + { + "epoch": 0.28865247627658586, + "grad_norm": 0.13163068890571594, + "learning_rate": 9.940281619207497e-06, + "loss": 0.0026, + "step": 17080 + }, + { + "epoch": 0.28882147655543633, + "grad_norm": 0.09194418042898178, + "learning_rate": 9.94005414321101e-06, + "loss": 0.0026, + "step": 17090 + }, + { + "epoch": 0.2889904768342868, + "grad_norm": 0.10978283733129501, + "learning_rate": 9.939826237405878e-06, + "loss": 0.0032, + "step": 17100 + }, + { + "epoch": 0.2891594771131372, + "grad_norm": 0.09779514372348785, + "learning_rate": 9.939597901811929e-06, + "loss": 0.0055, + "step": 17110 + }, + { + "epoch": 0.2893284773919877, + "grad_norm": 0.13635870814323425, + "learning_rate": 9.939369136449029e-06, + "loss": 0.0034, + "step": 17120 + }, + { + "epoch": 0.28949747767083817, + "grad_norm": 0.1415141522884369, + "learning_rate": 9.939139941337084e-06, + "loss": 0.0028, + "step": 17130 + }, + { + "epoch": 0.28966647794968864, + "grad_norm": 0.8230068683624268, + "learning_rate": 9.93891031649603e-06, + "loss": 0.0057, + "step": 17140 + }, + { + "epoch": 0.28983547822853906, + "grad_norm": 0.15510357916355133, + "learning_rate": 9.938680261945853e-06, + "loss": 0.0047, + "step": 17150 + }, + { + "epoch": 0.29000447850738953, + "grad_norm": 0.07704704254865646, + "learning_rate": 9.938449777706562e-06, + "loss": 0.0046, + "step": 17160 + }, + { + "epoch": 0.29017347878624, + "grad_norm": 0.1460547000169754, + "learning_rate": 9.938218863798215e-06, + "loss": 0.0042, + "step": 17170 + }, + { + "epoch": 0.2903424790650905, + "grad_norm": 0.16609102487564087, + "learning_rate": 9.937987520240899e-06, + "loss": 0.0043, + "step": 17180 + }, + { + "epoch": 0.2905114793439409, + "grad_norm": 0.05784421041607857, + "learning_rate": 9.937755747054745e-06, + "loss": 0.0033, + "step": 17190 + }, + { + "epoch": 0.29068047962279137, + "grad_norm": 0.23304054141044617, + "learning_rate": 9.937523544259915e-06, + "loss": 0.0037, + "step": 17200 + }, + { + "epoch": 0.29084947990164184, + "grad_norm": 0.0587453655898571, + "learning_rate": 9.937290911876616e-06, + "loss": 0.0041, + "step": 17210 + }, + { + "epoch": 0.2910184801804923, + "grad_norm": 0.14351072907447815, + "learning_rate": 9.937057849925084e-06, + "loss": 0.0033, + "step": 17220 + }, + { + "epoch": 0.2911874804593428, + "grad_norm": 0.07545598596334457, + "learning_rate": 9.9368243584256e-06, + "loss": 0.0028, + "step": 17230 + }, + { + "epoch": 0.2913564807381932, + "grad_norm": 0.21592868864536285, + "learning_rate": 9.936590437398476e-06, + "loss": 0.0038, + "step": 17240 + }, + { + "epoch": 0.2915254810170437, + "grad_norm": 0.18980614840984344, + "learning_rate": 9.936356086864067e-06, + "loss": 0.0042, + "step": 17250 + }, + { + "epoch": 0.29169448129589415, + "grad_norm": 0.09946509450674057, + "learning_rate": 9.936121306842761e-06, + "loss": 0.0034, + "step": 17260 + }, + { + "epoch": 0.2918634815747446, + "grad_norm": 0.09308329969644547, + "learning_rate": 9.935886097354985e-06, + "loss": 0.0039, + "step": 17270 + }, + { + "epoch": 0.29203248185359504, + "grad_norm": 0.11000710725784302, + "learning_rate": 9.935650458421202e-06, + "loss": 0.0029, + "step": 17280 + }, + { + "epoch": 0.2922014821324455, + "grad_norm": 0.12374405562877655, + "learning_rate": 9.935414390061918e-06, + "loss": 0.0041, + "step": 17290 + }, + { + "epoch": 0.292370482411296, + "grad_norm": 0.08498592674732208, + "learning_rate": 9.935177892297668e-06, + "loss": 0.004, + "step": 17300 + }, + { + "epoch": 0.29253948269014646, + "grad_norm": 0.1327739804983139, + "learning_rate": 9.93494096514903e-06, + "loss": 0.0042, + "step": 17310 + }, + { + "epoch": 0.2927084829689969, + "grad_norm": 0.0903698205947876, + "learning_rate": 9.934703608636618e-06, + "loss": 0.0042, + "step": 17320 + }, + { + "epoch": 0.29287748324784735, + "grad_norm": 0.1082349494099617, + "learning_rate": 9.934465822781083e-06, + "loss": 0.0023, + "step": 17330 + }, + { + "epoch": 0.2930464835266978, + "grad_norm": 0.14297062158584595, + "learning_rate": 9.934227607603112e-06, + "loss": 0.0048, + "step": 17340 + }, + { + "epoch": 0.2932154838055483, + "grad_norm": 0.013389919884502888, + "learning_rate": 9.933988963123435e-06, + "loss": 0.0043, + "step": 17350 + }, + { + "epoch": 0.29338448408439877, + "grad_norm": 0.30560749769210815, + "learning_rate": 9.93374988936281e-06, + "loss": 0.0037, + "step": 17360 + }, + { + "epoch": 0.2935534843632492, + "grad_norm": 0.1624475121498108, + "learning_rate": 9.93351038634204e-06, + "loss": 0.0025, + "step": 17370 + }, + { + "epoch": 0.29372248464209966, + "grad_norm": 0.04842815920710564, + "learning_rate": 9.933270454081964e-06, + "loss": 0.0026, + "step": 17380 + }, + { + "epoch": 0.29389148492095013, + "grad_norm": 0.12428752332925797, + "learning_rate": 9.933030092603458e-06, + "loss": 0.0034, + "step": 17390 + }, + { + "epoch": 0.2940604851998006, + "grad_norm": 0.05903220921754837, + "learning_rate": 9.93278930192743e-06, + "loss": 0.0022, + "step": 17400 + }, + { + "epoch": 0.294229485478651, + "grad_norm": 0.15623825788497925, + "learning_rate": 9.932548082074833e-06, + "loss": 0.0051, + "step": 17410 + }, + { + "epoch": 0.2943984857575015, + "grad_norm": 0.16055922210216522, + "learning_rate": 9.932306433066656e-06, + "loss": 0.0041, + "step": 17420 + }, + { + "epoch": 0.29456748603635197, + "grad_norm": 0.3821702301502228, + "learning_rate": 9.932064354923921e-06, + "loss": 0.0057, + "step": 17430 + }, + { + "epoch": 0.29473648631520244, + "grad_norm": 0.37602075934410095, + "learning_rate": 9.93182184766769e-06, + "loss": 0.0031, + "step": 17440 + }, + { + "epoch": 0.29490548659405286, + "grad_norm": 0.11960271745920181, + "learning_rate": 9.931578911319063e-06, + "loss": 0.0039, + "step": 17450 + }, + { + "epoch": 0.29507448687290333, + "grad_norm": 0.4761727452278137, + "learning_rate": 9.931335545899177e-06, + "loss": 0.0043, + "step": 17460 + }, + { + "epoch": 0.2952434871517538, + "grad_norm": 0.17506860196590424, + "learning_rate": 9.931091751429207e-06, + "loss": 0.0032, + "step": 17470 + }, + { + "epoch": 0.2954124874306043, + "grad_norm": 0.12806014716625214, + "learning_rate": 9.93084752793036e-06, + "loss": 0.0046, + "step": 17480 + }, + { + "epoch": 0.2955814877094547, + "grad_norm": 0.11130823940038681, + "learning_rate": 9.930602875423889e-06, + "loss": 0.0039, + "step": 17490 + }, + { + "epoch": 0.29575048798830517, + "grad_norm": 0.09847419708967209, + "learning_rate": 9.930357793931081e-06, + "loss": 0.004, + "step": 17500 + }, + { + "epoch": 0.29591948826715564, + "grad_norm": 0.2048054039478302, + "learning_rate": 9.930112283473253e-06, + "loss": 0.0032, + "step": 17510 + }, + { + "epoch": 0.2960884885460061, + "grad_norm": 0.41881263256073, + "learning_rate": 9.92986634407177e-06, + "loss": 0.0033, + "step": 17520 + }, + { + "epoch": 0.2962574888248566, + "grad_norm": 0.13665935397148132, + "learning_rate": 9.92961997574803e-06, + "loss": 0.0041, + "step": 17530 + }, + { + "epoch": 0.296426489103707, + "grad_norm": 0.1357094794511795, + "learning_rate": 9.929373178523469e-06, + "loss": 0.0034, + "step": 17540 + }, + { + "epoch": 0.2965954893825575, + "grad_norm": 0.11714345961809158, + "learning_rate": 9.929125952419558e-06, + "loss": 0.0031, + "step": 17550 + }, + { + "epoch": 0.29676448966140795, + "grad_norm": 0.21937525272369385, + "learning_rate": 9.928878297457804e-06, + "loss": 0.0038, + "step": 17560 + }, + { + "epoch": 0.2969334899402584, + "grad_norm": 0.5983049273490906, + "learning_rate": 9.92863021365976e-06, + "loss": 0.0035, + "step": 17570 + }, + { + "epoch": 0.29710249021910884, + "grad_norm": 0.06767556816339493, + "learning_rate": 9.928381701047006e-06, + "loss": 0.0049, + "step": 17580 + }, + { + "epoch": 0.2972714904979593, + "grad_norm": 0.13499832153320312, + "learning_rate": 9.928132759641166e-06, + "loss": 0.0045, + "step": 17590 + }, + { + "epoch": 0.2974404907768098, + "grad_norm": 0.26578205823898315, + "learning_rate": 9.9278833894639e-06, + "loss": 0.0046, + "step": 17600 + }, + { + "epoch": 0.29760949105566026, + "grad_norm": 0.06498241424560547, + "learning_rate": 9.9276335905369e-06, + "loss": 0.0046, + "step": 17610 + }, + { + "epoch": 0.2977784913345107, + "grad_norm": 0.1358170062303543, + "learning_rate": 9.927383362881906e-06, + "loss": 0.0038, + "step": 17620 + }, + { + "epoch": 0.29794749161336115, + "grad_norm": 0.16412805020809174, + "learning_rate": 9.927132706520684e-06, + "loss": 0.003, + "step": 17630 + }, + { + "epoch": 0.2981164918922116, + "grad_norm": 0.07152877002954483, + "learning_rate": 9.926881621475044e-06, + "loss": 0.0044, + "step": 17640 + }, + { + "epoch": 0.2982854921710621, + "grad_norm": 0.12229660153388977, + "learning_rate": 9.92663010776683e-06, + "loss": 0.0027, + "step": 17650 + }, + { + "epoch": 0.29845449244991257, + "grad_norm": 0.2264779806137085, + "learning_rate": 9.926378165417928e-06, + "loss": 0.0025, + "step": 17660 + }, + { + "epoch": 0.298623492728763, + "grad_norm": 0.18397444486618042, + "learning_rate": 9.926125794450258e-06, + "loss": 0.002, + "step": 17670 + }, + { + "epoch": 0.29879249300761346, + "grad_norm": 0.022286036983132362, + "learning_rate": 9.925872994885776e-06, + "loss": 0.0028, + "step": 17680 + }, + { + "epoch": 0.29896149328646393, + "grad_norm": 0.18695835769176483, + "learning_rate": 9.925619766746476e-06, + "loss": 0.0033, + "step": 17690 + }, + { + "epoch": 0.2991304935653144, + "grad_norm": 0.14758239686489105, + "learning_rate": 9.925366110054391e-06, + "loss": 0.0044, + "step": 17700 + }, + { + "epoch": 0.2992994938441648, + "grad_norm": 0.0414218008518219, + "learning_rate": 9.925112024831591e-06, + "loss": 0.0029, + "step": 17710 + }, + { + "epoch": 0.2994684941230153, + "grad_norm": 0.23307713866233826, + "learning_rate": 9.924857511100181e-06, + "loss": 0.0042, + "step": 17720 + }, + { + "epoch": 0.29963749440186577, + "grad_norm": 0.11337589472532272, + "learning_rate": 9.924602568882308e-06, + "loss": 0.0031, + "step": 17730 + }, + { + "epoch": 0.29980649468071624, + "grad_norm": 0.10292692482471466, + "learning_rate": 9.92434719820015e-06, + "loss": 0.0041, + "step": 17740 + }, + { + "epoch": 0.29997549495956666, + "grad_norm": 0.1608804613351822, + "learning_rate": 9.924091399075928e-06, + "loss": 0.0048, + "step": 17750 + }, + { + "epoch": 0.30014449523841713, + "grad_norm": 0.12451475858688354, + "learning_rate": 9.923835171531896e-06, + "loss": 0.0039, + "step": 17760 + }, + { + "epoch": 0.3003134955172676, + "grad_norm": 0.35020482540130615, + "learning_rate": 9.923578515590347e-06, + "loss": 0.0028, + "step": 17770 + }, + { + "epoch": 0.3004824957961181, + "grad_norm": 0.14740000665187836, + "learning_rate": 9.923321431273611e-06, + "loss": 0.0063, + "step": 17780 + }, + { + "epoch": 0.30065149607496855, + "grad_norm": 0.1438693404197693, + "learning_rate": 9.923063918604057e-06, + "loss": 0.0032, + "step": 17790 + }, + { + "epoch": 0.30082049635381897, + "grad_norm": 0.15545758605003357, + "learning_rate": 9.92280597760409e-06, + "loss": 0.0055, + "step": 17800 + }, + { + "epoch": 0.30098949663266944, + "grad_norm": 0.057372089475393295, + "learning_rate": 9.922547608296151e-06, + "loss": 0.0027, + "step": 17810 + }, + { + "epoch": 0.3011584969115199, + "grad_norm": 0.08195595443248749, + "learning_rate": 9.92228881070272e-06, + "loss": 0.0024, + "step": 17820 + }, + { + "epoch": 0.3013274971903704, + "grad_norm": 0.13404421508312225, + "learning_rate": 9.922029584846314e-06, + "loss": 0.0027, + "step": 17830 + }, + { + "epoch": 0.3014964974692208, + "grad_norm": 0.0030572651885449886, + "learning_rate": 9.921769930749485e-06, + "loss": 0.0032, + "step": 17840 + }, + { + "epoch": 0.3016654977480713, + "grad_norm": 0.08229197561740875, + "learning_rate": 9.921509848434827e-06, + "loss": 0.0036, + "step": 17850 + }, + { + "epoch": 0.30183449802692175, + "grad_norm": 0.07791959494352341, + "learning_rate": 9.921249337924966e-06, + "loss": 0.0031, + "step": 17860 + }, + { + "epoch": 0.3020034983057722, + "grad_norm": 0.23877891898155212, + "learning_rate": 9.920988399242569e-06, + "loss": 0.0087, + "step": 17870 + }, + { + "epoch": 0.30217249858462264, + "grad_norm": 0.0586976483464241, + "learning_rate": 9.920727032410337e-06, + "loss": 0.0033, + "step": 17880 + }, + { + "epoch": 0.3023414988634731, + "grad_norm": 0.06728573143482208, + "learning_rate": 9.920465237451013e-06, + "loss": 0.003, + "step": 17890 + }, + { + "epoch": 0.3025104991423236, + "grad_norm": 0.1478877067565918, + "learning_rate": 9.920203014387373e-06, + "loss": 0.0033, + "step": 17900 + }, + { + "epoch": 0.30267949942117406, + "grad_norm": 0.08358483016490936, + "learning_rate": 9.919940363242233e-06, + "loss": 0.0041, + "step": 17910 + }, + { + "epoch": 0.30284849970002453, + "grad_norm": 0.12691207230091095, + "learning_rate": 9.919677284038443e-06, + "loss": 0.0036, + "step": 17920 + }, + { + "epoch": 0.30301749997887495, + "grad_norm": 0.056970302015542984, + "learning_rate": 9.919413776798892e-06, + "loss": 0.0034, + "step": 17930 + }, + { + "epoch": 0.3031865002577254, + "grad_norm": 0.11025747656822205, + "learning_rate": 9.919149841546509e-06, + "loss": 0.0033, + "step": 17940 + }, + { + "epoch": 0.3033555005365759, + "grad_norm": 0.15607964992523193, + "learning_rate": 9.918885478304253e-06, + "loss": 0.0033, + "step": 17950 + }, + { + "epoch": 0.30352450081542637, + "grad_norm": 0.1555921882390976, + "learning_rate": 9.91862068709513e-06, + "loss": 0.0025, + "step": 17960 + }, + { + "epoch": 0.3036935010942768, + "grad_norm": 0.09082618355751038, + "learning_rate": 9.918355467942176e-06, + "loss": 0.0023, + "step": 17970 + }, + { + "epoch": 0.30386250137312726, + "grad_norm": 0.3090972900390625, + "learning_rate": 9.918089820868466e-06, + "loss": 0.0019, + "step": 17980 + }, + { + "epoch": 0.30403150165197773, + "grad_norm": 0.057993724942207336, + "learning_rate": 9.917823745897113e-06, + "loss": 0.0031, + "step": 17990 + }, + { + "epoch": 0.3042005019308282, + "grad_norm": 0.06888581812381744, + "learning_rate": 9.917557243051266e-06, + "loss": 0.004, + "step": 18000 + }, + { + "epoch": 0.3043695022096786, + "grad_norm": 0.13699199259281158, + "learning_rate": 9.917290312354113e-06, + "loss": 0.0028, + "step": 18010 + }, + { + "epoch": 0.3045385024885291, + "grad_norm": 0.1704409122467041, + "learning_rate": 9.917022953828879e-06, + "loss": 0.0052, + "step": 18020 + }, + { + "epoch": 0.30470750276737957, + "grad_norm": 0.24415592849254608, + "learning_rate": 9.916755167498824e-06, + "loss": 0.0046, + "step": 18030 + }, + { + "epoch": 0.30487650304623004, + "grad_norm": 0.09228435158729553, + "learning_rate": 9.916486953387247e-06, + "loss": 0.0037, + "step": 18040 + }, + { + "epoch": 0.3050455033250805, + "grad_norm": 0.24966703355312347, + "learning_rate": 9.916218311517484e-06, + "loss": 0.0032, + "step": 18050 + }, + { + "epoch": 0.30521450360393093, + "grad_norm": 0.07386928051710129, + "learning_rate": 9.915949241912908e-06, + "loss": 0.0035, + "step": 18060 + }, + { + "epoch": 0.3053835038827814, + "grad_norm": 0.183248370885849, + "learning_rate": 9.91567974459693e-06, + "loss": 0.006, + "step": 18070 + }, + { + "epoch": 0.3055525041616319, + "grad_norm": 0.3374176621437073, + "learning_rate": 9.915409819592998e-06, + "loss": 0.0043, + "step": 18080 + }, + { + "epoch": 0.30572150444048235, + "grad_norm": 0.034306954592466354, + "learning_rate": 9.915139466924594e-06, + "loss": 0.0044, + "step": 18090 + }, + { + "epoch": 0.30589050471933277, + "grad_norm": 0.2904483675956726, + "learning_rate": 9.914868686615244e-06, + "loss": 0.0037, + "step": 18100 + }, + { + "epoch": 0.30605950499818324, + "grad_norm": 0.18893156945705414, + "learning_rate": 9.914597478688503e-06, + "loss": 0.0033, + "step": 18110 + }, + { + "epoch": 0.3062285052770337, + "grad_norm": 0.22676680982112885, + "learning_rate": 9.91432584316797e-06, + "loss": 0.0047, + "step": 18120 + }, + { + "epoch": 0.3063975055558842, + "grad_norm": 0.13738246262073517, + "learning_rate": 9.91405378007728e-06, + "loss": 0.0047, + "step": 18130 + }, + { + "epoch": 0.3065665058347346, + "grad_norm": 0.08430465310811996, + "learning_rate": 9.913781289440102e-06, + "loss": 0.0024, + "step": 18140 + }, + { + "epoch": 0.3067355061135851, + "grad_norm": 0.05394160374999046, + "learning_rate": 9.913508371280143e-06, + "loss": 0.0036, + "step": 18150 + }, + { + "epoch": 0.30690450639243555, + "grad_norm": 0.12102994322776794, + "learning_rate": 9.913235025621148e-06, + "loss": 0.0035, + "step": 18160 + }, + { + "epoch": 0.307073506671286, + "grad_norm": 0.20373289287090302, + "learning_rate": 9.912961252486903e-06, + "loss": 0.003, + "step": 18170 + }, + { + "epoch": 0.30724250695013644, + "grad_norm": 0.33118996024131775, + "learning_rate": 9.912687051901224e-06, + "loss": 0.004, + "step": 18180 + }, + { + "epoch": 0.3074115072289869, + "grad_norm": 0.1756182610988617, + "learning_rate": 9.91241242388797e-06, + "loss": 0.003, + "step": 18190 + }, + { + "epoch": 0.3075805075078374, + "grad_norm": 0.3151039183139801, + "learning_rate": 9.912137368471032e-06, + "loss": 0.0035, + "step": 18200 + }, + { + "epoch": 0.30774950778668786, + "grad_norm": 0.08164257556200027, + "learning_rate": 9.911861885674344e-06, + "loss": 0.004, + "step": 18210 + }, + { + "epoch": 0.30791850806553833, + "grad_norm": 0.2701294422149658, + "learning_rate": 9.911585975521873e-06, + "loss": 0.0043, + "step": 18220 + }, + { + "epoch": 0.30808750834438875, + "grad_norm": 0.16288606822490692, + "learning_rate": 9.911309638037626e-06, + "loss": 0.0034, + "step": 18230 + }, + { + "epoch": 0.3082565086232392, + "grad_norm": 0.08081130683422089, + "learning_rate": 9.911032873245645e-06, + "loss": 0.0033, + "step": 18240 + }, + { + "epoch": 0.3084255089020897, + "grad_norm": 0.1643926352262497, + "learning_rate": 9.910755681170009e-06, + "loss": 0.0051, + "step": 18250 + }, + { + "epoch": 0.30859450918094017, + "grad_norm": 0.0930451974272728, + "learning_rate": 9.910478061834834e-06, + "loss": 0.0039, + "step": 18260 + }, + { + "epoch": 0.3087635094597906, + "grad_norm": 0.12892429530620575, + "learning_rate": 9.910200015264278e-06, + "loss": 0.0049, + "step": 18270 + }, + { + "epoch": 0.30893250973864106, + "grad_norm": 0.14059489965438843, + "learning_rate": 9.909921541482527e-06, + "loss": 0.0049, + "step": 18280 + }, + { + "epoch": 0.30910151001749153, + "grad_norm": 0.028928019106388092, + "learning_rate": 9.909642640513817e-06, + "loss": 0.0031, + "step": 18290 + }, + { + "epoch": 0.309270510296342, + "grad_norm": 0.08975011855363846, + "learning_rate": 9.909363312382408e-06, + "loss": 0.0036, + "step": 18300 + }, + { + "epoch": 0.3094395105751924, + "grad_norm": 0.2675141990184784, + "learning_rate": 9.909083557112602e-06, + "loss": 0.0116, + "step": 18310 + }, + { + "epoch": 0.3096085108540429, + "grad_norm": 0.08267785608768463, + "learning_rate": 9.908803374728744e-06, + "loss": 0.0043, + "step": 18320 + }, + { + "epoch": 0.30977751113289337, + "grad_norm": 0.17032286524772644, + "learning_rate": 9.908522765255208e-06, + "loss": 0.0026, + "step": 18330 + }, + { + "epoch": 0.30994651141174384, + "grad_norm": 0.12598468363285065, + "learning_rate": 9.90824172871641e-06, + "loss": 0.0033, + "step": 18340 + }, + { + "epoch": 0.3101155116905943, + "grad_norm": 0.14041945338249207, + "learning_rate": 9.907960265136801e-06, + "loss": 0.0043, + "step": 18350 + }, + { + "epoch": 0.31028451196944473, + "grad_norm": 0.09343992173671722, + "learning_rate": 9.907678374540866e-06, + "loss": 0.0017, + "step": 18360 + }, + { + "epoch": 0.3104535122482952, + "grad_norm": 0.080118827521801, + "learning_rate": 9.907396056953137e-06, + "loss": 0.003, + "step": 18370 + }, + { + "epoch": 0.3106225125271457, + "grad_norm": 0.05156289041042328, + "learning_rate": 9.907113312398173e-06, + "loss": 0.0037, + "step": 18380 + }, + { + "epoch": 0.31079151280599615, + "grad_norm": 0.17726033926010132, + "learning_rate": 9.906830140900577e-06, + "loss": 0.0066, + "step": 18390 + }, + { + "epoch": 0.31096051308484657, + "grad_norm": 0.01746257022023201, + "learning_rate": 9.906546542484984e-06, + "loss": 0.0033, + "step": 18400 + }, + { + "epoch": 0.31112951336369704, + "grad_norm": 0.14338521659374237, + "learning_rate": 9.906262517176066e-06, + "loss": 0.0019, + "step": 18410 + }, + { + "epoch": 0.3112985136425475, + "grad_norm": 0.04118502512574196, + "learning_rate": 9.90597806499854e-06, + "loss": 0.0024, + "step": 18420 + }, + { + "epoch": 0.311467513921398, + "grad_norm": 0.25993072986602783, + "learning_rate": 9.905693185977152e-06, + "loss": 0.0045, + "step": 18430 + }, + { + "epoch": 0.3116365142002484, + "grad_norm": 0.2973000407218933, + "learning_rate": 9.90540788013669e-06, + "loss": 0.0035, + "step": 18440 + }, + { + "epoch": 0.3118055144790989, + "grad_norm": 0.11956703662872314, + "learning_rate": 9.905122147501973e-06, + "loss": 0.0023, + "step": 18450 + }, + { + "epoch": 0.31197451475794935, + "grad_norm": 0.11619943380355835, + "learning_rate": 9.904835988097865e-06, + "loss": 0.0022, + "step": 18460 + }, + { + "epoch": 0.3121435150367998, + "grad_norm": 0.28381380438804626, + "learning_rate": 9.90454940194926e-06, + "loss": 0.0048, + "step": 18470 + }, + { + "epoch": 0.3123125153156503, + "grad_norm": 0.403537392616272, + "learning_rate": 9.904262389081093e-06, + "loss": 0.0049, + "step": 18480 + }, + { + "epoch": 0.3124815155945007, + "grad_norm": 0.042656026780605316, + "learning_rate": 9.90397494951834e-06, + "loss": 0.0039, + "step": 18490 + }, + { + "epoch": 0.3126505158733512, + "grad_norm": 0.25615885853767395, + "learning_rate": 9.903687083286003e-06, + "loss": 0.0048, + "step": 18500 + }, + { + "epoch": 0.31281951615220166, + "grad_norm": 0.16681556403636932, + "learning_rate": 9.903398790409132e-06, + "loss": 0.0039, + "step": 18510 + }, + { + "epoch": 0.31298851643105213, + "grad_norm": 0.08900097012519836, + "learning_rate": 9.90311007091281e-06, + "loss": 0.0022, + "step": 18520 + }, + { + "epoch": 0.31315751670990255, + "grad_norm": 0.26374733448028564, + "learning_rate": 9.902820924822153e-06, + "loss": 0.0039, + "step": 18530 + }, + { + "epoch": 0.313326516988753, + "grad_norm": 0.11339973658323288, + "learning_rate": 9.902531352162322e-06, + "loss": 0.0041, + "step": 18540 + }, + { + "epoch": 0.3134955172676035, + "grad_norm": 0.15740644931793213, + "learning_rate": 9.902241352958511e-06, + "loss": 0.0047, + "step": 18550 + }, + { + "epoch": 0.31366451754645397, + "grad_norm": 0.1276281625032425, + "learning_rate": 9.90195092723595e-06, + "loss": 0.0031, + "step": 18560 + }, + { + "epoch": 0.3138335178253044, + "grad_norm": 0.07520712167024612, + "learning_rate": 9.901660075019907e-06, + "loss": 0.0035, + "step": 18570 + }, + { + "epoch": 0.31400251810415486, + "grad_norm": 0.09208130836486816, + "learning_rate": 9.901368796335688e-06, + "loss": 0.0026, + "step": 18580 + }, + { + "epoch": 0.31417151838300533, + "grad_norm": 0.10288451611995697, + "learning_rate": 9.901077091208638e-06, + "loss": 0.0049, + "step": 18590 + }, + { + "epoch": 0.3143405186618558, + "grad_norm": 0.2334715723991394, + "learning_rate": 9.900784959664134e-06, + "loss": 0.004, + "step": 18600 + }, + { + "epoch": 0.3145095189407063, + "grad_norm": 0.11997061967849731, + "learning_rate": 9.900492401727592e-06, + "loss": 0.0022, + "step": 18610 + }, + { + "epoch": 0.3146785192195567, + "grad_norm": 0.019103042781352997, + "learning_rate": 9.90019941742447e-06, + "loss": 0.0023, + "step": 18620 + }, + { + "epoch": 0.31484751949840717, + "grad_norm": 0.15194158256053925, + "learning_rate": 9.899906006780256e-06, + "loss": 0.0024, + "step": 18630 + }, + { + "epoch": 0.31501651977725764, + "grad_norm": 0.12645916640758514, + "learning_rate": 9.899612169820477e-06, + "loss": 0.004, + "step": 18640 + }, + { + "epoch": 0.3151855200561081, + "grad_norm": 0.11526061594486237, + "learning_rate": 9.899317906570702e-06, + "loss": 0.0048, + "step": 18650 + }, + { + "epoch": 0.31535452033495853, + "grad_norm": 0.03650401905179024, + "learning_rate": 9.89902321705653e-06, + "loss": 0.0041, + "step": 18660 + }, + { + "epoch": 0.315523520613809, + "grad_norm": 0.14329461753368378, + "learning_rate": 9.898728101303603e-06, + "loss": 0.0025, + "step": 18670 + }, + { + "epoch": 0.3156925208926595, + "grad_norm": 0.21272212266921997, + "learning_rate": 9.898432559337596e-06, + "loss": 0.0046, + "step": 18680 + }, + { + "epoch": 0.31586152117150995, + "grad_norm": 0.15306775271892548, + "learning_rate": 9.898136591184222e-06, + "loss": 0.004, + "step": 18690 + }, + { + "epoch": 0.31603052145036037, + "grad_norm": 0.13471972942352295, + "learning_rate": 9.897840196869235e-06, + "loss": 0.0029, + "step": 18700 + }, + { + "epoch": 0.31619952172921084, + "grad_norm": 0.2979624271392822, + "learning_rate": 9.897543376418419e-06, + "loss": 0.0038, + "step": 18710 + }, + { + "epoch": 0.3163685220080613, + "grad_norm": 0.11547307670116425, + "learning_rate": 9.897246129857598e-06, + "loss": 0.0033, + "step": 18720 + }, + { + "epoch": 0.3165375222869118, + "grad_norm": 0.30266544222831726, + "learning_rate": 9.896948457212638e-06, + "loss": 0.0057, + "step": 18730 + }, + { + "epoch": 0.31670652256576226, + "grad_norm": 0.08800787478685379, + "learning_rate": 9.896650358509437e-06, + "loss": 0.0021, + "step": 18740 + }, + { + "epoch": 0.3168755228446127, + "grad_norm": 0.09709448367357254, + "learning_rate": 9.896351833773929e-06, + "loss": 0.0039, + "step": 18750 + }, + { + "epoch": 0.31704452312346315, + "grad_norm": 0.07401283830404282, + "learning_rate": 9.896052883032087e-06, + "loss": 0.003, + "step": 18760 + }, + { + "epoch": 0.3172135234023136, + "grad_norm": 0.14612315595149994, + "learning_rate": 9.895753506309923e-06, + "loss": 0.003, + "step": 18770 + }, + { + "epoch": 0.3173825236811641, + "grad_norm": 0.06939349323511124, + "learning_rate": 9.895453703633485e-06, + "loss": 0.0043, + "step": 18780 + }, + { + "epoch": 0.3175515239600145, + "grad_norm": 0.1797635704278946, + "learning_rate": 9.895153475028853e-06, + "loss": 0.0033, + "step": 18790 + }, + { + "epoch": 0.317720524238865, + "grad_norm": 0.12291466444730759, + "learning_rate": 9.894852820522152e-06, + "loss": 0.0037, + "step": 18800 + }, + { + "epoch": 0.31788952451771546, + "grad_norm": 0.1714431196451187, + "learning_rate": 9.89455174013954e-06, + "loss": 0.0031, + "step": 18810 + }, + { + "epoch": 0.31805852479656593, + "grad_norm": 0.2693570554256439, + "learning_rate": 9.894250233907213e-06, + "loss": 0.0048, + "step": 18820 + }, + { + "epoch": 0.31822752507541635, + "grad_norm": 0.1704714298248291, + "learning_rate": 9.893948301851402e-06, + "loss": 0.0029, + "step": 18830 + }, + { + "epoch": 0.3183965253542668, + "grad_norm": 0.23571054637432098, + "learning_rate": 9.893645943998376e-06, + "loss": 0.0038, + "step": 18840 + }, + { + "epoch": 0.3185655256331173, + "grad_norm": 0.1295132339000702, + "learning_rate": 9.893343160374442e-06, + "loss": 0.0046, + "step": 18850 + }, + { + "epoch": 0.31873452591196777, + "grad_norm": 0.2537764310836792, + "learning_rate": 9.893039951005945e-06, + "loss": 0.0044, + "step": 18860 + }, + { + "epoch": 0.3189035261908182, + "grad_norm": 0.08305542916059494, + "learning_rate": 9.892736315919265e-06, + "loss": 0.0022, + "step": 18870 + }, + { + "epoch": 0.31907252646966866, + "grad_norm": 0.04880817234516144, + "learning_rate": 9.892432255140821e-06, + "loss": 0.0027, + "step": 18880 + }, + { + "epoch": 0.31924152674851913, + "grad_norm": 0.11052796244621277, + "learning_rate": 9.892127768697064e-06, + "loss": 0.0035, + "step": 18890 + }, + { + "epoch": 0.3194105270273696, + "grad_norm": 0.19982464611530304, + "learning_rate": 9.89182285661449e-06, + "loss": 0.0026, + "step": 18900 + }, + { + "epoch": 0.3195795273062201, + "grad_norm": 0.46795204281806946, + "learning_rate": 9.891517518919627e-06, + "loss": 0.0036, + "step": 18910 + }, + { + "epoch": 0.3197485275850705, + "grad_norm": 0.03222557529807091, + "learning_rate": 9.891211755639037e-06, + "loss": 0.0022, + "step": 18920 + }, + { + "epoch": 0.31991752786392097, + "grad_norm": 0.1941870152950287, + "learning_rate": 9.890905566799328e-06, + "loss": 0.0044, + "step": 18930 + }, + { + "epoch": 0.32008652814277144, + "grad_norm": 0.17623929679393768, + "learning_rate": 9.890598952427137e-06, + "loss": 0.0048, + "step": 18940 + }, + { + "epoch": 0.3202555284216219, + "grad_norm": 0.014388482086360455, + "learning_rate": 9.890291912549141e-06, + "loss": 0.0032, + "step": 18950 + }, + { + "epoch": 0.32042452870047233, + "grad_norm": 0.07924115657806396, + "learning_rate": 9.889984447192056e-06, + "loss": 0.0051, + "step": 18960 + }, + { + "epoch": 0.3205935289793228, + "grad_norm": 0.11756382137537003, + "learning_rate": 9.88967655638263e-06, + "loss": 0.0046, + "step": 18970 + }, + { + "epoch": 0.3207625292581733, + "grad_norm": 0.08961915224790573, + "learning_rate": 9.889368240147654e-06, + "loss": 0.0038, + "step": 18980 + }, + { + "epoch": 0.32093152953702375, + "grad_norm": 0.16066645085811615, + "learning_rate": 9.889059498513951e-06, + "loss": 0.0033, + "step": 18990 + }, + { + "epoch": 0.32110052981587417, + "grad_norm": 0.27878034114837646, + "learning_rate": 9.888750331508383e-06, + "loss": 0.0022, + "step": 19000 + }, + { + "epoch": 0.32126953009472464, + "grad_norm": 0.1295364946126938, + "learning_rate": 9.88844073915785e-06, + "loss": 0.0026, + "step": 19010 + }, + { + "epoch": 0.3214385303735751, + "grad_norm": 0.21508057415485382, + "learning_rate": 9.888130721489288e-06, + "loss": 0.0033, + "step": 19020 + }, + { + "epoch": 0.3216075306524256, + "grad_norm": 0.17439287900924683, + "learning_rate": 9.88782027852967e-06, + "loss": 0.0033, + "step": 19030 + }, + { + "epoch": 0.32177653093127606, + "grad_norm": 0.10152928531169891, + "learning_rate": 9.887509410306005e-06, + "loss": 0.0054, + "step": 19040 + }, + { + "epoch": 0.3219455312101265, + "grad_norm": 0.06065661087632179, + "learning_rate": 9.887198116845341e-06, + "loss": 0.0023, + "step": 19050 + }, + { + "epoch": 0.32211453148897695, + "grad_norm": 0.10322712361812592, + "learning_rate": 9.88688639817476e-06, + "loss": 0.0037, + "step": 19060 + }, + { + "epoch": 0.3222835317678274, + "grad_norm": 0.23588989675045013, + "learning_rate": 9.886574254321388e-06, + "loss": 0.0035, + "step": 19070 + }, + { + "epoch": 0.3224525320466779, + "grad_norm": 0.10690174996852875, + "learning_rate": 9.886261685312378e-06, + "loss": 0.0043, + "step": 19080 + }, + { + "epoch": 0.3226215323255283, + "grad_norm": 0.18640218675136566, + "learning_rate": 9.885948691174927e-06, + "loss": 0.0029, + "step": 19090 + }, + { + "epoch": 0.3227905326043788, + "grad_norm": 0.12204626947641373, + "learning_rate": 9.885635271936268e-06, + "loss": 0.0042, + "step": 19100 + }, + { + "epoch": 0.32295953288322926, + "grad_norm": 0.09853368997573853, + "learning_rate": 9.88532142762367e-06, + "loss": 0.0039, + "step": 19110 + }, + { + "epoch": 0.32312853316207973, + "grad_norm": 0.06033041328191757, + "learning_rate": 9.885007158264435e-06, + "loss": 0.0024, + "step": 19120 + }, + { + "epoch": 0.32329753344093015, + "grad_norm": 0.03601764515042305, + "learning_rate": 9.88469246388591e-06, + "loss": 0.0021, + "step": 19130 + }, + { + "epoch": 0.3234665337197806, + "grad_norm": 0.27800238132476807, + "learning_rate": 9.884377344515473e-06, + "loss": 0.0042, + "step": 19140 + }, + { + "epoch": 0.3236355339986311, + "grad_norm": 0.37190690636634827, + "learning_rate": 9.884061800180542e-06, + "loss": 0.0044, + "step": 19150 + }, + { + "epoch": 0.32380453427748157, + "grad_norm": 0.2755439281463623, + "learning_rate": 9.88374583090857e-06, + "loss": 0.0036, + "step": 19160 + }, + { + "epoch": 0.32397353455633204, + "grad_norm": 0.334343284368515, + "learning_rate": 9.88342943672705e-06, + "loss": 0.0044, + "step": 19170 + }, + { + "epoch": 0.32414253483518246, + "grad_norm": 0.12950769066810608, + "learning_rate": 9.883112617663508e-06, + "loss": 0.0031, + "step": 19180 + }, + { + "epoch": 0.32431153511403293, + "grad_norm": 0.07064332067966461, + "learning_rate": 9.882795373745508e-06, + "loss": 0.0027, + "step": 19190 + }, + { + "epoch": 0.3244805353928834, + "grad_norm": 0.2044609934091568, + "learning_rate": 9.882477705000654e-06, + "loss": 0.0051, + "step": 19200 + }, + { + "epoch": 0.3246495356717339, + "grad_norm": 0.18798178434371948, + "learning_rate": 9.882159611456584e-06, + "loss": 0.0031, + "step": 19210 + }, + { + "epoch": 0.3248185359505843, + "grad_norm": 0.10369912534952164, + "learning_rate": 9.88184109314097e-06, + "loss": 0.0041, + "step": 19220 + }, + { + "epoch": 0.32498753622943477, + "grad_norm": 0.39374813437461853, + "learning_rate": 9.881522150081531e-06, + "loss": 0.0027, + "step": 19230 + }, + { + "epoch": 0.32515653650828524, + "grad_norm": 0.1317203789949417, + "learning_rate": 9.881202782306011e-06, + "loss": 0.013, + "step": 19240 + }, + { + "epoch": 0.3253255367871357, + "grad_norm": 0.08882055431604385, + "learning_rate": 9.880882989842202e-06, + "loss": 0.0032, + "step": 19250 + }, + { + "epoch": 0.32549453706598613, + "grad_norm": 0.27533435821533203, + "learning_rate": 9.880562772717923e-06, + "loss": 0.004, + "step": 19260 + }, + { + "epoch": 0.3256635373448366, + "grad_norm": 0.2720194458961487, + "learning_rate": 9.880242130961035e-06, + "loss": 0.0043, + "step": 19270 + }, + { + "epoch": 0.3258325376236871, + "grad_norm": 0.06423293799161911, + "learning_rate": 9.879921064599438e-06, + "loss": 0.0029, + "step": 19280 + }, + { + "epoch": 0.32600153790253755, + "grad_norm": 0.13293801248073578, + "learning_rate": 9.879599573661063e-06, + "loss": 0.0032, + "step": 19290 + }, + { + "epoch": 0.326170538181388, + "grad_norm": 0.48314961791038513, + "learning_rate": 9.879277658173883e-06, + "loss": 0.0026, + "step": 19300 + }, + { + "epoch": 0.32633953846023844, + "grad_norm": 0.1730279177427292, + "learning_rate": 9.878955318165907e-06, + "loss": 0.0043, + "step": 19310 + }, + { + "epoch": 0.3265085387390889, + "grad_norm": 0.0645437240600586, + "learning_rate": 9.87863255366518e-06, + "loss": 0.0056, + "step": 19320 + }, + { + "epoch": 0.3266775390179394, + "grad_norm": 0.3200550973415375, + "learning_rate": 9.878309364699781e-06, + "loss": 0.0049, + "step": 19330 + }, + { + "epoch": 0.32684653929678986, + "grad_norm": 0.0750180333852768, + "learning_rate": 9.877985751297831e-06, + "loss": 0.0054, + "step": 19340 + }, + { + "epoch": 0.3270155395756403, + "grad_norm": 0.14632025361061096, + "learning_rate": 9.87766171348749e-06, + "loss": 0.003, + "step": 19350 + }, + { + "epoch": 0.32718453985449075, + "grad_norm": 0.07658658921718597, + "learning_rate": 9.877337251296943e-06, + "loss": 0.0026, + "step": 19360 + }, + { + "epoch": 0.3273535401333412, + "grad_norm": 0.03774444758892059, + "learning_rate": 9.877012364754425e-06, + "loss": 0.0039, + "step": 19370 + }, + { + "epoch": 0.3275225404121917, + "grad_norm": 0.38548749685287476, + "learning_rate": 9.876687053888203e-06, + "loss": 0.0044, + "step": 19380 + }, + { + "epoch": 0.3276915406910421, + "grad_norm": 0.09892215579748154, + "learning_rate": 9.87636131872658e-06, + "loss": 0.0038, + "step": 19390 + }, + { + "epoch": 0.3278605409698926, + "grad_norm": 0.167256698012352, + "learning_rate": 9.876035159297894e-06, + "loss": 0.0039, + "step": 19400 + }, + { + "epoch": 0.32802954124874306, + "grad_norm": 0.19858378171920776, + "learning_rate": 9.875708575630524e-06, + "loss": 0.003, + "step": 19410 + }, + { + "epoch": 0.32819854152759353, + "grad_norm": 0.05991840735077858, + "learning_rate": 9.875381567752884e-06, + "loss": 0.0026, + "step": 19420 + }, + { + "epoch": 0.328367541806444, + "grad_norm": 0.2287953794002533, + "learning_rate": 9.875054135693428e-06, + "loss": 0.0063, + "step": 19430 + }, + { + "epoch": 0.3285365420852944, + "grad_norm": 0.09663691371679306, + "learning_rate": 9.87472627948064e-06, + "loss": 0.0039, + "step": 19440 + }, + { + "epoch": 0.3287055423641449, + "grad_norm": 0.13738645613193512, + "learning_rate": 9.874397999143048e-06, + "loss": 0.0022, + "step": 19450 + }, + { + "epoch": 0.32887454264299537, + "grad_norm": 0.1286608725786209, + "learning_rate": 9.874069294709215e-06, + "loss": 0.0026, + "step": 19460 + }, + { + "epoch": 0.32904354292184584, + "grad_norm": 0.12891557812690735, + "learning_rate": 9.873740166207735e-06, + "loss": 0.0025, + "step": 19470 + }, + { + "epoch": 0.32921254320069626, + "grad_norm": 0.10180335491895676, + "learning_rate": 9.873410613667247e-06, + "loss": 0.0027, + "step": 19480 + }, + { + "epoch": 0.32938154347954673, + "grad_norm": 0.06524354964494705, + "learning_rate": 9.873080637116425e-06, + "loss": 0.0028, + "step": 19490 + }, + { + "epoch": 0.3295505437583972, + "grad_norm": 0.22627593576908112, + "learning_rate": 9.872750236583976e-06, + "loss": 0.0045, + "step": 19500 + }, + { + "epoch": 0.3297195440372477, + "grad_norm": 0.18337315320968628, + "learning_rate": 9.872419412098648e-06, + "loss": 0.0046, + "step": 19510 + }, + { + "epoch": 0.3298885443160981, + "grad_norm": 0.2440752238035202, + "learning_rate": 9.872088163689222e-06, + "loss": 0.0037, + "step": 19520 + }, + { + "epoch": 0.33005754459494857, + "grad_norm": 0.17850267887115479, + "learning_rate": 9.871756491384522e-06, + "loss": 0.0039, + "step": 19530 + }, + { + "epoch": 0.33022654487379904, + "grad_norm": 0.21067939698696136, + "learning_rate": 9.871424395213402e-06, + "loss": 0.003, + "step": 19540 + }, + { + "epoch": 0.3303955451526495, + "grad_norm": 0.046429593116045, + "learning_rate": 9.871091875204756e-06, + "loss": 0.0027, + "step": 19550 + }, + { + "epoch": 0.33056454543149993, + "grad_norm": 0.062241218984127045, + "learning_rate": 9.870758931387519e-06, + "loss": 0.0048, + "step": 19560 + }, + { + "epoch": 0.3307335457103504, + "grad_norm": 0.055095065385103226, + "learning_rate": 9.870425563790654e-06, + "loss": 0.0036, + "step": 19570 + }, + { + "epoch": 0.3309025459892009, + "grad_norm": 0.24390147626399994, + "learning_rate": 9.870091772443166e-06, + "loss": 0.0032, + "step": 19580 + }, + { + "epoch": 0.33107154626805135, + "grad_norm": 0.04408691078424454, + "learning_rate": 9.869757557374099e-06, + "loss": 0.0034, + "step": 19590 + }, + { + "epoch": 0.3312405465469018, + "grad_norm": 0.300178200006485, + "learning_rate": 9.86942291861253e-06, + "loss": 0.0035, + "step": 19600 + }, + { + "epoch": 0.33140954682575224, + "grad_norm": 0.4546404778957367, + "learning_rate": 9.869087856187574e-06, + "loss": 0.0029, + "step": 19610 + }, + { + "epoch": 0.3315785471046027, + "grad_norm": 0.18573160469532013, + "learning_rate": 9.868752370128383e-06, + "loss": 0.0035, + "step": 19620 + }, + { + "epoch": 0.3317475473834532, + "grad_norm": 0.38010624051094055, + "learning_rate": 9.868416460464145e-06, + "loss": 0.0047, + "step": 19630 + }, + { + "epoch": 0.33191654766230366, + "grad_norm": 0.2465725839138031, + "learning_rate": 9.86808012722409e-06, + "loss": 0.002, + "step": 19640 + }, + { + "epoch": 0.3320855479411541, + "grad_norm": 0.07605840265750885, + "learning_rate": 9.867743370437476e-06, + "loss": 0.0033, + "step": 19650 + }, + { + "epoch": 0.33225454822000455, + "grad_norm": 0.34282565116882324, + "learning_rate": 9.867406190133602e-06, + "loss": 0.0038, + "step": 19660 + }, + { + "epoch": 0.332423548498855, + "grad_norm": 0.08095477521419525, + "learning_rate": 9.867068586341808e-06, + "loss": 0.0044, + "step": 19670 + }, + { + "epoch": 0.3325925487777055, + "grad_norm": 0.22404147684574127, + "learning_rate": 9.866730559091465e-06, + "loss": 0.0035, + "step": 19680 + }, + { + "epoch": 0.3327615490565559, + "grad_norm": 0.1436876505613327, + "learning_rate": 9.866392108411985e-06, + "loss": 0.0037, + "step": 19690 + }, + { + "epoch": 0.3329305493354064, + "grad_norm": 0.103852778673172, + "learning_rate": 9.86605323433281e-06, + "loss": 0.0034, + "step": 19700 + }, + { + "epoch": 0.33309954961425686, + "grad_norm": 0.35399582982063293, + "learning_rate": 9.865713936883429e-06, + "loss": 0.0042, + "step": 19710 + }, + { + "epoch": 0.33326854989310734, + "grad_norm": 0.025398628786206245, + "learning_rate": 9.865374216093363e-06, + "loss": 0.0049, + "step": 19720 + }, + { + "epoch": 0.3334375501719578, + "grad_norm": 0.16962707042694092, + "learning_rate": 9.865034071992165e-06, + "loss": 0.0079, + "step": 19730 + }, + { + "epoch": 0.3336065504508082, + "grad_norm": 0.3511894643306732, + "learning_rate": 9.86469350460943e-06, + "loss": 0.0044, + "step": 19740 + }, + { + "epoch": 0.3337755507296587, + "grad_norm": 0.012258529663085938, + "learning_rate": 9.86435251397479e-06, + "loss": 0.0044, + "step": 19750 + }, + { + "epoch": 0.33394455100850917, + "grad_norm": 0.20916761457920074, + "learning_rate": 9.864011100117916e-06, + "loss": 0.0043, + "step": 19760 + }, + { + "epoch": 0.33411355128735964, + "grad_norm": 0.24661147594451904, + "learning_rate": 9.863669263068506e-06, + "loss": 0.0042, + "step": 19770 + }, + { + "epoch": 0.33428255156621006, + "grad_norm": 0.3546825349330902, + "learning_rate": 9.863327002856308e-06, + "loss": 0.004, + "step": 19780 + }, + { + "epoch": 0.33445155184506054, + "grad_norm": 0.06682464480400085, + "learning_rate": 9.862984319511095e-06, + "loss": 0.0035, + "step": 19790 + }, + { + "epoch": 0.334620552123911, + "grad_norm": 0.14287163317203522, + "learning_rate": 9.862641213062688e-06, + "loss": 0.0037, + "step": 19800 + }, + { + "epoch": 0.3347895524027615, + "grad_norm": 0.19845755398273468, + "learning_rate": 9.862297683540932e-06, + "loss": 0.0026, + "step": 19810 + }, + { + "epoch": 0.3349585526816119, + "grad_norm": 0.4925055205821991, + "learning_rate": 9.86195373097572e-06, + "loss": 0.0042, + "step": 19820 + }, + { + "epoch": 0.33512755296046237, + "grad_norm": 0.14126281440258026, + "learning_rate": 9.861609355396978e-06, + "loss": 0.0039, + "step": 19830 + }, + { + "epoch": 0.33529655323931284, + "grad_norm": 0.08112350106239319, + "learning_rate": 9.861264556834665e-06, + "loss": 0.0032, + "step": 19840 + }, + { + "epoch": 0.3354655535181633, + "grad_norm": 0.07324805855751038, + "learning_rate": 9.860919335318783e-06, + "loss": 0.0032, + "step": 19850 + }, + { + "epoch": 0.3356345537970138, + "grad_norm": 0.15092162787914276, + "learning_rate": 9.860573690879367e-06, + "loss": 0.0046, + "step": 19860 + }, + { + "epoch": 0.3358035540758642, + "grad_norm": 0.07325884699821472, + "learning_rate": 9.86022762354649e-06, + "loss": 0.005, + "step": 19870 + }, + { + "epoch": 0.3359725543547147, + "grad_norm": 0.27018794417381287, + "learning_rate": 9.859881133350262e-06, + "loss": 0.0041, + "step": 19880 + }, + { + "epoch": 0.33614155463356515, + "grad_norm": 0.27263039350509644, + "learning_rate": 9.859534220320828e-06, + "loss": 0.004, + "step": 19890 + }, + { + "epoch": 0.3363105549124156, + "grad_norm": 0.15515533089637756, + "learning_rate": 9.859186884488372e-06, + "loss": 0.0031, + "step": 19900 + }, + { + "epoch": 0.33647955519126604, + "grad_norm": 0.10629349946975708, + "learning_rate": 9.858839125883112e-06, + "loss": 0.0036, + "step": 19910 + }, + { + "epoch": 0.3366485554701165, + "grad_norm": 0.2211560159921646, + "learning_rate": 9.858490944535311e-06, + "loss": 0.0034, + "step": 19920 + }, + { + "epoch": 0.336817555748967, + "grad_norm": 0.12569260597229004, + "learning_rate": 9.858142340475254e-06, + "loss": 0.005, + "step": 19930 + }, + { + "epoch": 0.33698655602781746, + "grad_norm": 0.10294578224420547, + "learning_rate": 9.857793313733277e-06, + "loss": 0.0037, + "step": 19940 + }, + { + "epoch": 0.3371555563066679, + "grad_norm": 0.4229937195777893, + "learning_rate": 9.857443864339744e-06, + "loss": 0.0033, + "step": 19950 + }, + { + "epoch": 0.33732455658551835, + "grad_norm": 0.15797141194343567, + "learning_rate": 9.85709399232506e-06, + "loss": 0.0032, + "step": 19960 + }, + { + "epoch": 0.3374935568643688, + "grad_norm": 0.23842860758304596, + "learning_rate": 9.856743697719666e-06, + "loss": 0.0044, + "step": 19970 + }, + { + "epoch": 0.3376625571432193, + "grad_norm": 0.17768318951129913, + "learning_rate": 9.85639298055404e-06, + "loss": 0.0037, + "step": 19980 + }, + { + "epoch": 0.3378315574220698, + "grad_norm": 0.011220584623515606, + "learning_rate": 9.856041840858693e-06, + "loss": 0.0029, + "step": 19990 + }, + { + "epoch": 0.3380005577009202, + "grad_norm": 0.144292950630188, + "learning_rate": 9.855690278664179e-06, + "loss": 0.0041, + "step": 20000 + }, + { + "epoch": 0.33816955797977066, + "grad_norm": 0.14557820558547974, + "learning_rate": 9.855338294001083e-06, + "loss": 0.0033, + "step": 20010 + }, + { + "epoch": 0.33833855825862114, + "grad_norm": 0.08057982474565506, + "learning_rate": 9.854985886900032e-06, + "loss": 0.005, + "step": 20020 + }, + { + "epoch": 0.3385075585374716, + "grad_norm": 0.10894902795553207, + "learning_rate": 9.854633057391685e-06, + "loss": 0.004, + "step": 20030 + }, + { + "epoch": 0.338676558816322, + "grad_norm": 0.2983936071395874, + "learning_rate": 9.854279805506741e-06, + "loss": 0.0037, + "step": 20040 + }, + { + "epoch": 0.3388455590951725, + "grad_norm": 0.12957635521888733, + "learning_rate": 9.853926131275934e-06, + "loss": 0.0038, + "step": 20050 + }, + { + "epoch": 0.339014559374023, + "grad_norm": 0.17585639655590057, + "learning_rate": 9.853572034730036e-06, + "loss": 0.0037, + "step": 20060 + }, + { + "epoch": 0.33918355965287345, + "grad_norm": 0.7723053097724915, + "learning_rate": 9.853217515899857e-06, + "loss": 0.0079, + "step": 20070 + }, + { + "epoch": 0.33935255993172386, + "grad_norm": 0.10039085894823074, + "learning_rate": 9.852862574816237e-06, + "loss": 0.0036, + "step": 20080 + }, + { + "epoch": 0.33952156021057434, + "grad_norm": 0.1289125382900238, + "learning_rate": 9.852507211510063e-06, + "loss": 0.0035, + "step": 20090 + }, + { + "epoch": 0.3396905604894248, + "grad_norm": 0.18636161088943481, + "learning_rate": 9.852151426012249e-06, + "loss": 0.0042, + "step": 20100 + }, + { + "epoch": 0.3398595607682753, + "grad_norm": 0.023119311779737473, + "learning_rate": 9.851795218353751e-06, + "loss": 0.0037, + "step": 20110 + }, + { + "epoch": 0.34002856104712575, + "grad_norm": 0.09347156435251236, + "learning_rate": 9.851438588565562e-06, + "loss": 0.0018, + "step": 20120 + }, + { + "epoch": 0.34019756132597617, + "grad_norm": 0.10636084526777267, + "learning_rate": 9.851081536678711e-06, + "loss": 0.0031, + "step": 20130 + }, + { + "epoch": 0.34036656160482665, + "grad_norm": 0.0867602601647377, + "learning_rate": 9.850724062724261e-06, + "loss": 0.0031, + "step": 20140 + }, + { + "epoch": 0.3405355618836771, + "grad_norm": 0.31015679240226746, + "learning_rate": 9.850366166733316e-06, + "loss": 0.0051, + "step": 20150 + }, + { + "epoch": 0.3407045621625276, + "grad_norm": 0.11224543303251266, + "learning_rate": 9.850007848737013e-06, + "loss": 0.0035, + "step": 20160 + }, + { + "epoch": 0.340873562441378, + "grad_norm": 0.14716555178165436, + "learning_rate": 9.84964910876653e-06, + "loss": 0.0024, + "step": 20170 + }, + { + "epoch": 0.3410425627202285, + "grad_norm": 0.4090542197227478, + "learning_rate": 9.849289946853077e-06, + "loss": 0.005, + "step": 20180 + }, + { + "epoch": 0.34121156299907895, + "grad_norm": 0.13513973355293274, + "learning_rate": 9.848930363027901e-06, + "loss": 0.0025, + "step": 20190 + }, + { + "epoch": 0.3413805632779294, + "grad_norm": 0.3776029646396637, + "learning_rate": 9.848570357322294e-06, + "loss": 0.0056, + "step": 20200 + }, + { + "epoch": 0.34154956355677984, + "grad_norm": 0.07301806658506393, + "learning_rate": 9.848209929767571e-06, + "loss": 0.005, + "step": 20210 + }, + { + "epoch": 0.3417185638356303, + "grad_norm": 0.03997489809989929, + "learning_rate": 9.847849080395094e-06, + "loss": 0.0047, + "step": 20220 + }, + { + "epoch": 0.3418875641144808, + "grad_norm": 0.33692535758018494, + "learning_rate": 9.84748780923626e-06, + "loss": 0.004, + "step": 20230 + }, + { + "epoch": 0.34205656439333126, + "grad_norm": 0.09017347544431686, + "learning_rate": 9.847126116322498e-06, + "loss": 0.0037, + "step": 20240 + }, + { + "epoch": 0.34222556467218174, + "grad_norm": 0.13404417037963867, + "learning_rate": 9.84676400168528e-06, + "loss": 0.0047, + "step": 20250 + }, + { + "epoch": 0.34239456495103215, + "grad_norm": 0.11601067334413528, + "learning_rate": 9.846401465356112e-06, + "loss": 0.0036, + "step": 20260 + }, + { + "epoch": 0.3425635652298826, + "grad_norm": 0.17043401300907135, + "learning_rate": 9.846038507366536e-06, + "loss": 0.0038, + "step": 20270 + }, + { + "epoch": 0.3427325655087331, + "grad_norm": 0.31567105650901794, + "learning_rate": 9.845675127748126e-06, + "loss": 0.0045, + "step": 20280 + }, + { + "epoch": 0.3429015657875836, + "grad_norm": 0.09197935461997986, + "learning_rate": 9.845311326532504e-06, + "loss": 0.0026, + "step": 20290 + }, + { + "epoch": 0.343070566066434, + "grad_norm": 0.10412884503602982, + "learning_rate": 9.844947103751324e-06, + "loss": 0.0051, + "step": 20300 + }, + { + "epoch": 0.34323956634528446, + "grad_norm": 0.16309162974357605, + "learning_rate": 9.844582459436267e-06, + "loss": 0.0035, + "step": 20310 + }, + { + "epoch": 0.34340856662413494, + "grad_norm": 0.020650912076234818, + "learning_rate": 9.844217393619066e-06, + "loss": 0.0029, + "step": 20320 + }, + { + "epoch": 0.3435775669029854, + "grad_norm": 0.06039857119321823, + "learning_rate": 9.84385190633148e-06, + "loss": 0.0025, + "step": 20330 + }, + { + "epoch": 0.3437465671818358, + "grad_norm": 0.21292872726917267, + "learning_rate": 9.843485997605309e-06, + "loss": 0.0034, + "step": 20340 + }, + { + "epoch": 0.3439155674606863, + "grad_norm": 0.19261842966079712, + "learning_rate": 9.843119667472389e-06, + "loss": 0.0028, + "step": 20350 + }, + { + "epoch": 0.3440845677395368, + "grad_norm": 0.04296991974115372, + "learning_rate": 9.842752915964592e-06, + "loss": 0.0027, + "step": 20360 + }, + { + "epoch": 0.34425356801838725, + "grad_norm": 0.4843842089176178, + "learning_rate": 9.842385743113829e-06, + "loss": 0.0055, + "step": 20370 + }, + { + "epoch": 0.34442256829723766, + "grad_norm": 0.5954232811927795, + "learning_rate": 9.842018148952044e-06, + "loss": 0.0051, + "step": 20380 + }, + { + "epoch": 0.34459156857608814, + "grad_norm": 0.10331881791353226, + "learning_rate": 9.84165013351122e-06, + "loss": 0.0033, + "step": 20390 + }, + { + "epoch": 0.3447605688549386, + "grad_norm": 0.16192494332790375, + "learning_rate": 9.841281696823377e-06, + "loss": 0.0034, + "step": 20400 + }, + { + "epoch": 0.3449295691337891, + "grad_norm": 0.10578694939613342, + "learning_rate": 9.840912838920568e-06, + "loss": 0.0028, + "step": 20410 + }, + { + "epoch": 0.34509856941263956, + "grad_norm": 0.1332312375307083, + "learning_rate": 9.840543559834888e-06, + "loss": 0.0032, + "step": 20420 + }, + { + "epoch": 0.34526756969149, + "grad_norm": 0.0674179196357727, + "learning_rate": 9.840173859598465e-06, + "loss": 0.0023, + "step": 20430 + }, + { + "epoch": 0.34543656997034045, + "grad_norm": 0.22473275661468506, + "learning_rate": 9.839803738243466e-06, + "loss": 0.0042, + "step": 20440 + }, + { + "epoch": 0.3456055702491909, + "grad_norm": 0.05025557801127434, + "learning_rate": 9.83943319580209e-06, + "loss": 0.0045, + "step": 20450 + }, + { + "epoch": 0.3457745705280414, + "grad_norm": 0.03869166225194931, + "learning_rate": 9.83906223230658e-06, + "loss": 0.0029, + "step": 20460 + }, + { + "epoch": 0.3459435708068918, + "grad_norm": 0.09461043030023575, + "learning_rate": 9.838690847789211e-06, + "loss": 0.0047, + "step": 20470 + }, + { + "epoch": 0.3461125710857423, + "grad_norm": 0.2004118263721466, + "learning_rate": 9.838319042282293e-06, + "loss": 0.007, + "step": 20480 + }, + { + "epoch": 0.34628157136459276, + "grad_norm": 0.12171358615159988, + "learning_rate": 9.837946815818176e-06, + "loss": 0.0041, + "step": 20490 + }, + { + "epoch": 0.34645057164344323, + "grad_norm": 0.12448956072330475, + "learning_rate": 9.837574168429247e-06, + "loss": 0.0032, + "step": 20500 + }, + { + "epoch": 0.34661957192229365, + "grad_norm": 0.17204810678958893, + "learning_rate": 9.837201100147925e-06, + "loss": 0.0033, + "step": 20510 + }, + { + "epoch": 0.3467885722011441, + "grad_norm": 0.08777932077646255, + "learning_rate": 9.836827611006672e-06, + "loss": 0.0027, + "step": 20520 + }, + { + "epoch": 0.3469575724799946, + "grad_norm": 0.14681030809879303, + "learning_rate": 9.83645370103798e-06, + "loss": 0.0032, + "step": 20530 + }, + { + "epoch": 0.34712657275884506, + "grad_norm": 0.09471159428358078, + "learning_rate": 9.836079370274385e-06, + "loss": 0.0022, + "step": 20540 + }, + { + "epoch": 0.34729557303769554, + "grad_norm": 0.09500308334827423, + "learning_rate": 9.835704618748451e-06, + "loss": 0.0019, + "step": 20550 + }, + { + "epoch": 0.34746457331654595, + "grad_norm": 0.3364561200141907, + "learning_rate": 9.835329446492788e-06, + "loss": 0.0018, + "step": 20560 + }, + { + "epoch": 0.34763357359539643, + "grad_norm": 0.192582905292511, + "learning_rate": 9.834953853540035e-06, + "loss": 0.0031, + "step": 20570 + }, + { + "epoch": 0.3478025738742469, + "grad_norm": 0.05873227119445801, + "learning_rate": 9.834577839922869e-06, + "loss": 0.0029, + "step": 20580 + }, + { + "epoch": 0.3479715741530974, + "grad_norm": 0.09160672128200531, + "learning_rate": 9.834201405674008e-06, + "loss": 0.004, + "step": 20590 + }, + { + "epoch": 0.3481405744319478, + "grad_norm": 0.14228111505508423, + "learning_rate": 9.833824550826203e-06, + "loss": 0.0035, + "step": 20600 + }, + { + "epoch": 0.34830957471079826, + "grad_norm": 0.05157274380326271, + "learning_rate": 9.833447275412243e-06, + "loss": 0.0028, + "step": 20610 + }, + { + "epoch": 0.34847857498964874, + "grad_norm": 0.06791481375694275, + "learning_rate": 9.833069579464949e-06, + "loss": 0.0031, + "step": 20620 + }, + { + "epoch": 0.3486475752684992, + "grad_norm": 0.03289634734392166, + "learning_rate": 9.832691463017186e-06, + "loss": 0.002, + "step": 20630 + }, + { + "epoch": 0.3488165755473496, + "grad_norm": 0.11063172668218613, + "learning_rate": 9.83231292610185e-06, + "loss": 0.0039, + "step": 20640 + }, + { + "epoch": 0.3489855758262001, + "grad_norm": 0.24149391055107117, + "learning_rate": 9.831933968751877e-06, + "loss": 0.0029, + "step": 20650 + }, + { + "epoch": 0.3491545761050506, + "grad_norm": 0.19239841401576996, + "learning_rate": 9.831554591000236e-06, + "loss": 0.0028, + "step": 20660 + }, + { + "epoch": 0.34932357638390105, + "grad_norm": 0.14838945865631104, + "learning_rate": 9.831174792879938e-06, + "loss": 0.0029, + "step": 20670 + }, + { + "epoch": 0.3494925766627515, + "grad_norm": 0.258220911026001, + "learning_rate": 9.830794574424026e-06, + "loss": 0.0017, + "step": 20680 + }, + { + "epoch": 0.34966157694160194, + "grad_norm": 0.2164478600025177, + "learning_rate": 9.83041393566558e-06, + "loss": 0.0026, + "step": 20690 + }, + { + "epoch": 0.3498305772204524, + "grad_norm": 0.01935494877398014, + "learning_rate": 9.830032876637714e-06, + "loss": 0.0032, + "step": 20700 + }, + { + "epoch": 0.3499995774993029, + "grad_norm": 0.22248001396656036, + "learning_rate": 9.829651397373589e-06, + "loss": 0.0065, + "step": 20710 + }, + { + "epoch": 0.35016857777815336, + "grad_norm": 0.08571802824735641, + "learning_rate": 9.829269497906393e-06, + "loss": 0.0039, + "step": 20720 + }, + { + "epoch": 0.3503375780570038, + "grad_norm": 0.060072824358940125, + "learning_rate": 9.82888717826935e-06, + "loss": 0.003, + "step": 20730 + }, + { + "epoch": 0.35050657833585425, + "grad_norm": 0.06358876824378967, + "learning_rate": 9.828504438495728e-06, + "loss": 0.0027, + "step": 20740 + }, + { + "epoch": 0.3506755786147047, + "grad_norm": 0.01844388246536255, + "learning_rate": 9.828121278618824e-06, + "loss": 0.0025, + "step": 20750 + }, + { + "epoch": 0.3508445788935552, + "grad_norm": 0.07648131251335144, + "learning_rate": 9.827737698671976e-06, + "loss": 0.0026, + "step": 20760 + }, + { + "epoch": 0.3510135791724056, + "grad_norm": 0.04285912215709686, + "learning_rate": 9.827353698688557e-06, + "loss": 0.0042, + "step": 20770 + }, + { + "epoch": 0.3511825794512561, + "grad_norm": 0.0519292838871479, + "learning_rate": 9.82696927870198e-06, + "loss": 0.0023, + "step": 20780 + }, + { + "epoch": 0.35135157973010656, + "grad_norm": 0.11216392368078232, + "learning_rate": 9.826584438745685e-06, + "loss": 0.0034, + "step": 20790 + }, + { + "epoch": 0.35152058000895703, + "grad_norm": 0.029105642810463905, + "learning_rate": 9.82619917885316e-06, + "loss": 0.0024, + "step": 20800 + }, + { + "epoch": 0.3516895802878075, + "grad_norm": 0.19928911328315735, + "learning_rate": 9.825813499057922e-06, + "loss": 0.0019, + "step": 20810 + }, + { + "epoch": 0.3518585805666579, + "grad_norm": 0.04720110446214676, + "learning_rate": 9.825427399393527e-06, + "loss": 0.0018, + "step": 20820 + }, + { + "epoch": 0.3520275808455084, + "grad_norm": 0.1498241126537323, + "learning_rate": 9.825040879893571e-06, + "loss": 0.0035, + "step": 20830 + }, + { + "epoch": 0.35219658112435887, + "grad_norm": 0.10597819089889526, + "learning_rate": 9.824653940591679e-06, + "loss": 0.0055, + "step": 20840 + }, + { + "epoch": 0.35236558140320934, + "grad_norm": 0.14786836504936218, + "learning_rate": 9.82426658152152e-06, + "loss": 0.0051, + "step": 20850 + }, + { + "epoch": 0.35253458168205976, + "grad_norm": 0.09650187194347382, + "learning_rate": 9.823878802716792e-06, + "loss": 0.0031, + "step": 20860 + }, + { + "epoch": 0.35270358196091023, + "grad_norm": 0.06578878313302994, + "learning_rate": 9.823490604211238e-06, + "loss": 0.0036, + "step": 20870 + }, + { + "epoch": 0.3528725822397607, + "grad_norm": 0.09055297821760178, + "learning_rate": 9.823101986038632e-06, + "loss": 0.0042, + "step": 20880 + }, + { + "epoch": 0.3530415825186112, + "grad_norm": 0.35521236062049866, + "learning_rate": 9.822712948232782e-06, + "loss": 0.003, + "step": 20890 + }, + { + "epoch": 0.3532105827974616, + "grad_norm": 0.17009131610393524, + "learning_rate": 9.82232349082754e-06, + "loss": 0.0037, + "step": 20900 + }, + { + "epoch": 0.35337958307631206, + "grad_norm": 0.052262403070926666, + "learning_rate": 9.82193361385679e-06, + "loss": 0.0028, + "step": 20910 + }, + { + "epoch": 0.35354858335516254, + "grad_norm": 0.09378935396671295, + "learning_rate": 9.821543317354451e-06, + "loss": 0.0052, + "step": 20920 + }, + { + "epoch": 0.353717583634013, + "grad_norm": 0.11036661267280579, + "learning_rate": 9.821152601354484e-06, + "loss": 0.0037, + "step": 20930 + }, + { + "epoch": 0.3538865839128635, + "grad_norm": 0.05563582852482796, + "learning_rate": 9.820761465890882e-06, + "loss": 0.0031, + "step": 20940 + }, + { + "epoch": 0.3540555841917139, + "grad_norm": 0.04262761399149895, + "learning_rate": 9.820369910997674e-06, + "loss": 0.0031, + "step": 20950 + }, + { + "epoch": 0.3542245844705644, + "grad_norm": 0.059218067675828934, + "learning_rate": 9.819977936708931e-06, + "loss": 0.0036, + "step": 20960 + }, + { + "epoch": 0.35439358474941485, + "grad_norm": 0.16169999539852142, + "learning_rate": 9.819585543058752e-06, + "loss": 0.0011, + "step": 20970 + }, + { + "epoch": 0.3545625850282653, + "grad_norm": 0.07854767888784409, + "learning_rate": 9.81919273008128e-06, + "loss": 0.0024, + "step": 20980 + }, + { + "epoch": 0.35473158530711574, + "grad_norm": 0.33886852860450745, + "learning_rate": 9.818799497810691e-06, + "loss": 0.0053, + "step": 20990 + }, + { + "epoch": 0.3549005855859662, + "grad_norm": 0.10044369101524353, + "learning_rate": 9.818405846281196e-06, + "loss": 0.0047, + "step": 21000 + }, + { + "epoch": 0.3550695858648167, + "grad_norm": 0.05758042261004448, + "learning_rate": 9.81801177552705e-06, + "loss": 0.0018, + "step": 21010 + }, + { + "epoch": 0.35523858614366716, + "grad_norm": 0.18058626353740692, + "learning_rate": 9.817617285582534e-06, + "loss": 0.0042, + "step": 21020 + }, + { + "epoch": 0.3554075864225176, + "grad_norm": 0.2607985734939575, + "learning_rate": 9.817222376481972e-06, + "loss": 0.0043, + "step": 21030 + }, + { + "epoch": 0.35557658670136805, + "grad_norm": 0.030365170910954475, + "learning_rate": 9.816827048259724e-06, + "loss": 0.002, + "step": 21040 + }, + { + "epoch": 0.3557455869802185, + "grad_norm": 0.12924058735370636, + "learning_rate": 9.816431300950184e-06, + "loss": 0.0038, + "step": 21050 + }, + { + "epoch": 0.355914587259069, + "grad_norm": 0.193020299077034, + "learning_rate": 9.816035134587785e-06, + "loss": 0.0034, + "step": 21060 + }, + { + "epoch": 0.3560835875379194, + "grad_norm": 0.13917414844036102, + "learning_rate": 9.815638549206997e-06, + "loss": 0.0057, + "step": 21070 + }, + { + "epoch": 0.3562525878167699, + "grad_norm": 0.11824040114879608, + "learning_rate": 9.81524154484232e-06, + "loss": 0.0033, + "step": 21080 + }, + { + "epoch": 0.35642158809562036, + "grad_norm": 0.04004885256290436, + "learning_rate": 9.8148441215283e-06, + "loss": 0.0028, + "step": 21090 + }, + { + "epoch": 0.35659058837447083, + "grad_norm": 0.08424954116344452, + "learning_rate": 9.814446279299512e-06, + "loss": 0.0044, + "step": 21100 + }, + { + "epoch": 0.3567595886533213, + "grad_norm": 0.34200525283813477, + "learning_rate": 9.814048018190572e-06, + "loss": 0.0046, + "step": 21110 + }, + { + "epoch": 0.3569285889321717, + "grad_norm": 0.09797846525907516, + "learning_rate": 9.813649338236129e-06, + "loss": 0.0057, + "step": 21120 + }, + { + "epoch": 0.3570975892110222, + "grad_norm": 0.036891937255859375, + "learning_rate": 9.81325023947087e-06, + "loss": 0.0022, + "step": 21130 + }, + { + "epoch": 0.35726658948987267, + "grad_norm": 0.2412370890378952, + "learning_rate": 9.81285072192952e-06, + "loss": 0.0034, + "step": 21140 + }, + { + "epoch": 0.35743558976872314, + "grad_norm": 0.12464900314807892, + "learning_rate": 9.812450785646841e-06, + "loss": 0.002, + "step": 21150 + }, + { + "epoch": 0.35760459004757356, + "grad_norm": 0.08267778158187866, + "learning_rate": 9.812050430657624e-06, + "loss": 0.0028, + "step": 21160 + }, + { + "epoch": 0.35777359032642403, + "grad_norm": 0.09670425951480865, + "learning_rate": 9.811649656996706e-06, + "loss": 0.0048, + "step": 21170 + }, + { + "epoch": 0.3579425906052745, + "grad_norm": 0.02184341289103031, + "learning_rate": 9.811248464698954e-06, + "loss": 0.0048, + "step": 21180 + }, + { + "epoch": 0.358111590884125, + "grad_norm": 0.12013767659664154, + "learning_rate": 9.810846853799275e-06, + "loss": 0.0043, + "step": 21190 + }, + { + "epoch": 0.3582805911629754, + "grad_norm": 0.10406313091516495, + "learning_rate": 9.81044482433261e-06, + "loss": 0.0021, + "step": 21200 + }, + { + "epoch": 0.35844959144182587, + "grad_norm": 0.083363838493824, + "learning_rate": 9.810042376333939e-06, + "loss": 0.0038, + "step": 21210 + }, + { + "epoch": 0.35861859172067634, + "grad_norm": 0.08557987213134766, + "learning_rate": 9.809639509838276e-06, + "loss": 0.0026, + "step": 21220 + }, + { + "epoch": 0.3587875919995268, + "grad_norm": 0.02609414793550968, + "learning_rate": 9.809236224880672e-06, + "loss": 0.0011, + "step": 21230 + }, + { + "epoch": 0.3589565922783773, + "grad_norm": 0.15575864911079407, + "learning_rate": 9.808832521496214e-06, + "loss": 0.0047, + "step": 21240 + }, + { + "epoch": 0.3591255925572277, + "grad_norm": 0.2338981032371521, + "learning_rate": 9.808428399720029e-06, + "loss": 0.0029, + "step": 21250 + }, + { + "epoch": 0.3592945928360782, + "grad_norm": 0.18020887672901154, + "learning_rate": 9.808023859587276e-06, + "loss": 0.004, + "step": 21260 + }, + { + "epoch": 0.35946359311492865, + "grad_norm": 0.07131248712539673, + "learning_rate": 9.807618901133152e-06, + "loss": 0.0026, + "step": 21270 + }, + { + "epoch": 0.3596325933937791, + "grad_norm": 0.09082590788602829, + "learning_rate": 9.807213524392889e-06, + "loss": 0.002, + "step": 21280 + }, + { + "epoch": 0.35980159367262954, + "grad_norm": 0.17046932876110077, + "learning_rate": 9.806807729401756e-06, + "loss": 0.003, + "step": 21290 + }, + { + "epoch": 0.35997059395148, + "grad_norm": 0.155225470662117, + "learning_rate": 9.806401516195066e-06, + "loss": 0.0029, + "step": 21300 + }, + { + "epoch": 0.3601395942303305, + "grad_norm": 0.03638127073645592, + "learning_rate": 9.805994884808153e-06, + "loss": 0.004, + "step": 21310 + }, + { + "epoch": 0.36030859450918096, + "grad_norm": 0.12270484119653702, + "learning_rate": 9.8055878352764e-06, + "loss": 0.0022, + "step": 21320 + }, + { + "epoch": 0.3604775947880314, + "grad_norm": 0.1206061840057373, + "learning_rate": 9.805180367635222e-06, + "loss": 0.003, + "step": 21330 + }, + { + "epoch": 0.36064659506688185, + "grad_norm": 0.021965859457850456, + "learning_rate": 9.804772481920071e-06, + "loss": 0.0033, + "step": 21340 + }, + { + "epoch": 0.3608155953457323, + "grad_norm": 0.14499451220035553, + "learning_rate": 9.804364178166432e-06, + "loss": 0.0041, + "step": 21350 + }, + { + "epoch": 0.3609845956245828, + "grad_norm": 0.3501626253128052, + "learning_rate": 9.803955456409834e-06, + "loss": 0.0029, + "step": 21360 + }, + { + "epoch": 0.36115359590343327, + "grad_norm": 0.19067667424678802, + "learning_rate": 9.803546316685835e-06, + "loss": 0.0158, + "step": 21370 + }, + { + "epoch": 0.3613225961822837, + "grad_norm": 0.017335981130599976, + "learning_rate": 9.803136759030034e-06, + "loss": 0.0045, + "step": 21380 + }, + { + "epoch": 0.36149159646113416, + "grad_norm": 0.04304839298129082, + "learning_rate": 9.802726783478062e-06, + "loss": 0.0027, + "step": 21390 + }, + { + "epoch": 0.36166059673998463, + "grad_norm": 0.17117752134799957, + "learning_rate": 9.802316390065589e-06, + "loss": 0.0031, + "step": 21400 + }, + { + "epoch": 0.3618295970188351, + "grad_norm": 0.09694608300924301, + "learning_rate": 9.801905578828325e-06, + "loss": 0.0033, + "step": 21410 + }, + { + "epoch": 0.3619985972976855, + "grad_norm": 0.15579736232757568, + "learning_rate": 9.801494349802008e-06, + "loss": 0.0036, + "step": 21420 + }, + { + "epoch": 0.362167597576536, + "grad_norm": 0.28429025411605835, + "learning_rate": 9.80108270302242e-06, + "loss": 0.0035, + "step": 21430 + }, + { + "epoch": 0.36233659785538647, + "grad_norm": 0.01713419519364834, + "learning_rate": 9.800670638525374e-06, + "loss": 0.0031, + "step": 21440 + }, + { + "epoch": 0.36250559813423694, + "grad_norm": 0.07234758883714676, + "learning_rate": 9.800258156346722e-06, + "loss": 0.0042, + "step": 21450 + }, + { + "epoch": 0.36267459841308736, + "grad_norm": 0.08492135256528854, + "learning_rate": 9.799845256522353e-06, + "loss": 0.004, + "step": 21460 + }, + { + "epoch": 0.36284359869193783, + "grad_norm": 0.18330654501914978, + "learning_rate": 9.799431939088193e-06, + "loss": 0.0027, + "step": 21470 + }, + { + "epoch": 0.3630125989707883, + "grad_norm": 0.07547181844711304, + "learning_rate": 9.799018204080198e-06, + "loss": 0.0011, + "step": 21480 + }, + { + "epoch": 0.3631815992496388, + "grad_norm": 0.044233452528715134, + "learning_rate": 9.798604051534368e-06, + "loss": 0.0024, + "step": 21490 + }, + { + "epoch": 0.36335059952848925, + "grad_norm": 0.13739806413650513, + "learning_rate": 9.798189481486738e-06, + "loss": 0.0034, + "step": 21500 + }, + { + "epoch": 0.36351959980733967, + "grad_norm": 0.024653052911162376, + "learning_rate": 9.797774493973372e-06, + "loss": 0.0019, + "step": 21510 + }, + { + "epoch": 0.36368860008619014, + "grad_norm": 0.10872801393270493, + "learning_rate": 9.797359089030381e-06, + "loss": 0.0037, + "step": 21520 + }, + { + "epoch": 0.3638576003650406, + "grad_norm": 0.17600472271442413, + "learning_rate": 9.796943266693906e-06, + "loss": 0.0022, + "step": 21530 + }, + { + "epoch": 0.3640266006438911, + "grad_norm": 0.18142275512218475, + "learning_rate": 9.796527027000123e-06, + "loss": 0.0035, + "step": 21540 + }, + { + "epoch": 0.3641956009227415, + "grad_norm": 0.08506744354963303, + "learning_rate": 9.796110369985252e-06, + "loss": 0.005, + "step": 21550 + }, + { + "epoch": 0.364364601201592, + "grad_norm": 0.05040203779935837, + "learning_rate": 9.795693295685538e-06, + "loss": 0.0015, + "step": 21560 + }, + { + "epoch": 0.36453360148044245, + "grad_norm": 0.060695599764585495, + "learning_rate": 9.795275804137273e-06, + "loss": 0.0022, + "step": 21570 + }, + { + "epoch": 0.3647026017592929, + "grad_norm": 0.30580392479896545, + "learning_rate": 9.79485789537678e-06, + "loss": 0.0025, + "step": 21580 + }, + { + "epoch": 0.36487160203814334, + "grad_norm": 0.20160751044750214, + "learning_rate": 9.794439569440417e-06, + "loss": 0.0025, + "step": 21590 + }, + { + "epoch": 0.3650406023169938, + "grad_norm": 0.1361100822687149, + "learning_rate": 9.794020826364584e-06, + "loss": 0.0036, + "step": 21600 + }, + { + "epoch": 0.3652096025958443, + "grad_norm": 0.06608045846223831, + "learning_rate": 9.79360166618571e-06, + "loss": 0.0032, + "step": 21610 + }, + { + "epoch": 0.36537860287469476, + "grad_norm": 0.010083463042974472, + "learning_rate": 9.793182088940266e-06, + "loss": 0.0044, + "step": 21620 + }, + { + "epoch": 0.36554760315354523, + "grad_norm": 0.09272641688585281, + "learning_rate": 9.792762094664756e-06, + "loss": 0.0034, + "step": 21630 + }, + { + "epoch": 0.36571660343239565, + "grad_norm": 0.0999511182308197, + "learning_rate": 9.792341683395723e-06, + "loss": 0.003, + "step": 21640 + }, + { + "epoch": 0.3658856037112461, + "grad_norm": 0.07313133031129837, + "learning_rate": 9.791920855169744e-06, + "loss": 0.0031, + "step": 21650 + }, + { + "epoch": 0.3660546039900966, + "grad_norm": 0.07365882396697998, + "learning_rate": 9.791499610023433e-06, + "loss": 0.0026, + "step": 21660 + }, + { + "epoch": 0.36622360426894707, + "grad_norm": 0.05429621785879135, + "learning_rate": 9.79107794799344e-06, + "loss": 0.0024, + "step": 21670 + }, + { + "epoch": 0.3663926045477975, + "grad_norm": 0.13175803422927856, + "learning_rate": 9.790655869116454e-06, + "loss": 0.0033, + "step": 21680 + }, + { + "epoch": 0.36656160482664796, + "grad_norm": 0.043493472039699554, + "learning_rate": 9.790233373429195e-06, + "loss": 0.0028, + "step": 21690 + }, + { + "epoch": 0.36673060510549843, + "grad_norm": 0.10219153761863708, + "learning_rate": 9.789810460968423e-06, + "loss": 0.0035, + "step": 21700 + }, + { + "epoch": 0.3668996053843489, + "grad_norm": 0.13374842703342438, + "learning_rate": 9.789387131770936e-06, + "loss": 0.003, + "step": 21710 + }, + { + "epoch": 0.3670686056631993, + "grad_norm": 0.10844600200653076, + "learning_rate": 9.78896338587356e-06, + "loss": 0.0037, + "step": 21720 + }, + { + "epoch": 0.3672376059420498, + "grad_norm": 0.03692597150802612, + "learning_rate": 9.788539223313168e-06, + "loss": 0.002, + "step": 21730 + }, + { + "epoch": 0.36740660622090027, + "grad_norm": 0.06032506749033928, + "learning_rate": 9.788114644126662e-06, + "loss": 0.006, + "step": 21740 + }, + { + "epoch": 0.36757560649975074, + "grad_norm": 0.017542537301778793, + "learning_rate": 9.787689648350984e-06, + "loss": 0.0017, + "step": 21750 + }, + { + "epoch": 0.36774460677860116, + "grad_norm": 0.11518267542123795, + "learning_rate": 9.78726423602311e-06, + "loss": 0.0032, + "step": 21760 + }, + { + "epoch": 0.36791360705745163, + "grad_norm": 0.25193750858306885, + "learning_rate": 9.786838407180052e-06, + "loss": 0.004, + "step": 21770 + }, + { + "epoch": 0.3680826073363021, + "grad_norm": 0.5318714380264282, + "learning_rate": 9.78641216185886e-06, + "loss": 0.004, + "step": 21780 + }, + { + "epoch": 0.3682516076151526, + "grad_norm": 0.16579753160476685, + "learning_rate": 9.785985500096617e-06, + "loss": 0.0024, + "step": 21790 + }, + { + "epoch": 0.36842060789400305, + "grad_norm": 0.2203885316848755, + "learning_rate": 9.78555842193045e-06, + "loss": 0.0018, + "step": 21800 + }, + { + "epoch": 0.36858960817285347, + "grad_norm": 0.09116717427968979, + "learning_rate": 9.785130927397513e-06, + "loss": 0.0067, + "step": 21810 + }, + { + "epoch": 0.36875860845170394, + "grad_norm": 0.0862962082028389, + "learning_rate": 9.784703016535e-06, + "loss": 0.0036, + "step": 21820 + }, + { + "epoch": 0.3689276087305544, + "grad_norm": 0.0602206252515316, + "learning_rate": 9.784274689380142e-06, + "loss": 0.0015, + "step": 21830 + }, + { + "epoch": 0.3690966090094049, + "grad_norm": 0.11045362055301666, + "learning_rate": 9.783845945970205e-06, + "loss": 0.003, + "step": 21840 + }, + { + "epoch": 0.3692656092882553, + "grad_norm": 0.12604741752147675, + "learning_rate": 9.783416786342495e-06, + "loss": 0.0032, + "step": 21850 + }, + { + "epoch": 0.3694346095671058, + "grad_norm": 0.06938070058822632, + "learning_rate": 9.782987210534348e-06, + "loss": 0.003, + "step": 21860 + }, + { + "epoch": 0.36960360984595625, + "grad_norm": 0.11994098871946335, + "learning_rate": 9.782557218583138e-06, + "loss": 0.0026, + "step": 21870 + }, + { + "epoch": 0.3697726101248067, + "grad_norm": 0.43083980679512024, + "learning_rate": 9.782126810526278e-06, + "loss": 0.0031, + "step": 21880 + }, + { + "epoch": 0.36994161040365714, + "grad_norm": 0.1065066009759903, + "learning_rate": 9.781695986401217e-06, + "loss": 0.0028, + "step": 21890 + }, + { + "epoch": 0.3701106106825076, + "grad_norm": 0.04962790757417679, + "learning_rate": 9.781264746245434e-06, + "loss": 0.0032, + "step": 21900 + }, + { + "epoch": 0.3702796109613581, + "grad_norm": 0.071550652384758, + "learning_rate": 9.780833090096455e-06, + "loss": 0.0024, + "step": 21910 + }, + { + "epoch": 0.37044861124020856, + "grad_norm": 0.17351196706295013, + "learning_rate": 9.780401017991833e-06, + "loss": 0.004, + "step": 21920 + }, + { + "epoch": 0.37061761151905903, + "grad_norm": 0.16681891679763794, + "learning_rate": 9.77996852996916e-06, + "loss": 0.0022, + "step": 21930 + }, + { + "epoch": 0.37078661179790945, + "grad_norm": 0.078296959400177, + "learning_rate": 9.779535626066067e-06, + "loss": 0.004, + "step": 21940 + }, + { + "epoch": 0.3709556120767599, + "grad_norm": 0.1080959290266037, + "learning_rate": 9.779102306320219e-06, + "loss": 0.0028, + "step": 21950 + }, + { + "epoch": 0.3711246123556104, + "grad_norm": 0.19295884668827057, + "learning_rate": 9.778668570769312e-06, + "loss": 0.0036, + "step": 21960 + }, + { + "epoch": 0.37129361263446087, + "grad_norm": 0.06714986264705658, + "learning_rate": 9.778234419451087e-06, + "loss": 0.0029, + "step": 21970 + }, + { + "epoch": 0.3714626129133113, + "grad_norm": 0.15599657595157623, + "learning_rate": 9.777799852403316e-06, + "loss": 0.0033, + "step": 21980 + }, + { + "epoch": 0.37163161319216176, + "grad_norm": 0.160843625664711, + "learning_rate": 9.777364869663808e-06, + "loss": 0.0047, + "step": 21990 + }, + { + "epoch": 0.37180061347101223, + "grad_norm": 0.12721359729766846, + "learning_rate": 9.776929471270411e-06, + "loss": 0.0018, + "step": 22000 + }, + { + "epoch": 0.3719696137498627, + "grad_norm": 0.08139853179454803, + "learning_rate": 9.776493657261005e-06, + "loss": 0.0044, + "step": 22010 + }, + { + "epoch": 0.3721386140287131, + "grad_norm": 0.23349185287952423, + "learning_rate": 9.776057427673508e-06, + "loss": 0.0039, + "step": 22020 + }, + { + "epoch": 0.3723076143075636, + "grad_norm": 0.13932804763317108, + "learning_rate": 9.775620782545874e-06, + "loss": 0.0043, + "step": 22030 + }, + { + "epoch": 0.37247661458641407, + "grad_norm": 0.14340350031852722, + "learning_rate": 9.775183721916094e-06, + "loss": 0.0036, + "step": 22040 + }, + { + "epoch": 0.37264561486526454, + "grad_norm": 0.14557211101055145, + "learning_rate": 9.774746245822193e-06, + "loss": 0.0025, + "step": 22050 + }, + { + "epoch": 0.372814615144115, + "grad_norm": 0.06957050412893295, + "learning_rate": 9.774308354302236e-06, + "loss": 0.0024, + "step": 22060 + }, + { + "epoch": 0.37298361542296543, + "grad_norm": 0.059090156108140945, + "learning_rate": 9.77387004739432e-06, + "loss": 0.0028, + "step": 22070 + }, + { + "epoch": 0.3731526157018159, + "grad_norm": 0.42358991503715515, + "learning_rate": 9.773431325136577e-06, + "loss": 0.0051, + "step": 22080 + }, + { + "epoch": 0.3733216159806664, + "grad_norm": 0.0769067034125328, + "learning_rate": 9.772992187567183e-06, + "loss": 0.0047, + "step": 22090 + }, + { + "epoch": 0.37349061625951685, + "grad_norm": 0.15835249423980713, + "learning_rate": 9.772552634724345e-06, + "loss": 0.0024, + "step": 22100 + }, + { + "epoch": 0.37365961653836727, + "grad_norm": 0.04665343463420868, + "learning_rate": 9.772112666646302e-06, + "loss": 0.0029, + "step": 22110 + }, + { + "epoch": 0.37382861681721774, + "grad_norm": 0.0746210366487503, + "learning_rate": 9.771672283371337e-06, + "loss": 0.0033, + "step": 22120 + }, + { + "epoch": 0.3739976170960682, + "grad_norm": 0.022807767614722252, + "learning_rate": 9.771231484937762e-06, + "loss": 0.002, + "step": 22130 + }, + { + "epoch": 0.3741666173749187, + "grad_norm": 0.014665382914245129, + "learning_rate": 9.770790271383931e-06, + "loss": 0.0027, + "step": 22140 + }, + { + "epoch": 0.3743356176537691, + "grad_norm": 0.09234453737735748, + "learning_rate": 9.770348642748231e-06, + "loss": 0.0021, + "step": 22150 + }, + { + "epoch": 0.3745046179326196, + "grad_norm": 0.0824594646692276, + "learning_rate": 9.769906599069088e-06, + "loss": 0.0033, + "step": 22160 + }, + { + "epoch": 0.37467361821147005, + "grad_norm": 0.0873909667134285, + "learning_rate": 9.76946414038496e-06, + "loss": 0.0043, + "step": 22170 + }, + { + "epoch": 0.3748426184903205, + "grad_norm": 0.10788635909557343, + "learning_rate": 9.769021266734343e-06, + "loss": 0.0015, + "step": 22180 + }, + { + "epoch": 0.375011618769171, + "grad_norm": 0.10120099037885666, + "learning_rate": 9.768577978155769e-06, + "loss": 0.0029, + "step": 22190 + }, + { + "epoch": 0.3751806190480214, + "grad_norm": 0.09097573161125183, + "learning_rate": 9.768134274687806e-06, + "loss": 0.0031, + "step": 22200 + }, + { + "epoch": 0.3753496193268719, + "grad_norm": 0.11442361027002335, + "learning_rate": 9.767690156369059e-06, + "loss": 0.0026, + "step": 22210 + }, + { + "epoch": 0.37551861960572236, + "grad_norm": 0.19592778384685516, + "learning_rate": 9.767245623238169e-06, + "loss": 0.0027, + "step": 22220 + }, + { + "epoch": 0.37568761988457283, + "grad_norm": 0.09860577434301376, + "learning_rate": 9.766800675333812e-06, + "loss": 0.0033, + "step": 22230 + }, + { + "epoch": 0.37585662016342325, + "grad_norm": 0.06487202644348145, + "learning_rate": 9.7663553126947e-06, + "loss": 0.0027, + "step": 22240 + }, + { + "epoch": 0.3760256204422737, + "grad_norm": 0.24659186601638794, + "learning_rate": 9.765909535359584e-06, + "loss": 0.0025, + "step": 22250 + }, + { + "epoch": 0.3761946207211242, + "grad_norm": 0.05501728877425194, + "learning_rate": 9.765463343367245e-06, + "loss": 0.0029, + "step": 22260 + }, + { + "epoch": 0.37636362099997467, + "grad_norm": 0.04128387197852135, + "learning_rate": 9.765016736756506e-06, + "loss": 0.0024, + "step": 22270 + }, + { + "epoch": 0.3765326212788251, + "grad_norm": 0.4396528899669647, + "learning_rate": 9.764569715566224e-06, + "loss": 0.0033, + "step": 22280 + }, + { + "epoch": 0.37670162155767556, + "grad_norm": 0.09631740301847458, + "learning_rate": 9.764122279835293e-06, + "loss": 0.0037, + "step": 22290 + }, + { + "epoch": 0.37687062183652603, + "grad_norm": 0.04098780080676079, + "learning_rate": 9.76367442960264e-06, + "loss": 0.0029, + "step": 22300 + }, + { + "epoch": 0.3770396221153765, + "grad_norm": 0.03507139906287193, + "learning_rate": 9.763226164907231e-06, + "loss": 0.0034, + "step": 22310 + }, + { + "epoch": 0.377208622394227, + "grad_norm": 0.06853745877742767, + "learning_rate": 9.762777485788069e-06, + "loss": 0.0017, + "step": 22320 + }, + { + "epoch": 0.3773776226730774, + "grad_norm": 0.12593597173690796, + "learning_rate": 9.762328392284188e-06, + "loss": 0.0032, + "step": 22330 + }, + { + "epoch": 0.37754662295192787, + "grad_norm": 0.12185325473546982, + "learning_rate": 9.761878884434663e-06, + "loss": 0.0023, + "step": 22340 + }, + { + "epoch": 0.37771562323077834, + "grad_norm": 0.024328766390681267, + "learning_rate": 9.761428962278603e-06, + "loss": 0.0024, + "step": 22350 + }, + { + "epoch": 0.3778846235096288, + "grad_norm": 0.23012223839759827, + "learning_rate": 9.760978625855154e-06, + "loss": 0.0019, + "step": 22360 + }, + { + "epoch": 0.37805362378847923, + "grad_norm": 0.03638390824198723, + "learning_rate": 9.760527875203498e-06, + "loss": 0.0041, + "step": 22370 + }, + { + "epoch": 0.3782226240673297, + "grad_norm": 0.10978236794471741, + "learning_rate": 9.76007671036285e-06, + "loss": 0.0025, + "step": 22380 + }, + { + "epoch": 0.3783916243461802, + "grad_norm": 0.02126036025583744, + "learning_rate": 9.759625131372466e-06, + "loss": 0.0025, + "step": 22390 + }, + { + "epoch": 0.37856062462503065, + "grad_norm": 0.04835692048072815, + "learning_rate": 9.759173138271634e-06, + "loss": 0.0027, + "step": 22400 + }, + { + "epoch": 0.37872962490388107, + "grad_norm": 0.12906229496002197, + "learning_rate": 9.75872073109968e-06, + "loss": 0.0028, + "step": 22410 + }, + { + "epoch": 0.37889862518273154, + "grad_norm": 0.3933924436569214, + "learning_rate": 9.758267909895966e-06, + "loss": 0.0021, + "step": 22420 + }, + { + "epoch": 0.379067625461582, + "grad_norm": 0.08228648453950882, + "learning_rate": 9.757814674699891e-06, + "loss": 0.0037, + "step": 22430 + }, + { + "epoch": 0.3792366257404325, + "grad_norm": 0.1131923496723175, + "learning_rate": 9.757361025550887e-06, + "loss": 0.0032, + "step": 22440 + }, + { + "epoch": 0.3794056260192829, + "grad_norm": 0.11813594400882721, + "learning_rate": 9.756906962488421e-06, + "loss": 0.0029, + "step": 22450 + }, + { + "epoch": 0.3795746262981334, + "grad_norm": 0.05745278298854828, + "learning_rate": 9.756452485552005e-06, + "loss": 0.0016, + "step": 22460 + }, + { + "epoch": 0.37974362657698385, + "grad_norm": 0.07140239328145981, + "learning_rate": 9.755997594781175e-06, + "loss": 0.0034, + "step": 22470 + }, + { + "epoch": 0.3799126268558343, + "grad_norm": 0.06921084225177765, + "learning_rate": 9.755542290215512e-06, + "loss": 0.003, + "step": 22480 + }, + { + "epoch": 0.3800816271346848, + "grad_norm": 0.050568293780088425, + "learning_rate": 9.755086571894629e-06, + "loss": 0.0024, + "step": 22490 + }, + { + "epoch": 0.3802506274135352, + "grad_norm": 0.19204530119895935, + "learning_rate": 9.754630439858175e-06, + "loss": 0.0035, + "step": 22500 + }, + { + "epoch": 0.3804196276923857, + "grad_norm": 0.016198497265577316, + "learning_rate": 9.754173894145835e-06, + "loss": 0.0018, + "step": 22510 + }, + { + "epoch": 0.38058862797123616, + "grad_norm": 0.14448107779026031, + "learning_rate": 9.753716934797333e-06, + "loss": 0.003, + "step": 22520 + }, + { + "epoch": 0.38075762825008663, + "grad_norm": 0.11803316324949265, + "learning_rate": 9.753259561852424e-06, + "loss": 0.0044, + "step": 22530 + }, + { + "epoch": 0.38092662852893705, + "grad_norm": 0.3361509144306183, + "learning_rate": 9.752801775350904e-06, + "loss": 0.0043, + "step": 22540 + }, + { + "epoch": 0.3810956288077875, + "grad_norm": 0.09761517494916916, + "learning_rate": 9.752343575332602e-06, + "loss": 0.0036, + "step": 22550 + }, + { + "epoch": 0.381264629086638, + "grad_norm": 0.05435268580913544, + "learning_rate": 9.75188496183738e-06, + "loss": 0.0015, + "step": 22560 + }, + { + "epoch": 0.38143362936548847, + "grad_norm": 0.15113788843154907, + "learning_rate": 9.751425934905148e-06, + "loss": 0.0027, + "step": 22570 + }, + { + "epoch": 0.3816026296443389, + "grad_norm": 0.21489611268043518, + "learning_rate": 9.750966494575835e-06, + "loss": 0.0021, + "step": 22580 + }, + { + "epoch": 0.38177162992318936, + "grad_norm": 0.12411852180957794, + "learning_rate": 9.750506640889418e-06, + "loss": 0.0036, + "step": 22590 + }, + { + "epoch": 0.38194063020203983, + "grad_norm": 0.06043042987585068, + "learning_rate": 9.750046373885909e-06, + "loss": 0.003, + "step": 22600 + }, + { + "epoch": 0.3821096304808903, + "grad_norm": 0.04677782580256462, + "learning_rate": 9.74958569360535e-06, + "loss": 0.0026, + "step": 22610 + }, + { + "epoch": 0.3822786307597408, + "grad_norm": 0.1353975385427475, + "learning_rate": 9.749124600087822e-06, + "loss": 0.0024, + "step": 22620 + }, + { + "epoch": 0.3824476310385912, + "grad_norm": 0.19661273062229156, + "learning_rate": 9.748663093373445e-06, + "loss": 0.0042, + "step": 22630 + }, + { + "epoch": 0.38261663131744167, + "grad_norm": 0.0980839729309082, + "learning_rate": 9.748201173502372e-06, + "loss": 0.0033, + "step": 22640 + }, + { + "epoch": 0.38278563159629214, + "grad_norm": 0.17554502189159393, + "learning_rate": 9.747738840514789e-06, + "loss": 0.0024, + "step": 22650 + }, + { + "epoch": 0.3829546318751426, + "grad_norm": 0.09802938252687454, + "learning_rate": 9.747276094450924e-06, + "loss": 0.0022, + "step": 22660 + }, + { + "epoch": 0.38312363215399303, + "grad_norm": 0.11497267335653305, + "learning_rate": 9.74681293535104e-06, + "loss": 0.004, + "step": 22670 + }, + { + "epoch": 0.3832926324328435, + "grad_norm": 0.17812427878379822, + "learning_rate": 9.74634936325543e-06, + "loss": 0.007, + "step": 22680 + }, + { + "epoch": 0.383461632711694, + "grad_norm": 0.21699292957782745, + "learning_rate": 9.745885378204427e-06, + "loss": 0.002, + "step": 22690 + }, + { + "epoch": 0.38363063299054445, + "grad_norm": 0.18406936526298523, + "learning_rate": 9.745420980238405e-06, + "loss": 0.0044, + "step": 22700 + }, + { + "epoch": 0.38379963326939487, + "grad_norm": 0.039109811186790466, + "learning_rate": 9.744956169397764e-06, + "loss": 0.0017, + "step": 22710 + }, + { + "epoch": 0.38396863354824534, + "grad_norm": 0.23905335366725922, + "learning_rate": 9.744490945722947e-06, + "loss": 0.003, + "step": 22720 + }, + { + "epoch": 0.3841376338270958, + "grad_norm": 0.17291510105133057, + "learning_rate": 9.74402530925443e-06, + "loss": 0.0041, + "step": 22730 + }, + { + "epoch": 0.3843066341059463, + "grad_norm": 0.21678727865219116, + "learning_rate": 9.743559260032727e-06, + "loss": 0.0031, + "step": 22740 + }, + { + "epoch": 0.38447563438479676, + "grad_norm": 0.23328426480293274, + "learning_rate": 9.743092798098383e-06, + "loss": 0.0052, + "step": 22750 + }, + { + "epoch": 0.3846446346636472, + "grad_norm": 0.19061240553855896, + "learning_rate": 9.742625923491986e-06, + "loss": 0.0027, + "step": 22760 + }, + { + "epoch": 0.38481363494249765, + "grad_norm": 0.03589209169149399, + "learning_rate": 9.742158636254155e-06, + "loss": 0.003, + "step": 22770 + }, + { + "epoch": 0.3849826352213481, + "grad_norm": 0.03044353425502777, + "learning_rate": 9.741690936425545e-06, + "loss": 0.0024, + "step": 22780 + }, + { + "epoch": 0.3851516355001986, + "grad_norm": 0.09414897114038467, + "learning_rate": 9.741222824046853e-06, + "loss": 0.002, + "step": 22790 + }, + { + "epoch": 0.385320635779049, + "grad_norm": 0.09415625780820847, + "learning_rate": 9.740754299158799e-06, + "loss": 0.0024, + "step": 22800 + }, + { + "epoch": 0.3854896360578995, + "grad_norm": 0.1006363034248352, + "learning_rate": 9.740285361802154e-06, + "loss": 0.003, + "step": 22810 + }, + { + "epoch": 0.38565863633674996, + "grad_norm": 0.21563363075256348, + "learning_rate": 9.739816012017715e-06, + "loss": 0.0042, + "step": 22820 + }, + { + "epoch": 0.38582763661560043, + "grad_norm": 0.31482458114624023, + "learning_rate": 9.73934624984632e-06, + "loss": 0.0038, + "step": 22830 + }, + { + "epoch": 0.38599663689445085, + "grad_norm": 0.13808608055114746, + "learning_rate": 9.738876075328836e-06, + "loss": 0.0033, + "step": 22840 + }, + { + "epoch": 0.3861656371733013, + "grad_norm": 0.0663992166519165, + "learning_rate": 9.738405488506173e-06, + "loss": 0.0031, + "step": 22850 + }, + { + "epoch": 0.3863346374521518, + "grad_norm": 0.0355796180665493, + "learning_rate": 9.737934489419276e-06, + "loss": 0.0029, + "step": 22860 + }, + { + "epoch": 0.38650363773100227, + "grad_norm": 0.12239819765090942, + "learning_rate": 9.737463078109121e-06, + "loss": 0.003, + "step": 22870 + }, + { + "epoch": 0.38667263800985274, + "grad_norm": 0.08600953966379166, + "learning_rate": 9.736991254616725e-06, + "loss": 0.0017, + "step": 22880 + }, + { + "epoch": 0.38684163828870316, + "grad_norm": 0.08243556320667267, + "learning_rate": 9.73651901898314e-06, + "loss": 0.0027, + "step": 22890 + }, + { + "epoch": 0.38701063856755363, + "grad_norm": 0.1894107311964035, + "learning_rate": 9.73604637124945e-06, + "loss": 0.0034, + "step": 22900 + }, + { + "epoch": 0.3871796388464041, + "grad_norm": 0.09860440343618393, + "learning_rate": 9.73557331145678e-06, + "loss": 0.0029, + "step": 22910 + }, + { + "epoch": 0.3873486391252546, + "grad_norm": 0.09986410290002823, + "learning_rate": 9.735099839646286e-06, + "loss": 0.0031, + "step": 22920 + }, + { + "epoch": 0.387517639404105, + "grad_norm": 0.0639401376247406, + "learning_rate": 9.734625955859164e-06, + "loss": 0.0016, + "step": 22930 + }, + { + "epoch": 0.38768663968295547, + "grad_norm": 0.06922442466020584, + "learning_rate": 9.734151660136645e-06, + "loss": 0.003, + "step": 22940 + }, + { + "epoch": 0.38785563996180594, + "grad_norm": 0.020561840385198593, + "learning_rate": 9.733676952519995e-06, + "loss": 0.0044, + "step": 22950 + }, + { + "epoch": 0.3880246402406564, + "grad_norm": 0.09675197303295135, + "learning_rate": 9.733201833050513e-06, + "loss": 0.0019, + "step": 22960 + }, + { + "epoch": 0.38819364051950683, + "grad_norm": 0.0909353494644165, + "learning_rate": 9.73272630176954e-06, + "loss": 0.005, + "step": 22970 + }, + { + "epoch": 0.3883626407983573, + "grad_norm": 0.07992875576019287, + "learning_rate": 9.732250358718448e-06, + "loss": 0.0026, + "step": 22980 + }, + { + "epoch": 0.3885316410772078, + "grad_norm": 0.0822548121213913, + "learning_rate": 9.731774003938645e-06, + "loss": 0.0042, + "step": 22990 + }, + { + "epoch": 0.38870064135605825, + "grad_norm": 0.09815090894699097, + "learning_rate": 9.73129723747158e-06, + "loss": 0.0039, + "step": 23000 + }, + { + "epoch": 0.3888696416349087, + "grad_norm": 0.15999223291873932, + "learning_rate": 9.73082005935873e-06, + "loss": 0.0038, + "step": 23010 + }, + { + "epoch": 0.38903864191375914, + "grad_norm": 0.14762641489505768, + "learning_rate": 9.730342469641614e-06, + "loss": 0.0036, + "step": 23020 + }, + { + "epoch": 0.3892076421926096, + "grad_norm": 0.1708577424287796, + "learning_rate": 9.729864468361786e-06, + "loss": 0.0024, + "step": 23030 + }, + { + "epoch": 0.3893766424714601, + "grad_norm": 0.12733496725559235, + "learning_rate": 9.72938605556083e-06, + "loss": 0.0033, + "step": 23040 + }, + { + "epoch": 0.38954564275031056, + "grad_norm": 0.20779503881931305, + "learning_rate": 9.728907231280373e-06, + "loss": 0.003, + "step": 23050 + }, + { + "epoch": 0.389714643029161, + "grad_norm": 0.17399100959300995, + "learning_rate": 9.728427995562076e-06, + "loss": 0.0016, + "step": 23060 + }, + { + "epoch": 0.38988364330801145, + "grad_norm": 0.022260790690779686, + "learning_rate": 9.727948348447632e-06, + "loss": 0.0022, + "step": 23070 + }, + { + "epoch": 0.3900526435868619, + "grad_norm": 0.1329025775194168, + "learning_rate": 9.727468289978774e-06, + "loss": 0.0035, + "step": 23080 + }, + { + "epoch": 0.3902216438657124, + "grad_norm": 0.12162362039089203, + "learning_rate": 9.726987820197271e-06, + "loss": 0.0034, + "step": 23090 + }, + { + "epoch": 0.3903906441445628, + "grad_norm": 0.12319975346326828, + "learning_rate": 9.726506939144925e-06, + "loss": 0.0037, + "step": 23100 + }, + { + "epoch": 0.3905596444234133, + "grad_norm": 0.2242601066827774, + "learning_rate": 9.726025646863574e-06, + "loss": 0.0038, + "step": 23110 + }, + { + "epoch": 0.39072864470226376, + "grad_norm": 0.23295743763446808, + "learning_rate": 9.725543943395094e-06, + "loss": 0.0023, + "step": 23120 + }, + { + "epoch": 0.39089764498111423, + "grad_norm": 0.20486986637115479, + "learning_rate": 9.725061828781396e-06, + "loss": 0.0031, + "step": 23130 + }, + { + "epoch": 0.39106664525996465, + "grad_norm": 0.10565141588449478, + "learning_rate": 9.724579303064425e-06, + "loss": 0.002, + "step": 23140 + }, + { + "epoch": 0.3912356455388151, + "grad_norm": 0.07504316419363022, + "learning_rate": 9.724096366286162e-06, + "loss": 0.0038, + "step": 23150 + }, + { + "epoch": 0.3914046458176656, + "grad_norm": 0.11156802624464035, + "learning_rate": 9.723613018488629e-06, + "loss": 0.0034, + "step": 23160 + }, + { + "epoch": 0.39157364609651607, + "grad_norm": 0.09637878090143204, + "learning_rate": 9.723129259713876e-06, + "loss": 0.0029, + "step": 23170 + }, + { + "epoch": 0.39174264637536654, + "grad_norm": 0.18587429821491241, + "learning_rate": 9.722645090003992e-06, + "loss": 0.0035, + "step": 23180 + }, + { + "epoch": 0.39191164665421696, + "grad_norm": 0.08628827333450317, + "learning_rate": 9.722160509401104e-06, + "loss": 0.0035, + "step": 23190 + }, + { + "epoch": 0.39208064693306743, + "grad_norm": 0.060949645936489105, + "learning_rate": 9.721675517947373e-06, + "loss": 0.002, + "step": 23200 + }, + { + "epoch": 0.3922496472119179, + "grad_norm": 0.13335323333740234, + "learning_rate": 9.721190115684994e-06, + "loss": 0.0026, + "step": 23210 + }, + { + "epoch": 0.3924186474907684, + "grad_norm": 0.10148081183433533, + "learning_rate": 9.720704302656201e-06, + "loss": 0.0044, + "step": 23220 + }, + { + "epoch": 0.3925876477696188, + "grad_norm": 0.11652295291423798, + "learning_rate": 9.72021807890326e-06, + "loss": 0.0023, + "step": 23230 + }, + { + "epoch": 0.39275664804846927, + "grad_norm": 0.16628910601139069, + "learning_rate": 9.719731444468478e-06, + "loss": 0.0063, + "step": 23240 + }, + { + "epoch": 0.39292564832731974, + "grad_norm": 0.08759075403213501, + "learning_rate": 9.719244399394192e-06, + "loss": 0.0035, + "step": 23250 + }, + { + "epoch": 0.3930946486061702, + "grad_norm": 0.20623010396957397, + "learning_rate": 9.718756943722779e-06, + "loss": 0.0031, + "step": 23260 + }, + { + "epoch": 0.39326364888502063, + "grad_norm": 0.15852336585521698, + "learning_rate": 9.718269077496645e-06, + "loss": 0.0029, + "step": 23270 + }, + { + "epoch": 0.3934326491638711, + "grad_norm": 0.1342359483242035, + "learning_rate": 9.717780800758245e-06, + "loss": 0.0033, + "step": 23280 + }, + { + "epoch": 0.3936016494427216, + "grad_norm": 0.05740807205438614, + "learning_rate": 9.717292113550055e-06, + "loss": 0.0045, + "step": 23290 + }, + { + "epoch": 0.39377064972157205, + "grad_norm": 0.08184656500816345, + "learning_rate": 9.716803015914597e-06, + "loss": 0.0026, + "step": 23300 + }, + { + "epoch": 0.3939396500004225, + "grad_norm": 0.10684201121330261, + "learning_rate": 9.716313507894422e-06, + "loss": 0.0026, + "step": 23310 + }, + { + "epoch": 0.39410865027927294, + "grad_norm": 0.19640758633613586, + "learning_rate": 9.715823589532121e-06, + "loss": 0.0034, + "step": 23320 + }, + { + "epoch": 0.3942776505581234, + "grad_norm": 0.059934161603450775, + "learning_rate": 9.715333260870319e-06, + "loss": 0.0021, + "step": 23330 + }, + { + "epoch": 0.3944466508369739, + "grad_norm": 0.08852091431617737, + "learning_rate": 9.714842521951676e-06, + "loss": 0.0022, + "step": 23340 + }, + { + "epoch": 0.39461565111582436, + "grad_norm": 0.1293201893568039, + "learning_rate": 9.714351372818891e-06, + "loss": 0.002, + "step": 23350 + }, + { + "epoch": 0.3947846513946748, + "grad_norm": 0.08041828870773315, + "learning_rate": 9.713859813514695e-06, + "loss": 0.0022, + "step": 23360 + }, + { + "epoch": 0.39495365167352525, + "grad_norm": 0.0484473817050457, + "learning_rate": 9.713367844081856e-06, + "loss": 0.0042, + "step": 23370 + }, + { + "epoch": 0.3951226519523757, + "grad_norm": 0.06702147424221039, + "learning_rate": 9.712875464563177e-06, + "loss": 0.0033, + "step": 23380 + }, + { + "epoch": 0.3952916522312262, + "grad_norm": 0.08013001084327698, + "learning_rate": 9.712382675001499e-06, + "loss": 0.0028, + "step": 23390 + }, + { + "epoch": 0.3954606525100766, + "grad_norm": 0.035971228033304214, + "learning_rate": 9.711889475439696e-06, + "loss": 0.0033, + "step": 23400 + }, + { + "epoch": 0.3956296527889271, + "grad_norm": 0.060159262269735336, + "learning_rate": 9.711395865920679e-06, + "loss": 0.0028, + "step": 23410 + }, + { + "epoch": 0.39579865306777756, + "grad_norm": 0.15951140224933624, + "learning_rate": 9.710901846487394e-06, + "loss": 0.0039, + "step": 23420 + }, + { + "epoch": 0.39596765334662803, + "grad_norm": 0.18661275506019592, + "learning_rate": 9.710407417182824e-06, + "loss": 0.003, + "step": 23430 + }, + { + "epoch": 0.3961366536254785, + "grad_norm": 0.19150663912296295, + "learning_rate": 9.709912578049986e-06, + "loss": 0.0048, + "step": 23440 + }, + { + "epoch": 0.3963056539043289, + "grad_norm": 0.02610347792506218, + "learning_rate": 9.709417329131933e-06, + "loss": 0.0022, + "step": 23450 + }, + { + "epoch": 0.3964746541831794, + "grad_norm": 0.16545186936855316, + "learning_rate": 9.708921670471755e-06, + "loss": 0.0039, + "step": 23460 + }, + { + "epoch": 0.39664365446202987, + "grad_norm": 0.07641608268022537, + "learning_rate": 9.708425602112576e-06, + "loss": 0.002, + "step": 23470 + }, + { + "epoch": 0.39681265474088034, + "grad_norm": 0.04412617161870003, + "learning_rate": 9.707929124097559e-06, + "loss": 0.002, + "step": 23480 + }, + { + "epoch": 0.39698165501973076, + "grad_norm": 0.15296491980552673, + "learning_rate": 9.707432236469897e-06, + "loss": 0.0034, + "step": 23490 + }, + { + "epoch": 0.39715065529858123, + "grad_norm": 0.07034008949995041, + "learning_rate": 9.70693493927282e-06, + "loss": 0.0038, + "step": 23500 + }, + { + "epoch": 0.3973196555774317, + "grad_norm": 0.13851159811019897, + "learning_rate": 9.706437232549601e-06, + "loss": 0.0035, + "step": 23510 + }, + { + "epoch": 0.3974886558562822, + "grad_norm": 0.13486424088478088, + "learning_rate": 9.705939116343538e-06, + "loss": 0.0028, + "step": 23520 + }, + { + "epoch": 0.3976576561351326, + "grad_norm": 0.0902167558670044, + "learning_rate": 9.70544059069797e-06, + "loss": 0.0043, + "step": 23530 + }, + { + "epoch": 0.39782665641398307, + "grad_norm": 0.0866253599524498, + "learning_rate": 9.704941655656274e-06, + "loss": 0.0035, + "step": 23540 + }, + { + "epoch": 0.39799565669283354, + "grad_norm": 0.06707198917865753, + "learning_rate": 9.704442311261856e-06, + "loss": 0.0035, + "step": 23550 + }, + { + "epoch": 0.398164656971684, + "grad_norm": 0.11433298140764236, + "learning_rate": 9.703942557558166e-06, + "loss": 0.0038, + "step": 23560 + }, + { + "epoch": 0.3983336572505345, + "grad_norm": 0.14999189972877502, + "learning_rate": 9.70344239458868e-06, + "loss": 0.0045, + "step": 23570 + }, + { + "epoch": 0.3985026575293849, + "grad_norm": 0.08040648698806763, + "learning_rate": 9.702941822396918e-06, + "loss": 0.0024, + "step": 23580 + }, + { + "epoch": 0.3986716578082354, + "grad_norm": 0.05942732095718384, + "learning_rate": 9.702440841026433e-06, + "loss": 0.0024, + "step": 23590 + }, + { + "epoch": 0.39884065808708585, + "grad_norm": 0.055566608905792236, + "learning_rate": 9.701939450520808e-06, + "loss": 0.0038, + "step": 23600 + }, + { + "epoch": 0.3990096583659363, + "grad_norm": 0.10234656929969788, + "learning_rate": 9.70143765092367e-06, + "loss": 0.0047, + "step": 23610 + }, + { + "epoch": 0.39917865864478674, + "grad_norm": 0.04624078422784805, + "learning_rate": 9.700935442278678e-06, + "loss": 0.0024, + "step": 23620 + }, + { + "epoch": 0.3993476589236372, + "grad_norm": 0.09292671829462051, + "learning_rate": 9.700432824629526e-06, + "loss": 0.0026, + "step": 23630 + }, + { + "epoch": 0.3995166592024877, + "grad_norm": 0.12578187882900238, + "learning_rate": 9.699929798019944e-06, + "loss": 0.0041, + "step": 23640 + }, + { + "epoch": 0.39968565948133816, + "grad_norm": 0.08320990949869156, + "learning_rate": 9.699426362493698e-06, + "loss": 0.0033, + "step": 23650 + }, + { + "epoch": 0.3998546597601886, + "grad_norm": 0.38111990690231323, + "learning_rate": 9.698922518094588e-06, + "loss": 0.0086, + "step": 23660 + }, + { + "epoch": 0.40002366003903905, + "grad_norm": 0.10134875774383545, + "learning_rate": 9.698418264866455e-06, + "loss": 0.0041, + "step": 23670 + }, + { + "epoch": 0.4001926603178895, + "grad_norm": 0.1464819759130478, + "learning_rate": 9.697913602853165e-06, + "loss": 0.0028, + "step": 23680 + }, + { + "epoch": 0.40036166059674, + "grad_norm": 0.06998094916343689, + "learning_rate": 9.697408532098633e-06, + "loss": 0.0036, + "step": 23690 + }, + { + "epoch": 0.40053066087559047, + "grad_norm": 0.14025792479515076, + "learning_rate": 9.696903052646798e-06, + "loss": 0.0035, + "step": 23700 + }, + { + "epoch": 0.4006996611544409, + "grad_norm": 0.09845133870840073, + "learning_rate": 9.69639716454164e-06, + "loss": 0.0037, + "step": 23710 + }, + { + "epoch": 0.40086866143329136, + "grad_norm": 0.11155934631824493, + "learning_rate": 9.695890867827173e-06, + "loss": 0.0038, + "step": 23720 + }, + { + "epoch": 0.40103766171214184, + "grad_norm": 0.20432689785957336, + "learning_rate": 9.69538416254745e-06, + "loss": 0.0049, + "step": 23730 + }, + { + "epoch": 0.4012066619909923, + "grad_norm": 0.011552118696272373, + "learning_rate": 9.694877048746556e-06, + "loss": 0.0036, + "step": 23740 + }, + { + "epoch": 0.4013756622698427, + "grad_norm": 0.053301118314266205, + "learning_rate": 9.69436952646861e-06, + "loss": 0.0023, + "step": 23750 + }, + { + "epoch": 0.4015446625486932, + "grad_norm": 0.09333498030900955, + "learning_rate": 9.693861595757771e-06, + "loss": 0.0019, + "step": 23760 + }, + { + "epoch": 0.40171366282754367, + "grad_norm": 0.06840863823890686, + "learning_rate": 9.693353256658231e-06, + "loss": 0.0029, + "step": 23770 + }, + { + "epoch": 0.40188266310639414, + "grad_norm": 0.02834627963602543, + "learning_rate": 9.692844509214216e-06, + "loss": 0.0012, + "step": 23780 + }, + { + "epoch": 0.40205166338524456, + "grad_norm": 0.16074758768081665, + "learning_rate": 9.692335353469994e-06, + "loss": 0.0031, + "step": 23790 + }, + { + "epoch": 0.40222066366409504, + "grad_norm": 0.0556454062461853, + "learning_rate": 9.69182578946986e-06, + "loss": 0.0035, + "step": 23800 + }, + { + "epoch": 0.4023896639429455, + "grad_norm": 0.1124231368303299, + "learning_rate": 9.691315817258151e-06, + "loss": 0.0036, + "step": 23810 + }, + { + "epoch": 0.402558664221796, + "grad_norm": 0.09787849336862564, + "learning_rate": 9.690805436879234e-06, + "loss": 0.0069, + "step": 23820 + }, + { + "epoch": 0.4027276645006464, + "grad_norm": 0.06661677360534668, + "learning_rate": 9.690294648377519e-06, + "loss": 0.0029, + "step": 23830 + }, + { + "epoch": 0.40289666477949687, + "grad_norm": 0.1677347719669342, + "learning_rate": 9.689783451797444e-06, + "loss": 0.0019, + "step": 23840 + }, + { + "epoch": 0.40306566505834734, + "grad_norm": 0.09581337124109268, + "learning_rate": 9.689271847183483e-06, + "loss": 0.0016, + "step": 23850 + }, + { + "epoch": 0.4032346653371978, + "grad_norm": 0.05288088321685791, + "learning_rate": 9.688759834580154e-06, + "loss": 0.0024, + "step": 23860 + }, + { + "epoch": 0.4034036656160483, + "grad_norm": 0.192446768283844, + "learning_rate": 9.688247414032002e-06, + "loss": 0.0025, + "step": 23870 + }, + { + "epoch": 0.4035726658948987, + "grad_norm": 0.11666693538427353, + "learning_rate": 9.687734585583609e-06, + "loss": 0.004, + "step": 23880 + }, + { + "epoch": 0.4037416661737492, + "grad_norm": 0.10450201481580734, + "learning_rate": 9.687221349279596e-06, + "loss": 0.0038, + "step": 23890 + }, + { + "epoch": 0.40391066645259965, + "grad_norm": 0.07050692290067673, + "learning_rate": 9.686707705164613e-06, + "loss": 0.0032, + "step": 23900 + }, + { + "epoch": 0.4040796667314501, + "grad_norm": 0.16578009724617004, + "learning_rate": 9.686193653283354e-06, + "loss": 0.0034, + "step": 23910 + }, + { + "epoch": 0.40424866701030054, + "grad_norm": 0.15062333643436432, + "learning_rate": 9.685679193680542e-06, + "loss": 0.0027, + "step": 23920 + }, + { + "epoch": 0.404417667289151, + "grad_norm": 0.06264977157115936, + "learning_rate": 9.685164326400936e-06, + "loss": 0.0016, + "step": 23930 + }, + { + "epoch": 0.4045866675680015, + "grad_norm": 0.32454973459243774, + "learning_rate": 9.684649051489335e-06, + "loss": 0.0028, + "step": 23940 + }, + { + "epoch": 0.40475566784685196, + "grad_norm": 0.08096280694007874, + "learning_rate": 9.684133368990567e-06, + "loss": 0.0028, + "step": 23950 + }, + { + "epoch": 0.4049246681257024, + "grad_norm": 0.21466168761253357, + "learning_rate": 9.683617278949501e-06, + "loss": 0.0031, + "step": 23960 + }, + { + "epoch": 0.40509366840455285, + "grad_norm": 0.16156120598316193, + "learning_rate": 9.68310078141104e-06, + "loss": 0.0027, + "step": 23970 + }, + { + "epoch": 0.4052626686834033, + "grad_norm": 0.09032265841960907, + "learning_rate": 9.682583876420121e-06, + "loss": 0.0028, + "step": 23980 + }, + { + "epoch": 0.4054316689622538, + "grad_norm": 0.25376391410827637, + "learning_rate": 9.68206656402172e-06, + "loss": 0.0024, + "step": 23990 + }, + { + "epoch": 0.4056006692411043, + "grad_norm": 0.23052674531936646, + "learning_rate": 9.681548844260839e-06, + "loss": 0.0041, + "step": 24000 + }, + { + "epoch": 0.4057696695199547, + "grad_norm": 0.1080017164349556, + "learning_rate": 9.681030717182527e-06, + "loss": 0.0037, + "step": 24010 + }, + { + "epoch": 0.40593866979880516, + "grad_norm": 0.07792221754789352, + "learning_rate": 9.680512182831861e-06, + "loss": 0.0033, + "step": 24020 + }, + { + "epoch": 0.40610767007765564, + "grad_norm": 0.24158845841884613, + "learning_rate": 9.67999324125396e-06, + "loss": 0.0043, + "step": 24030 + }, + { + "epoch": 0.4062766703565061, + "grad_norm": 0.135731503367424, + "learning_rate": 9.67947389249397e-06, + "loss": 0.003, + "step": 24040 + }, + { + "epoch": 0.4064456706353565, + "grad_norm": 0.07237989455461502, + "learning_rate": 9.678954136597079e-06, + "loss": 0.0035, + "step": 24050 + }, + { + "epoch": 0.406614670914207, + "grad_norm": 0.04045663774013519, + "learning_rate": 9.678433973608508e-06, + "loss": 0.0033, + "step": 24060 + }, + { + "epoch": 0.40678367119305747, + "grad_norm": 0.8070462346076965, + "learning_rate": 9.677913403573516e-06, + "loss": 0.0027, + "step": 24070 + }, + { + "epoch": 0.40695267147190795, + "grad_norm": 0.2314949482679367, + "learning_rate": 9.677392426537391e-06, + "loss": 0.0038, + "step": 24080 + }, + { + "epoch": 0.40712167175075836, + "grad_norm": 0.04682336747646332, + "learning_rate": 9.676871042545462e-06, + "loss": 0.0018, + "step": 24090 + }, + { + "epoch": 0.40729067202960884, + "grad_norm": 0.11390011757612228, + "learning_rate": 9.676349251643094e-06, + "loss": 0.0026, + "step": 24100 + }, + { + "epoch": 0.4074596723084593, + "grad_norm": 0.07852821052074432, + "learning_rate": 9.675827053875682e-06, + "loss": 0.0028, + "step": 24110 + }, + { + "epoch": 0.4076286725873098, + "grad_norm": 0.13662543892860413, + "learning_rate": 9.675304449288662e-06, + "loss": 0.0026, + "step": 24120 + }, + { + "epoch": 0.40779767286616025, + "grad_norm": 0.6542212963104248, + "learning_rate": 9.674781437927501e-06, + "loss": 0.003, + "step": 24130 + }, + { + "epoch": 0.40796667314501067, + "grad_norm": 0.15434002876281738, + "learning_rate": 9.674258019837708e-06, + "loss": 0.0035, + "step": 24140 + }, + { + "epoch": 0.40813567342386115, + "grad_norm": 0.20408910512924194, + "learning_rate": 9.673734195064818e-06, + "loss": 0.0057, + "step": 24150 + }, + { + "epoch": 0.4083046737027116, + "grad_norm": 0.15168999135494232, + "learning_rate": 9.673209963654409e-06, + "loss": 0.0039, + "step": 24160 + }, + { + "epoch": 0.4084736739815621, + "grad_norm": 0.018376335501670837, + "learning_rate": 9.672685325652091e-06, + "loss": 0.0037, + "step": 24170 + }, + { + "epoch": 0.4086426742604125, + "grad_norm": 0.1481887549161911, + "learning_rate": 9.672160281103509e-06, + "loss": 0.003, + "step": 24180 + }, + { + "epoch": 0.408811674539263, + "grad_norm": 0.23799261450767517, + "learning_rate": 9.671634830054347e-06, + "loss": 0.0027, + "step": 24190 + }, + { + "epoch": 0.40898067481811345, + "grad_norm": 0.05132792517542839, + "learning_rate": 9.671108972550318e-06, + "loss": 0.0026, + "step": 24200 + }, + { + "epoch": 0.4091496750969639, + "grad_norm": 0.05585364252328873, + "learning_rate": 9.670582708637179e-06, + "loss": 0.0027, + "step": 24210 + }, + { + "epoch": 0.40931867537581434, + "grad_norm": 0.11600292474031448, + "learning_rate": 9.670056038360713e-06, + "loss": 0.0019, + "step": 24220 + }, + { + "epoch": 0.4094876756546648, + "grad_norm": 0.12043534219264984, + "learning_rate": 9.669528961766746e-06, + "loss": 0.0013, + "step": 24230 + }, + { + "epoch": 0.4096566759335153, + "grad_norm": 0.0870441123843193, + "learning_rate": 9.669001478901135e-06, + "loss": 0.0021, + "step": 24240 + }, + { + "epoch": 0.40982567621236576, + "grad_norm": 0.12131226807832718, + "learning_rate": 9.668473589809772e-06, + "loss": 0.0079, + "step": 24250 + }, + { + "epoch": 0.40999467649121624, + "grad_norm": 0.05351598933339119, + "learning_rate": 9.667945294538588e-06, + "loss": 0.0033, + "step": 24260 + }, + { + "epoch": 0.41016367677006665, + "grad_norm": 0.22449712455272675, + "learning_rate": 9.667416593133546e-06, + "loss": 0.0042, + "step": 24270 + }, + { + "epoch": 0.4103326770489171, + "grad_norm": 0.03976152464747429, + "learning_rate": 9.666887485640647e-06, + "loss": 0.0025, + "step": 24280 + }, + { + "epoch": 0.4105016773277676, + "grad_norm": 0.058581795543432236, + "learning_rate": 9.666357972105926e-06, + "loss": 0.0025, + "step": 24290 + }, + { + "epoch": 0.4106706776066181, + "grad_norm": 0.15372496843338013, + "learning_rate": 9.665828052575452e-06, + "loss": 0.0049, + "step": 24300 + }, + { + "epoch": 0.4108396778854685, + "grad_norm": 0.3144594132900238, + "learning_rate": 9.665297727095329e-06, + "loss": 0.002, + "step": 24310 + }, + { + "epoch": 0.41100867816431896, + "grad_norm": 0.04498997703194618, + "learning_rate": 9.664766995711702e-06, + "loss": 0.0024, + "step": 24320 + }, + { + "epoch": 0.41117767844316944, + "grad_norm": 0.12133470922708511, + "learning_rate": 9.664235858470744e-06, + "loss": 0.0028, + "step": 24330 + }, + { + "epoch": 0.4113466787220199, + "grad_norm": 0.08657453209161758, + "learning_rate": 9.663704315418669e-06, + "loss": 0.002, + "step": 24340 + }, + { + "epoch": 0.4115156790008703, + "grad_norm": 0.18983878195285797, + "learning_rate": 9.66317236660172e-06, + "loss": 0.0049, + "step": 24350 + }, + { + "epoch": 0.4116846792797208, + "grad_norm": 0.11060302704572678, + "learning_rate": 9.662640012066185e-06, + "loss": 0.0032, + "step": 24360 + }, + { + "epoch": 0.4118536795585713, + "grad_norm": 0.04116309806704521, + "learning_rate": 9.662107251858375e-06, + "loss": 0.0036, + "step": 24370 + }, + { + "epoch": 0.41202267983742175, + "grad_norm": 0.11360263079404831, + "learning_rate": 9.661574086024645e-06, + "loss": 0.0042, + "step": 24380 + }, + { + "epoch": 0.4121916801162722, + "grad_norm": 0.038537509739398956, + "learning_rate": 9.661040514611386e-06, + "loss": 0.003, + "step": 24390 + }, + { + "epoch": 0.41236068039512264, + "grad_norm": 0.13446074724197388, + "learning_rate": 9.660506537665019e-06, + "loss": 0.0037, + "step": 24400 + }, + { + "epoch": 0.4125296806739731, + "grad_norm": 0.05725783109664917, + "learning_rate": 9.659972155232002e-06, + "loss": 0.003, + "step": 24410 + }, + { + "epoch": 0.4126986809528236, + "grad_norm": 0.09010022133588791, + "learning_rate": 9.659437367358828e-06, + "loss": 0.0035, + "step": 24420 + }, + { + "epoch": 0.41286768123167406, + "grad_norm": 0.12016736716032028, + "learning_rate": 9.658902174092029e-06, + "loss": 0.0037, + "step": 24430 + }, + { + "epoch": 0.4130366815105245, + "grad_norm": 0.5030607581138611, + "learning_rate": 9.658366575478168e-06, + "loss": 0.0026, + "step": 24440 + }, + { + "epoch": 0.41320568178937495, + "grad_norm": 0.04519381374120712, + "learning_rate": 9.657830571563845e-06, + "loss": 0.003, + "step": 24450 + }, + { + "epoch": 0.4133746820682254, + "grad_norm": 0.12689000368118286, + "learning_rate": 9.657294162395693e-06, + "loss": 0.0029, + "step": 24460 + }, + { + "epoch": 0.4135436823470759, + "grad_norm": 0.17634360492229462, + "learning_rate": 9.656757348020384e-06, + "loss": 0.003, + "step": 24470 + }, + { + "epoch": 0.4137126826259263, + "grad_norm": 0.06941474229097366, + "learning_rate": 9.656220128484624e-06, + "loss": 0.0037, + "step": 24480 + }, + { + "epoch": 0.4138816829047768, + "grad_norm": 0.2054547667503357, + "learning_rate": 9.655682503835154e-06, + "loss": 0.0026, + "step": 24490 + }, + { + "epoch": 0.41405068318362725, + "grad_norm": 0.028921693563461304, + "learning_rate": 9.655144474118748e-06, + "loss": 0.0031, + "step": 24500 + }, + { + "epoch": 0.41421968346247773, + "grad_norm": 0.13475167751312256, + "learning_rate": 9.654606039382216e-06, + "loss": 0.0061, + "step": 24510 + }, + { + "epoch": 0.4143886837413282, + "grad_norm": 0.08045545965433121, + "learning_rate": 9.654067199672408e-06, + "loss": 0.0033, + "step": 24520 + }, + { + "epoch": 0.4145576840201786, + "grad_norm": 0.13410677015781403, + "learning_rate": 9.653527955036205e-06, + "loss": 0.0039, + "step": 24530 + }, + { + "epoch": 0.4147266842990291, + "grad_norm": 0.15886163711547852, + "learning_rate": 9.652988305520521e-06, + "loss": 0.0034, + "step": 24540 + }, + { + "epoch": 0.41489568457787956, + "grad_norm": 0.1001209244132042, + "learning_rate": 9.652448251172314e-06, + "loss": 0.0027, + "step": 24550 + }, + { + "epoch": 0.41506468485673004, + "grad_norm": 0.14588508009910583, + "learning_rate": 9.651907792038565e-06, + "loss": 0.0028, + "step": 24560 + }, + { + "epoch": 0.41523368513558045, + "grad_norm": 0.07480347901582718, + "learning_rate": 9.651366928166297e-06, + "loss": 0.0017, + "step": 24570 + }, + { + "epoch": 0.41540268541443093, + "grad_norm": 0.19968412816524506, + "learning_rate": 9.650825659602572e-06, + "loss": 0.0037, + "step": 24580 + }, + { + "epoch": 0.4155716856932814, + "grad_norm": 0.25834447145462036, + "learning_rate": 9.650283986394482e-06, + "loss": 0.0044, + "step": 24590 + }, + { + "epoch": 0.4157406859721319, + "grad_norm": 0.23417113721370697, + "learning_rate": 9.649741908589151e-06, + "loss": 0.004, + "step": 24600 + }, + { + "epoch": 0.4159096862509823, + "grad_norm": 0.06393170356750488, + "learning_rate": 9.649199426233748e-06, + "loss": 0.0022, + "step": 24610 + }, + { + "epoch": 0.41607868652983276, + "grad_norm": 0.12674586474895477, + "learning_rate": 9.648656539375469e-06, + "loss": 0.003, + "step": 24620 + }, + { + "epoch": 0.41624768680868324, + "grad_norm": 0.04374230280518532, + "learning_rate": 9.648113248061548e-06, + "loss": 0.0024, + "step": 24630 + }, + { + "epoch": 0.4164166870875337, + "grad_norm": 0.14106465876102448, + "learning_rate": 9.64756955233925e-06, + "loss": 0.0034, + "step": 24640 + }, + { + "epoch": 0.4165856873663841, + "grad_norm": 0.11494036763906479, + "learning_rate": 9.647025452255888e-06, + "loss": 0.0044, + "step": 24650 + }, + { + "epoch": 0.4167546876452346, + "grad_norm": 0.07348564267158508, + "learning_rate": 9.646480947858794e-06, + "loss": 0.0026, + "step": 24660 + }, + { + "epoch": 0.4169236879240851, + "grad_norm": 0.0473347082734108, + "learning_rate": 9.645936039195346e-06, + "loss": 0.0027, + "step": 24670 + }, + { + "epoch": 0.41709268820293555, + "grad_norm": 0.1133309155702591, + "learning_rate": 9.645390726312951e-06, + "loss": 0.0037, + "step": 24680 + }, + { + "epoch": 0.417261688481786, + "grad_norm": 0.05429339036345482, + "learning_rate": 9.644845009259055e-06, + "loss": 0.002, + "step": 24690 + }, + { + "epoch": 0.41743068876063644, + "grad_norm": 0.08009226620197296, + "learning_rate": 9.64429888808114e-06, + "loss": 0.0034, + "step": 24700 + }, + { + "epoch": 0.4175996890394869, + "grad_norm": 0.03397831693291664, + "learning_rate": 9.643752362826718e-06, + "loss": 0.0021, + "step": 24710 + }, + { + "epoch": 0.4177686893183374, + "grad_norm": 0.06480947881937027, + "learning_rate": 9.643205433543343e-06, + "loss": 0.0041, + "step": 24720 + }, + { + "epoch": 0.41793768959718786, + "grad_norm": 0.04352713003754616, + "learning_rate": 9.642658100278598e-06, + "loss": 0.0024, + "step": 24730 + }, + { + "epoch": 0.4181066898760383, + "grad_norm": 0.12258494645357132, + "learning_rate": 9.642110363080101e-06, + "loss": 0.0019, + "step": 24740 + }, + { + "epoch": 0.41827569015488875, + "grad_norm": 0.04337688535451889, + "learning_rate": 9.641562221995515e-06, + "loss": 0.0021, + "step": 24750 + }, + { + "epoch": 0.4184446904337392, + "grad_norm": 0.010867961682379246, + "learning_rate": 9.641013677072524e-06, + "loss": 0.0024, + "step": 24760 + }, + { + "epoch": 0.4186136907125897, + "grad_norm": 0.18787983059883118, + "learning_rate": 9.640464728358858e-06, + "loss": 0.003, + "step": 24770 + }, + { + "epoch": 0.4187826909914401, + "grad_norm": 0.16595998406410217, + "learning_rate": 9.639915375902277e-06, + "loss": 0.0026, + "step": 24780 + }, + { + "epoch": 0.4189516912702906, + "grad_norm": 0.058716196566820145, + "learning_rate": 9.639365619750577e-06, + "loss": 0.0043, + "step": 24790 + }, + { + "epoch": 0.41912069154914106, + "grad_norm": 0.07995070517063141, + "learning_rate": 9.63881545995159e-06, + "loss": 0.003, + "step": 24800 + }, + { + "epoch": 0.41928969182799153, + "grad_norm": 0.05202421545982361, + "learning_rate": 9.638264896553182e-06, + "loss": 0.0127, + "step": 24810 + }, + { + "epoch": 0.419458692106842, + "grad_norm": 0.13160359859466553, + "learning_rate": 9.637713929603257e-06, + "loss": 0.0034, + "step": 24820 + }, + { + "epoch": 0.4196276923856924, + "grad_norm": 0.08078698068857193, + "learning_rate": 9.637162559149748e-06, + "loss": 0.0038, + "step": 24830 + }, + { + "epoch": 0.4197966926645429, + "grad_norm": 0.08044783771038055, + "learning_rate": 9.63661078524063e-06, + "loss": 0.0031, + "step": 24840 + }, + { + "epoch": 0.41996569294339336, + "grad_norm": 0.11060716956853867, + "learning_rate": 9.636058607923907e-06, + "loss": 0.0046, + "step": 24850 + }, + { + "epoch": 0.42013469322224384, + "grad_norm": 0.22302891314029694, + "learning_rate": 9.635506027247624e-06, + "loss": 0.0035, + "step": 24860 + }, + { + "epoch": 0.42030369350109426, + "grad_norm": 0.19783833622932434, + "learning_rate": 9.634953043259858e-06, + "loss": 0.004, + "step": 24870 + }, + { + "epoch": 0.42047269377994473, + "grad_norm": 0.05085299164056778, + "learning_rate": 9.63439965600872e-06, + "loss": 0.008, + "step": 24880 + }, + { + "epoch": 0.4206416940587952, + "grad_norm": 0.11641545593738556, + "learning_rate": 9.633845865542356e-06, + "loss": 0.0039, + "step": 24890 + }, + { + "epoch": 0.4208106943376457, + "grad_norm": 0.06848058104515076, + "learning_rate": 9.633291671908952e-06, + "loss": 0.0042, + "step": 24900 + }, + { + "epoch": 0.4209796946164961, + "grad_norm": 0.1078142449259758, + "learning_rate": 9.632737075156721e-06, + "loss": 0.0034, + "step": 24910 + }, + { + "epoch": 0.42114869489534656, + "grad_norm": 0.2299955040216446, + "learning_rate": 9.632182075333923e-06, + "loss": 0.0047, + "step": 24920 + }, + { + "epoch": 0.42131769517419704, + "grad_norm": 0.036223720759153366, + "learning_rate": 9.631626672488838e-06, + "loss": 0.0024, + "step": 24930 + }, + { + "epoch": 0.4214866954530475, + "grad_norm": 0.04823785647749901, + "learning_rate": 9.631070866669791e-06, + "loss": 0.0028, + "step": 24940 + }, + { + "epoch": 0.421655695731898, + "grad_norm": 0.06334184855222702, + "learning_rate": 9.630514657925143e-06, + "loss": 0.0032, + "step": 24950 + }, + { + "epoch": 0.4218246960107484, + "grad_norm": 0.14863380789756775, + "learning_rate": 9.629958046303282e-06, + "loss": 0.003, + "step": 24960 + }, + { + "epoch": 0.4219936962895989, + "grad_norm": 0.30553850531578064, + "learning_rate": 9.629401031852639e-06, + "loss": 0.0042, + "step": 24970 + }, + { + "epoch": 0.42216269656844935, + "grad_norm": 0.11199302971363068, + "learning_rate": 9.628843614621678e-06, + "loss": 0.0033, + "step": 24980 + }, + { + "epoch": 0.4223316968472998, + "grad_norm": 0.057078536599874496, + "learning_rate": 9.628285794658894e-06, + "loss": 0.0017, + "step": 24990 + }, + { + "epoch": 0.42250069712615024, + "grad_norm": 0.445557177066803, + "learning_rate": 9.627727572012821e-06, + "loss": 0.0032, + "step": 25000 + }, + { + "epoch": 0.4226696974050007, + "grad_norm": 0.3197193145751953, + "learning_rate": 9.627168946732028e-06, + "loss": 0.0035, + "step": 25010 + }, + { + "epoch": 0.4228386976838512, + "grad_norm": 0.020992321893572807, + "learning_rate": 9.626609918865117e-06, + "loss": 0.0027, + "step": 25020 + }, + { + "epoch": 0.42300769796270166, + "grad_norm": 0.08679168671369553, + "learning_rate": 9.626050488460727e-06, + "loss": 0.0023, + "step": 25030 + }, + { + "epoch": 0.4231766982415521, + "grad_norm": 0.08856311440467834, + "learning_rate": 9.62549065556753e-06, + "loss": 0.0033, + "step": 25040 + }, + { + "epoch": 0.42334569852040255, + "grad_norm": 0.10782845318317413, + "learning_rate": 9.624930420234235e-06, + "loss": 0.0039, + "step": 25050 + }, + { + "epoch": 0.423514698799253, + "grad_norm": 0.14478425681591034, + "learning_rate": 9.624369782509586e-06, + "loss": 0.002, + "step": 25060 + }, + { + "epoch": 0.4236836990781035, + "grad_norm": 0.1290246993303299, + "learning_rate": 9.62380874244236e-06, + "loss": 0.003, + "step": 25070 + }, + { + "epoch": 0.42385269935695397, + "grad_norm": 0.11611910164356232, + "learning_rate": 9.62324730008137e-06, + "loss": 0.0017, + "step": 25080 + }, + { + "epoch": 0.4240216996358044, + "grad_norm": 0.08613353967666626, + "learning_rate": 9.622685455475466e-06, + "loss": 0.003, + "step": 25090 + }, + { + "epoch": 0.42419069991465486, + "grad_norm": 0.06108185276389122, + "learning_rate": 9.622123208673528e-06, + "loss": 0.0019, + "step": 25100 + }, + { + "epoch": 0.42435970019350533, + "grad_norm": 0.09997698664665222, + "learning_rate": 9.62156055972448e-06, + "loss": 0.0026, + "step": 25110 + }, + { + "epoch": 0.4245287004723558, + "grad_norm": 0.11060816794633865, + "learning_rate": 9.620997508677268e-06, + "loss": 0.0029, + "step": 25120 + }, + { + "epoch": 0.4246977007512062, + "grad_norm": 0.1168910413980484, + "learning_rate": 9.620434055580882e-06, + "loss": 0.0031, + "step": 25130 + }, + { + "epoch": 0.4248667010300567, + "grad_norm": 0.08170294016599655, + "learning_rate": 9.61987020048435e-06, + "loss": 0.0025, + "step": 25140 + }, + { + "epoch": 0.42503570130890717, + "grad_norm": 0.2618868052959442, + "learning_rate": 9.619305943436725e-06, + "loss": 0.0024, + "step": 25150 + }, + { + "epoch": 0.42520470158775764, + "grad_norm": 0.028670979663729668, + "learning_rate": 9.618741284487103e-06, + "loss": 0.0038, + "step": 25160 + }, + { + "epoch": 0.42537370186660806, + "grad_norm": 0.16440579295158386, + "learning_rate": 9.61817622368461e-06, + "loss": 0.0037, + "step": 25170 + }, + { + "epoch": 0.42554270214545853, + "grad_norm": 0.11584070324897766, + "learning_rate": 9.61761076107841e-06, + "loss": 0.0032, + "step": 25180 + }, + { + "epoch": 0.425711702424309, + "grad_norm": 0.2152673900127411, + "learning_rate": 9.617044896717703e-06, + "loss": 0.0038, + "step": 25190 + }, + { + "epoch": 0.4258807027031595, + "grad_norm": 0.028066881000995636, + "learning_rate": 9.616478630651718e-06, + "loss": 0.0032, + "step": 25200 + }, + { + "epoch": 0.42604970298200995, + "grad_norm": 0.19883617758750916, + "learning_rate": 9.615911962929725e-06, + "loss": 0.0037, + "step": 25210 + }, + { + "epoch": 0.42621870326086037, + "grad_norm": 0.15827932953834534, + "learning_rate": 9.615344893601026e-06, + "loss": 0.0033, + "step": 25220 + }, + { + "epoch": 0.42638770353971084, + "grad_norm": 0.07252488285303116, + "learning_rate": 9.61477742271496e-06, + "loss": 0.0037, + "step": 25230 + }, + { + "epoch": 0.4265567038185613, + "grad_norm": 0.28482991456985474, + "learning_rate": 9.614209550320899e-06, + "loss": 0.0018, + "step": 25240 + }, + { + "epoch": 0.4267257040974118, + "grad_norm": 0.016567599028348923, + "learning_rate": 9.61364127646825e-06, + "loss": 0.0037, + "step": 25250 + }, + { + "epoch": 0.4268947043762622, + "grad_norm": 0.11594730615615845, + "learning_rate": 9.613072601206459e-06, + "loss": 0.0032, + "step": 25260 + }, + { + "epoch": 0.4270637046551127, + "grad_norm": 0.12419804185628891, + "learning_rate": 9.612503524584997e-06, + "loss": 0.0054, + "step": 25270 + }, + { + "epoch": 0.42723270493396315, + "grad_norm": 0.06168608367443085, + "learning_rate": 9.611934046653384e-06, + "loss": 0.0025, + "step": 25280 + }, + { + "epoch": 0.4274017052128136, + "grad_norm": 0.0676799863576889, + "learning_rate": 9.61136416746116e-06, + "loss": 0.0018, + "step": 25290 + }, + { + "epoch": 0.42757070549166404, + "grad_norm": 0.08644477277994156, + "learning_rate": 9.610793887057914e-06, + "loss": 0.0049, + "step": 25300 + }, + { + "epoch": 0.4277397057705145, + "grad_norm": 0.12501251697540283, + "learning_rate": 9.610223205493259e-06, + "loss": 0.0042, + "step": 25310 + }, + { + "epoch": 0.427908706049365, + "grad_norm": 0.022200902923941612, + "learning_rate": 9.609652122816847e-06, + "loss": 0.0026, + "step": 25320 + }, + { + "epoch": 0.42807770632821546, + "grad_norm": 0.19189688563346863, + "learning_rate": 9.609080639078367e-06, + "loss": 0.0019, + "step": 25330 + }, + { + "epoch": 0.4282467066070659, + "grad_norm": 0.08449879288673401, + "learning_rate": 9.60850875432754e-06, + "loss": 0.0027, + "step": 25340 + }, + { + "epoch": 0.42841570688591635, + "grad_norm": 0.5087281465530396, + "learning_rate": 9.607936468614122e-06, + "loss": 0.0036, + "step": 25350 + }, + { + "epoch": 0.4285847071647668, + "grad_norm": 0.0032684323377907276, + "learning_rate": 9.607363781987906e-06, + "loss": 0.0021, + "step": 25360 + }, + { + "epoch": 0.4287537074436173, + "grad_norm": 0.15610577166080475, + "learning_rate": 9.606790694498717e-06, + "loss": 0.0037, + "step": 25370 + }, + { + "epoch": 0.42892270772246777, + "grad_norm": 0.02182059921324253, + "learning_rate": 9.606217206196418e-06, + "loss": 0.0022, + "step": 25380 + }, + { + "epoch": 0.4290917080013182, + "grad_norm": 0.08796156942844391, + "learning_rate": 9.605643317130903e-06, + "loss": 0.0043, + "step": 25390 + }, + { + "epoch": 0.42926070828016866, + "grad_norm": 0.07977686822414398, + "learning_rate": 9.605069027352108e-06, + "loss": 0.0024, + "step": 25400 + }, + { + "epoch": 0.42942970855901913, + "grad_norm": 0.08197636157274246, + "learning_rate": 9.604494336909994e-06, + "loss": 0.0029, + "step": 25410 + }, + { + "epoch": 0.4295987088378696, + "grad_norm": 0.062118370085954666, + "learning_rate": 9.60391924585456e-06, + "loss": 0.0033, + "step": 25420 + }, + { + "epoch": 0.42976770911672, + "grad_norm": 0.13398393988609314, + "learning_rate": 9.603343754235849e-06, + "loss": 0.0032, + "step": 25430 + }, + { + "epoch": 0.4299367093955705, + "grad_norm": 0.1671101152896881, + "learning_rate": 9.602767862103925e-06, + "loss": 0.0041, + "step": 25440 + }, + { + "epoch": 0.43010570967442097, + "grad_norm": 0.02333880588412285, + "learning_rate": 9.602191569508899e-06, + "loss": 0.003, + "step": 25450 + }, + { + "epoch": 0.43027470995327144, + "grad_norm": 0.05902350693941116, + "learning_rate": 9.601614876500906e-06, + "loss": 0.0028, + "step": 25460 + }, + { + "epoch": 0.43044371023212186, + "grad_norm": 0.09615401923656464, + "learning_rate": 9.601037783130123e-06, + "loss": 0.0021, + "step": 25470 + }, + { + "epoch": 0.43061271051097233, + "grad_norm": 0.11068742722272873, + "learning_rate": 9.600460289446762e-06, + "loss": 0.0027, + "step": 25480 + }, + { + "epoch": 0.4307817107898228, + "grad_norm": 0.08340872824192047, + "learning_rate": 9.599882395501066e-06, + "loss": 0.0025, + "step": 25490 + }, + { + "epoch": 0.4309507110686733, + "grad_norm": 0.4351450800895691, + "learning_rate": 9.599304101343314e-06, + "loss": 0.005, + "step": 25500 + }, + { + "epoch": 0.43111971134752375, + "grad_norm": 0.07268258184194565, + "learning_rate": 9.59872540702382e-06, + "loss": 0.0032, + "step": 25510 + }, + { + "epoch": 0.43128871162637417, + "grad_norm": 0.15467266738414764, + "learning_rate": 9.598146312592938e-06, + "loss": 0.003, + "step": 25520 + }, + { + "epoch": 0.43145771190522464, + "grad_norm": 0.158308744430542, + "learning_rate": 9.597566818101046e-06, + "loss": 0.0022, + "step": 25530 + }, + { + "epoch": 0.4316267121840751, + "grad_norm": 0.06377403438091278, + "learning_rate": 9.596986923598564e-06, + "loss": 0.0033, + "step": 25540 + }, + { + "epoch": 0.4317957124629256, + "grad_norm": 0.17815333604812622, + "learning_rate": 9.596406629135947e-06, + "loss": 0.003, + "step": 25550 + }, + { + "epoch": 0.431964712741776, + "grad_norm": 0.10275933146476746, + "learning_rate": 9.595825934763684e-06, + "loss": 0.0037, + "step": 25560 + }, + { + "epoch": 0.4321337130206265, + "grad_norm": 0.06398187577724457, + "learning_rate": 9.595244840532296e-06, + "loss": 0.0011, + "step": 25570 + }, + { + "epoch": 0.43230271329947695, + "grad_norm": 0.06439651548862457, + "learning_rate": 9.594663346492344e-06, + "loss": 0.003, + "step": 25580 + }, + { + "epoch": 0.4324717135783274, + "grad_norm": 0.019858231768012047, + "learning_rate": 9.594081452694419e-06, + "loss": 0.0027, + "step": 25590 + }, + { + "epoch": 0.43264071385717784, + "grad_norm": 0.33359295129776, + "learning_rate": 9.593499159189147e-06, + "loss": 0.003, + "step": 25600 + }, + { + "epoch": 0.4328097141360283, + "grad_norm": 0.13325978815555573, + "learning_rate": 9.592916466027195e-06, + "loss": 0.0043, + "step": 25610 + }, + { + "epoch": 0.4329787144148788, + "grad_norm": 0.05250495672225952, + "learning_rate": 9.592333373259256e-06, + "loss": 0.0024, + "step": 25620 + }, + { + "epoch": 0.43314771469372926, + "grad_norm": 0.09613601863384247, + "learning_rate": 9.591749880936063e-06, + "loss": 0.0012, + "step": 25630 + }, + { + "epoch": 0.43331671497257973, + "grad_norm": 0.056316081434488297, + "learning_rate": 9.591165989108384e-06, + "loss": 0.0042, + "step": 25640 + }, + { + "epoch": 0.43348571525143015, + "grad_norm": 0.08078259974718094, + "learning_rate": 9.590581697827017e-06, + "loss": 0.0015, + "step": 25650 + }, + { + "epoch": 0.4336547155302806, + "grad_norm": 0.23672063648700714, + "learning_rate": 9.589997007142802e-06, + "loss": 0.0028, + "step": 25660 + }, + { + "epoch": 0.4338237158091311, + "grad_norm": 0.028148150071501732, + "learning_rate": 9.589411917106608e-06, + "loss": 0.0018, + "step": 25670 + }, + { + "epoch": 0.43399271608798157, + "grad_norm": 0.11672691255807877, + "learning_rate": 9.58882642776934e-06, + "loss": 0.0021, + "step": 25680 + }, + { + "epoch": 0.434161716366832, + "grad_norm": 0.10163123905658722, + "learning_rate": 9.588240539181942e-06, + "loss": 0.0026, + "step": 25690 + }, + { + "epoch": 0.43433071664568246, + "grad_norm": 0.06551125645637512, + "learning_rate": 9.587654251395385e-06, + "loss": 0.0093, + "step": 25700 + }, + { + "epoch": 0.43449971692453293, + "grad_norm": 0.04481912776827812, + "learning_rate": 9.587067564460679e-06, + "loss": 0.0034, + "step": 25710 + }, + { + "epoch": 0.4346687172033834, + "grad_norm": 0.13800600171089172, + "learning_rate": 9.586480478428872e-06, + "loss": 0.0048, + "step": 25720 + }, + { + "epoch": 0.4348377174822338, + "grad_norm": 0.09977370500564575, + "learning_rate": 9.58589299335104e-06, + "loss": 0.0012, + "step": 25730 + }, + { + "epoch": 0.4350067177610843, + "grad_norm": 0.0055341655388474464, + "learning_rate": 9.585305109278299e-06, + "loss": 0.0031, + "step": 25740 + }, + { + "epoch": 0.43517571803993477, + "grad_norm": 0.16339772939682007, + "learning_rate": 9.584716826261797e-06, + "loss": 0.0051, + "step": 25750 + }, + { + "epoch": 0.43534471831878524, + "grad_norm": 0.10466916114091873, + "learning_rate": 9.584128144352717e-06, + "loss": 0.0036, + "step": 25760 + }, + { + "epoch": 0.4355137185976357, + "grad_norm": 0.13969078660011292, + "learning_rate": 9.58353906360228e-06, + "loss": 0.0018, + "step": 25770 + }, + { + "epoch": 0.43568271887648613, + "grad_norm": 0.10388576984405518, + "learning_rate": 9.582949584061736e-06, + "loss": 0.0026, + "step": 25780 + }, + { + "epoch": 0.4358517191553366, + "grad_norm": 0.031867023557424545, + "learning_rate": 9.582359705782371e-06, + "loss": 0.003, + "step": 25790 + }, + { + "epoch": 0.4360207194341871, + "grad_norm": 0.2640696167945862, + "learning_rate": 9.581769428815512e-06, + "loss": 0.0039, + "step": 25800 + }, + { + "epoch": 0.43618971971303755, + "grad_norm": 0.07732165604829788, + "learning_rate": 9.581178753212514e-06, + "loss": 0.0035, + "step": 25810 + }, + { + "epoch": 0.43635871999188797, + "grad_norm": 0.0999775156378746, + "learning_rate": 9.580587679024766e-06, + "loss": 0.0025, + "step": 25820 + }, + { + "epoch": 0.43652772027073844, + "grad_norm": 0.04713275283575058, + "learning_rate": 9.579996206303696e-06, + "loss": 0.0026, + "step": 25830 + }, + { + "epoch": 0.4366967205495889, + "grad_norm": 0.11159471422433853, + "learning_rate": 9.579404335100768e-06, + "loss": 0.0033, + "step": 25840 + }, + { + "epoch": 0.4368657208284394, + "grad_norm": 0.16928553581237793, + "learning_rate": 9.578812065467475e-06, + "loss": 0.0027, + "step": 25850 + }, + { + "epoch": 0.4370347211072898, + "grad_norm": 0.03889685869216919, + "learning_rate": 9.578219397455346e-06, + "loss": 0.0054, + "step": 25860 + }, + { + "epoch": 0.4372037213861403, + "grad_norm": 0.10739434510469437, + "learning_rate": 9.577626331115947e-06, + "loss": 0.0023, + "step": 25870 + }, + { + "epoch": 0.43737272166499075, + "grad_norm": 0.23763492703437805, + "learning_rate": 9.577032866500879e-06, + "loss": 0.0027, + "step": 25880 + }, + { + "epoch": 0.4375417219438412, + "grad_norm": 0.1545303910970688, + "learning_rate": 9.576439003661776e-06, + "loss": 0.0026, + "step": 25890 + }, + { + "epoch": 0.4377107222226917, + "grad_norm": 0.08356676250696182, + "learning_rate": 9.575844742650305e-06, + "loss": 0.0023, + "step": 25900 + }, + { + "epoch": 0.4378797225015421, + "grad_norm": 0.1397261917591095, + "learning_rate": 9.575250083518173e-06, + "loss": 0.0031, + "step": 25910 + }, + { + "epoch": 0.4380487227803926, + "grad_norm": 0.122606061398983, + "learning_rate": 9.574655026317114e-06, + "loss": 0.0027, + "step": 25920 + }, + { + "epoch": 0.43821772305924306, + "grad_norm": 0.11674268543720245, + "learning_rate": 9.574059571098903e-06, + "loss": 0.0022, + "step": 25930 + }, + { + "epoch": 0.43838672333809353, + "grad_norm": 0.07671211659908295, + "learning_rate": 9.573463717915349e-06, + "loss": 0.0022, + "step": 25940 + }, + { + "epoch": 0.43855572361694395, + "grad_norm": 0.1337328404188156, + "learning_rate": 9.572867466818291e-06, + "loss": 0.0019, + "step": 25950 + }, + { + "epoch": 0.4387247238957944, + "grad_norm": 0.09576038271188736, + "learning_rate": 9.572270817859607e-06, + "loss": 0.0031, + "step": 25960 + }, + { + "epoch": 0.4388937241746449, + "grad_norm": 0.021519066765904427, + "learning_rate": 9.571673771091212e-06, + "loss": 0.0014, + "step": 25970 + }, + { + "epoch": 0.43906272445349537, + "grad_norm": 0.01436109934002161, + "learning_rate": 9.571076326565045e-06, + "loss": 0.0028, + "step": 25980 + }, + { + "epoch": 0.4392317247323458, + "grad_norm": 0.1742318868637085, + "learning_rate": 9.570478484333093e-06, + "loss": 0.0024, + "step": 25990 + }, + { + "epoch": 0.43940072501119626, + "grad_norm": 0.05830789729952812, + "learning_rate": 9.569880244447368e-06, + "loss": 0.0029, + "step": 26000 + }, + { + "epoch": 0.43956972529004673, + "grad_norm": 0.12230129539966583, + "learning_rate": 9.569281606959917e-06, + "loss": 0.0023, + "step": 26010 + }, + { + "epoch": 0.4397387255688972, + "grad_norm": 0.033528443425893784, + "learning_rate": 9.56868257192283e-06, + "loss": 0.0029, + "step": 26020 + }, + { + "epoch": 0.4399077258477476, + "grad_norm": 0.18434548377990723, + "learning_rate": 9.568083139388224e-06, + "loss": 0.0036, + "step": 26030 + }, + { + "epoch": 0.4400767261265981, + "grad_norm": 0.14409662783145905, + "learning_rate": 9.56748330940825e-06, + "loss": 0.0047, + "step": 26040 + }, + { + "epoch": 0.44024572640544857, + "grad_norm": 0.06758307665586472, + "learning_rate": 9.5668830820351e-06, + "loss": 0.0027, + "step": 26050 + }, + { + "epoch": 0.44041472668429904, + "grad_norm": 0.09830377250909805, + "learning_rate": 9.566282457320994e-06, + "loss": 0.0031, + "step": 26060 + }, + { + "epoch": 0.4405837269631495, + "grad_norm": 0.09198874235153198, + "learning_rate": 9.56568143531819e-06, + "loss": 0.0026, + "step": 26070 + }, + { + "epoch": 0.44075272724199993, + "grad_norm": 0.02963336929678917, + "learning_rate": 9.56508001607898e-06, + "loss": 0.0051, + "step": 26080 + }, + { + "epoch": 0.4409217275208504, + "grad_norm": 0.07761721312999725, + "learning_rate": 9.56447819965569e-06, + "loss": 0.0038, + "step": 26090 + }, + { + "epoch": 0.4410907277997009, + "grad_norm": 0.2935218811035156, + "learning_rate": 9.563875986100682e-06, + "loss": 0.0034, + "step": 26100 + }, + { + "epoch": 0.44125972807855135, + "grad_norm": 0.22003482282161713, + "learning_rate": 9.563273375466351e-06, + "loss": 0.0027, + "step": 26110 + }, + { + "epoch": 0.44142872835740177, + "grad_norm": 0.05516747757792473, + "learning_rate": 9.562670367805127e-06, + "loss": 0.0031, + "step": 26120 + }, + { + "epoch": 0.44159772863625224, + "grad_norm": 0.06797683238983154, + "learning_rate": 9.562066963169472e-06, + "loss": 0.0033, + "step": 26130 + }, + { + "epoch": 0.4417667289151027, + "grad_norm": 0.07337122410535812, + "learning_rate": 9.56146316161189e-06, + "loss": 0.0033, + "step": 26140 + }, + { + "epoch": 0.4419357291939532, + "grad_norm": 0.14874686300754547, + "learning_rate": 9.56085896318491e-06, + "loss": 0.0035, + "step": 26150 + }, + { + "epoch": 0.4421047294728036, + "grad_norm": 0.16632644832134247, + "learning_rate": 9.560254367941104e-06, + "loss": 0.0031, + "step": 26160 + }, + { + "epoch": 0.4422737297516541, + "grad_norm": 0.08548545092344284, + "learning_rate": 9.559649375933073e-06, + "loss": 0.0023, + "step": 26170 + }, + { + "epoch": 0.44244273003050455, + "grad_norm": 0.24352887272834778, + "learning_rate": 9.559043987213451e-06, + "loss": 0.0019, + "step": 26180 + }, + { + "epoch": 0.442611730309355, + "grad_norm": 0.03823421150445938, + "learning_rate": 9.558438201834918e-06, + "loss": 0.0024, + "step": 26190 + }, + { + "epoch": 0.4427807305882055, + "grad_norm": 0.2297850400209427, + "learning_rate": 9.557832019850172e-06, + "loss": 0.0041, + "step": 26200 + }, + { + "epoch": 0.4429497308670559, + "grad_norm": 0.06309180706739426, + "learning_rate": 9.557225441311959e-06, + "loss": 0.0024, + "step": 26210 + }, + { + "epoch": 0.4431187311459064, + "grad_norm": 0.1206027939915657, + "learning_rate": 9.55661846627305e-06, + "loss": 0.0026, + "step": 26220 + }, + { + "epoch": 0.44328773142475686, + "grad_norm": 0.052939776331186295, + "learning_rate": 9.55601109478626e-06, + "loss": 0.0029, + "step": 26230 + }, + { + "epoch": 0.44345673170360733, + "grad_norm": 0.05884641408920288, + "learning_rate": 9.55540332690443e-06, + "loss": 0.0037, + "step": 26240 + }, + { + "epoch": 0.44362573198245775, + "grad_norm": 0.0946325957775116, + "learning_rate": 9.554795162680436e-06, + "loss": 0.0048, + "step": 26250 + }, + { + "epoch": 0.4437947322613082, + "grad_norm": 0.09997012466192245, + "learning_rate": 9.554186602167197e-06, + "loss": 0.0026, + "step": 26260 + }, + { + "epoch": 0.4439637325401587, + "grad_norm": 0.023280398920178413, + "learning_rate": 9.553577645417657e-06, + "loss": 0.0028, + "step": 26270 + }, + { + "epoch": 0.44413273281900917, + "grad_norm": 0.16902558505535126, + "learning_rate": 9.5529682924848e-06, + "loss": 0.0019, + "step": 26280 + }, + { + "epoch": 0.4443017330978596, + "grad_norm": 0.06520242989063263, + "learning_rate": 9.552358543421643e-06, + "loss": 0.0029, + "step": 26290 + }, + { + "epoch": 0.44447073337671006, + "grad_norm": 0.10236934572458267, + "learning_rate": 9.551748398281233e-06, + "loss": 0.002, + "step": 26300 + }, + { + "epoch": 0.44463973365556053, + "grad_norm": 0.039478711783885956, + "learning_rate": 9.551137857116661e-06, + "loss": 0.0023, + "step": 26310 + }, + { + "epoch": 0.444808733934411, + "grad_norm": 0.05753898248076439, + "learning_rate": 9.550526919981045e-06, + "loss": 0.0024, + "step": 26320 + }, + { + "epoch": 0.4449777342132615, + "grad_norm": 0.06465510278940201, + "learning_rate": 9.549915586927536e-06, + "loss": 0.0017, + "step": 26330 + }, + { + "epoch": 0.4451467344921119, + "grad_norm": 0.322773277759552, + "learning_rate": 9.549303858009329e-06, + "loss": 0.002, + "step": 26340 + }, + { + "epoch": 0.44531573477096237, + "grad_norm": 0.16484954953193665, + "learning_rate": 9.548691733279644e-06, + "loss": 0.0029, + "step": 26350 + }, + { + "epoch": 0.44548473504981284, + "grad_norm": 0.09908819943666458, + "learning_rate": 9.548079212791739e-06, + "loss": 0.0019, + "step": 26360 + }, + { + "epoch": 0.4456537353286633, + "grad_norm": 0.08926418423652649, + "learning_rate": 9.547466296598907e-06, + "loss": 0.0018, + "step": 26370 + }, + { + "epoch": 0.44582273560751373, + "grad_norm": 0.14144323766231537, + "learning_rate": 9.546852984754474e-06, + "loss": 0.0024, + "step": 26380 + }, + { + "epoch": 0.4459917358863642, + "grad_norm": 0.09844768047332764, + "learning_rate": 9.546239277311799e-06, + "loss": 0.0023, + "step": 26390 + }, + { + "epoch": 0.4461607361652147, + "grad_norm": 0.02975483052432537, + "learning_rate": 9.545625174324282e-06, + "loss": 0.0023, + "step": 26400 + }, + { + "epoch": 0.44632973644406515, + "grad_norm": 0.11824537813663483, + "learning_rate": 9.545010675845352e-06, + "loss": 0.0035, + "step": 26410 + }, + { + "epoch": 0.44649873672291557, + "grad_norm": 0.20971493422985077, + "learning_rate": 9.544395781928471e-06, + "loss": 0.0035, + "step": 26420 + }, + { + "epoch": 0.44666773700176604, + "grad_norm": 0.06645579636096954, + "learning_rate": 9.543780492627137e-06, + "loss": 0.0026, + "step": 26430 + }, + { + "epoch": 0.4468367372806165, + "grad_norm": 0.08542334288358688, + "learning_rate": 9.543164807994886e-06, + "loss": 0.0018, + "step": 26440 + }, + { + "epoch": 0.447005737559467, + "grad_norm": 0.2023836076259613, + "learning_rate": 9.542548728085286e-06, + "loss": 0.0034, + "step": 26450 + }, + { + "epoch": 0.44717473783831746, + "grad_norm": 0.052794910967350006, + "learning_rate": 9.541932252951938e-06, + "loss": 0.0011, + "step": 26460 + }, + { + "epoch": 0.4473437381171679, + "grad_norm": 0.22299179434776306, + "learning_rate": 9.541315382648476e-06, + "loss": 0.0035, + "step": 26470 + }, + { + "epoch": 0.44751273839601835, + "grad_norm": 0.06551665812730789, + "learning_rate": 9.540698117228573e-06, + "loss": 0.0021, + "step": 26480 + }, + { + "epoch": 0.4476817386748688, + "grad_norm": 0.12201642245054245, + "learning_rate": 9.540080456745934e-06, + "loss": 0.0016, + "step": 26490 + }, + { + "epoch": 0.4478507389537193, + "grad_norm": 0.08223738521337509, + "learning_rate": 9.539462401254298e-06, + "loss": 0.0019, + "step": 26500 + }, + { + "epoch": 0.4480197392325697, + "grad_norm": 0.2150903344154358, + "learning_rate": 9.538843950807438e-06, + "loss": 0.0041, + "step": 26510 + }, + { + "epoch": 0.4481887395114202, + "grad_norm": 0.1299905776977539, + "learning_rate": 9.538225105459164e-06, + "loss": 0.0036, + "step": 26520 + }, + { + "epoch": 0.44835773979027066, + "grad_norm": 0.020779894664883614, + "learning_rate": 9.537605865263318e-06, + "loss": 0.0021, + "step": 26530 + }, + { + "epoch": 0.44852674006912113, + "grad_norm": 0.04517320543527603, + "learning_rate": 9.536986230273774e-06, + "loss": 0.0034, + "step": 26540 + }, + { + "epoch": 0.44869574034797155, + "grad_norm": 0.12831257283687592, + "learning_rate": 9.536366200544448e-06, + "loss": 0.0033, + "step": 26550 + }, + { + "epoch": 0.448864740626822, + "grad_norm": 0.13628539443016052, + "learning_rate": 9.535745776129284e-06, + "loss": 0.0031, + "step": 26560 + }, + { + "epoch": 0.4490337409056725, + "grad_norm": 0.09149462729692459, + "learning_rate": 9.53512495708226e-06, + "loss": 0.0023, + "step": 26570 + }, + { + "epoch": 0.44920274118452297, + "grad_norm": 0.07784470915794373, + "learning_rate": 9.534503743457392e-06, + "loss": 0.0025, + "step": 26580 + }, + { + "epoch": 0.44937174146337344, + "grad_norm": 0.15072597563266754, + "learning_rate": 9.533882135308727e-06, + "loss": 0.0039, + "step": 26590 + }, + { + "epoch": 0.44954074174222386, + "grad_norm": 0.10618377476930618, + "learning_rate": 9.53326013269035e-06, + "loss": 0.0043, + "step": 26600 + }, + { + "epoch": 0.44970974202107433, + "grad_norm": 0.0775023028254509, + "learning_rate": 9.532637735656379e-06, + "loss": 0.0025, + "step": 26610 + }, + { + "epoch": 0.4498787422999248, + "grad_norm": 0.10800741612911224, + "learning_rate": 9.532014944260962e-06, + "loss": 0.0028, + "step": 26620 + }, + { + "epoch": 0.4500477425787753, + "grad_norm": 0.04061252623796463, + "learning_rate": 9.531391758558286e-06, + "loss": 0.0034, + "step": 26630 + }, + { + "epoch": 0.4502167428576257, + "grad_norm": 0.004851445555686951, + "learning_rate": 9.530768178602573e-06, + "loss": 0.0032, + "step": 26640 + }, + { + "epoch": 0.45038574313647617, + "grad_norm": 0.0552886426448822, + "learning_rate": 9.530144204448076e-06, + "loss": 0.0018, + "step": 26650 + }, + { + "epoch": 0.45055474341532664, + "grad_norm": 0.14879478514194489, + "learning_rate": 9.529519836149083e-06, + "loss": 0.0021, + "step": 26660 + }, + { + "epoch": 0.4507237436941771, + "grad_norm": 0.06371227651834488, + "learning_rate": 9.528895073759921e-06, + "loss": 0.0025, + "step": 26670 + }, + { + "epoch": 0.45089274397302753, + "grad_norm": 1.3603676557540894, + "learning_rate": 9.528269917334942e-06, + "loss": 0.0047, + "step": 26680 + }, + { + "epoch": 0.451061744251878, + "grad_norm": 0.04017437994480133, + "learning_rate": 9.527644366928542e-06, + "loss": 0.0027, + "step": 26690 + }, + { + "epoch": 0.4512307445307285, + "grad_norm": 0.12146834284067154, + "learning_rate": 9.527018422595144e-06, + "loss": 0.005, + "step": 26700 + }, + { + "epoch": 0.45139974480957895, + "grad_norm": 0.0878925770521164, + "learning_rate": 9.52639208438921e-06, + "loss": 0.0032, + "step": 26710 + }, + { + "epoch": 0.45156874508842937, + "grad_norm": 0.04248873144388199, + "learning_rate": 9.525765352365236e-06, + "loss": 0.0024, + "step": 26720 + }, + { + "epoch": 0.45173774536727984, + "grad_norm": 0.11376402527093887, + "learning_rate": 9.525138226577744e-06, + "loss": 0.0036, + "step": 26730 + }, + { + "epoch": 0.4519067456461303, + "grad_norm": 0.1192072182893753, + "learning_rate": 9.524510707081304e-06, + "loss": 0.0023, + "step": 26740 + }, + { + "epoch": 0.4520757459249808, + "grad_norm": 0.049709390848875046, + "learning_rate": 9.52388279393051e-06, + "loss": 0.0011, + "step": 26750 + }, + { + "epoch": 0.45224474620383126, + "grad_norm": 0.1261884570121765, + "learning_rate": 9.523254487179997e-06, + "loss": 0.0028, + "step": 26760 + }, + { + "epoch": 0.4524137464826817, + "grad_norm": 0.07239934056997299, + "learning_rate": 9.522625786884424e-06, + "loss": 0.0022, + "step": 26770 + }, + { + "epoch": 0.45258274676153215, + "grad_norm": 0.010885490104556084, + "learning_rate": 9.521996693098496e-06, + "loss": 0.0028, + "step": 26780 + }, + { + "epoch": 0.4527517470403826, + "grad_norm": 0.21607480943202972, + "learning_rate": 9.521367205876946e-06, + "loss": 0.002, + "step": 26790 + }, + { + "epoch": 0.4529207473192331, + "grad_norm": 0.21466338634490967, + "learning_rate": 9.520737325274544e-06, + "loss": 0.0046, + "step": 26800 + }, + { + "epoch": 0.4530897475980835, + "grad_norm": 0.21052201092243195, + "learning_rate": 9.52010705134609e-06, + "loss": 0.0019, + "step": 26810 + }, + { + "epoch": 0.453258747876934, + "grad_norm": 0.07716619968414307, + "learning_rate": 9.519476384146421e-06, + "loss": 0.0051, + "step": 26820 + }, + { + "epoch": 0.45342774815578446, + "grad_norm": 0.22380301356315613, + "learning_rate": 9.518845323730413e-06, + "loss": 0.0033, + "step": 26830 + }, + { + "epoch": 0.45359674843463493, + "grad_norm": 0.035338010638952255, + "learning_rate": 9.518213870152964e-06, + "loss": 0.0024, + "step": 26840 + }, + { + "epoch": 0.45376574871348535, + "grad_norm": 0.17700858414173126, + "learning_rate": 9.517582023469019e-06, + "loss": 0.0021, + "step": 26850 + }, + { + "epoch": 0.4539347489923358, + "grad_norm": 0.08837666362524033, + "learning_rate": 9.51694978373355e-06, + "loss": 0.0033, + "step": 26860 + }, + { + "epoch": 0.4541037492711863, + "grad_norm": 0.08255530148744583, + "learning_rate": 9.516317151001562e-06, + "loss": 0.0048, + "step": 26870 + }, + { + "epoch": 0.45427274955003677, + "grad_norm": 0.18214929103851318, + "learning_rate": 9.515684125328102e-06, + "loss": 0.0029, + "step": 26880 + }, + { + "epoch": 0.45444174982888724, + "grad_norm": 0.16345560550689697, + "learning_rate": 9.515050706768243e-06, + "loss": 0.0018, + "step": 26890 + }, + { + "epoch": 0.45461075010773766, + "grad_norm": 0.05697440356016159, + "learning_rate": 9.514416895377097e-06, + "loss": 0.0017, + "step": 26900 + }, + { + "epoch": 0.45477975038658813, + "grad_norm": 0.04156593605875969, + "learning_rate": 9.513782691209808e-06, + "loss": 0.0048, + "step": 26910 + }, + { + "epoch": 0.4549487506654386, + "grad_norm": 0.10291267931461334, + "learning_rate": 9.513148094321556e-06, + "loss": 0.0028, + "step": 26920 + }, + { + "epoch": 0.4551177509442891, + "grad_norm": 0.11824605613946915, + "learning_rate": 9.512513104767553e-06, + "loss": 0.0068, + "step": 26930 + }, + { + "epoch": 0.4552867512231395, + "grad_norm": 0.14371328055858612, + "learning_rate": 9.511877722603045e-06, + "loss": 0.002, + "step": 26940 + }, + { + "epoch": 0.45545575150198997, + "grad_norm": 0.054665789008140564, + "learning_rate": 9.511241947883316e-06, + "loss": 0.0025, + "step": 26950 + }, + { + "epoch": 0.45562475178084044, + "grad_norm": 0.1597243696451187, + "learning_rate": 9.51060578066368e-06, + "loss": 0.0031, + "step": 26960 + }, + { + "epoch": 0.4557937520596909, + "grad_norm": 0.027206890285015106, + "learning_rate": 9.509969220999485e-06, + "loss": 0.0033, + "step": 26970 + }, + { + "epoch": 0.45596275233854133, + "grad_norm": 0.21192127466201782, + "learning_rate": 9.509332268946118e-06, + "loss": 0.0027, + "step": 26980 + }, + { + "epoch": 0.4561317526173918, + "grad_norm": 0.17130592465400696, + "learning_rate": 9.508694924558996e-06, + "loss": 0.004, + "step": 26990 + }, + { + "epoch": 0.4563007528962423, + "grad_norm": 0.08280270546674728, + "learning_rate": 9.508057187893568e-06, + "loss": 0.0024, + "step": 27000 + }, + { + "epoch": 0.45646975317509275, + "grad_norm": 0.10956969857215881, + "learning_rate": 9.507419059005325e-06, + "loss": 0.0033, + "step": 27010 + }, + { + "epoch": 0.4566387534539432, + "grad_norm": 0.07973030209541321, + "learning_rate": 9.506780537949785e-06, + "loss": 0.0024, + "step": 27020 + }, + { + "epoch": 0.45680775373279364, + "grad_norm": 0.04599049314856529, + "learning_rate": 9.5061416247825e-06, + "loss": 0.003, + "step": 27030 + }, + { + "epoch": 0.4569767540116441, + "grad_norm": 0.0669611468911171, + "learning_rate": 9.505502319559062e-06, + "loss": 0.0023, + "step": 27040 + }, + { + "epoch": 0.4571457542904946, + "grad_norm": 0.18551360070705414, + "learning_rate": 9.504862622335093e-06, + "loss": 0.0031, + "step": 27050 + }, + { + "epoch": 0.45731475456934506, + "grad_norm": 0.23695415258407593, + "learning_rate": 9.50422253316625e-06, + "loss": 0.0025, + "step": 27060 + }, + { + "epoch": 0.4574837548481955, + "grad_norm": 0.23178884387016296, + "learning_rate": 9.503582052108222e-06, + "loss": 0.0039, + "step": 27070 + }, + { + "epoch": 0.45765275512704595, + "grad_norm": 0.04792691767215729, + "learning_rate": 9.502941179216734e-06, + "loss": 0.002, + "step": 27080 + }, + { + "epoch": 0.4578217554058964, + "grad_norm": 0.051211390644311905, + "learning_rate": 9.502299914547547e-06, + "loss": 0.0027, + "step": 27090 + }, + { + "epoch": 0.4579907556847469, + "grad_norm": 0.08183260262012482, + "learning_rate": 9.501658258156455e-06, + "loss": 0.0027, + "step": 27100 + }, + { + "epoch": 0.4581597559635973, + "grad_norm": 0.10788262635469437, + "learning_rate": 9.501016210099284e-06, + "loss": 0.0023, + "step": 27110 + }, + { + "epoch": 0.4583287562424478, + "grad_norm": 0.05273312330245972, + "learning_rate": 9.500373770431893e-06, + "loss": 0.0018, + "step": 27120 + }, + { + "epoch": 0.45849775652129826, + "grad_norm": 0.13591207563877106, + "learning_rate": 9.499730939210178e-06, + "loss": 0.0029, + "step": 27130 + }, + { + "epoch": 0.45866675680014873, + "grad_norm": 0.13457539677619934, + "learning_rate": 9.49908771649007e-06, + "loss": 0.0041, + "step": 27140 + }, + { + "epoch": 0.4588357570789992, + "grad_norm": 0.04396795853972435, + "learning_rate": 9.498444102327534e-06, + "loss": 0.0031, + "step": 27150 + }, + { + "epoch": 0.4590047573578496, + "grad_norm": 0.03095605969429016, + "learning_rate": 9.497800096778565e-06, + "loss": 0.0015, + "step": 27160 + }, + { + "epoch": 0.4591737576367001, + "grad_norm": 0.10350055247545242, + "learning_rate": 9.497155699899194e-06, + "loss": 0.0019, + "step": 27170 + }, + { + "epoch": 0.45934275791555057, + "grad_norm": 0.07381950318813324, + "learning_rate": 9.496510911745489e-06, + "loss": 0.0023, + "step": 27180 + }, + { + "epoch": 0.45951175819440104, + "grad_norm": 0.07345439493656158, + "learning_rate": 9.495865732373549e-06, + "loss": 0.0026, + "step": 27190 + }, + { + "epoch": 0.45968075847325146, + "grad_norm": 0.10080543160438538, + "learning_rate": 9.495220161839505e-06, + "loss": 0.0029, + "step": 27200 + }, + { + "epoch": 0.45984975875210193, + "grad_norm": 0.07151289284229279, + "learning_rate": 9.49457420019953e-06, + "loss": 0.0018, + "step": 27210 + }, + { + "epoch": 0.4600187590309524, + "grad_norm": 0.08651597052812576, + "learning_rate": 9.493927847509821e-06, + "loss": 0.0028, + "step": 27220 + }, + { + "epoch": 0.4601877593098029, + "grad_norm": 0.013782293535768986, + "learning_rate": 9.493281103826614e-06, + "loss": 0.0028, + "step": 27230 + }, + { + "epoch": 0.4603567595886533, + "grad_norm": 0.14195019006729126, + "learning_rate": 9.492633969206184e-06, + "loss": 0.0032, + "step": 27240 + }, + { + "epoch": 0.46052575986750377, + "grad_norm": 0.2378387153148651, + "learning_rate": 9.49198644370483e-06, + "loss": 0.0042, + "step": 27250 + }, + { + "epoch": 0.46069476014635424, + "grad_norm": 0.06230916827917099, + "learning_rate": 9.491338527378892e-06, + "loss": 0.003, + "step": 27260 + }, + { + "epoch": 0.4608637604252047, + "grad_norm": 0.10277201235294342, + "learning_rate": 9.49069022028474e-06, + "loss": 0.0029, + "step": 27270 + }, + { + "epoch": 0.4610327607040552, + "grad_norm": 0.2792683243751526, + "learning_rate": 9.490041522478781e-06, + "loss": 0.0034, + "step": 27280 + }, + { + "epoch": 0.4612017609829056, + "grad_norm": 0.07100880146026611, + "learning_rate": 9.489392434017455e-06, + "loss": 0.0015, + "step": 27290 + }, + { + "epoch": 0.4613707612617561, + "grad_norm": 0.11998149007558823, + "learning_rate": 9.488742954957236e-06, + "loss": 0.0029, + "step": 27300 + }, + { + "epoch": 0.46153976154060655, + "grad_norm": 0.09208308160305023, + "learning_rate": 9.488093085354632e-06, + "loss": 0.0025, + "step": 27310 + }, + { + "epoch": 0.461708761819457, + "grad_norm": 0.11172691732645035, + "learning_rate": 9.487442825266182e-06, + "loss": 0.0019, + "step": 27320 + }, + { + "epoch": 0.46187776209830744, + "grad_norm": 0.06742815673351288, + "learning_rate": 9.486792174748467e-06, + "loss": 0.002, + "step": 27330 + }, + { + "epoch": 0.4620467623771579, + "grad_norm": 0.1170259416103363, + "learning_rate": 9.486141133858092e-06, + "loss": 0.0022, + "step": 27340 + }, + { + "epoch": 0.4622157626560084, + "grad_norm": 0.02968103438615799, + "learning_rate": 9.485489702651703e-06, + "loss": 0.0011, + "step": 27350 + }, + { + "epoch": 0.46238476293485886, + "grad_norm": 0.08396390825510025, + "learning_rate": 9.484837881185977e-06, + "loss": 0.0019, + "step": 27360 + }, + { + "epoch": 0.4625537632137093, + "grad_norm": 0.120454341173172, + "learning_rate": 9.484185669517625e-06, + "loss": 0.0023, + "step": 27370 + }, + { + "epoch": 0.46272276349255975, + "grad_norm": 0.04382321983575821, + "learning_rate": 9.483533067703394e-06, + "loss": 0.0015, + "step": 27380 + }, + { + "epoch": 0.4628917637714102, + "grad_norm": 0.12909847497940063, + "learning_rate": 9.482880075800064e-06, + "loss": 0.0055, + "step": 27390 + }, + { + "epoch": 0.4630607640502607, + "grad_norm": 0.17938898503780365, + "learning_rate": 9.482226693864445e-06, + "loss": 0.0042, + "step": 27400 + }, + { + "epoch": 0.4632297643291111, + "grad_norm": 0.05244031921029091, + "learning_rate": 9.481572921953388e-06, + "loss": 0.0022, + "step": 27410 + }, + { + "epoch": 0.4633987646079616, + "grad_norm": 0.02196231298148632, + "learning_rate": 9.480918760123774e-06, + "loss": 0.0025, + "step": 27420 + }, + { + "epoch": 0.46356776488681206, + "grad_norm": 0.1432347148656845, + "learning_rate": 9.480264208432517e-06, + "loss": 0.0028, + "step": 27430 + }, + { + "epoch": 0.46373676516566253, + "grad_norm": 0.1046581119298935, + "learning_rate": 9.479609266936566e-06, + "loss": 0.0035, + "step": 27440 + }, + { + "epoch": 0.463905765444513, + "grad_norm": 0.018662603572010994, + "learning_rate": 9.478953935692906e-06, + "loss": 0.0032, + "step": 27450 + }, + { + "epoch": 0.4640747657233634, + "grad_norm": 0.020825045183300972, + "learning_rate": 9.47829821475855e-06, + "loss": 0.0025, + "step": 27460 + }, + { + "epoch": 0.4642437660022139, + "grad_norm": 0.07902387529611588, + "learning_rate": 9.477642104190552e-06, + "loss": 0.0023, + "step": 27470 + }, + { + "epoch": 0.46441276628106437, + "grad_norm": 0.01250784657895565, + "learning_rate": 9.476985604045998e-06, + "loss": 0.0029, + "step": 27480 + }, + { + "epoch": 0.46458176655991484, + "grad_norm": 0.039467018097639084, + "learning_rate": 9.476328714382003e-06, + "loss": 0.0025, + "step": 27490 + }, + { + "epoch": 0.46475076683876526, + "grad_norm": 0.16227799654006958, + "learning_rate": 9.475671435255722e-06, + "loss": 0.0028, + "step": 27500 + }, + { + "epoch": 0.46491976711761573, + "grad_norm": 0.21521276235580444, + "learning_rate": 9.475013766724341e-06, + "loss": 0.0033, + "step": 27510 + }, + { + "epoch": 0.4650887673964662, + "grad_norm": 0.17984019219875336, + "learning_rate": 9.474355708845082e-06, + "loss": 0.0066, + "step": 27520 + }, + { + "epoch": 0.4652577676753167, + "grad_norm": 0.1133570596575737, + "learning_rate": 9.473697261675195e-06, + "loss": 0.0034, + "step": 27530 + }, + { + "epoch": 0.4654267679541671, + "grad_norm": 0.03497983515262604, + "learning_rate": 9.473038425271973e-06, + "loss": 0.0042, + "step": 27540 + }, + { + "epoch": 0.46559576823301757, + "grad_norm": 0.03304870426654816, + "learning_rate": 9.472379199692734e-06, + "loss": 0.0032, + "step": 27550 + }, + { + "epoch": 0.46576476851186804, + "grad_norm": 0.019517891108989716, + "learning_rate": 9.471719584994836e-06, + "loss": 0.0016, + "step": 27560 + }, + { + "epoch": 0.4659337687907185, + "grad_norm": 0.05855069309473038, + "learning_rate": 9.471059581235668e-06, + "loss": 0.0018, + "step": 27570 + }, + { + "epoch": 0.466102769069569, + "grad_norm": 0.13207753002643585, + "learning_rate": 9.470399188472655e-06, + "loss": 0.0032, + "step": 27580 + }, + { + "epoch": 0.4662717693484194, + "grad_norm": 0.23710985481739044, + "learning_rate": 9.469738406763252e-06, + "loss": 0.0032, + "step": 27590 + }, + { + "epoch": 0.4664407696272699, + "grad_norm": 0.0779089406132698, + "learning_rate": 9.46907723616495e-06, + "loss": 0.0031, + "step": 27600 + }, + { + "epoch": 0.46660976990612035, + "grad_norm": 0.030405018478631973, + "learning_rate": 9.468415676735276e-06, + "loss": 0.0027, + "step": 27610 + }, + { + "epoch": 0.4667787701849708, + "grad_norm": 0.062397025525569916, + "learning_rate": 9.467753728531789e-06, + "loss": 0.0019, + "step": 27620 + }, + { + "epoch": 0.46694777046382124, + "grad_norm": 0.058743417263031006, + "learning_rate": 9.46709139161208e-06, + "loss": 0.0016, + "step": 27630 + }, + { + "epoch": 0.4671167707426717, + "grad_norm": 0.13921846449375153, + "learning_rate": 9.466428666033778e-06, + "loss": 0.0027, + "step": 27640 + }, + { + "epoch": 0.4672857710215222, + "grad_norm": 0.025780148804187775, + "learning_rate": 9.46576555185454e-06, + "loss": 0.0032, + "step": 27650 + }, + { + "epoch": 0.46745477130037266, + "grad_norm": 0.02071690745651722, + "learning_rate": 9.465102049132062e-06, + "loss": 0.0023, + "step": 27660 + }, + { + "epoch": 0.4676237715792231, + "grad_norm": 0.19302938878536224, + "learning_rate": 9.464438157924071e-06, + "loss": 0.0039, + "step": 27670 + }, + { + "epoch": 0.46779277185807355, + "grad_norm": 0.05434525012969971, + "learning_rate": 9.463773878288331e-06, + "loss": 0.0023, + "step": 27680 + }, + { + "epoch": 0.467961772136924, + "grad_norm": 0.0356968455016613, + "learning_rate": 9.463109210282637e-06, + "loss": 0.0019, + "step": 27690 + }, + { + "epoch": 0.4681307724157745, + "grad_norm": 0.08952018618583679, + "learning_rate": 9.462444153964816e-06, + "loss": 0.0024, + "step": 27700 + }, + { + "epoch": 0.46829977269462497, + "grad_norm": 0.10880987346172333, + "learning_rate": 9.461778709392732e-06, + "loss": 0.0024, + "step": 27710 + }, + { + "epoch": 0.4684687729734754, + "grad_norm": 0.009157329797744751, + "learning_rate": 9.461112876624283e-06, + "loss": 0.0031, + "step": 27720 + }, + { + "epoch": 0.46863777325232586, + "grad_norm": 0.06396792829036713, + "learning_rate": 9.460446655717401e-06, + "loss": 0.0025, + "step": 27730 + }, + { + "epoch": 0.46880677353117634, + "grad_norm": 0.0792291983962059, + "learning_rate": 9.459780046730046e-06, + "loss": 0.0029, + "step": 27740 + }, + { + "epoch": 0.4689757738100268, + "grad_norm": 0.17137271165847778, + "learning_rate": 9.45911304972022e-06, + "loss": 0.0023, + "step": 27750 + }, + { + "epoch": 0.4691447740888772, + "grad_norm": 0.16492405533790588, + "learning_rate": 9.458445664745954e-06, + "loss": 0.0041, + "step": 27760 + }, + { + "epoch": 0.4693137743677277, + "grad_norm": 0.08875801414251328, + "learning_rate": 9.457777891865312e-06, + "loss": 0.0019, + "step": 27770 + }, + { + "epoch": 0.46948277464657817, + "grad_norm": 0.035520289093256, + "learning_rate": 9.457109731136396e-06, + "loss": 0.0023, + "step": 27780 + }, + { + "epoch": 0.46965177492542864, + "grad_norm": 0.03769254311919212, + "learning_rate": 9.456441182617339e-06, + "loss": 0.0032, + "step": 27790 + }, + { + "epoch": 0.46982077520427906, + "grad_norm": 0.022853732109069824, + "learning_rate": 9.455772246366306e-06, + "loss": 0.0029, + "step": 27800 + }, + { + "epoch": 0.46998977548312953, + "grad_norm": 0.08009804040193558, + "learning_rate": 9.4551029224415e-06, + "loss": 0.0017, + "step": 27810 + }, + { + "epoch": 0.47015877576198, + "grad_norm": 0.1740979701280594, + "learning_rate": 9.454433210901152e-06, + "loss": 0.0039, + "step": 27820 + }, + { + "epoch": 0.4703277760408305, + "grad_norm": 0.014562606811523438, + "learning_rate": 9.453763111803536e-06, + "loss": 0.0036, + "step": 27830 + }, + { + "epoch": 0.47049677631968095, + "grad_norm": 0.1988329291343689, + "learning_rate": 9.453092625206947e-06, + "loss": 0.0039, + "step": 27840 + }, + { + "epoch": 0.47066577659853137, + "grad_norm": 0.09475607424974442, + "learning_rate": 9.452421751169724e-06, + "loss": 0.0031, + "step": 27850 + }, + { + "epoch": 0.47083477687738184, + "grad_norm": 0.05441892519593239, + "learning_rate": 9.451750489750238e-06, + "loss": 0.0031, + "step": 27860 + }, + { + "epoch": 0.4710037771562323, + "grad_norm": 0.1322525590658188, + "learning_rate": 9.45107884100689e-06, + "loss": 0.0019, + "step": 27870 + }, + { + "epoch": 0.4711727774350828, + "grad_norm": 0.23945532739162445, + "learning_rate": 9.450406804998116e-06, + "loss": 0.0034, + "step": 27880 + }, + { + "epoch": 0.4713417777139332, + "grad_norm": 0.09466557949781418, + "learning_rate": 9.449734381782388e-06, + "loss": 0.0018, + "step": 27890 + }, + { + "epoch": 0.4715107779927837, + "grad_norm": 0.10149737447500229, + "learning_rate": 9.449061571418208e-06, + "loss": 0.0021, + "step": 27900 + }, + { + "epoch": 0.47167977827163415, + "grad_norm": 0.11542762815952301, + "learning_rate": 9.448388373964118e-06, + "loss": 0.0027, + "step": 27910 + }, + { + "epoch": 0.4718487785504846, + "grad_norm": 0.09331060945987701, + "learning_rate": 9.447714789478684e-06, + "loss": 0.0028, + "step": 27920 + }, + { + "epoch": 0.47201777882933504, + "grad_norm": 0.21507014334201813, + "learning_rate": 9.447040818020514e-06, + "loss": 0.0031, + "step": 27930 + }, + { + "epoch": 0.4721867791081855, + "grad_norm": 0.18056412041187286, + "learning_rate": 9.446366459648246e-06, + "loss": 0.0024, + "step": 27940 + }, + { + "epoch": 0.472355779387036, + "grad_norm": 0.17236560583114624, + "learning_rate": 9.445691714420553e-06, + "loss": 0.0059, + "step": 27950 + }, + { + "epoch": 0.47252477966588646, + "grad_norm": 0.06868729740381241, + "learning_rate": 9.445016582396143e-06, + "loss": 0.0023, + "step": 27960 + }, + { + "epoch": 0.47269377994473694, + "grad_norm": 0.04553517326712608, + "learning_rate": 9.44434106363375e-06, + "loss": 0.0024, + "step": 27970 + }, + { + "epoch": 0.47286278022358735, + "grad_norm": 0.14323654770851135, + "learning_rate": 9.443665158192154e-06, + "loss": 0.0034, + "step": 27980 + }, + { + "epoch": 0.4730317805024378, + "grad_norm": 0.08195087313652039, + "learning_rate": 9.442988866130159e-06, + "loss": 0.0029, + "step": 27990 + }, + { + "epoch": 0.4732007807812883, + "grad_norm": 0.04136926308274269, + "learning_rate": 9.442312187506605e-06, + "loss": 0.0026, + "step": 28000 + }, + { + "epoch": 0.4733697810601388, + "grad_norm": 0.09844597429037094, + "learning_rate": 9.441635122380367e-06, + "loss": 0.003, + "step": 28010 + }, + { + "epoch": 0.4735387813389892, + "grad_norm": 0.02087082341313362, + "learning_rate": 9.440957670810354e-06, + "loss": 0.0026, + "step": 28020 + }, + { + "epoch": 0.47370778161783966, + "grad_norm": 0.14074547588825226, + "learning_rate": 9.440279832855507e-06, + "loss": 0.0018, + "step": 28030 + }, + { + "epoch": 0.47387678189669014, + "grad_norm": 0.11037290841341019, + "learning_rate": 9.4396016085748e-06, + "loss": 0.0035, + "step": 28040 + }, + { + "epoch": 0.4740457821755406, + "grad_norm": 0.13119235634803772, + "learning_rate": 9.438922998027242e-06, + "loss": 0.0043, + "step": 28050 + }, + { + "epoch": 0.474214782454391, + "grad_norm": 0.27193567156791687, + "learning_rate": 9.438244001271878e-06, + "loss": 0.0019, + "step": 28060 + }, + { + "epoch": 0.4743837827332415, + "grad_norm": 0.061256974935531616, + "learning_rate": 9.437564618367781e-06, + "loss": 0.0023, + "step": 28070 + }, + { + "epoch": 0.47455278301209197, + "grad_norm": 0.06362022459506989, + "learning_rate": 9.436884849374062e-06, + "loss": 0.004, + "step": 28080 + }, + { + "epoch": 0.47472178329094245, + "grad_norm": 0.09642446041107178, + "learning_rate": 9.436204694349863e-06, + "loss": 0.0019, + "step": 28090 + }, + { + "epoch": 0.47489078356979286, + "grad_norm": 0.06854920089244843, + "learning_rate": 9.435524153354363e-06, + "loss": 0.0068, + "step": 28100 + }, + { + "epoch": 0.47505978384864334, + "grad_norm": 0.1118801087141037, + "learning_rate": 9.43484322644677e-06, + "loss": 0.0048, + "step": 28110 + }, + { + "epoch": 0.4752287841274938, + "grad_norm": 0.10855915397405624, + "learning_rate": 9.434161913686331e-06, + "loss": 0.0021, + "step": 28120 + }, + { + "epoch": 0.4753977844063443, + "grad_norm": 0.1947670429944992, + "learning_rate": 9.433480215132319e-06, + "loss": 0.002, + "step": 28130 + }, + { + "epoch": 0.47556678468519475, + "grad_norm": 0.2410636693239212, + "learning_rate": 9.43279813084405e-06, + "loss": 0.0034, + "step": 28140 + }, + { + "epoch": 0.47573578496404517, + "grad_norm": 0.1882900595664978, + "learning_rate": 9.432115660880862e-06, + "loss": 0.0022, + "step": 28150 + }, + { + "epoch": 0.47590478524289564, + "grad_norm": 0.17083005607128143, + "learning_rate": 9.431432805302141e-06, + "loss": 0.0023, + "step": 28160 + }, + { + "epoch": 0.4760737855217461, + "grad_norm": 0.15488651394844055, + "learning_rate": 9.430749564167294e-06, + "loss": 0.0041, + "step": 28170 + }, + { + "epoch": 0.4762427858005966, + "grad_norm": 0.05121308192610741, + "learning_rate": 9.430065937535769e-06, + "loss": 0.0025, + "step": 28180 + }, + { + "epoch": 0.476411786079447, + "grad_norm": 0.07457858324050903, + "learning_rate": 9.429381925467042e-06, + "loss": 0.0018, + "step": 28190 + }, + { + "epoch": 0.4765807863582975, + "grad_norm": 0.16733551025390625, + "learning_rate": 9.428697528020626e-06, + "loss": 0.0018, + "step": 28200 + }, + { + "epoch": 0.47674978663714795, + "grad_norm": 0.05104765668511391, + "learning_rate": 9.428012745256068e-06, + "loss": 0.0031, + "step": 28210 + }, + { + "epoch": 0.4769187869159984, + "grad_norm": 0.1416759192943573, + "learning_rate": 9.427327577232948e-06, + "loss": 0.0013, + "step": 28220 + }, + { + "epoch": 0.47708778719484884, + "grad_norm": 0.08597423881292343, + "learning_rate": 9.426642024010877e-06, + "loss": 0.0013, + "step": 28230 + }, + { + "epoch": 0.4772567874736993, + "grad_norm": 0.11198081821203232, + "learning_rate": 9.425956085649502e-06, + "loss": 0.0034, + "step": 28240 + }, + { + "epoch": 0.4774257877525498, + "grad_norm": 0.14100314676761627, + "learning_rate": 9.425269762208504e-06, + "loss": 0.0023, + "step": 28250 + }, + { + "epoch": 0.47759478803140026, + "grad_norm": 0.041467033326625824, + "learning_rate": 9.424583053747596e-06, + "loss": 0.0034, + "step": 28260 + }, + { + "epoch": 0.47776378831025074, + "grad_norm": 0.020220177248120308, + "learning_rate": 9.423895960326524e-06, + "loss": 0.0036, + "step": 28270 + }, + { + "epoch": 0.47793278858910115, + "grad_norm": 0.030264656990766525, + "learning_rate": 9.423208482005068e-06, + "loss": 0.0018, + "step": 28280 + }, + { + "epoch": 0.4781017888679516, + "grad_norm": 0.05260373651981354, + "learning_rate": 9.422520618843045e-06, + "loss": 0.003, + "step": 28290 + }, + { + "epoch": 0.4782707891468021, + "grad_norm": 0.05647336319088936, + "learning_rate": 9.4218323709003e-06, + "loss": 0.0044, + "step": 28300 + }, + { + "epoch": 0.4784397894256526, + "grad_norm": 0.2026774287223816, + "learning_rate": 9.421143738236715e-06, + "loss": 0.0034, + "step": 28310 + }, + { + "epoch": 0.478608789704503, + "grad_norm": 0.07637016475200653, + "learning_rate": 9.420454720912203e-06, + "loss": 0.0046, + "step": 28320 + }, + { + "epoch": 0.47877778998335346, + "grad_norm": 0.3951351046562195, + "learning_rate": 9.419765318986713e-06, + "loss": 0.0024, + "step": 28330 + }, + { + "epoch": 0.47894679026220394, + "grad_norm": 0.03452563285827637, + "learning_rate": 9.419075532520225e-06, + "loss": 0.0034, + "step": 28340 + }, + { + "epoch": 0.4791157905410544, + "grad_norm": 0.017014149576425552, + "learning_rate": 9.418385361572758e-06, + "loss": 0.0025, + "step": 28350 + }, + { + "epoch": 0.4792847908199048, + "grad_norm": 0.1143660619854927, + "learning_rate": 9.417694806204353e-06, + "loss": 0.002, + "step": 28360 + }, + { + "epoch": 0.4794537910987553, + "grad_norm": 0.1327093243598938, + "learning_rate": 9.417003866475099e-06, + "loss": 0.0025, + "step": 28370 + }, + { + "epoch": 0.4796227913776058, + "grad_norm": 0.06873395293951035, + "learning_rate": 9.416312542445105e-06, + "loss": 0.0031, + "step": 28380 + }, + { + "epoch": 0.47979179165645625, + "grad_norm": 0.18769247829914093, + "learning_rate": 9.415620834174524e-06, + "loss": 0.0032, + "step": 28390 + }, + { + "epoch": 0.4799607919353067, + "grad_norm": 0.05309249460697174, + "learning_rate": 9.414928741723535e-06, + "loss": 0.0018, + "step": 28400 + }, + { + "epoch": 0.48012979221415714, + "grad_norm": 0.07335963100194931, + "learning_rate": 9.414236265152355e-06, + "loss": 0.0026, + "step": 28410 + }, + { + "epoch": 0.4802987924930076, + "grad_norm": 0.02062637358903885, + "learning_rate": 9.413543404521233e-06, + "loss": 0.0012, + "step": 28420 + }, + { + "epoch": 0.4804677927718581, + "grad_norm": 0.2966996431350708, + "learning_rate": 9.41285015989045e-06, + "loss": 0.0034, + "step": 28430 + }, + { + "epoch": 0.48063679305070856, + "grad_norm": 0.11095491051673889, + "learning_rate": 9.412156531320323e-06, + "loss": 0.0026, + "step": 28440 + }, + { + "epoch": 0.480805793329559, + "grad_norm": 0.09372120350599289, + "learning_rate": 9.4114625188712e-06, + "loss": 0.0023, + "step": 28450 + }, + { + "epoch": 0.48097479360840945, + "grad_norm": 0.05419611930847168, + "learning_rate": 9.410768122603464e-06, + "loss": 0.0043, + "step": 28460 + }, + { + "epoch": 0.4811437938872599, + "grad_norm": 0.08735162019729614, + "learning_rate": 9.410073342577532e-06, + "loss": 0.002, + "step": 28470 + }, + { + "epoch": 0.4813127941661104, + "grad_norm": 0.1224854364991188, + "learning_rate": 9.40937817885385e-06, + "loss": 0.0026, + "step": 28480 + }, + { + "epoch": 0.4814817944449608, + "grad_norm": 0.03866802901029587, + "learning_rate": 9.408682631492902e-06, + "loss": 0.0019, + "step": 28490 + }, + { + "epoch": 0.4816507947238113, + "grad_norm": 0.06974063068628311, + "learning_rate": 9.407986700555206e-06, + "loss": 0.0043, + "step": 28500 + }, + { + "epoch": 0.48181979500266175, + "grad_norm": 0.06814968585968018, + "learning_rate": 9.40729038610131e-06, + "loss": 0.0022, + "step": 28510 + }, + { + "epoch": 0.48198879528151223, + "grad_norm": 0.022219954058527946, + "learning_rate": 9.406593688191796e-06, + "loss": 0.0019, + "step": 28520 + }, + { + "epoch": 0.4821577955603627, + "grad_norm": 0.1143420934677124, + "learning_rate": 9.405896606887282e-06, + "loss": 0.0036, + "step": 28530 + }, + { + "epoch": 0.4823267958392131, + "grad_norm": 0.12258830666542053, + "learning_rate": 9.405199142248414e-06, + "loss": 0.006, + "step": 28540 + }, + { + "epoch": 0.4824957961180636, + "grad_norm": 0.3413972556591034, + "learning_rate": 9.404501294335878e-06, + "loss": 0.0025, + "step": 28550 + }, + { + "epoch": 0.48266479639691406, + "grad_norm": 0.15560147166252136, + "learning_rate": 9.403803063210389e-06, + "loss": 0.0015, + "step": 28560 + }, + { + "epoch": 0.48283379667576454, + "grad_norm": 0.04303400591015816, + "learning_rate": 9.403104448932696e-06, + "loss": 0.0023, + "step": 28570 + }, + { + "epoch": 0.48300279695461495, + "grad_norm": 0.10534984618425369, + "learning_rate": 9.402405451563583e-06, + "loss": 0.0028, + "step": 28580 + }, + { + "epoch": 0.4831717972334654, + "grad_norm": 0.0458071269094944, + "learning_rate": 9.401706071163866e-06, + "loss": 0.0027, + "step": 28590 + }, + { + "epoch": 0.4833407975123159, + "grad_norm": 0.017634080722928047, + "learning_rate": 9.401006307794394e-06, + "loss": 0.0014, + "step": 28600 + }, + { + "epoch": 0.4835097977911664, + "grad_norm": 0.07521199434995651, + "learning_rate": 9.400306161516049e-06, + "loss": 0.0019, + "step": 28610 + }, + { + "epoch": 0.4836787980700168, + "grad_norm": 0.10925111174583435, + "learning_rate": 9.399605632389748e-06, + "loss": 0.0033, + "step": 28620 + }, + { + "epoch": 0.48384779834886726, + "grad_norm": 0.01852216199040413, + "learning_rate": 9.39890472047644e-06, + "loss": 0.0021, + "step": 28630 + }, + { + "epoch": 0.48401679862771774, + "grad_norm": 0.09024424850940704, + "learning_rate": 9.39820342583711e-06, + "loss": 0.0028, + "step": 28640 + }, + { + "epoch": 0.4841857989065682, + "grad_norm": 0.08193477988243103, + "learning_rate": 9.39750174853277e-06, + "loss": 0.0017, + "step": 28650 + }, + { + "epoch": 0.4843547991854187, + "grad_norm": 0.08656947314739227, + "learning_rate": 9.396799688624473e-06, + "loss": 0.0023, + "step": 28660 + }, + { + "epoch": 0.4845237994642691, + "grad_norm": 0.08420667797327042, + "learning_rate": 9.396097246173299e-06, + "loss": 0.0016, + "step": 28670 + }, + { + "epoch": 0.4846927997431196, + "grad_norm": 0.13190427422523499, + "learning_rate": 9.395394421240366e-06, + "loss": 0.0016, + "step": 28680 + }, + { + "epoch": 0.48486180002197005, + "grad_norm": 0.13366137444972992, + "learning_rate": 9.394691213886823e-06, + "loss": 0.0014, + "step": 28690 + }, + { + "epoch": 0.4850308003008205, + "grad_norm": 0.0756131261587143, + "learning_rate": 9.393987624173849e-06, + "loss": 0.0032, + "step": 28700 + }, + { + "epoch": 0.48519980057967094, + "grad_norm": 0.034523122012615204, + "learning_rate": 9.393283652162664e-06, + "loss": 0.0032, + "step": 28710 + }, + { + "epoch": 0.4853688008585214, + "grad_norm": 0.05466236546635628, + "learning_rate": 9.392579297914515e-06, + "loss": 0.0028, + "step": 28720 + }, + { + "epoch": 0.4855378011373719, + "grad_norm": 0.16884277760982513, + "learning_rate": 9.391874561490685e-06, + "loss": 0.0026, + "step": 28730 + }, + { + "epoch": 0.48570680141622236, + "grad_norm": 0.11400584131479263, + "learning_rate": 9.391169442952488e-06, + "loss": 0.0038, + "step": 28740 + }, + { + "epoch": 0.4858758016950728, + "grad_norm": 0.04285375028848648, + "learning_rate": 9.390463942361276e-06, + "loss": 0.0019, + "step": 28750 + }, + { + "epoch": 0.48604480197392325, + "grad_norm": 0.6482422947883606, + "learning_rate": 9.389758059778427e-06, + "loss": 0.0043, + "step": 28760 + }, + { + "epoch": 0.4862138022527737, + "grad_norm": 0.04816224426031113, + "learning_rate": 9.389051795265359e-06, + "loss": 0.0029, + "step": 28770 + }, + { + "epoch": 0.4863828025316242, + "grad_norm": 0.031575001776218414, + "learning_rate": 9.388345148883517e-06, + "loss": 0.0013, + "step": 28780 + }, + { + "epoch": 0.4865518028104746, + "grad_norm": 0.06918296962976456, + "learning_rate": 9.387638120694387e-06, + "loss": 0.0042, + "step": 28790 + }, + { + "epoch": 0.4867208030893251, + "grad_norm": 0.0319158211350441, + "learning_rate": 9.386930710759482e-06, + "loss": 0.0048, + "step": 28800 + }, + { + "epoch": 0.48688980336817556, + "grad_norm": 0.11078489571809769, + "learning_rate": 9.386222919140349e-06, + "loss": 0.0024, + "step": 28810 + }, + { + "epoch": 0.48705880364702603, + "grad_norm": 0.16688930988311768, + "learning_rate": 9.385514745898569e-06, + "loss": 0.0033, + "step": 28820 + }, + { + "epoch": 0.4872278039258765, + "grad_norm": 0.11452493071556091, + "learning_rate": 9.384806191095761e-06, + "loss": 0.0021, + "step": 28830 + }, + { + "epoch": 0.4873968042047269, + "grad_norm": 0.1142348200082779, + "learning_rate": 9.384097254793567e-06, + "loss": 0.0038, + "step": 28840 + }, + { + "epoch": 0.4875658044835774, + "grad_norm": 0.08110999315977097, + "learning_rate": 9.383387937053671e-06, + "loss": 0.0029, + "step": 28850 + }, + { + "epoch": 0.48773480476242786, + "grad_norm": 0.08010591566562653, + "learning_rate": 9.382678237937788e-06, + "loss": 0.0028, + "step": 28860 + }, + { + "epoch": 0.48790380504127834, + "grad_norm": 0.025130346417427063, + "learning_rate": 9.381968157507662e-06, + "loss": 0.0015, + "step": 28870 + }, + { + "epoch": 0.48807280532012876, + "grad_norm": 0.08091887086629868, + "learning_rate": 9.381257695825076e-06, + "loss": 0.0049, + "step": 28880 + }, + { + "epoch": 0.48824180559897923, + "grad_norm": 0.07320184260606766, + "learning_rate": 9.380546852951841e-06, + "loss": 0.0026, + "step": 28890 + }, + { + "epoch": 0.4884108058778297, + "grad_norm": 0.08067914098501205, + "learning_rate": 9.379835628949809e-06, + "loss": 0.0031, + "step": 28900 + }, + { + "epoch": 0.4885798061566802, + "grad_norm": 0.03999374806880951, + "learning_rate": 9.379124023880854e-06, + "loss": 0.0018, + "step": 28910 + }, + { + "epoch": 0.4887488064355306, + "grad_norm": 0.07922464609146118, + "learning_rate": 9.378412037806891e-06, + "loss": 0.0034, + "step": 28920 + }, + { + "epoch": 0.48891780671438106, + "grad_norm": 0.13327257335186005, + "learning_rate": 9.377699670789869e-06, + "loss": 0.0021, + "step": 28930 + }, + { + "epoch": 0.48908680699323154, + "grad_norm": 0.10939807444810867, + "learning_rate": 9.376986922891765e-06, + "loss": 0.0018, + "step": 28940 + }, + { + "epoch": 0.489255807272082, + "grad_norm": 0.07308842241764069, + "learning_rate": 9.376273794174591e-06, + "loss": 0.0038, + "step": 28950 + }, + { + "epoch": 0.4894248075509325, + "grad_norm": 0.1312398463487625, + "learning_rate": 9.375560284700394e-06, + "loss": 0.0043, + "step": 28960 + }, + { + "epoch": 0.4895938078297829, + "grad_norm": 0.05857406184077263, + "learning_rate": 9.37484639453125e-06, + "loss": 0.0023, + "step": 28970 + }, + { + "epoch": 0.4897628081086334, + "grad_norm": 0.15974996984004974, + "learning_rate": 9.374132123729274e-06, + "loss": 0.0019, + "step": 28980 + }, + { + "epoch": 0.48993180838748385, + "grad_norm": 0.17144306004047394, + "learning_rate": 9.373417472356612e-06, + "loss": 0.004, + "step": 28990 + }, + { + "epoch": 0.4901008086663343, + "grad_norm": 0.08801712095737457, + "learning_rate": 9.372702440475438e-06, + "loss": 0.0021, + "step": 29000 + }, + { + "epoch": 0.49026980894518474, + "grad_norm": 0.07644796371459961, + "learning_rate": 9.371987028147966e-06, + "loss": 0.0019, + "step": 29010 + }, + { + "epoch": 0.4904388092240352, + "grad_norm": 0.11791327595710754, + "learning_rate": 9.37127123543644e-06, + "loss": 0.0021, + "step": 29020 + }, + { + "epoch": 0.4906078095028857, + "grad_norm": 0.11407289654016495, + "learning_rate": 9.370555062403136e-06, + "loss": 0.0033, + "step": 29030 + }, + { + "epoch": 0.49077680978173616, + "grad_norm": 0.12483616918325424, + "learning_rate": 9.369838509110368e-06, + "loss": 0.0031, + "step": 29040 + }, + { + "epoch": 0.4909458100605866, + "grad_norm": 0.031986385583877563, + "learning_rate": 9.369121575620476e-06, + "loss": 0.0029, + "step": 29050 + }, + { + "epoch": 0.49111481033943705, + "grad_norm": 0.18724696338176727, + "learning_rate": 9.368404261995838e-06, + "loss": 0.0047, + "step": 29060 + }, + { + "epoch": 0.4912838106182875, + "grad_norm": 0.09730450809001923, + "learning_rate": 9.367686568298867e-06, + "loss": 0.0054, + "step": 29070 + }, + { + "epoch": 0.491452810897138, + "grad_norm": 0.10546234995126724, + "learning_rate": 9.366968494591997e-06, + "loss": 0.0044, + "step": 29080 + }, + { + "epoch": 0.49162181117598847, + "grad_norm": 0.025785325095057487, + "learning_rate": 9.366250040937713e-06, + "loss": 0.0025, + "step": 29090 + }, + { + "epoch": 0.4917908114548389, + "grad_norm": 0.06503026932477951, + "learning_rate": 9.36553120739852e-06, + "loss": 0.0029, + "step": 29100 + }, + { + "epoch": 0.49195981173368936, + "grad_norm": 0.047761380672454834, + "learning_rate": 9.36481199403696e-06, + "loss": 0.0027, + "step": 29110 + }, + { + "epoch": 0.49212881201253983, + "grad_norm": 0.05150453746318817, + "learning_rate": 9.364092400915609e-06, + "loss": 0.0016, + "step": 29120 + }, + { + "epoch": 0.4922978122913903, + "grad_norm": 0.011763407848775387, + "learning_rate": 9.363372428097075e-06, + "loss": 0.0021, + "step": 29130 + }, + { + "epoch": 0.4924668125702407, + "grad_norm": 0.16468170285224915, + "learning_rate": 9.362652075643998e-06, + "loss": 0.0031, + "step": 29140 + }, + { + "epoch": 0.4926358128490912, + "grad_norm": 0.12033814936876297, + "learning_rate": 9.361931343619053e-06, + "loss": 0.002, + "step": 29150 + }, + { + "epoch": 0.49280481312794167, + "grad_norm": 0.10892493277788162, + "learning_rate": 9.361210232084946e-06, + "loss": 0.0045, + "step": 29160 + }, + { + "epoch": 0.49297381340679214, + "grad_norm": 0.03691662847995758, + "learning_rate": 9.36048874110442e-06, + "loss": 0.0037, + "step": 29170 + }, + { + "epoch": 0.49314281368564256, + "grad_norm": 0.05275307223200798, + "learning_rate": 9.359766870740246e-06, + "loss": 0.0015, + "step": 29180 + }, + { + "epoch": 0.49331181396449303, + "grad_norm": 0.02527184970676899, + "learning_rate": 9.35904462105523e-06, + "loss": 0.0017, + "step": 29190 + }, + { + "epoch": 0.4934808142433435, + "grad_norm": 0.12803104519844055, + "learning_rate": 9.358321992112212e-06, + "loss": 0.0027, + "step": 29200 + }, + { + "epoch": 0.493649814522194, + "grad_norm": 0.11234404146671295, + "learning_rate": 9.357598983974066e-06, + "loss": 0.0015, + "step": 29210 + }, + { + "epoch": 0.49381881480104445, + "grad_norm": 0.04649844393134117, + "learning_rate": 9.356875596703693e-06, + "loss": 0.0029, + "step": 29220 + }, + { + "epoch": 0.49398781507989487, + "grad_norm": 0.11385609209537506, + "learning_rate": 9.356151830364035e-06, + "loss": 0.0019, + "step": 29230 + }, + { + "epoch": 0.49415681535874534, + "grad_norm": 0.12828443944454193, + "learning_rate": 9.355427685018061e-06, + "loss": 0.0043, + "step": 29240 + }, + { + "epoch": 0.4943258156375958, + "grad_norm": 0.4393269121646881, + "learning_rate": 9.354703160728774e-06, + "loss": 0.0046, + "step": 29250 + }, + { + "epoch": 0.4944948159164463, + "grad_norm": 0.2788711190223694, + "learning_rate": 9.353978257559216e-06, + "loss": 0.0027, + "step": 29260 + }, + { + "epoch": 0.4946638161952967, + "grad_norm": 0.0992347002029419, + "learning_rate": 9.353252975572453e-06, + "loss": 0.0026, + "step": 29270 + }, + { + "epoch": 0.4948328164741472, + "grad_norm": 0.08816232532262802, + "learning_rate": 9.35252731483159e-06, + "loss": 0.0026, + "step": 29280 + }, + { + "epoch": 0.49500181675299765, + "grad_norm": 0.05659950524568558, + "learning_rate": 9.351801275399761e-06, + "loss": 0.0024, + "step": 29290 + }, + { + "epoch": 0.4951708170318481, + "grad_norm": 0.1687520444393158, + "learning_rate": 9.351074857340137e-06, + "loss": 0.0039, + "step": 29300 + }, + { + "epoch": 0.49533981731069854, + "grad_norm": 0.07601363956928253, + "learning_rate": 9.350348060715917e-06, + "loss": 0.0019, + "step": 29310 + }, + { + "epoch": 0.495508817589549, + "grad_norm": 0.09154291450977325, + "learning_rate": 9.34962088559034e-06, + "loss": 0.0014, + "step": 29320 + }, + { + "epoch": 0.4956778178683995, + "grad_norm": 0.10395083576440811, + "learning_rate": 9.34889333202667e-06, + "loss": 0.0026, + "step": 29330 + }, + { + "epoch": 0.49584681814724996, + "grad_norm": 0.07902251183986664, + "learning_rate": 9.34816540008821e-06, + "loss": 0.0026, + "step": 29340 + }, + { + "epoch": 0.49601581842610043, + "grad_norm": 0.18695111572742462, + "learning_rate": 9.347437089838294e-06, + "loss": 0.0039, + "step": 29350 + }, + { + "epoch": 0.49618481870495085, + "grad_norm": 0.10042164474725723, + "learning_rate": 9.346708401340285e-06, + "loss": 0.0033, + "step": 29360 + }, + { + "epoch": 0.4963538189838013, + "grad_norm": 0.0964827910065651, + "learning_rate": 9.345979334657587e-06, + "loss": 0.0028, + "step": 29370 + }, + { + "epoch": 0.4965228192626518, + "grad_norm": 0.05580337718129158, + "learning_rate": 9.34524988985363e-06, + "loss": 0.0026, + "step": 29380 + }, + { + "epoch": 0.49669181954150227, + "grad_norm": 0.06996355950832367, + "learning_rate": 9.344520066991878e-06, + "loss": 0.0017, + "step": 29390 + }, + { + "epoch": 0.4968608198203527, + "grad_norm": 0.14571979641914368, + "learning_rate": 9.34378986613583e-06, + "loss": 0.0033, + "step": 29400 + }, + { + "epoch": 0.49702982009920316, + "grad_norm": 0.04337066411972046, + "learning_rate": 9.343059287349019e-06, + "loss": 0.0015, + "step": 29410 + }, + { + "epoch": 0.49719882037805363, + "grad_norm": 0.06206294894218445, + "learning_rate": 9.342328330695005e-06, + "loss": 0.003, + "step": 29420 + }, + { + "epoch": 0.4973678206569041, + "grad_norm": 0.036904092878103256, + "learning_rate": 9.34159699623739e-06, + "loss": 0.0025, + "step": 29430 + }, + { + "epoch": 0.4975368209357545, + "grad_norm": 0.24361184239387512, + "learning_rate": 9.3408652840398e-06, + "loss": 0.0031, + "step": 29440 + }, + { + "epoch": 0.497705821214605, + "grad_norm": 0.045649971812963486, + "learning_rate": 9.340133194165899e-06, + "loss": 0.0034, + "step": 29450 + }, + { + "epoch": 0.49787482149345547, + "grad_norm": 0.23143626749515533, + "learning_rate": 9.33940072667938e-06, + "loss": 0.0046, + "step": 29460 + }, + { + "epoch": 0.49804382177230594, + "grad_norm": 0.012344293296337128, + "learning_rate": 9.338667881643973e-06, + "loss": 0.0021, + "step": 29470 + }, + { + "epoch": 0.4982128220511564, + "grad_norm": 0.08606468141078949, + "learning_rate": 9.33793465912344e-06, + "loss": 0.0026, + "step": 29480 + }, + { + "epoch": 0.49838182233000683, + "grad_norm": 0.15866480767726898, + "learning_rate": 9.337201059181572e-06, + "loss": 0.0019, + "step": 29490 + }, + { + "epoch": 0.4985508226088573, + "grad_norm": 0.10740579664707184, + "learning_rate": 9.336467081882202e-06, + "loss": 0.0031, + "step": 29500 + }, + { + "epoch": 0.4987198228877078, + "grad_norm": 0.06434217095375061, + "learning_rate": 9.335732727289182e-06, + "loss": 0.003, + "step": 29510 + }, + { + "epoch": 0.49888882316655825, + "grad_norm": 0.1749943196773529, + "learning_rate": 9.334997995466408e-06, + "loss": 0.0027, + "step": 29520 + }, + { + "epoch": 0.49905782344540867, + "grad_norm": 0.16625481843948364, + "learning_rate": 9.334262886477804e-06, + "loss": 0.0016, + "step": 29530 + }, + { + "epoch": 0.49922682372425914, + "grad_norm": 0.19593088328838348, + "learning_rate": 9.33352740038733e-06, + "loss": 0.0015, + "step": 29540 + }, + { + "epoch": 0.4993958240031096, + "grad_norm": 0.07355517894029617, + "learning_rate": 9.332791537258977e-06, + "loss": 0.002, + "step": 29550 + }, + { + "epoch": 0.4995648242819601, + "grad_norm": 0.07157208025455475, + "learning_rate": 9.332055297156763e-06, + "loss": 0.0027, + "step": 29560 + }, + { + "epoch": 0.4997338245608105, + "grad_norm": 0.10493667423725128, + "learning_rate": 9.331318680144754e-06, + "loss": 0.0043, + "step": 29570 + }, + { + "epoch": 0.499902824839661, + "grad_norm": 0.10629831999540329, + "learning_rate": 9.33058168628703e-06, + "loss": 0.0026, + "step": 29580 + }, + { + "epoch": 0.5000718251185114, + "grad_norm": 0.115446075797081, + "learning_rate": 9.329844315647721e-06, + "loss": 0.0026, + "step": 29590 + }, + { + "epoch": 0.5002408253973619, + "grad_norm": 0.08104785531759262, + "learning_rate": 9.329106568290976e-06, + "loss": 0.0032, + "step": 29600 + }, + { + "epoch": 0.5004098256762124, + "grad_norm": 0.07581508904695511, + "learning_rate": 9.328368444280983e-06, + "loss": 0.0025, + "step": 29610 + }, + { + "epoch": 0.5005788259550629, + "grad_norm": 0.1760362833738327, + "learning_rate": 9.327629943681966e-06, + "loss": 0.0041, + "step": 29620 + }, + { + "epoch": 0.5007478262339133, + "grad_norm": 0.0963960513472557, + "learning_rate": 9.326891066558174e-06, + "loss": 0.0047, + "step": 29630 + }, + { + "epoch": 0.5009168265127637, + "grad_norm": 0.14027276635169983, + "learning_rate": 9.326151812973898e-06, + "loss": 0.0018, + "step": 29640 + }, + { + "epoch": 0.5010858267916142, + "grad_norm": 0.2154625505208969, + "learning_rate": 9.325412182993452e-06, + "loss": 0.0023, + "step": 29650 + }, + { + "epoch": 0.5012548270704646, + "grad_norm": 0.06819602102041245, + "learning_rate": 9.324672176681189e-06, + "loss": 0.0023, + "step": 29660 + }, + { + "epoch": 0.5014238273493151, + "grad_norm": 0.047595296055078506, + "learning_rate": 9.323931794101493e-06, + "loss": 0.0014, + "step": 29670 + }, + { + "epoch": 0.5015928276281656, + "grad_norm": 0.14946642518043518, + "learning_rate": 9.32319103531878e-06, + "loss": 0.0019, + "step": 29680 + }, + { + "epoch": 0.5017618279070161, + "grad_norm": 0.10646338760852814, + "learning_rate": 9.322449900397502e-06, + "loss": 0.0024, + "step": 29690 + }, + { + "epoch": 0.5019308281858665, + "grad_norm": 0.10761136561632156, + "learning_rate": 9.32170838940214e-06, + "loss": 0.003, + "step": 29700 + }, + { + "epoch": 0.502099828464717, + "grad_norm": 0.15118083357810974, + "learning_rate": 9.320966502397208e-06, + "loss": 0.0034, + "step": 29710 + }, + { + "epoch": 0.5022688287435674, + "grad_norm": 0.04990677163004875, + "learning_rate": 9.320224239447256e-06, + "loss": 0.0024, + "step": 29720 + }, + { + "epoch": 0.5024378290224178, + "grad_norm": 0.10470911860466003, + "learning_rate": 9.319481600616862e-06, + "loss": 0.0022, + "step": 29730 + }, + { + "epoch": 0.5026068293012683, + "grad_norm": 0.08429449051618576, + "learning_rate": 9.318738585970642e-06, + "loss": 0.0038, + "step": 29740 + }, + { + "epoch": 0.5027758295801188, + "grad_norm": 0.056416355073451996, + "learning_rate": 9.317995195573237e-06, + "loss": 0.0027, + "step": 29750 + }, + { + "epoch": 0.5029448298589693, + "grad_norm": 0.1960950344800949, + "learning_rate": 9.31725142948933e-06, + "loss": 0.0027, + "step": 29760 + }, + { + "epoch": 0.5031138301378197, + "grad_norm": 0.06585361063480377, + "learning_rate": 9.316507287783634e-06, + "loss": 0.0029, + "step": 29770 + }, + { + "epoch": 0.5032828304166702, + "grad_norm": 0.03832526504993439, + "learning_rate": 9.315762770520887e-06, + "loss": 0.0023, + "step": 29780 + }, + { + "epoch": 0.5034518306955207, + "grad_norm": 0.07613620162010193, + "learning_rate": 9.31501787776587e-06, + "loss": 0.0015, + "step": 29790 + }, + { + "epoch": 0.5036208309743712, + "grad_norm": 0.0848083421587944, + "learning_rate": 9.31427260958339e-06, + "loss": 0.0025, + "step": 29800 + }, + { + "epoch": 0.5037898312532215, + "grad_norm": 0.12784676253795624, + "learning_rate": 9.31352696603829e-06, + "loss": 0.0025, + "step": 29810 + }, + { + "epoch": 0.503958831532072, + "grad_norm": 0.05923425406217575, + "learning_rate": 9.312780947195446e-06, + "loss": 0.002, + "step": 29820 + }, + { + "epoch": 0.5041278318109225, + "grad_norm": 0.0461299754679203, + "learning_rate": 9.312034553119761e-06, + "loss": 0.0031, + "step": 29830 + }, + { + "epoch": 0.5042968320897729, + "grad_norm": 0.19783492386341095, + "learning_rate": 9.311287783876181e-06, + "loss": 0.0025, + "step": 29840 + }, + { + "epoch": 0.5044658323686234, + "grad_norm": 0.02259715087711811, + "learning_rate": 9.310540639529673e-06, + "loss": 0.002, + "step": 29850 + }, + { + "epoch": 0.5046348326474739, + "grad_norm": 0.11579112708568573, + "learning_rate": 9.309793120145245e-06, + "loss": 0.0024, + "step": 29860 + }, + { + "epoch": 0.5048038329263244, + "grad_norm": 0.35851532220840454, + "learning_rate": 9.309045225787933e-06, + "loss": 0.0031, + "step": 29870 + }, + { + "epoch": 0.5049728332051748, + "grad_norm": 0.09024239331483841, + "learning_rate": 9.308296956522809e-06, + "loss": 0.0035, + "step": 29880 + }, + { + "epoch": 0.5051418334840252, + "grad_norm": 0.04510599747300148, + "learning_rate": 9.307548312414975e-06, + "loss": 0.0026, + "step": 29890 + }, + { + "epoch": 0.5053108337628757, + "grad_norm": 0.013859073631465435, + "learning_rate": 9.306799293529569e-06, + "loss": 0.0019, + "step": 29900 + }, + { + "epoch": 0.5054798340417261, + "grad_norm": 0.018246594816446304, + "learning_rate": 9.306049899931755e-06, + "loss": 0.0015, + "step": 29910 + }, + { + "epoch": 0.5056488343205766, + "grad_norm": 0.02640846185386181, + "learning_rate": 9.305300131686739e-06, + "loss": 0.0023, + "step": 29920 + }, + { + "epoch": 0.5058178345994271, + "grad_norm": 0.05299566313624382, + "learning_rate": 9.30454998885975e-06, + "loss": 0.0018, + "step": 29930 + }, + { + "epoch": 0.5059868348782776, + "grad_norm": 0.06415662169456482, + "learning_rate": 9.303799471516057e-06, + "loss": 0.0021, + "step": 29940 + }, + { + "epoch": 0.506155835157128, + "grad_norm": 0.05302102863788605, + "learning_rate": 9.303048579720959e-06, + "loss": 0.0026, + "step": 29950 + }, + { + "epoch": 0.5063248354359785, + "grad_norm": 0.06410418450832367, + "learning_rate": 9.302297313539783e-06, + "loss": 0.0024, + "step": 29960 + }, + { + "epoch": 0.506493835714829, + "grad_norm": 0.02581281214952469, + "learning_rate": 9.301545673037898e-06, + "loss": 0.0041, + "step": 29970 + }, + { + "epoch": 0.5066628359936793, + "grad_norm": 0.11411970108747482, + "learning_rate": 9.300793658280696e-06, + "loss": 0.0023, + "step": 29980 + }, + { + "epoch": 0.5068318362725298, + "grad_norm": 0.10526223480701447, + "learning_rate": 9.300041269333609e-06, + "loss": 0.0025, + "step": 29990 + }, + { + "epoch": 0.5070008365513803, + "grad_norm": 0.06469008326530457, + "learning_rate": 9.299288506262097e-06, + "loss": 0.0032, + "step": 30000 + }, + { + "epoch": 0.5071698368302308, + "grad_norm": 0.10594971477985382, + "learning_rate": 9.298535369131654e-06, + "loss": 0.0024, + "step": 30010 + }, + { + "epoch": 0.5073388371090812, + "grad_norm": 0.12533098459243774, + "learning_rate": 9.297781858007808e-06, + "loss": 0.0032, + "step": 30020 + }, + { + "epoch": 0.5075078373879317, + "grad_norm": 0.12551948428153992, + "learning_rate": 9.297027972956116e-06, + "loss": 0.0018, + "step": 30030 + }, + { + "epoch": 0.5076768376667822, + "grad_norm": 0.035782407969236374, + "learning_rate": 9.296273714042172e-06, + "loss": 0.0037, + "step": 30040 + }, + { + "epoch": 0.5078458379456326, + "grad_norm": 0.17860619723796844, + "learning_rate": 9.295519081331598e-06, + "loss": 0.0038, + "step": 30050 + }, + { + "epoch": 0.5080148382244831, + "grad_norm": 0.1742449700832367, + "learning_rate": 9.294764074890051e-06, + "loss": 0.0031, + "step": 30060 + }, + { + "epoch": 0.5081838385033335, + "grad_norm": 0.09593408554792404, + "learning_rate": 9.294008694783223e-06, + "loss": 0.0019, + "step": 30070 + }, + { + "epoch": 0.508352838782184, + "grad_norm": 0.014105924405157566, + "learning_rate": 9.293252941076832e-06, + "loss": 0.0016, + "step": 30080 + }, + { + "epoch": 0.5085218390610344, + "grad_norm": 0.1020912230014801, + "learning_rate": 9.292496813836634e-06, + "loss": 0.0022, + "step": 30090 + }, + { + "epoch": 0.5086908393398849, + "grad_norm": 0.12688863277435303, + "learning_rate": 9.291740313128416e-06, + "loss": 0.0033, + "step": 30100 + }, + { + "epoch": 0.5088598396187354, + "grad_norm": 0.0973917618393898, + "learning_rate": 9.290983439017998e-06, + "loss": 0.0024, + "step": 30110 + }, + { + "epoch": 0.5090288398975858, + "grad_norm": 0.14230580627918243, + "learning_rate": 9.290226191571228e-06, + "loss": 0.0023, + "step": 30120 + }, + { + "epoch": 0.5091978401764363, + "grad_norm": 0.13736513257026672, + "learning_rate": 9.289468570853995e-06, + "loss": 0.0063, + "step": 30130 + }, + { + "epoch": 0.5093668404552868, + "grad_norm": 0.3065262734889984, + "learning_rate": 9.288710576932211e-06, + "loss": 0.0029, + "step": 30140 + }, + { + "epoch": 0.5095358407341372, + "grad_norm": 0.03719579428434372, + "learning_rate": 9.287952209871829e-06, + "loss": 0.0034, + "step": 30150 + }, + { + "epoch": 0.5097048410129876, + "grad_norm": 0.12979014217853546, + "learning_rate": 9.287193469738828e-06, + "loss": 0.0032, + "step": 30160 + }, + { + "epoch": 0.5098738412918381, + "grad_norm": 0.025296248495578766, + "learning_rate": 9.286434356599225e-06, + "loss": 0.003, + "step": 30170 + }, + { + "epoch": 0.5100428415706886, + "grad_norm": 0.02095586247742176, + "learning_rate": 9.285674870519064e-06, + "loss": 0.0024, + "step": 30180 + }, + { + "epoch": 0.510211841849539, + "grad_norm": 0.0356566496193409, + "learning_rate": 9.284915011564423e-06, + "loss": 0.0015, + "step": 30190 + }, + { + "epoch": 0.5103808421283895, + "grad_norm": 0.10899782180786133, + "learning_rate": 9.284154779801417e-06, + "loss": 0.0021, + "step": 30200 + }, + { + "epoch": 0.51054984240724, + "grad_norm": 0.11044955253601074, + "learning_rate": 9.28339417529619e-06, + "loss": 0.0026, + "step": 30210 + }, + { + "epoch": 0.5107188426860905, + "grad_norm": 0.05205506831407547, + "learning_rate": 9.282633198114913e-06, + "loss": 0.0023, + "step": 30220 + }, + { + "epoch": 0.5108878429649409, + "grad_norm": 0.09147424250841141, + "learning_rate": 9.281871848323797e-06, + "loss": 0.0021, + "step": 30230 + }, + { + "epoch": 0.5110568432437913, + "grad_norm": 0.07484003156423569, + "learning_rate": 9.281110125989087e-06, + "loss": 0.002, + "step": 30240 + }, + { + "epoch": 0.5112258435226418, + "grad_norm": 0.09416460990905762, + "learning_rate": 9.280348031177053e-06, + "loss": 0.0025, + "step": 30250 + }, + { + "epoch": 0.5113948438014922, + "grad_norm": 0.13577759265899658, + "learning_rate": 9.279585563954002e-06, + "loss": 0.0023, + "step": 30260 + }, + { + "epoch": 0.5115638440803427, + "grad_norm": 0.08064518868923187, + "learning_rate": 9.27882272438627e-06, + "loss": 0.0035, + "step": 30270 + }, + { + "epoch": 0.5117328443591932, + "grad_norm": 0.032194700092077255, + "learning_rate": 9.278059512540229e-06, + "loss": 0.0021, + "step": 30280 + }, + { + "epoch": 0.5119018446380437, + "grad_norm": 0.032202497124671936, + "learning_rate": 9.277295928482285e-06, + "loss": 0.0027, + "step": 30290 + }, + { + "epoch": 0.5120708449168941, + "grad_norm": 0.05126439407467842, + "learning_rate": 9.27653197227887e-06, + "loss": 0.0022, + "step": 30300 + }, + { + "epoch": 0.5122398451957446, + "grad_norm": 0.06987911462783813, + "learning_rate": 9.275767643996454e-06, + "loss": 0.0029, + "step": 30310 + }, + { + "epoch": 0.5124088454745951, + "grad_norm": 0.3274784982204437, + "learning_rate": 9.275002943701537e-06, + "loss": 0.0028, + "step": 30320 + }, + { + "epoch": 0.5125778457534454, + "grad_norm": 0.10538214445114136, + "learning_rate": 9.27423787146065e-06, + "loss": 0.0025, + "step": 30330 + }, + { + "epoch": 0.5127468460322959, + "grad_norm": 0.12387062609195709, + "learning_rate": 9.273472427340357e-06, + "loss": 0.0011, + "step": 30340 + }, + { + "epoch": 0.5129158463111464, + "grad_norm": 0.09943099319934845, + "learning_rate": 9.27270661140726e-06, + "loss": 0.0022, + "step": 30350 + }, + { + "epoch": 0.5130848465899969, + "grad_norm": 0.16701321303844452, + "learning_rate": 9.271940423727986e-06, + "loss": 0.0034, + "step": 30360 + }, + { + "epoch": 0.5132538468688473, + "grad_norm": 0.16028109192848206, + "learning_rate": 9.271173864369196e-06, + "loss": 0.0031, + "step": 30370 + }, + { + "epoch": 0.5134228471476978, + "grad_norm": 0.050224725157022476, + "learning_rate": 9.270406933397587e-06, + "loss": 0.0063, + "step": 30380 + }, + { + "epoch": 0.5135918474265483, + "grad_norm": 0.06589037925004959, + "learning_rate": 9.269639630879884e-06, + "loss": 0.002, + "step": 30390 + }, + { + "epoch": 0.5137608477053988, + "grad_norm": 0.24337688088417053, + "learning_rate": 9.268871956882844e-06, + "loss": 0.0051, + "step": 30400 + }, + { + "epoch": 0.5139298479842491, + "grad_norm": 0.08122189342975616, + "learning_rate": 9.268103911473262e-06, + "loss": 0.0019, + "step": 30410 + }, + { + "epoch": 0.5140988482630996, + "grad_norm": 0.14038895070552826, + "learning_rate": 9.267335494717959e-06, + "loss": 0.0034, + "step": 30420 + }, + { + "epoch": 0.5142678485419501, + "grad_norm": 0.19843675196170807, + "learning_rate": 9.266566706683795e-06, + "loss": 0.0021, + "step": 30430 + }, + { + "epoch": 0.5144368488208005, + "grad_norm": 0.09629879146814346, + "learning_rate": 9.265797547437653e-06, + "loss": 0.0037, + "step": 30440 + }, + { + "epoch": 0.514605849099651, + "grad_norm": 0.13716882467269897, + "learning_rate": 9.265028017046459e-06, + "loss": 0.0029, + "step": 30450 + }, + { + "epoch": 0.5147748493785015, + "grad_norm": 0.09429722279310226, + "learning_rate": 9.26425811557716e-06, + "loss": 0.0033, + "step": 30460 + }, + { + "epoch": 0.514943849657352, + "grad_norm": 0.07236813008785248, + "learning_rate": 9.263487843096746e-06, + "loss": 0.0031, + "step": 30470 + }, + { + "epoch": 0.5151128499362024, + "grad_norm": 0.07731340080499649, + "learning_rate": 9.26271719967223e-06, + "loss": 0.0013, + "step": 30480 + }, + { + "epoch": 0.5152818502150529, + "grad_norm": 0.1405019909143448, + "learning_rate": 9.261946185370668e-06, + "loss": 0.0042, + "step": 30490 + }, + { + "epoch": 0.5154508504939033, + "grad_norm": 0.04756263270974159, + "learning_rate": 9.261174800259137e-06, + "loss": 0.0042, + "step": 30500 + }, + { + "epoch": 0.5156198507727537, + "grad_norm": 0.054839082062244415, + "learning_rate": 9.26040304440475e-06, + "loss": 0.0026, + "step": 30510 + }, + { + "epoch": 0.5157888510516042, + "grad_norm": 0.01461437065154314, + "learning_rate": 9.259630917874659e-06, + "loss": 0.0021, + "step": 30520 + }, + { + "epoch": 0.5159578513304547, + "grad_norm": 0.1616632491350174, + "learning_rate": 9.25885842073604e-06, + "loss": 0.0029, + "step": 30530 + }, + { + "epoch": 0.5161268516093052, + "grad_norm": 0.07244772464036942, + "learning_rate": 9.258085553056101e-06, + "loss": 0.002, + "step": 30540 + }, + { + "epoch": 0.5162958518881556, + "grad_norm": 0.023395681753754616, + "learning_rate": 9.257312314902089e-06, + "loss": 0.0019, + "step": 30550 + }, + { + "epoch": 0.5164648521670061, + "grad_norm": 0.1325421929359436, + "learning_rate": 9.256538706341279e-06, + "loss": 0.0034, + "step": 30560 + }, + { + "epoch": 0.5166338524458566, + "grad_norm": 0.0765732079744339, + "learning_rate": 9.255764727440977e-06, + "loss": 0.0029, + "step": 30570 + }, + { + "epoch": 0.5168028527247069, + "grad_norm": 0.056597087532281876, + "learning_rate": 9.254990378268527e-06, + "loss": 0.0015, + "step": 30580 + }, + { + "epoch": 0.5169718530035574, + "grad_norm": 0.13077285885810852, + "learning_rate": 9.254215658891296e-06, + "loss": 0.0063, + "step": 30590 + }, + { + "epoch": 0.5171408532824079, + "grad_norm": 0.12355636805295944, + "learning_rate": 9.25344056937669e-06, + "loss": 0.0042, + "step": 30600 + }, + { + "epoch": 0.5173098535612584, + "grad_norm": 0.17257574200630188, + "learning_rate": 9.252665109792148e-06, + "loss": 0.0047, + "step": 30610 + }, + { + "epoch": 0.5174788538401088, + "grad_norm": 0.06446636468172073, + "learning_rate": 9.251889280205135e-06, + "loss": 0.0023, + "step": 30620 + }, + { + "epoch": 0.5176478541189593, + "grad_norm": 0.032882850617170334, + "learning_rate": 9.251113080683154e-06, + "loss": 0.0034, + "step": 30630 + }, + { + "epoch": 0.5178168543978098, + "grad_norm": 0.2541007399559021, + "learning_rate": 9.250336511293738e-06, + "loss": 0.0025, + "step": 30640 + }, + { + "epoch": 0.5179858546766603, + "grad_norm": 0.07943384349346161, + "learning_rate": 9.249559572104453e-06, + "loss": 0.0023, + "step": 30650 + }, + { + "epoch": 0.5181548549555107, + "grad_norm": 0.1908251792192459, + "learning_rate": 9.248782263182896e-06, + "loss": 0.0034, + "step": 30660 + }, + { + "epoch": 0.5183238552343611, + "grad_norm": 0.0682080015540123, + "learning_rate": 9.248004584596695e-06, + "loss": 0.0019, + "step": 30670 + }, + { + "epoch": 0.5184928555132116, + "grad_norm": 0.05401146411895752, + "learning_rate": 9.247226536413514e-06, + "loss": 0.0018, + "step": 30680 + }, + { + "epoch": 0.518661855792062, + "grad_norm": 0.09396155923604965, + "learning_rate": 9.246448118701044e-06, + "loss": 0.0027, + "step": 30690 + }, + { + "epoch": 0.5188308560709125, + "grad_norm": 0.11234533041715622, + "learning_rate": 9.245669331527016e-06, + "loss": 0.0018, + "step": 30700 + }, + { + "epoch": 0.518999856349763, + "grad_norm": 0.11173015832901001, + "learning_rate": 9.244890174959183e-06, + "loss": 0.0031, + "step": 30710 + }, + { + "epoch": 0.5191688566286135, + "grad_norm": 0.04353360831737518, + "learning_rate": 9.24411064906534e-06, + "loss": 0.0013, + "step": 30720 + }, + { + "epoch": 0.5193378569074639, + "grad_norm": 0.0787418782711029, + "learning_rate": 9.243330753913305e-06, + "loss": 0.0034, + "step": 30730 + }, + { + "epoch": 0.5195068571863144, + "grad_norm": 0.17237931489944458, + "learning_rate": 9.242550489570935e-06, + "loss": 0.0019, + "step": 30740 + }, + { + "epoch": 0.5196758574651649, + "grad_norm": 0.08603113889694214, + "learning_rate": 9.241769856106119e-06, + "loss": 0.0026, + "step": 30750 + }, + { + "epoch": 0.5198448577440152, + "grad_norm": 0.1021459698677063, + "learning_rate": 9.240988853586774e-06, + "loss": 0.0025, + "step": 30760 + }, + { + "epoch": 0.5200138580228657, + "grad_norm": 0.14208252727985382, + "learning_rate": 9.24020748208085e-06, + "loss": 0.0032, + "step": 30770 + }, + { + "epoch": 0.5201828583017162, + "grad_norm": 0.04353468865156174, + "learning_rate": 9.23942574165633e-06, + "loss": 0.0022, + "step": 30780 + }, + { + "epoch": 0.5203518585805667, + "grad_norm": 0.053181033581495285, + "learning_rate": 9.23864363238123e-06, + "loss": 0.0013, + "step": 30790 + }, + { + "epoch": 0.5205208588594171, + "grad_norm": 0.14026151597499847, + "learning_rate": 9.237861154323596e-06, + "loss": 0.0029, + "step": 30800 + }, + { + "epoch": 0.5206898591382676, + "grad_norm": 0.006315328646451235, + "learning_rate": 9.23707830755151e-06, + "loss": 0.0037, + "step": 30810 + }, + { + "epoch": 0.5208588594171181, + "grad_norm": 0.04784843325614929, + "learning_rate": 9.236295092133083e-06, + "loss": 0.004, + "step": 30820 + }, + { + "epoch": 0.5210278596959685, + "grad_norm": 0.06517542898654938, + "learning_rate": 9.235511508136456e-06, + "loss": 0.0024, + "step": 30830 + }, + { + "epoch": 0.5211968599748189, + "grad_norm": 0.38778600096702576, + "learning_rate": 9.234727555629807e-06, + "loss": 0.0029, + "step": 30840 + }, + { + "epoch": 0.5213658602536694, + "grad_norm": 0.23829753696918488, + "learning_rate": 9.233943234681345e-06, + "loss": 0.0028, + "step": 30850 + }, + { + "epoch": 0.5215348605325199, + "grad_norm": 0.06499862670898438, + "learning_rate": 9.233158545359304e-06, + "loss": 0.0025, + "step": 30860 + }, + { + "epoch": 0.5217038608113703, + "grad_norm": 0.170320063829422, + "learning_rate": 9.232373487731962e-06, + "loss": 0.0045, + "step": 30870 + }, + { + "epoch": 0.5218728610902208, + "grad_norm": 0.07646413147449493, + "learning_rate": 9.231588061867619e-06, + "loss": 0.0022, + "step": 30880 + }, + { + "epoch": 0.5220418613690713, + "grad_norm": 0.010970628820359707, + "learning_rate": 9.230802267834613e-06, + "loss": 0.0032, + "step": 30890 + }, + { + "epoch": 0.5222108616479217, + "grad_norm": 0.061763424426317215, + "learning_rate": 9.23001610570131e-06, + "loss": 0.0035, + "step": 30900 + }, + { + "epoch": 0.5223798619267722, + "grad_norm": 0.050179943442344666, + "learning_rate": 9.229229575536112e-06, + "loss": 0.0063, + "step": 30910 + }, + { + "epoch": 0.5225488622056227, + "grad_norm": 0.09505718946456909, + "learning_rate": 9.228442677407448e-06, + "loss": 0.0026, + "step": 30920 + }, + { + "epoch": 0.522717862484473, + "grad_norm": 0.05893668159842491, + "learning_rate": 9.227655411383785e-06, + "loss": 0.0014, + "step": 30930 + }, + { + "epoch": 0.5228868627633235, + "grad_norm": 0.040056515485048294, + "learning_rate": 9.226867777533619e-06, + "loss": 0.0031, + "step": 30940 + }, + { + "epoch": 0.523055863042174, + "grad_norm": 0.406801700592041, + "learning_rate": 9.226079775925475e-06, + "loss": 0.003, + "step": 30950 + }, + { + "epoch": 0.5232248633210245, + "grad_norm": 0.15354953706264496, + "learning_rate": 9.225291406627914e-06, + "loss": 0.0036, + "step": 30960 + }, + { + "epoch": 0.5233938635998749, + "grad_norm": 0.026319092139601707, + "learning_rate": 9.22450266970953e-06, + "loss": 0.0024, + "step": 30970 + }, + { + "epoch": 0.5235628638787254, + "grad_norm": 0.08271504193544388, + "learning_rate": 9.223713565238947e-06, + "loss": 0.003, + "step": 30980 + }, + { + "epoch": 0.5237318641575759, + "grad_norm": 0.054755691438913345, + "learning_rate": 9.222924093284818e-06, + "loss": 0.0032, + "step": 30990 + }, + { + "epoch": 0.5239008644364264, + "grad_norm": 0.05949968844652176, + "learning_rate": 9.222134253915831e-06, + "loss": 0.0026, + "step": 31000 + }, + { + "epoch": 0.5240698647152768, + "grad_norm": 0.03801442310214043, + "learning_rate": 9.221344047200709e-06, + "loss": 0.0031, + "step": 31010 + }, + { + "epoch": 0.5242388649941272, + "grad_norm": 0.027336781844496727, + "learning_rate": 9.220553473208201e-06, + "loss": 0.0044, + "step": 31020 + }, + { + "epoch": 0.5244078652729777, + "grad_norm": 0.03937138244509697, + "learning_rate": 9.219762532007093e-06, + "loss": 0.002, + "step": 31030 + }, + { + "epoch": 0.5245768655518281, + "grad_norm": 0.015671947970986366, + "learning_rate": 9.218971223666197e-06, + "loss": 0.0018, + "step": 31040 + }, + { + "epoch": 0.5247458658306786, + "grad_norm": 0.05549074336886406, + "learning_rate": 9.218179548254364e-06, + "loss": 0.0046, + "step": 31050 + }, + { + "epoch": 0.5249148661095291, + "grad_norm": 0.14241427183151245, + "learning_rate": 9.217387505840473e-06, + "loss": 0.0024, + "step": 31060 + }, + { + "epoch": 0.5250838663883796, + "grad_norm": 0.04867669567465782, + "learning_rate": 9.216595096493436e-06, + "loss": 0.0028, + "step": 31070 + }, + { + "epoch": 0.52525286666723, + "grad_norm": 0.09113096445798874, + "learning_rate": 9.215802320282197e-06, + "loss": 0.0031, + "step": 31080 + }, + { + "epoch": 0.5254218669460805, + "grad_norm": 0.027196591719985008, + "learning_rate": 9.215009177275729e-06, + "loss": 0.0029, + "step": 31090 + }, + { + "epoch": 0.5255908672249309, + "grad_norm": 0.06898143142461777, + "learning_rate": 9.214215667543039e-06, + "loss": 0.0032, + "step": 31100 + }, + { + "epoch": 0.5257598675037813, + "grad_norm": 0.07773678749799728, + "learning_rate": 9.21342179115317e-06, + "loss": 0.0026, + "step": 31110 + }, + { + "epoch": 0.5259288677826318, + "grad_norm": 0.12087377160787582, + "learning_rate": 9.212627548175188e-06, + "loss": 0.0028, + "step": 31120 + }, + { + "epoch": 0.5260978680614823, + "grad_norm": 0.047073110938072205, + "learning_rate": 9.2118329386782e-06, + "loss": 0.0024, + "step": 31130 + }, + { + "epoch": 0.5262668683403328, + "grad_norm": 0.05412263050675392, + "learning_rate": 9.211037962731339e-06, + "loss": 0.0022, + "step": 31140 + }, + { + "epoch": 0.5264358686191832, + "grad_norm": 0.1352180540561676, + "learning_rate": 9.210242620403773e-06, + "loss": 0.0029, + "step": 31150 + }, + { + "epoch": 0.5266048688980337, + "grad_norm": 0.11758013814687729, + "learning_rate": 9.2094469117647e-06, + "loss": 0.0023, + "step": 31160 + }, + { + "epoch": 0.5267738691768842, + "grad_norm": 0.02222493104636669, + "learning_rate": 9.20865083688335e-06, + "loss": 0.0017, + "step": 31170 + }, + { + "epoch": 0.5269428694557347, + "grad_norm": 0.10861939936876297, + "learning_rate": 9.207854395828985e-06, + "loss": 0.0024, + "step": 31180 + }, + { + "epoch": 0.527111869734585, + "grad_norm": 0.11261676996946335, + "learning_rate": 9.207057588670903e-06, + "loss": 0.0026, + "step": 31190 + }, + { + "epoch": 0.5272808700134355, + "grad_norm": 0.04202567785978317, + "learning_rate": 9.206260415478425e-06, + "loss": 0.0016, + "step": 31200 + }, + { + "epoch": 0.527449870292286, + "grad_norm": 0.03397662192583084, + "learning_rate": 9.20546287632091e-06, + "loss": 0.0021, + "step": 31210 + }, + { + "epoch": 0.5276188705711364, + "grad_norm": 0.004616164602339268, + "learning_rate": 9.20466497126775e-06, + "loss": 0.0017, + "step": 31220 + }, + { + "epoch": 0.5277878708499869, + "grad_norm": 0.09319368749856949, + "learning_rate": 9.203866700388366e-06, + "loss": 0.0016, + "step": 31230 + }, + { + "epoch": 0.5279568711288374, + "grad_norm": 0.16816814243793488, + "learning_rate": 9.20306806375221e-06, + "loss": 0.0027, + "step": 31240 + }, + { + "epoch": 0.5281258714076879, + "grad_norm": 0.04165370762348175, + "learning_rate": 9.202269061428768e-06, + "loss": 0.0022, + "step": 31250 + }, + { + "epoch": 0.5282948716865383, + "grad_norm": 0.0498676560819149, + "learning_rate": 9.201469693487558e-06, + "loss": 0.0022, + "step": 31260 + }, + { + "epoch": 0.5284638719653888, + "grad_norm": 0.2199142873287201, + "learning_rate": 9.200669959998128e-06, + "loss": 0.0054, + "step": 31270 + }, + { + "epoch": 0.5286328722442392, + "grad_norm": 0.08191534131765366, + "learning_rate": 9.199869861030057e-06, + "loss": 0.0017, + "step": 31280 + }, + { + "epoch": 0.5288018725230896, + "grad_norm": 0.03749081864953041, + "learning_rate": 9.199069396652963e-06, + "loss": 0.0015, + "step": 31290 + }, + { + "epoch": 0.5289708728019401, + "grad_norm": 0.4414420425891876, + "learning_rate": 9.198268566936484e-06, + "loss": 0.0022, + "step": 31300 + }, + { + "epoch": 0.5291398730807906, + "grad_norm": 0.035784609615802765, + "learning_rate": 9.197467371950296e-06, + "loss": 0.0019, + "step": 31310 + }, + { + "epoch": 0.529308873359641, + "grad_norm": 0.17147906124591827, + "learning_rate": 9.196665811764114e-06, + "loss": 0.0032, + "step": 31320 + }, + { + "epoch": 0.5294778736384915, + "grad_norm": 0.17997874319553375, + "learning_rate": 9.19586388644767e-06, + "loss": 0.0016, + "step": 31330 + }, + { + "epoch": 0.529646873917342, + "grad_norm": 0.13828697800636292, + "learning_rate": 9.19506159607074e-06, + "loss": 0.0024, + "step": 31340 + }, + { + "epoch": 0.5298158741961925, + "grad_norm": 0.14012043178081512, + "learning_rate": 9.194258940703125e-06, + "loss": 0.0025, + "step": 31350 + }, + { + "epoch": 0.5299848744750428, + "grad_norm": 0.18596386909484863, + "learning_rate": 9.193455920414662e-06, + "loss": 0.0017, + "step": 31360 + }, + { + "epoch": 0.5301538747538933, + "grad_norm": 0.06577977538108826, + "learning_rate": 9.192652535275215e-06, + "loss": 0.0023, + "step": 31370 + }, + { + "epoch": 0.5303228750327438, + "grad_norm": 0.12237042933702469, + "learning_rate": 9.191848785354683e-06, + "loss": 0.0028, + "step": 31380 + }, + { + "epoch": 0.5304918753115943, + "grad_norm": 0.05757032707333565, + "learning_rate": 9.191044670722998e-06, + "loss": 0.0035, + "step": 31390 + }, + { + "epoch": 0.5306608755904447, + "grad_norm": 0.0789322629570961, + "learning_rate": 9.190240191450119e-06, + "loss": 0.0046, + "step": 31400 + }, + { + "epoch": 0.5308298758692952, + "grad_norm": 0.12790855765342712, + "learning_rate": 9.189435347606043e-06, + "loss": 0.0033, + "step": 31410 + }, + { + "epoch": 0.5309988761481457, + "grad_norm": 0.05954355373978615, + "learning_rate": 9.188630139260793e-06, + "loss": 0.0019, + "step": 31420 + }, + { + "epoch": 0.5311678764269961, + "grad_norm": 0.0388394370675087, + "learning_rate": 9.187824566484425e-06, + "loss": 0.0026, + "step": 31430 + }, + { + "epoch": 0.5313368767058466, + "grad_norm": 0.16026939451694489, + "learning_rate": 9.18701862934703e-06, + "loss": 0.0037, + "step": 31440 + }, + { + "epoch": 0.531505876984697, + "grad_norm": 0.0926368236541748, + "learning_rate": 9.186212327918729e-06, + "loss": 0.0043, + "step": 31450 + }, + { + "epoch": 0.5316748772635475, + "grad_norm": 0.17913147807121277, + "learning_rate": 9.185405662269671e-06, + "loss": 0.0018, + "step": 31460 + }, + { + "epoch": 0.5318438775423979, + "grad_norm": 0.03901561349630356, + "learning_rate": 9.184598632470042e-06, + "loss": 0.0026, + "step": 31470 + }, + { + "epoch": 0.5320128778212484, + "grad_norm": 0.08872038125991821, + "learning_rate": 9.183791238590057e-06, + "loss": 0.002, + "step": 31480 + }, + { + "epoch": 0.5321818781000989, + "grad_norm": 0.20491473376750946, + "learning_rate": 9.182983480699965e-06, + "loss": 0.0029, + "step": 31490 + }, + { + "epoch": 0.5323508783789493, + "grad_norm": 0.04137842357158661, + "learning_rate": 9.182175358870041e-06, + "loss": 0.0014, + "step": 31500 + }, + { + "epoch": 0.5325198786577998, + "grad_norm": 0.15952403843402863, + "learning_rate": 9.181366873170599e-06, + "loss": 0.0027, + "step": 31510 + }, + { + "epoch": 0.5326888789366503, + "grad_norm": 0.10556403547525406, + "learning_rate": 9.180558023671979e-06, + "loss": 0.0028, + "step": 31520 + }, + { + "epoch": 0.5328578792155007, + "grad_norm": 0.0331096388399601, + "learning_rate": 9.179748810444557e-06, + "loss": 0.0019, + "step": 31530 + }, + { + "epoch": 0.5330268794943511, + "grad_norm": 0.07562128454446793, + "learning_rate": 9.178939233558735e-06, + "loss": 0.0017, + "step": 31540 + }, + { + "epoch": 0.5331958797732016, + "grad_norm": 0.14268942177295685, + "learning_rate": 9.178129293084954e-06, + "loss": 0.0022, + "step": 31550 + }, + { + "epoch": 0.5333648800520521, + "grad_norm": 0.12696851789951324, + "learning_rate": 9.177318989093681e-06, + "loss": 0.0026, + "step": 31560 + }, + { + "epoch": 0.5335338803309025, + "grad_norm": 0.015331567265093327, + "learning_rate": 9.176508321655415e-06, + "loss": 0.0016, + "step": 31570 + }, + { + "epoch": 0.533702880609753, + "grad_norm": 0.060741398483514786, + "learning_rate": 9.17569729084069e-06, + "loss": 0.0017, + "step": 31580 + }, + { + "epoch": 0.5338718808886035, + "grad_norm": 0.007240627892315388, + "learning_rate": 9.17488589672007e-06, + "loss": 0.002, + "step": 31590 + }, + { + "epoch": 0.534040881167454, + "grad_norm": 0.19343307614326477, + "learning_rate": 9.174074139364148e-06, + "loss": 0.0033, + "step": 31600 + }, + { + "epoch": 0.5342098814463044, + "grad_norm": 0.009958441369235516, + "learning_rate": 9.173262018843552e-06, + "loss": 0.0031, + "step": 31610 + }, + { + "epoch": 0.5343788817251548, + "grad_norm": 0.04688011482357979, + "learning_rate": 9.172449535228941e-06, + "loss": 0.0017, + "step": 31620 + }, + { + "epoch": 0.5345478820040053, + "grad_norm": 0.11201368272304535, + "learning_rate": 9.171636688591005e-06, + "loss": 0.0017, + "step": 31630 + }, + { + "epoch": 0.5347168822828557, + "grad_norm": 0.13468189537525177, + "learning_rate": 9.170823479000464e-06, + "loss": 0.0035, + "step": 31640 + }, + { + "epoch": 0.5348858825617062, + "grad_norm": 0.05479159206151962, + "learning_rate": 9.170009906528074e-06, + "loss": 0.0021, + "step": 31650 + }, + { + "epoch": 0.5350548828405567, + "grad_norm": 0.10649165511131287, + "learning_rate": 9.169195971244615e-06, + "loss": 0.0031, + "step": 31660 + }, + { + "epoch": 0.5352238831194072, + "grad_norm": 0.08019914478063583, + "learning_rate": 9.168381673220909e-06, + "loss": 0.0021, + "step": 31670 + }, + { + "epoch": 0.5353928833982576, + "grad_norm": 0.03295615315437317, + "learning_rate": 9.1675670125278e-06, + "loss": 0.0025, + "step": 31680 + }, + { + "epoch": 0.5355618836771081, + "grad_norm": 0.10095320641994476, + "learning_rate": 9.166751989236168e-06, + "loss": 0.0028, + "step": 31690 + }, + { + "epoch": 0.5357308839559586, + "grad_norm": 0.16818812489509583, + "learning_rate": 9.165936603416925e-06, + "loss": 0.0013, + "step": 31700 + }, + { + "epoch": 0.5358998842348089, + "grad_norm": 0.16022954881191254, + "learning_rate": 9.165120855141014e-06, + "loss": 0.0049, + "step": 31710 + }, + { + "epoch": 0.5360688845136594, + "grad_norm": 0.09274117648601532, + "learning_rate": 9.164304744479406e-06, + "loss": 0.0027, + "step": 31720 + }, + { + "epoch": 0.5362378847925099, + "grad_norm": 0.04743310436606407, + "learning_rate": 9.163488271503111e-06, + "loss": 0.002, + "step": 31730 + }, + { + "epoch": 0.5364068850713604, + "grad_norm": 0.12824346125125885, + "learning_rate": 9.162671436283164e-06, + "loss": 0.0022, + "step": 31740 + }, + { + "epoch": 0.5365758853502108, + "grad_norm": 0.11546172946691513, + "learning_rate": 9.16185423889063e-06, + "loss": 0.0028, + "step": 31750 + }, + { + "epoch": 0.5367448856290613, + "grad_norm": 0.28843066096305847, + "learning_rate": 9.161036679396615e-06, + "loss": 0.0032, + "step": 31760 + }, + { + "epoch": 0.5369138859079118, + "grad_norm": 0.09800495207309723, + "learning_rate": 9.160218757872248e-06, + "loss": 0.0018, + "step": 31770 + }, + { + "epoch": 0.5370828861867623, + "grad_norm": 0.05644332244992256, + "learning_rate": 9.159400474388694e-06, + "loss": 0.0022, + "step": 31780 + }, + { + "epoch": 0.5372518864656126, + "grad_norm": 0.20425622165203094, + "learning_rate": 9.158581829017143e-06, + "loss": 0.0031, + "step": 31790 + }, + { + "epoch": 0.5374208867444631, + "grad_norm": 0.026468642055988312, + "learning_rate": 9.157762821828826e-06, + "loss": 0.0026, + "step": 31800 + }, + { + "epoch": 0.5375898870233136, + "grad_norm": 0.08805709332227707, + "learning_rate": 9.156943452894998e-06, + "loss": 0.0045, + "step": 31810 + }, + { + "epoch": 0.537758887302164, + "grad_norm": 0.09152763336896896, + "learning_rate": 9.156123722286949e-06, + "loss": 0.0043, + "step": 31820 + }, + { + "epoch": 0.5379278875810145, + "grad_norm": 0.10086562484502792, + "learning_rate": 9.155303630076e-06, + "loss": 0.0031, + "step": 31830 + }, + { + "epoch": 0.538096887859865, + "grad_norm": 0.1953403353691101, + "learning_rate": 9.154483176333502e-06, + "loss": 0.0027, + "step": 31840 + }, + { + "epoch": 0.5382658881387155, + "grad_norm": 0.05798371136188507, + "learning_rate": 9.153662361130838e-06, + "loss": 0.0029, + "step": 31850 + }, + { + "epoch": 0.5384348884175659, + "grad_norm": 0.23227271437644958, + "learning_rate": 9.152841184539423e-06, + "loss": 0.0016, + "step": 31860 + }, + { + "epoch": 0.5386038886964164, + "grad_norm": 0.03487158566713333, + "learning_rate": 9.152019646630705e-06, + "loss": 0.0031, + "step": 31870 + }, + { + "epoch": 0.5387728889752668, + "grad_norm": 0.12952357530593872, + "learning_rate": 9.15119774747616e-06, + "loss": 0.0015, + "step": 31880 + }, + { + "epoch": 0.5389418892541172, + "grad_norm": 0.049278002232313156, + "learning_rate": 9.1503754871473e-06, + "loss": 0.0025, + "step": 31890 + }, + { + "epoch": 0.5391108895329677, + "grad_norm": 0.13247987627983093, + "learning_rate": 9.14955286571566e-06, + "loss": 0.003, + "step": 31900 + }, + { + "epoch": 0.5392798898118182, + "grad_norm": 0.02803332731127739, + "learning_rate": 9.148729883252818e-06, + "loss": 0.0024, + "step": 31910 + }, + { + "epoch": 0.5394488900906687, + "grad_norm": 0.17933359742164612, + "learning_rate": 9.147906539830376e-06, + "loss": 0.0027, + "step": 31920 + }, + { + "epoch": 0.5396178903695191, + "grad_norm": 0.05666980519890785, + "learning_rate": 9.147082835519967e-06, + "loss": 0.0024, + "step": 31930 + }, + { + "epoch": 0.5397868906483696, + "grad_norm": 0.02192831039428711, + "learning_rate": 9.146258770393256e-06, + "loss": 0.003, + "step": 31940 + }, + { + "epoch": 0.5399558909272201, + "grad_norm": 0.058615997433662415, + "learning_rate": 9.145434344521946e-06, + "loss": 0.002, + "step": 31950 + }, + { + "epoch": 0.5401248912060705, + "grad_norm": 0.09989965707063675, + "learning_rate": 9.144609557977762e-06, + "loss": 0.0026, + "step": 31960 + }, + { + "epoch": 0.5402938914849209, + "grad_norm": 0.11751866340637207, + "learning_rate": 9.143784410832465e-06, + "loss": 0.0031, + "step": 31970 + }, + { + "epoch": 0.5404628917637714, + "grad_norm": 0.07591240853071213, + "learning_rate": 9.142958903157849e-06, + "loss": 0.002, + "step": 31980 + }, + { + "epoch": 0.5406318920426219, + "grad_norm": 0.145902618765831, + "learning_rate": 9.142133035025733e-06, + "loss": 0.0017, + "step": 31990 + }, + { + "epoch": 0.5408008923214723, + "grad_norm": 0.16412098705768585, + "learning_rate": 9.141306806507974e-06, + "loss": 0.0031, + "step": 32000 + }, + { + "epoch": 0.5409698926003228, + "grad_norm": 0.14947715401649475, + "learning_rate": 9.140480217676458e-06, + "loss": 0.003, + "step": 32010 + }, + { + "epoch": 0.5411388928791733, + "grad_norm": 0.0731707289814949, + "learning_rate": 9.139653268603102e-06, + "loss": 0.0029, + "step": 32020 + }, + { + "epoch": 0.5413078931580237, + "grad_norm": 0.15577909350395203, + "learning_rate": 9.138825959359855e-06, + "loss": 0.0021, + "step": 32030 + }, + { + "epoch": 0.5414768934368742, + "grad_norm": 0.035175248980522156, + "learning_rate": 9.137998290018697e-06, + "loss": 0.0022, + "step": 32040 + }, + { + "epoch": 0.5416458937157246, + "grad_norm": 0.08923418819904327, + "learning_rate": 9.137170260651637e-06, + "loss": 0.0024, + "step": 32050 + }, + { + "epoch": 0.541814893994575, + "grad_norm": 0.0423198901116848, + "learning_rate": 9.13634187133072e-06, + "loss": 0.0012, + "step": 32060 + }, + { + "epoch": 0.5419838942734255, + "grad_norm": 0.08710048347711563, + "learning_rate": 9.13551312212802e-06, + "loss": 0.0024, + "step": 32070 + }, + { + "epoch": 0.542152894552276, + "grad_norm": 0.07039597630500793, + "learning_rate": 9.13468401311564e-06, + "loss": 0.0023, + "step": 32080 + }, + { + "epoch": 0.5423218948311265, + "grad_norm": 0.03258507326245308, + "learning_rate": 9.133854544365719e-06, + "loss": 0.0018, + "step": 32090 + }, + { + "epoch": 0.542490895109977, + "grad_norm": 0.04773247241973877, + "learning_rate": 9.133024715950422e-06, + "loss": 0.002, + "step": 32100 + }, + { + "epoch": 0.5426598953888274, + "grad_norm": 0.013408832252025604, + "learning_rate": 9.132194527941952e-06, + "loss": 0.0012, + "step": 32110 + }, + { + "epoch": 0.5428288956676779, + "grad_norm": 0.16576145589351654, + "learning_rate": 9.131363980412536e-06, + "loss": 0.002, + "step": 32120 + }, + { + "epoch": 0.5429978959465284, + "grad_norm": 0.023107144981622696, + "learning_rate": 9.130533073434438e-06, + "loss": 0.0012, + "step": 32130 + }, + { + "epoch": 0.5431668962253787, + "grad_norm": 0.09261453151702881, + "learning_rate": 9.129701807079948e-06, + "loss": 0.0018, + "step": 32140 + }, + { + "epoch": 0.5433358965042292, + "grad_norm": 0.0984678715467453, + "learning_rate": 9.128870181421392e-06, + "loss": 0.0019, + "step": 32150 + }, + { + "epoch": 0.5435048967830797, + "grad_norm": 0.1755990982055664, + "learning_rate": 9.128038196531126e-06, + "loss": 0.0031, + "step": 32160 + }, + { + "epoch": 0.5436738970619301, + "grad_norm": 0.07609538733959198, + "learning_rate": 9.127205852481536e-06, + "loss": 0.0029, + "step": 32170 + }, + { + "epoch": 0.5438428973407806, + "grad_norm": 0.07591988146305084, + "learning_rate": 9.12637314934504e-06, + "loss": 0.0029, + "step": 32180 + }, + { + "epoch": 0.5440118976196311, + "grad_norm": 0.08974188566207886, + "learning_rate": 9.125540087194089e-06, + "loss": 0.004, + "step": 32190 + }, + { + "epoch": 0.5441808978984816, + "grad_norm": 0.08013894408941269, + "learning_rate": 9.124706666101159e-06, + "loss": 0.0034, + "step": 32200 + }, + { + "epoch": 0.544349898177332, + "grad_norm": 0.2126358449459076, + "learning_rate": 9.123872886138765e-06, + "loss": 0.0029, + "step": 32210 + }, + { + "epoch": 0.5445188984561824, + "grad_norm": 0.19131335616111755, + "learning_rate": 9.12303874737945e-06, + "loss": 0.0036, + "step": 32220 + }, + { + "epoch": 0.5446878987350329, + "grad_norm": 0.08860641717910767, + "learning_rate": 9.122204249895787e-06, + "loss": 0.0017, + "step": 32230 + }, + { + "epoch": 0.5448568990138833, + "grad_norm": 0.045733124017715454, + "learning_rate": 9.121369393760381e-06, + "loss": 0.0016, + "step": 32240 + }, + { + "epoch": 0.5450258992927338, + "grad_norm": 0.031239159405231476, + "learning_rate": 9.120534179045872e-06, + "loss": 0.0024, + "step": 32250 + }, + { + "epoch": 0.5451948995715843, + "grad_norm": 0.07091531157493591, + "learning_rate": 9.119698605824923e-06, + "loss": 0.001, + "step": 32260 + }, + { + "epoch": 0.5453638998504348, + "grad_norm": 0.06964509934186935, + "learning_rate": 9.118862674170234e-06, + "loss": 0.0044, + "step": 32270 + }, + { + "epoch": 0.5455329001292852, + "grad_norm": 0.13213202357292175, + "learning_rate": 9.118026384154537e-06, + "loss": 0.0025, + "step": 32280 + }, + { + "epoch": 0.5457019004081357, + "grad_norm": 0.13697420060634613, + "learning_rate": 9.117189735850592e-06, + "loss": 0.0026, + "step": 32290 + }, + { + "epoch": 0.5458709006869862, + "grad_norm": 0.05169123411178589, + "learning_rate": 9.116352729331192e-06, + "loss": 0.0026, + "step": 32300 + }, + { + "epoch": 0.5460399009658365, + "grad_norm": 0.0705161914229393, + "learning_rate": 9.115515364669162e-06, + "loss": 0.0014, + "step": 32310 + }, + { + "epoch": 0.546208901244687, + "grad_norm": 0.19334140419960022, + "learning_rate": 9.114677641937353e-06, + "loss": 0.0024, + "step": 32320 + }, + { + "epoch": 0.5463779015235375, + "grad_norm": 0.07870922982692719, + "learning_rate": 9.113839561208653e-06, + "loss": 0.0018, + "step": 32330 + }, + { + "epoch": 0.546546901802388, + "grad_norm": 0.1923825740814209, + "learning_rate": 9.113001122555981e-06, + "loss": 0.0027, + "step": 32340 + }, + { + "epoch": 0.5467159020812384, + "grad_norm": 0.06787336617708206, + "learning_rate": 9.11216232605228e-06, + "loss": 0.0016, + "step": 32350 + }, + { + "epoch": 0.5468849023600889, + "grad_norm": 0.03022446669638157, + "learning_rate": 9.111323171770535e-06, + "loss": 0.0019, + "step": 32360 + }, + { + "epoch": 0.5470539026389394, + "grad_norm": 0.1334880292415619, + "learning_rate": 9.110483659783755e-06, + "loss": 0.0025, + "step": 32370 + }, + { + "epoch": 0.5472229029177899, + "grad_norm": 0.04222896322607994, + "learning_rate": 9.109643790164979e-06, + "loss": 0.0022, + "step": 32380 + }, + { + "epoch": 0.5473919031966403, + "grad_norm": 0.11605576425790787, + "learning_rate": 9.108803562987282e-06, + "loss": 0.006, + "step": 32390 + }, + { + "epoch": 0.5475609034754907, + "grad_norm": 0.09948180615901947, + "learning_rate": 9.107962978323768e-06, + "loss": 0.002, + "step": 32400 + }, + { + "epoch": 0.5477299037543412, + "grad_norm": 0.06817879527807236, + "learning_rate": 9.107122036247572e-06, + "loss": 0.0028, + "step": 32410 + }, + { + "epoch": 0.5478989040331916, + "grad_norm": 0.11131305992603302, + "learning_rate": 9.106280736831858e-06, + "loss": 0.0015, + "step": 32420 + }, + { + "epoch": 0.5480679043120421, + "grad_norm": 0.05916478484869003, + "learning_rate": 9.105439080149826e-06, + "loss": 0.003, + "step": 32430 + }, + { + "epoch": 0.5482369045908926, + "grad_norm": 0.10021656006574631, + "learning_rate": 9.104597066274701e-06, + "loss": 0.002, + "step": 32440 + }, + { + "epoch": 0.548405904869743, + "grad_norm": 0.1359965056180954, + "learning_rate": 9.103754695279746e-06, + "loss": 0.0024, + "step": 32450 + }, + { + "epoch": 0.5485749051485935, + "grad_norm": 0.0387590229511261, + "learning_rate": 9.102911967238248e-06, + "loss": 0.0029, + "step": 32460 + }, + { + "epoch": 0.548743905427444, + "grad_norm": 0.10812094807624817, + "learning_rate": 9.10206888222353e-06, + "loss": 0.0037, + "step": 32470 + }, + { + "epoch": 0.5489129057062944, + "grad_norm": 0.05625665932893753, + "learning_rate": 9.101225440308944e-06, + "loss": 0.0034, + "step": 32480 + }, + { + "epoch": 0.5490819059851448, + "grad_norm": 0.06848200410604477, + "learning_rate": 9.100381641567875e-06, + "loss": 0.0019, + "step": 32490 + }, + { + "epoch": 0.5492509062639953, + "grad_norm": 0.12069488316774368, + "learning_rate": 9.099537486073736e-06, + "loss": 0.0091, + "step": 32500 + }, + { + "epoch": 0.5494199065428458, + "grad_norm": 0.044651955366134644, + "learning_rate": 9.098692973899971e-06, + "loss": 0.002, + "step": 32510 + }, + { + "epoch": 0.5495889068216963, + "grad_norm": 0.1285020112991333, + "learning_rate": 9.097848105120061e-06, + "loss": 0.0029, + "step": 32520 + }, + { + "epoch": 0.5497579071005467, + "grad_norm": 0.09652730077505112, + "learning_rate": 9.09700287980751e-06, + "loss": 0.0018, + "step": 32530 + }, + { + "epoch": 0.5499269073793972, + "grad_norm": 0.06952265650033951, + "learning_rate": 9.09615729803586e-06, + "loss": 0.0015, + "step": 32540 + }, + { + "epoch": 0.5500959076582477, + "grad_norm": 0.214644655585289, + "learning_rate": 9.095311359878676e-06, + "loss": 0.0023, + "step": 32550 + }, + { + "epoch": 0.5502649079370981, + "grad_norm": 0.06943915039300919, + "learning_rate": 9.094465065409563e-06, + "loss": 0.0049, + "step": 32560 + }, + { + "epoch": 0.5504339082159485, + "grad_norm": 0.06609531491994858, + "learning_rate": 9.09361841470215e-06, + "loss": 0.0019, + "step": 32570 + }, + { + "epoch": 0.550602908494799, + "grad_norm": 0.13681089878082275, + "learning_rate": 9.0927714078301e-06, + "loss": 0.0021, + "step": 32580 + }, + { + "epoch": 0.5507719087736495, + "grad_norm": 0.06775745004415512, + "learning_rate": 9.09192404486711e-06, + "loss": 0.003, + "step": 32590 + }, + { + "epoch": 0.5509409090524999, + "grad_norm": 0.051594868302345276, + "learning_rate": 9.091076325886903e-06, + "loss": 0.0035, + "step": 32600 + }, + { + "epoch": 0.5511099093313504, + "grad_norm": 0.018013061955571175, + "learning_rate": 9.090228250963233e-06, + "loss": 0.0026, + "step": 32610 + }, + { + "epoch": 0.5512789096102009, + "grad_norm": 0.06551416218280792, + "learning_rate": 9.089379820169886e-06, + "loss": 0.0025, + "step": 32620 + }, + { + "epoch": 0.5514479098890513, + "grad_norm": 0.25022977590560913, + "learning_rate": 9.088531033580681e-06, + "loss": 0.0041, + "step": 32630 + }, + { + "epoch": 0.5516169101679018, + "grad_norm": 0.16517165303230286, + "learning_rate": 9.087681891269469e-06, + "loss": 0.0035, + "step": 32640 + }, + { + "epoch": 0.5517859104467523, + "grad_norm": 0.24520257115364075, + "learning_rate": 9.086832393310125e-06, + "loss": 0.0021, + "step": 32650 + }, + { + "epoch": 0.5519549107256027, + "grad_norm": 0.47518280148506165, + "learning_rate": 9.085982539776563e-06, + "loss": 0.001, + "step": 32660 + }, + { + "epoch": 0.5521239110044531, + "grad_norm": 0.013922463171184063, + "learning_rate": 9.085132330742723e-06, + "loss": 0.003, + "step": 32670 + }, + { + "epoch": 0.5522929112833036, + "grad_norm": 0.07399224489927292, + "learning_rate": 9.084281766282578e-06, + "loss": 0.0014, + "step": 32680 + }, + { + "epoch": 0.5524619115621541, + "grad_norm": 0.08443097025156021, + "learning_rate": 9.08343084647013e-06, + "loss": 0.0023, + "step": 32690 + }, + { + "epoch": 0.5526309118410045, + "grad_norm": 0.0952787771821022, + "learning_rate": 9.082579571379413e-06, + "loss": 0.0018, + "step": 32700 + }, + { + "epoch": 0.552799912119855, + "grad_norm": 0.1248018890619278, + "learning_rate": 9.081727941084493e-06, + "loss": 0.0019, + "step": 32710 + }, + { + "epoch": 0.5529689123987055, + "grad_norm": 0.12016724795103073, + "learning_rate": 9.080875955659466e-06, + "loss": 0.0022, + "step": 32720 + }, + { + "epoch": 0.553137912677556, + "grad_norm": 0.04242364317178726, + "learning_rate": 9.080023615178456e-06, + "loss": 0.002, + "step": 32730 + }, + { + "epoch": 0.5533069129564063, + "grad_norm": 0.05166156217455864, + "learning_rate": 9.079170919715627e-06, + "loss": 0.0038, + "step": 32740 + }, + { + "epoch": 0.5534759132352568, + "grad_norm": 0.1383553296327591, + "learning_rate": 9.078317869345161e-06, + "loss": 0.0024, + "step": 32750 + }, + { + "epoch": 0.5536449135141073, + "grad_norm": 0.022725990042090416, + "learning_rate": 9.077464464141284e-06, + "loss": 0.0024, + "step": 32760 + }, + { + "epoch": 0.5538139137929577, + "grad_norm": 0.06411861628293991, + "learning_rate": 9.07661070417824e-06, + "loss": 0.0038, + "step": 32770 + }, + { + "epoch": 0.5539829140718082, + "grad_norm": 0.00547699024900794, + "learning_rate": 9.075756589530313e-06, + "loss": 0.0019, + "step": 32780 + }, + { + "epoch": 0.5541519143506587, + "grad_norm": 0.03633767366409302, + "learning_rate": 9.074902120271816e-06, + "loss": 0.0028, + "step": 32790 + }, + { + "epoch": 0.5543209146295092, + "grad_norm": 0.1565227508544922, + "learning_rate": 9.074047296477092e-06, + "loss": 0.0027, + "step": 32800 + }, + { + "epoch": 0.5544899149083596, + "grad_norm": 0.12397000938653946, + "learning_rate": 9.073192118220513e-06, + "loss": 0.0021, + "step": 32810 + }, + { + "epoch": 0.5546589151872101, + "grad_norm": 0.0234407689422369, + "learning_rate": 9.072336585576485e-06, + "loss": 0.0032, + "step": 32820 + }, + { + "epoch": 0.5548279154660605, + "grad_norm": 0.06488706916570663, + "learning_rate": 9.071480698619442e-06, + "loss": 0.0014, + "step": 32830 + }, + { + "epoch": 0.554996915744911, + "grad_norm": 0.09103720635175705, + "learning_rate": 9.070624457423853e-06, + "loss": 0.0027, + "step": 32840 + }, + { + "epoch": 0.5551659160237614, + "grad_norm": 0.09234441816806793, + "learning_rate": 9.069767862064213e-06, + "loss": 0.0029, + "step": 32850 + }, + { + "epoch": 0.5553349163026119, + "grad_norm": 0.85475093126297, + "learning_rate": 9.06891091261505e-06, + "loss": 0.0029, + "step": 32860 + }, + { + "epoch": 0.5555039165814624, + "grad_norm": 0.05765415355563164, + "learning_rate": 9.068053609150924e-06, + "loss": 0.0013, + "step": 32870 + }, + { + "epoch": 0.5556729168603128, + "grad_norm": 0.05210142582654953, + "learning_rate": 9.067195951746423e-06, + "loss": 0.0034, + "step": 32880 + }, + { + "epoch": 0.5558419171391633, + "grad_norm": 0.029150711372494698, + "learning_rate": 9.066337940476171e-06, + "loss": 0.0027, + "step": 32890 + }, + { + "epoch": 0.5560109174180138, + "grad_norm": 0.33490970730781555, + "learning_rate": 9.065479575414813e-06, + "loss": 0.0023, + "step": 32900 + }, + { + "epoch": 0.5561799176968641, + "grad_norm": 0.019613586366176605, + "learning_rate": 9.064620856637035e-06, + "loss": 0.0009, + "step": 32910 + }, + { + "epoch": 0.5563489179757146, + "grad_norm": 0.06637836247682571, + "learning_rate": 9.063761784217551e-06, + "loss": 0.0027, + "step": 32920 + }, + { + "epoch": 0.5565179182545651, + "grad_norm": 0.07676636427640915, + "learning_rate": 9.0629023582311e-06, + "loss": 0.0013, + "step": 32930 + }, + { + "epoch": 0.5566869185334156, + "grad_norm": 0.04984048381447792, + "learning_rate": 9.06204257875246e-06, + "loss": 0.0021, + "step": 32940 + }, + { + "epoch": 0.556855918812266, + "grad_norm": 0.11220373213291168, + "learning_rate": 9.061182445856434e-06, + "loss": 0.0024, + "step": 32950 + }, + { + "epoch": 0.5570249190911165, + "grad_norm": 0.11472593247890472, + "learning_rate": 9.060321959617857e-06, + "loss": 0.0021, + "step": 32960 + }, + { + "epoch": 0.557193919369967, + "grad_norm": 0.07053294032812119, + "learning_rate": 9.059461120111598e-06, + "loss": 0.0025, + "step": 32970 + }, + { + "epoch": 0.5573629196488175, + "grad_norm": 0.03769908845424652, + "learning_rate": 9.058599927412553e-06, + "loss": 0.0015, + "step": 32980 + }, + { + "epoch": 0.5575319199276679, + "grad_norm": 0.0317186638712883, + "learning_rate": 9.057738381595651e-06, + "loss": 0.0023, + "step": 32990 + }, + { + "epoch": 0.5577009202065183, + "grad_norm": 0.04581945016980171, + "learning_rate": 9.056876482735848e-06, + "loss": 0.0021, + "step": 33000 + }, + { + "epoch": 0.5578699204853688, + "grad_norm": 0.04762342572212219, + "learning_rate": 9.056014230908135e-06, + "loss": 0.0022, + "step": 33010 + }, + { + "epoch": 0.5580389207642192, + "grad_norm": 0.026327649131417274, + "learning_rate": 9.055151626187533e-06, + "loss": 0.0034, + "step": 33020 + }, + { + "epoch": 0.5582079210430697, + "grad_norm": 0.05236377194523811, + "learning_rate": 9.054288668649092e-06, + "loss": 0.0021, + "step": 33030 + }, + { + "epoch": 0.5583769213219202, + "grad_norm": 0.014177908189594746, + "learning_rate": 9.053425358367894e-06, + "loss": 0.0026, + "step": 33040 + }, + { + "epoch": 0.5585459216007707, + "grad_norm": 0.014622579328715801, + "learning_rate": 9.05256169541905e-06, + "loss": 0.0014, + "step": 33050 + }, + { + "epoch": 0.5587149218796211, + "grad_norm": 0.06770174205303192, + "learning_rate": 9.0516976798777e-06, + "loss": 0.0023, + "step": 33060 + }, + { + "epoch": 0.5588839221584716, + "grad_norm": 0.0714285597205162, + "learning_rate": 9.050833311819025e-06, + "loss": 0.001, + "step": 33070 + }, + { + "epoch": 0.5590529224373221, + "grad_norm": 0.12493888288736343, + "learning_rate": 9.049968591318224e-06, + "loss": 0.0032, + "step": 33080 + }, + { + "epoch": 0.5592219227161724, + "grad_norm": 0.24726036190986633, + "learning_rate": 9.049103518450533e-06, + "loss": 0.0018, + "step": 33090 + }, + { + "epoch": 0.5593909229950229, + "grad_norm": 0.06651870161294937, + "learning_rate": 9.048238093291218e-06, + "loss": 0.0021, + "step": 33100 + }, + { + "epoch": 0.5595599232738734, + "grad_norm": 0.03772980347275734, + "learning_rate": 9.047372315915572e-06, + "loss": 0.0025, + "step": 33110 + }, + { + "epoch": 0.5597289235527239, + "grad_norm": 0.028655195608735085, + "learning_rate": 9.046506186398926e-06, + "loss": 0.0013, + "step": 33120 + }, + { + "epoch": 0.5598979238315743, + "grad_norm": 0.1510440707206726, + "learning_rate": 9.045639704816637e-06, + "loss": 0.0032, + "step": 33130 + }, + { + "epoch": 0.5600669241104248, + "grad_norm": 0.03013589419424534, + "learning_rate": 9.044772871244093e-06, + "loss": 0.0019, + "step": 33140 + }, + { + "epoch": 0.5602359243892753, + "grad_norm": 0.01859009638428688, + "learning_rate": 9.04390568575671e-06, + "loss": 0.002, + "step": 33150 + }, + { + "epoch": 0.5604049246681257, + "grad_norm": 0.1631273627281189, + "learning_rate": 9.04303814842994e-06, + "loss": 0.0036, + "step": 33160 + }, + { + "epoch": 0.5605739249469761, + "grad_norm": 0.134274423122406, + "learning_rate": 9.042170259339263e-06, + "loss": 0.0019, + "step": 33170 + }, + { + "epoch": 0.5607429252258266, + "grad_norm": 0.038943417370319366, + "learning_rate": 9.041302018560187e-06, + "loss": 0.0019, + "step": 33180 + }, + { + "epoch": 0.5609119255046771, + "grad_norm": 0.10986550152301788, + "learning_rate": 9.040433426168255e-06, + "loss": 0.0015, + "step": 33190 + }, + { + "epoch": 0.5610809257835275, + "grad_norm": 0.13235639035701752, + "learning_rate": 9.039564482239039e-06, + "loss": 0.0011, + "step": 33200 + }, + { + "epoch": 0.561249926062378, + "grad_norm": 0.06512071937322617, + "learning_rate": 9.038695186848141e-06, + "loss": 0.0021, + "step": 33210 + }, + { + "epoch": 0.5614189263412285, + "grad_norm": 0.14054960012435913, + "learning_rate": 9.037825540071194e-06, + "loss": 0.0025, + "step": 33220 + }, + { + "epoch": 0.561587926620079, + "grad_norm": 0.07978338748216629, + "learning_rate": 9.03695554198386e-06, + "loss": 0.0044, + "step": 33230 + }, + { + "epoch": 0.5617569268989294, + "grad_norm": 0.058655884116888046, + "learning_rate": 9.036085192661837e-06, + "loss": 0.004, + "step": 33240 + }, + { + "epoch": 0.5619259271777799, + "grad_norm": 0.06768335402011871, + "learning_rate": 9.035214492180843e-06, + "loss": 0.0015, + "step": 33250 + }, + { + "epoch": 0.5620949274566303, + "grad_norm": 0.16143356263637543, + "learning_rate": 9.034343440616641e-06, + "loss": 0.0025, + "step": 33260 + }, + { + "epoch": 0.5622639277354807, + "grad_norm": 0.06286460161209106, + "learning_rate": 9.033472038045011e-06, + "loss": 0.0026, + "step": 33270 + }, + { + "epoch": 0.5624329280143312, + "grad_norm": 0.05834538862109184, + "learning_rate": 9.032600284541771e-06, + "loss": 0.0028, + "step": 33280 + }, + { + "epoch": 0.5626019282931817, + "grad_norm": 0.09082058072090149, + "learning_rate": 9.031728180182768e-06, + "loss": 0.003, + "step": 33290 + }, + { + "epoch": 0.5627709285720321, + "grad_norm": 0.07078733295202255, + "learning_rate": 9.03085572504388e-06, + "loss": 0.0023, + "step": 33300 + }, + { + "epoch": 0.5629399288508826, + "grad_norm": 0.078410305082798, + "learning_rate": 9.029982919201012e-06, + "loss": 0.0022, + "step": 33310 + }, + { + "epoch": 0.5631089291297331, + "grad_norm": 0.04747273400425911, + "learning_rate": 9.029109762730105e-06, + "loss": 0.0024, + "step": 33320 + }, + { + "epoch": 0.5632779294085836, + "grad_norm": 0.07112249732017517, + "learning_rate": 9.02823625570713e-06, + "loss": 0.0023, + "step": 33330 + }, + { + "epoch": 0.563446929687434, + "grad_norm": 0.10954030603170395, + "learning_rate": 9.02736239820808e-06, + "loss": 0.0025, + "step": 33340 + }, + { + "epoch": 0.5636159299662844, + "grad_norm": 0.13121449947357178, + "learning_rate": 9.026488190308989e-06, + "loss": 0.0015, + "step": 33350 + }, + { + "epoch": 0.5637849302451349, + "grad_norm": 0.03465167433023453, + "learning_rate": 9.025613632085919e-06, + "loss": 0.0021, + "step": 33360 + }, + { + "epoch": 0.5639539305239853, + "grad_norm": 0.055887263268232346, + "learning_rate": 9.024738723614957e-06, + "loss": 0.002, + "step": 33370 + }, + { + "epoch": 0.5641229308028358, + "grad_norm": 0.25826144218444824, + "learning_rate": 9.023863464972225e-06, + "loss": 0.0025, + "step": 33380 + }, + { + "epoch": 0.5642919310816863, + "grad_norm": 0.07254608720541, + "learning_rate": 9.022987856233876e-06, + "loss": 0.0093, + "step": 33390 + }, + { + "epoch": 0.5644609313605368, + "grad_norm": 0.18888425827026367, + "learning_rate": 9.022111897476092e-06, + "loss": 0.0042, + "step": 33400 + }, + { + "epoch": 0.5646299316393872, + "grad_norm": 0.11029987782239914, + "learning_rate": 9.021235588775083e-06, + "loss": 0.0026, + "step": 33410 + }, + { + "epoch": 0.5647989319182377, + "grad_norm": 0.1506171077489853, + "learning_rate": 9.020358930207098e-06, + "loss": 0.0027, + "step": 33420 + }, + { + "epoch": 0.5649679321970881, + "grad_norm": 0.007704961579293013, + "learning_rate": 9.019481921848405e-06, + "loss": 0.0023, + "step": 33430 + }, + { + "epoch": 0.5651369324759385, + "grad_norm": 0.05094357952475548, + "learning_rate": 9.01860456377531e-06, + "loss": 0.0012, + "step": 33440 + }, + { + "epoch": 0.565305932754789, + "grad_norm": 0.04711581766605377, + "learning_rate": 9.017726856064148e-06, + "loss": 0.0019, + "step": 33450 + }, + { + "epoch": 0.5654749330336395, + "grad_norm": 0.14686250686645508, + "learning_rate": 9.016848798791283e-06, + "loss": 0.0022, + "step": 33460 + }, + { + "epoch": 0.56564393331249, + "grad_norm": 0.0039303540252149105, + "learning_rate": 9.01597039203311e-06, + "loss": 0.0034, + "step": 33470 + }, + { + "epoch": 0.5658129335913404, + "grad_norm": 0.01116950437426567, + "learning_rate": 9.015091635866056e-06, + "loss": 0.0018, + "step": 33480 + }, + { + "epoch": 0.5659819338701909, + "grad_norm": 0.12378732860088348, + "learning_rate": 9.014212530366575e-06, + "loss": 0.0032, + "step": 33490 + }, + { + "epoch": 0.5661509341490414, + "grad_norm": 0.3053177297115326, + "learning_rate": 9.013333075611154e-06, + "loss": 0.0037, + "step": 33500 + }, + { + "epoch": 0.5663199344278919, + "grad_norm": 0.052048180252313614, + "learning_rate": 9.012453271676311e-06, + "loss": 0.0022, + "step": 33510 + }, + { + "epoch": 0.5664889347067422, + "grad_norm": 0.14100271463394165, + "learning_rate": 9.011573118638595e-06, + "loss": 0.0034, + "step": 33520 + }, + { + "epoch": 0.5666579349855927, + "grad_norm": 0.08364363014698029, + "learning_rate": 9.01069261657458e-06, + "loss": 0.0022, + "step": 33530 + }, + { + "epoch": 0.5668269352644432, + "grad_norm": 0.09455575048923492, + "learning_rate": 9.009811765560871e-06, + "loss": 0.0028, + "step": 33540 + }, + { + "epoch": 0.5669959355432936, + "grad_norm": 0.1075403094291687, + "learning_rate": 9.008930565674115e-06, + "loss": 0.0031, + "step": 33550 + }, + { + "epoch": 0.5671649358221441, + "grad_norm": 0.08358623832464218, + "learning_rate": 9.008049016990974e-06, + "loss": 0.0021, + "step": 33560 + }, + { + "epoch": 0.5673339361009946, + "grad_norm": 0.09312465786933899, + "learning_rate": 9.00716711958815e-06, + "loss": 0.0022, + "step": 33570 + }, + { + "epoch": 0.5675029363798451, + "grad_norm": 0.23224703967571259, + "learning_rate": 9.006284873542373e-06, + "loss": 0.0019, + "step": 33580 + }, + { + "epoch": 0.5676719366586955, + "grad_norm": 0.08911000937223434, + "learning_rate": 9.005402278930398e-06, + "loss": 0.0022, + "step": 33590 + }, + { + "epoch": 0.5678409369375459, + "grad_norm": 0.04529863968491554, + "learning_rate": 9.00451933582902e-06, + "loss": 0.0016, + "step": 33600 + }, + { + "epoch": 0.5680099372163964, + "grad_norm": 0.11283257603645325, + "learning_rate": 9.003636044315056e-06, + "loss": 0.0031, + "step": 33610 + }, + { + "epoch": 0.5681789374952468, + "grad_norm": 0.04146464169025421, + "learning_rate": 9.00275240446536e-06, + "loss": 0.0029, + "step": 33620 + }, + { + "epoch": 0.5683479377740973, + "grad_norm": 0.2886311113834381, + "learning_rate": 9.00186841635681e-06, + "loss": 0.0026, + "step": 33630 + }, + { + "epoch": 0.5685169380529478, + "grad_norm": 0.03935188800096512, + "learning_rate": 9.000984080066316e-06, + "loss": 0.0031, + "step": 33640 + }, + { + "epoch": 0.5686859383317983, + "grad_norm": 0.09145606309175491, + "learning_rate": 9.000099395670826e-06, + "loss": 0.0047, + "step": 33650 + }, + { + "epoch": 0.5688549386106487, + "grad_norm": 0.11949055641889572, + "learning_rate": 8.999214363247306e-06, + "loss": 0.0031, + "step": 33660 + }, + { + "epoch": 0.5690239388894992, + "grad_norm": 0.043543096631765366, + "learning_rate": 8.99832898287276e-06, + "loss": 0.0029, + "step": 33670 + }, + { + "epoch": 0.5691929391683497, + "grad_norm": 0.08801903575658798, + "learning_rate": 8.997443254624218e-06, + "loss": 0.0028, + "step": 33680 + }, + { + "epoch": 0.5693619394472, + "grad_norm": 0.2695542275905609, + "learning_rate": 8.996557178578747e-06, + "loss": 0.005, + "step": 33690 + }, + { + "epoch": 0.5695309397260505, + "grad_norm": 0.06001931428909302, + "learning_rate": 8.995670754813437e-06, + "loss": 0.0024, + "step": 33700 + }, + { + "epoch": 0.569699940004901, + "grad_norm": 0.031208530068397522, + "learning_rate": 8.994783983405414e-06, + "loss": 0.004, + "step": 33710 + }, + { + "epoch": 0.5698689402837515, + "grad_norm": 0.03699713572859764, + "learning_rate": 8.993896864431825e-06, + "loss": 0.0039, + "step": 33720 + }, + { + "epoch": 0.5700379405626019, + "grad_norm": 0.06721201539039612, + "learning_rate": 8.993009397969861e-06, + "loss": 0.0016, + "step": 33730 + }, + { + "epoch": 0.5702069408414524, + "grad_norm": 0.13690564036369324, + "learning_rate": 8.992121584096731e-06, + "loss": 0.0017, + "step": 33740 + }, + { + "epoch": 0.5703759411203029, + "grad_norm": 0.06848353147506714, + "learning_rate": 8.991233422889683e-06, + "loss": 0.0025, + "step": 33750 + }, + { + "epoch": 0.5705449413991533, + "grad_norm": 0.06637344509363174, + "learning_rate": 8.990344914425988e-06, + "loss": 0.003, + "step": 33760 + }, + { + "epoch": 0.5707139416780038, + "grad_norm": 0.07482563704252243, + "learning_rate": 8.989456058782952e-06, + "loss": 0.0019, + "step": 33770 + }, + { + "epoch": 0.5708829419568542, + "grad_norm": 0.02254422754049301, + "learning_rate": 8.988566856037908e-06, + "loss": 0.0008, + "step": 33780 + }, + { + "epoch": 0.5710519422357047, + "grad_norm": 0.1611790657043457, + "learning_rate": 8.987677306268226e-06, + "loss": 0.0023, + "step": 33790 + }, + { + "epoch": 0.5712209425145551, + "grad_norm": 0.07324439287185669, + "learning_rate": 8.986787409551294e-06, + "loss": 0.0015, + "step": 33800 + }, + { + "epoch": 0.5713899427934056, + "grad_norm": 0.36423519253730774, + "learning_rate": 8.985897165964544e-06, + "loss": 0.0025, + "step": 33810 + }, + { + "epoch": 0.5715589430722561, + "grad_norm": 0.09435511380434036, + "learning_rate": 8.985006575585426e-06, + "loss": 0.002, + "step": 33820 + }, + { + "epoch": 0.5717279433511065, + "grad_norm": 0.1214037612080574, + "learning_rate": 8.984115638491428e-06, + "loss": 0.0023, + "step": 33830 + }, + { + "epoch": 0.571896943629957, + "grad_norm": 0.1913721114397049, + "learning_rate": 8.983224354760068e-06, + "loss": 0.0029, + "step": 33840 + }, + { + "epoch": 0.5720659439088075, + "grad_norm": 0.026578059419989586, + "learning_rate": 8.98233272446889e-06, + "loss": 0.0027, + "step": 33850 + }, + { + "epoch": 0.5722349441876579, + "grad_norm": 0.19247399270534515, + "learning_rate": 8.981440747695467e-06, + "loss": 0.0023, + "step": 33860 + }, + { + "epoch": 0.5724039444665083, + "grad_norm": 0.1015344113111496, + "learning_rate": 8.980548424517412e-06, + "loss": 0.0046, + "step": 33870 + }, + { + "epoch": 0.5725729447453588, + "grad_norm": 0.0901235044002533, + "learning_rate": 8.979655755012355e-06, + "loss": 0.0026, + "step": 33880 + }, + { + "epoch": 0.5727419450242093, + "grad_norm": 0.21112436056137085, + "learning_rate": 8.978762739257966e-06, + "loss": 0.0032, + "step": 33890 + }, + { + "epoch": 0.5729109453030597, + "grad_norm": 0.3893822431564331, + "learning_rate": 8.977869377331944e-06, + "loss": 0.0022, + "step": 33900 + }, + { + "epoch": 0.5730799455819102, + "grad_norm": 0.19143308699131012, + "learning_rate": 8.976975669312012e-06, + "loss": 0.0022, + "step": 33910 + }, + { + "epoch": 0.5732489458607607, + "grad_norm": 0.06548358500003815, + "learning_rate": 8.976081615275924e-06, + "loss": 0.007, + "step": 33920 + }, + { + "epoch": 0.5734179461396112, + "grad_norm": 0.04744194447994232, + "learning_rate": 8.975187215301475e-06, + "loss": 0.0019, + "step": 33930 + }, + { + "epoch": 0.5735869464184616, + "grad_norm": 0.07531792670488358, + "learning_rate": 8.974292469466477e-06, + "loss": 0.0022, + "step": 33940 + }, + { + "epoch": 0.573755946697312, + "grad_norm": 0.05589604005217552, + "learning_rate": 8.973397377848778e-06, + "loss": 0.0023, + "step": 33950 + }, + { + "epoch": 0.5739249469761625, + "grad_norm": 0.07739026844501495, + "learning_rate": 8.972501940526256e-06, + "loss": 0.0021, + "step": 33960 + }, + { + "epoch": 0.574093947255013, + "grad_norm": 0.027860552072525024, + "learning_rate": 8.971606157576818e-06, + "loss": 0.0024, + "step": 33970 + }, + { + "epoch": 0.5742629475338634, + "grad_norm": 0.034402504563331604, + "learning_rate": 8.9707100290784e-06, + "loss": 0.0018, + "step": 33980 + }, + { + "epoch": 0.5744319478127139, + "grad_norm": 0.12433133274316788, + "learning_rate": 8.969813555108972e-06, + "loss": 0.0022, + "step": 33990 + }, + { + "epoch": 0.5746009480915644, + "grad_norm": 0.0021321179810911417, + "learning_rate": 8.96891673574653e-06, + "loss": 0.0024, + "step": 34000 + }, + { + "epoch": 0.5747699483704148, + "grad_norm": 0.09629344195127487, + "learning_rate": 8.968019571069101e-06, + "loss": 0.0029, + "step": 34010 + }, + { + "epoch": 0.5749389486492653, + "grad_norm": 0.05528600513935089, + "learning_rate": 8.967122061154747e-06, + "loss": 0.002, + "step": 34020 + }, + { + "epoch": 0.5751079489281158, + "grad_norm": 0.05657871440052986, + "learning_rate": 8.96622420608155e-06, + "loss": 0.0015, + "step": 34030 + }, + { + "epoch": 0.5752769492069661, + "grad_norm": 0.03395133092999458, + "learning_rate": 8.965326005927633e-06, + "loss": 0.0018, + "step": 34040 + }, + { + "epoch": 0.5754459494858166, + "grad_norm": 0.08920653909444809, + "learning_rate": 8.96442746077114e-06, + "loss": 0.0022, + "step": 34050 + }, + { + "epoch": 0.5756149497646671, + "grad_norm": 0.06838784366846085, + "learning_rate": 8.963528570690247e-06, + "loss": 0.0017, + "step": 34060 + }, + { + "epoch": 0.5757839500435176, + "grad_norm": 0.0616769976913929, + "learning_rate": 8.962629335763166e-06, + "loss": 0.0044, + "step": 34070 + }, + { + "epoch": 0.575952950322368, + "grad_norm": 0.05100078135728836, + "learning_rate": 8.961729756068135e-06, + "loss": 0.003, + "step": 34080 + }, + { + "epoch": 0.5761219506012185, + "grad_norm": 0.08292622119188309, + "learning_rate": 8.96082983168342e-06, + "loss": 0.0039, + "step": 34090 + }, + { + "epoch": 0.576290950880069, + "grad_norm": 0.02434578351676464, + "learning_rate": 8.959929562687317e-06, + "loss": 0.0038, + "step": 34100 + }, + { + "epoch": 0.5764599511589195, + "grad_norm": 0.10119115561246872, + "learning_rate": 8.959028949158158e-06, + "loss": 0.0018, + "step": 34110 + }, + { + "epoch": 0.5766289514377698, + "grad_norm": 0.13875305652618408, + "learning_rate": 8.958127991174298e-06, + "loss": 0.0024, + "step": 34120 + }, + { + "epoch": 0.5767979517166203, + "grad_norm": 0.15823498368263245, + "learning_rate": 8.957226688814124e-06, + "loss": 0.0014, + "step": 34130 + }, + { + "epoch": 0.5769669519954708, + "grad_norm": 0.15111730992794037, + "learning_rate": 8.956325042156055e-06, + "loss": 0.0023, + "step": 34140 + }, + { + "epoch": 0.5771359522743212, + "grad_norm": 0.1382988840341568, + "learning_rate": 8.95542305127854e-06, + "loss": 0.0045, + "step": 34150 + }, + { + "epoch": 0.5773049525531717, + "grad_norm": 0.011094818823039532, + "learning_rate": 8.954520716260054e-06, + "loss": 0.0038, + "step": 34160 + }, + { + "epoch": 0.5774739528320222, + "grad_norm": 0.043708279728889465, + "learning_rate": 8.953618037179105e-06, + "loss": 0.0016, + "step": 34170 + }, + { + "epoch": 0.5776429531108727, + "grad_norm": 0.07289635390043259, + "learning_rate": 8.952715014114231e-06, + "loss": 0.0026, + "step": 34180 + }, + { + "epoch": 0.5778119533897231, + "grad_norm": 0.03761967271566391, + "learning_rate": 8.951811647144002e-06, + "loss": 0.0019, + "step": 34190 + }, + { + "epoch": 0.5779809536685736, + "grad_norm": 0.15979503095149994, + "learning_rate": 8.95090793634701e-06, + "loss": 0.0022, + "step": 34200 + }, + { + "epoch": 0.578149953947424, + "grad_norm": 0.10363156348466873, + "learning_rate": 8.950003881801885e-06, + "loss": 0.0035, + "step": 34210 + }, + { + "epoch": 0.5783189542262744, + "grad_norm": 0.1256801337003708, + "learning_rate": 8.949099483587286e-06, + "loss": 0.0016, + "step": 34220 + }, + { + "epoch": 0.5784879545051249, + "grad_norm": 0.11050673574209213, + "learning_rate": 8.948194741781897e-06, + "loss": 0.0022, + "step": 34230 + }, + { + "epoch": 0.5786569547839754, + "grad_norm": 0.06849426031112671, + "learning_rate": 8.947289656464437e-06, + "loss": 0.0023, + "step": 34240 + }, + { + "epoch": 0.5788259550628259, + "grad_norm": 0.20660637319087982, + "learning_rate": 8.94638422771365e-06, + "loss": 0.0035, + "step": 34250 + }, + { + "epoch": 0.5789949553416763, + "grad_norm": 0.13702276349067688, + "learning_rate": 8.945478455608313e-06, + "loss": 0.0029, + "step": 34260 + }, + { + "epoch": 0.5791639556205268, + "grad_norm": 0.19022051990032196, + "learning_rate": 8.944572340227239e-06, + "loss": 0.0026, + "step": 34270 + }, + { + "epoch": 0.5793329558993773, + "grad_norm": 0.01711823046207428, + "learning_rate": 8.943665881649257e-06, + "loss": 0.002, + "step": 34280 + }, + { + "epoch": 0.5795019561782276, + "grad_norm": 0.03700932487845421, + "learning_rate": 8.942759079953235e-06, + "loss": 0.0019, + "step": 34290 + }, + { + "epoch": 0.5796709564570781, + "grad_norm": 0.1668715626001358, + "learning_rate": 8.941851935218072e-06, + "loss": 0.0019, + "step": 34300 + }, + { + "epoch": 0.5798399567359286, + "grad_norm": 0.0773654580116272, + "learning_rate": 8.94094444752269e-06, + "loss": 0.0025, + "step": 34310 + }, + { + "epoch": 0.5800089570147791, + "grad_norm": 0.38987037539482117, + "learning_rate": 8.940036616946047e-06, + "loss": 0.0051, + "step": 34320 + }, + { + "epoch": 0.5801779572936295, + "grad_norm": 0.03946467489004135, + "learning_rate": 8.939128443567128e-06, + "loss": 0.0013, + "step": 34330 + }, + { + "epoch": 0.58034695757248, + "grad_norm": 0.036311425268650055, + "learning_rate": 8.93821992746495e-06, + "loss": 0.0021, + "step": 34340 + }, + { + "epoch": 0.5805159578513305, + "grad_norm": 0.07707275450229645, + "learning_rate": 8.937311068718557e-06, + "loss": 0.002, + "step": 34350 + }, + { + "epoch": 0.580684958130181, + "grad_norm": 0.0606757290661335, + "learning_rate": 8.936401867407025e-06, + "loss": 0.0022, + "step": 34360 + }, + { + "epoch": 0.5808539584090314, + "grad_norm": 0.029825543984770775, + "learning_rate": 8.935492323609457e-06, + "loss": 0.0025, + "step": 34370 + }, + { + "epoch": 0.5810229586878818, + "grad_norm": 0.09910440444946289, + "learning_rate": 8.93458243740499e-06, + "loss": 0.0018, + "step": 34380 + }, + { + "epoch": 0.5811919589667323, + "grad_norm": 0.01993374340236187, + "learning_rate": 8.933672208872786e-06, + "loss": 0.0035, + "step": 34390 + }, + { + "epoch": 0.5813609592455827, + "grad_norm": 0.2147493213415146, + "learning_rate": 8.932761638092042e-06, + "loss": 0.0027, + "step": 34400 + }, + { + "epoch": 0.5815299595244332, + "grad_norm": 0.03388592600822449, + "learning_rate": 8.931850725141979e-06, + "loss": 0.0041, + "step": 34410 + }, + { + "epoch": 0.5816989598032837, + "grad_norm": 0.18381360173225403, + "learning_rate": 8.930939470101855e-06, + "loss": 0.0034, + "step": 34420 + }, + { + "epoch": 0.5818679600821342, + "grad_norm": 0.24191924929618835, + "learning_rate": 8.93002787305095e-06, + "loss": 0.0028, + "step": 34430 + }, + { + "epoch": 0.5820369603609846, + "grad_norm": 0.19341941177845, + "learning_rate": 8.92911593406858e-06, + "loss": 0.003, + "step": 34440 + }, + { + "epoch": 0.5822059606398351, + "grad_norm": 0.11159269511699677, + "learning_rate": 8.928203653234084e-06, + "loss": 0.002, + "step": 34450 + }, + { + "epoch": 0.5823749609186856, + "grad_norm": 0.0494178868830204, + "learning_rate": 8.927291030626842e-06, + "loss": 0.002, + "step": 34460 + }, + { + "epoch": 0.5825439611975359, + "grad_norm": 0.1472940593957901, + "learning_rate": 8.926378066326247e-06, + "loss": 0.003, + "step": 34470 + }, + { + "epoch": 0.5827129614763864, + "grad_norm": 0.09544549882411957, + "learning_rate": 8.925464760411739e-06, + "loss": 0.0035, + "step": 34480 + }, + { + "epoch": 0.5828819617552369, + "grad_norm": 0.09508192539215088, + "learning_rate": 8.924551112962779e-06, + "loss": 0.0026, + "step": 34490 + }, + { + "epoch": 0.5830509620340874, + "grad_norm": 0.0262440275400877, + "learning_rate": 8.923637124058854e-06, + "loss": 0.0035, + "step": 34500 + }, + { + "epoch": 0.5832199623129378, + "grad_norm": 0.07030729204416275, + "learning_rate": 8.922722793779492e-06, + "loss": 0.0027, + "step": 34510 + }, + { + "epoch": 0.5833889625917883, + "grad_norm": 0.006284997798502445, + "learning_rate": 8.921808122204239e-06, + "loss": 0.0026, + "step": 34520 + }, + { + "epoch": 0.5835579628706388, + "grad_norm": 0.03774796798825264, + "learning_rate": 8.920893109412678e-06, + "loss": 0.0027, + "step": 34530 + }, + { + "epoch": 0.5837269631494892, + "grad_norm": 0.09865015000104904, + "learning_rate": 8.91997775548442e-06, + "loss": 0.003, + "step": 34540 + }, + { + "epoch": 0.5838959634283396, + "grad_norm": 0.28293201327323914, + "learning_rate": 8.919062060499105e-06, + "loss": 0.0025, + "step": 34550 + }, + { + "epoch": 0.5840649637071901, + "grad_norm": 0.06183357164263725, + "learning_rate": 8.918146024536401e-06, + "loss": 0.0015, + "step": 34560 + }, + { + "epoch": 0.5842339639860405, + "grad_norm": 0.0637751892209053, + "learning_rate": 8.917229647676009e-06, + "loss": 0.0026, + "step": 34570 + }, + { + "epoch": 0.584402964264891, + "grad_norm": 0.10728228837251663, + "learning_rate": 8.916312929997659e-06, + "loss": 0.0046, + "step": 34580 + }, + { + "epoch": 0.5845719645437415, + "grad_norm": 0.13044388592243195, + "learning_rate": 8.915395871581108e-06, + "loss": 0.0018, + "step": 34590 + }, + { + "epoch": 0.584740964822592, + "grad_norm": 0.18705812096595764, + "learning_rate": 8.914478472506146e-06, + "loss": 0.0022, + "step": 34600 + }, + { + "epoch": 0.5849099651014424, + "grad_norm": 0.3285347819328308, + "learning_rate": 8.913560732852592e-06, + "loss": 0.0014, + "step": 34610 + }, + { + "epoch": 0.5850789653802929, + "grad_norm": 0.2867709696292877, + "learning_rate": 8.912642652700292e-06, + "loss": 0.0028, + "step": 34620 + }, + { + "epoch": 0.5852479656591434, + "grad_norm": 0.024372834712266922, + "learning_rate": 8.911724232129124e-06, + "loss": 0.0021, + "step": 34630 + }, + { + "epoch": 0.5854169659379937, + "grad_norm": 0.031246446073055267, + "learning_rate": 8.910805471218994e-06, + "loss": 0.0015, + "step": 34640 + }, + { + "epoch": 0.5855859662168442, + "grad_norm": 0.15418541431427002, + "learning_rate": 8.90988637004984e-06, + "loss": 0.0021, + "step": 34650 + }, + { + "epoch": 0.5857549664956947, + "grad_norm": 0.21680156886577606, + "learning_rate": 8.90896692870163e-06, + "loss": 0.0018, + "step": 34660 + }, + { + "epoch": 0.5859239667745452, + "grad_norm": 0.10197052359580994, + "learning_rate": 8.908047147254356e-06, + "loss": 0.0014, + "step": 34670 + }, + { + "epoch": 0.5860929670533956, + "grad_norm": 0.1428227722644806, + "learning_rate": 8.907127025788045e-06, + "loss": 0.004, + "step": 34680 + }, + { + "epoch": 0.5862619673322461, + "grad_norm": 0.02672131545841694, + "learning_rate": 8.906206564382753e-06, + "loss": 0.0021, + "step": 34690 + }, + { + "epoch": 0.5864309676110966, + "grad_norm": 0.06392728537321091, + "learning_rate": 8.905285763118565e-06, + "loss": 0.002, + "step": 34700 + }, + { + "epoch": 0.5865999678899471, + "grad_norm": 0.08085457235574722, + "learning_rate": 8.904364622075593e-06, + "loss": 0.0034, + "step": 34710 + }, + { + "epoch": 0.5867689681687975, + "grad_norm": 0.1856919229030609, + "learning_rate": 8.903443141333982e-06, + "loss": 0.0027, + "step": 34720 + }, + { + "epoch": 0.5869379684476479, + "grad_norm": 0.12695810198783875, + "learning_rate": 8.902521320973905e-06, + "loss": 0.0039, + "step": 34730 + }, + { + "epoch": 0.5871069687264984, + "grad_norm": 0.043028876185417175, + "learning_rate": 8.901599161075565e-06, + "loss": 0.0015, + "step": 34740 + }, + { + "epoch": 0.5872759690053488, + "grad_norm": 0.04045486077666283, + "learning_rate": 8.900676661719195e-06, + "loss": 0.002, + "step": 34750 + }, + { + "epoch": 0.5874449692841993, + "grad_norm": 0.10769476741552353, + "learning_rate": 8.899753822985054e-06, + "loss": 0.0032, + "step": 34760 + }, + { + "epoch": 0.5876139695630498, + "grad_norm": 0.0701104998588562, + "learning_rate": 8.898830644953436e-06, + "loss": 0.0017, + "step": 34770 + }, + { + "epoch": 0.5877829698419003, + "grad_norm": 0.13843099772930145, + "learning_rate": 8.897907127704663e-06, + "loss": 0.0023, + "step": 34780 + }, + { + "epoch": 0.5879519701207507, + "grad_norm": 0.12599579989910126, + "learning_rate": 8.896983271319085e-06, + "loss": 0.0022, + "step": 34790 + }, + { + "epoch": 0.5881209703996012, + "grad_norm": 0.16534028947353363, + "learning_rate": 8.89605907587708e-06, + "loss": 0.0029, + "step": 34800 + }, + { + "epoch": 0.5882899706784516, + "grad_norm": 0.06567218899726868, + "learning_rate": 8.895134541459058e-06, + "loss": 0.0019, + "step": 34810 + }, + { + "epoch": 0.588458970957302, + "grad_norm": 0.283370703458786, + "learning_rate": 8.894209668145459e-06, + "loss": 0.002, + "step": 34820 + }, + { + "epoch": 0.5886279712361525, + "grad_norm": 0.026961514726281166, + "learning_rate": 8.89328445601675e-06, + "loss": 0.0021, + "step": 34830 + }, + { + "epoch": 0.588796971515003, + "grad_norm": 0.1643812507390976, + "learning_rate": 8.89235890515343e-06, + "loss": 0.002, + "step": 34840 + }, + { + "epoch": 0.5889659717938535, + "grad_norm": 0.06348308175802231, + "learning_rate": 8.891433015636028e-06, + "loss": 0.0027, + "step": 34850 + }, + { + "epoch": 0.5891349720727039, + "grad_norm": 0.042579248547554016, + "learning_rate": 8.890506787545099e-06, + "loss": 0.0018, + "step": 34860 + }, + { + "epoch": 0.5893039723515544, + "grad_norm": 0.06588687747716904, + "learning_rate": 8.889580220961228e-06, + "loss": 0.0026, + "step": 34870 + }, + { + "epoch": 0.5894729726304049, + "grad_norm": 0.13126246631145477, + "learning_rate": 8.888653315965033e-06, + "loss": 0.0015, + "step": 34880 + }, + { + "epoch": 0.5896419729092554, + "grad_norm": 0.1396174430847168, + "learning_rate": 8.887726072637159e-06, + "loss": 0.0025, + "step": 34890 + }, + { + "epoch": 0.5898109731881057, + "grad_norm": 0.1821875274181366, + "learning_rate": 8.88679849105828e-06, + "loss": 0.0028, + "step": 34900 + }, + { + "epoch": 0.5899799734669562, + "grad_norm": 0.017610182985663414, + "learning_rate": 8.8858705713091e-06, + "loss": 0.0019, + "step": 34910 + }, + { + "epoch": 0.5901489737458067, + "grad_norm": 0.14475102722644806, + "learning_rate": 8.884942313470353e-06, + "loss": 0.0019, + "step": 34920 + }, + { + "epoch": 0.5903179740246571, + "grad_norm": 0.07700800150632858, + "learning_rate": 8.884013717622802e-06, + "loss": 0.0021, + "step": 34930 + }, + { + "epoch": 0.5904869743035076, + "grad_norm": 0.08157818764448166, + "learning_rate": 8.88308478384724e-06, + "loss": 0.0021, + "step": 34940 + }, + { + "epoch": 0.5906559745823581, + "grad_norm": 0.08447717875242233, + "learning_rate": 8.882155512224486e-06, + "loss": 0.0022, + "step": 34950 + }, + { + "epoch": 0.5908249748612086, + "grad_norm": 0.10168961435556412, + "learning_rate": 8.881225902835394e-06, + "loss": 0.0019, + "step": 34960 + }, + { + "epoch": 0.590993975140059, + "grad_norm": 0.160582035779953, + "learning_rate": 8.880295955760842e-06, + "loss": 0.0019, + "step": 34970 + }, + { + "epoch": 0.5911629754189094, + "grad_norm": 0.11416268348693848, + "learning_rate": 8.879365671081743e-06, + "loss": 0.0021, + "step": 34980 + }, + { + "epoch": 0.5913319756977599, + "grad_norm": 0.5007433891296387, + "learning_rate": 8.878435048879034e-06, + "loss": 0.005, + "step": 34990 + }, + { + "epoch": 0.5915009759766103, + "grad_norm": 0.0692245215177536, + "learning_rate": 8.877504089233685e-06, + "loss": 0.0023, + "step": 35000 + }, + { + "epoch": 0.5916699762554608, + "grad_norm": 0.06784098595380783, + "learning_rate": 8.876572792226693e-06, + "loss": 0.002, + "step": 35010 + }, + { + "epoch": 0.5918389765343113, + "grad_norm": 0.10995849967002869, + "learning_rate": 8.875641157939085e-06, + "loss": 0.0018, + "step": 35020 + }, + { + "epoch": 0.5920079768131618, + "grad_norm": 0.03497052937746048, + "learning_rate": 8.87470918645192e-06, + "loss": 0.0025, + "step": 35030 + }, + { + "epoch": 0.5921769770920122, + "grad_norm": 0.07584590464830399, + "learning_rate": 8.87377687784628e-06, + "loss": 0.0015, + "step": 35040 + }, + { + "epoch": 0.5923459773708627, + "grad_norm": 0.05822953209280968, + "learning_rate": 8.872844232203284e-06, + "loss": 0.0024, + "step": 35050 + }, + { + "epoch": 0.5925149776497132, + "grad_norm": 0.09714366495609283, + "learning_rate": 8.871911249604076e-06, + "loss": 0.0032, + "step": 35060 + }, + { + "epoch": 0.5926839779285635, + "grad_norm": 0.045151591300964355, + "learning_rate": 8.870977930129828e-06, + "loss": 0.0025, + "step": 35070 + }, + { + "epoch": 0.592852978207414, + "grad_norm": 0.0487397275865078, + "learning_rate": 8.870044273861746e-06, + "loss": 0.0025, + "step": 35080 + }, + { + "epoch": 0.5930219784862645, + "grad_norm": 0.03342911973595619, + "learning_rate": 8.869110280881062e-06, + "loss": 0.002, + "step": 35090 + }, + { + "epoch": 0.593190978765115, + "grad_norm": 0.012296173721551895, + "learning_rate": 8.868175951269036e-06, + "loss": 0.0018, + "step": 35100 + }, + { + "epoch": 0.5933599790439654, + "grad_norm": 0.12815845012664795, + "learning_rate": 8.86724128510696e-06, + "loss": 0.0038, + "step": 35110 + }, + { + "epoch": 0.5935289793228159, + "grad_norm": 0.07406541705131531, + "learning_rate": 8.866306282476155e-06, + "loss": 0.0018, + "step": 35120 + }, + { + "epoch": 0.5936979796016664, + "grad_norm": 0.0447167307138443, + "learning_rate": 8.865370943457973e-06, + "loss": 0.0027, + "step": 35130 + }, + { + "epoch": 0.5938669798805168, + "grad_norm": 0.11929440498352051, + "learning_rate": 8.864435268133789e-06, + "loss": 0.0019, + "step": 35140 + }, + { + "epoch": 0.5940359801593673, + "grad_norm": 0.11725620925426483, + "learning_rate": 8.863499256585012e-06, + "loss": 0.0017, + "step": 35150 + }, + { + "epoch": 0.5942049804382177, + "grad_norm": 0.02146654948592186, + "learning_rate": 8.862562908893081e-06, + "loss": 0.0033, + "step": 35160 + }, + { + "epoch": 0.5943739807170682, + "grad_norm": 0.027879882603883743, + "learning_rate": 8.861626225139465e-06, + "loss": 0.002, + "step": 35170 + }, + { + "epoch": 0.5945429809959186, + "grad_norm": 0.04303164780139923, + "learning_rate": 8.860689205405655e-06, + "loss": 0.0024, + "step": 35180 + }, + { + "epoch": 0.5947119812747691, + "grad_norm": 0.08739340305328369, + "learning_rate": 8.859751849773179e-06, + "loss": 0.0033, + "step": 35190 + }, + { + "epoch": 0.5948809815536196, + "grad_norm": 0.08744668960571289, + "learning_rate": 8.858814158323593e-06, + "loss": 0.002, + "step": 35200 + }, + { + "epoch": 0.59504998183247, + "grad_norm": 0.07492510974407196, + "learning_rate": 8.857876131138476e-06, + "loss": 0.003, + "step": 35210 + }, + { + "epoch": 0.5952189821113205, + "grad_norm": 0.08802341669797897, + "learning_rate": 8.856937768299445e-06, + "loss": 0.0015, + "step": 35220 + }, + { + "epoch": 0.595387982390171, + "grad_norm": 0.11602721363306046, + "learning_rate": 8.855999069888141e-06, + "loss": 0.0025, + "step": 35230 + }, + { + "epoch": 0.5955569826690214, + "grad_norm": 0.07800030708312988, + "learning_rate": 8.855060035986235e-06, + "loss": 0.0012, + "step": 35240 + }, + { + "epoch": 0.5957259829478718, + "grad_norm": 0.052270859479904175, + "learning_rate": 8.85412066667543e-06, + "loss": 0.0015, + "step": 35250 + }, + { + "epoch": 0.5958949832267223, + "grad_norm": 0.04862003028392792, + "learning_rate": 8.85318096203745e-06, + "loss": 0.0024, + "step": 35260 + }, + { + "epoch": 0.5960639835055728, + "grad_norm": 0.7330689430236816, + "learning_rate": 8.852240922154059e-06, + "loss": 0.0024, + "step": 35270 + }, + { + "epoch": 0.5962329837844232, + "grad_norm": 0.056989800184965134, + "learning_rate": 8.851300547107042e-06, + "loss": 0.0022, + "step": 35280 + }, + { + "epoch": 0.5964019840632737, + "grad_norm": 0.09635084867477417, + "learning_rate": 8.850359836978218e-06, + "loss": 0.0023, + "step": 35290 + }, + { + "epoch": 0.5965709843421242, + "grad_norm": 0.10977847874164581, + "learning_rate": 8.849418791849433e-06, + "loss": 0.002, + "step": 35300 + }, + { + "epoch": 0.5967399846209747, + "grad_norm": 0.03430016711354256, + "learning_rate": 8.84847741180256e-06, + "loss": 0.0021, + "step": 35310 + }, + { + "epoch": 0.5969089848998251, + "grad_norm": 0.026696454733610153, + "learning_rate": 8.847535696919509e-06, + "loss": 0.0018, + "step": 35320 + }, + { + "epoch": 0.5970779851786755, + "grad_norm": 0.15002593398094177, + "learning_rate": 8.84659364728221e-06, + "loss": 0.0024, + "step": 35330 + }, + { + "epoch": 0.597246985457526, + "grad_norm": 0.05427820608019829, + "learning_rate": 8.845651262972625e-06, + "loss": 0.0024, + "step": 35340 + }, + { + "epoch": 0.5974159857363764, + "grad_norm": 0.10184963792562485, + "learning_rate": 8.844708544072749e-06, + "loss": 0.0015, + "step": 35350 + }, + { + "epoch": 0.5975849860152269, + "grad_norm": 0.07781493663787842, + "learning_rate": 8.843765490664601e-06, + "loss": 0.0029, + "step": 35360 + }, + { + "epoch": 0.5977539862940774, + "grad_norm": 0.11765439808368683, + "learning_rate": 8.842822102830233e-06, + "loss": 0.0042, + "step": 35370 + }, + { + "epoch": 0.5979229865729279, + "grad_norm": 0.29873672127723694, + "learning_rate": 8.841878380651721e-06, + "loss": 0.0016, + "step": 35380 + }, + { + "epoch": 0.5980919868517783, + "grad_norm": 0.059736013412475586, + "learning_rate": 8.840934324211178e-06, + "loss": 0.0019, + "step": 35390 + }, + { + "epoch": 0.5982609871306288, + "grad_norm": 0.08907829970121384, + "learning_rate": 8.839989933590738e-06, + "loss": 0.0029, + "step": 35400 + }, + { + "epoch": 0.5984299874094793, + "grad_norm": 0.05392741784453392, + "learning_rate": 8.839045208872568e-06, + "loss": 0.0032, + "step": 35410 + }, + { + "epoch": 0.5985989876883296, + "grad_norm": 0.033009592443704605, + "learning_rate": 8.838100150138864e-06, + "loss": 0.0016, + "step": 35420 + }, + { + "epoch": 0.5987679879671801, + "grad_norm": 0.07848300039768219, + "learning_rate": 8.83715475747185e-06, + "loss": 0.0028, + "step": 35430 + }, + { + "epoch": 0.5989369882460306, + "grad_norm": 0.08762569725513458, + "learning_rate": 8.836209030953784e-06, + "loss": 0.0029, + "step": 35440 + }, + { + "epoch": 0.5991059885248811, + "grad_norm": 0.03356378898024559, + "learning_rate": 8.835262970666943e-06, + "loss": 0.0021, + "step": 35450 + }, + { + "epoch": 0.5992749888037315, + "grad_norm": 0.09711670875549316, + "learning_rate": 8.834316576693642e-06, + "loss": 0.0026, + "step": 35460 + }, + { + "epoch": 0.599443989082582, + "grad_norm": 0.05829253047704697, + "learning_rate": 8.83336984911622e-06, + "loss": 0.0022, + "step": 35470 + }, + { + "epoch": 0.5996129893614325, + "grad_norm": 0.09206686168909073, + "learning_rate": 8.83242278801705e-06, + "loss": 0.0022, + "step": 35480 + }, + { + "epoch": 0.599781989640283, + "grad_norm": 0.05280447378754616, + "learning_rate": 8.831475393478529e-06, + "loss": 0.0018, + "step": 35490 + }, + { + "epoch": 0.5999509899191333, + "grad_norm": 0.1475137323141098, + "learning_rate": 8.830527665583083e-06, + "loss": 0.0032, + "step": 35500 + }, + { + "epoch": 0.6001199901979838, + "grad_norm": 0.04320276901125908, + "learning_rate": 8.829579604413172e-06, + "loss": 0.0028, + "step": 35510 + }, + { + "epoch": 0.6002889904768343, + "grad_norm": 0.07844381034374237, + "learning_rate": 8.82863121005128e-06, + "loss": 0.0019, + "step": 35520 + }, + { + "epoch": 0.6004579907556847, + "grad_norm": 0.07419198751449585, + "learning_rate": 8.827682482579923e-06, + "loss": 0.0019, + "step": 35530 + }, + { + "epoch": 0.6006269910345352, + "grad_norm": 0.09416607767343521, + "learning_rate": 8.826733422081644e-06, + "loss": 0.0023, + "step": 35540 + }, + { + "epoch": 0.6007959913133857, + "grad_norm": 0.020704537630081177, + "learning_rate": 8.825784028639016e-06, + "loss": 0.002, + "step": 35550 + }, + { + "epoch": 0.6009649915922362, + "grad_norm": 0.15058861672878265, + "learning_rate": 8.824834302334641e-06, + "loss": 0.002, + "step": 35560 + }, + { + "epoch": 0.6011339918710866, + "grad_norm": 0.32956650853157043, + "learning_rate": 8.823884243251152e-06, + "loss": 0.0032, + "step": 35570 + }, + { + "epoch": 0.6013029921499371, + "grad_norm": 0.24377502501010895, + "learning_rate": 8.822933851471205e-06, + "loss": 0.0047, + "step": 35580 + }, + { + "epoch": 0.6014719924287875, + "grad_norm": 0.11893951892852783, + "learning_rate": 8.821983127077492e-06, + "loss": 0.0018, + "step": 35590 + }, + { + "epoch": 0.6016409927076379, + "grad_norm": 0.1588331013917923, + "learning_rate": 8.821032070152726e-06, + "loss": 0.0021, + "step": 35600 + }, + { + "epoch": 0.6018099929864884, + "grad_norm": 0.04255605861544609, + "learning_rate": 8.820080680779659e-06, + "loss": 0.0031, + "step": 35610 + }, + { + "epoch": 0.6019789932653389, + "grad_norm": 0.09891010075807571, + "learning_rate": 8.819128959041064e-06, + "loss": 0.002, + "step": 35620 + }, + { + "epoch": 0.6021479935441894, + "grad_norm": 0.16634584963321686, + "learning_rate": 8.818176905019744e-06, + "loss": 0.0045, + "step": 35630 + }, + { + "epoch": 0.6023169938230398, + "grad_norm": 0.16242848336696625, + "learning_rate": 8.817224518798535e-06, + "loss": 0.003, + "step": 35640 + }, + { + "epoch": 0.6024859941018903, + "grad_norm": 0.028616510331630707, + "learning_rate": 8.816271800460297e-06, + "loss": 0.0044, + "step": 35650 + }, + { + "epoch": 0.6026549943807408, + "grad_norm": 0.02689216285943985, + "learning_rate": 8.815318750087923e-06, + "loss": 0.0032, + "step": 35660 + }, + { + "epoch": 0.6028239946595911, + "grad_norm": 0.044815417379140854, + "learning_rate": 8.814365367764332e-06, + "loss": 0.0016, + "step": 35670 + }, + { + "epoch": 0.6029929949384416, + "grad_norm": 0.06668156385421753, + "learning_rate": 8.813411653572473e-06, + "loss": 0.0014, + "step": 35680 + }, + { + "epoch": 0.6031619952172921, + "grad_norm": 0.10036733001470566, + "learning_rate": 8.812457607595324e-06, + "loss": 0.002, + "step": 35690 + }, + { + "epoch": 0.6033309954961426, + "grad_norm": 0.1022668406367302, + "learning_rate": 8.811503229915889e-06, + "loss": 0.0024, + "step": 35700 + }, + { + "epoch": 0.603499995774993, + "grad_norm": 0.04607870429754257, + "learning_rate": 8.81054852061721e-06, + "loss": 0.0033, + "step": 35710 + }, + { + "epoch": 0.6036689960538435, + "grad_norm": 0.04547217860817909, + "learning_rate": 8.809593479782343e-06, + "loss": 0.0022, + "step": 35720 + }, + { + "epoch": 0.603837996332694, + "grad_norm": 0.16747385263442993, + "learning_rate": 8.808638107494388e-06, + "loss": 0.0034, + "step": 35730 + }, + { + "epoch": 0.6040069966115444, + "grad_norm": 0.10530160367488861, + "learning_rate": 8.807682403836464e-06, + "loss": 0.0081, + "step": 35740 + }, + { + "epoch": 0.6041759968903949, + "grad_norm": 0.12396273016929626, + "learning_rate": 8.806726368891723e-06, + "loss": 0.0028, + "step": 35750 + }, + { + "epoch": 0.6043449971692453, + "grad_norm": 0.13143059611320496, + "learning_rate": 8.80577000274334e-06, + "loss": 0.0032, + "step": 35760 + }, + { + "epoch": 0.6045139974480958, + "grad_norm": 0.17417097091674805, + "learning_rate": 8.804813305474533e-06, + "loss": 0.0022, + "step": 35770 + }, + { + "epoch": 0.6046829977269462, + "grad_norm": 0.24094782769680023, + "learning_rate": 8.80385627716853e-06, + "loss": 0.0025, + "step": 35780 + }, + { + "epoch": 0.6048519980057967, + "grad_norm": 0.16232720017433167, + "learning_rate": 8.802898917908601e-06, + "loss": 0.0037, + "step": 35790 + }, + { + "epoch": 0.6050209982846472, + "grad_norm": 0.040980543941259384, + "learning_rate": 8.801941227778042e-06, + "loss": 0.0019, + "step": 35800 + }, + { + "epoch": 0.6051899985634976, + "grad_norm": 0.08733880519866943, + "learning_rate": 8.800983206860173e-06, + "loss": 0.0037, + "step": 35810 + }, + { + "epoch": 0.6053589988423481, + "grad_norm": 0.0999969094991684, + "learning_rate": 8.800024855238351e-06, + "loss": 0.0022, + "step": 35820 + }, + { + "epoch": 0.6055279991211986, + "grad_norm": 0.2854050099849701, + "learning_rate": 8.799066172995955e-06, + "loss": 0.0032, + "step": 35830 + }, + { + "epoch": 0.6056969994000491, + "grad_norm": 0.06074146181344986, + "learning_rate": 8.798107160216394e-06, + "loss": 0.0016, + "step": 35840 + }, + { + "epoch": 0.6058659996788994, + "grad_norm": 0.27209043502807617, + "learning_rate": 8.79714781698311e-06, + "loss": 0.0023, + "step": 35850 + }, + { + "epoch": 0.6060349999577499, + "grad_norm": 0.1067693680524826, + "learning_rate": 8.796188143379565e-06, + "loss": 0.0028, + "step": 35860 + }, + { + "epoch": 0.6062040002366004, + "grad_norm": 0.12572214007377625, + "learning_rate": 8.79522813948926e-06, + "loss": 0.0015, + "step": 35870 + }, + { + "epoch": 0.6063730005154508, + "grad_norm": 0.08308408409357071, + "learning_rate": 8.79426780539572e-06, + "loss": 0.0026, + "step": 35880 + }, + { + "epoch": 0.6065420007943013, + "grad_norm": 0.053814616054296494, + "learning_rate": 8.793307141182496e-06, + "loss": 0.0037, + "step": 35890 + }, + { + "epoch": 0.6067110010731518, + "grad_norm": 0.15410888195037842, + "learning_rate": 8.792346146933172e-06, + "loss": 0.0011, + "step": 35900 + }, + { + "epoch": 0.6068800013520023, + "grad_norm": 0.02498161606490612, + "learning_rate": 8.791384822731358e-06, + "loss": 0.0027, + "step": 35910 + }, + { + "epoch": 0.6070490016308527, + "grad_norm": 0.10394150763750076, + "learning_rate": 8.790423168660695e-06, + "loss": 0.0032, + "step": 35920 + }, + { + "epoch": 0.6072180019097031, + "grad_norm": 0.02300155907869339, + "learning_rate": 8.789461184804853e-06, + "loss": 0.0021, + "step": 35930 + }, + { + "epoch": 0.6073870021885536, + "grad_norm": 0.061770204454660416, + "learning_rate": 8.788498871247526e-06, + "loss": 0.0034, + "step": 35940 + }, + { + "epoch": 0.607556002467404, + "grad_norm": 0.07594380527734756, + "learning_rate": 8.787536228072442e-06, + "loss": 0.0021, + "step": 35950 + }, + { + "epoch": 0.6077250027462545, + "grad_norm": 0.10542141646146774, + "learning_rate": 8.786573255363355e-06, + "loss": 0.0016, + "step": 35960 + }, + { + "epoch": 0.607894003025105, + "grad_norm": 0.14784705638885498, + "learning_rate": 8.785609953204047e-06, + "loss": 0.003, + "step": 35970 + }, + { + "epoch": 0.6080630033039555, + "grad_norm": 0.034316789358854294, + "learning_rate": 8.784646321678332e-06, + "loss": 0.0016, + "step": 35980 + }, + { + "epoch": 0.6082320035828059, + "grad_norm": 0.10311736911535263, + "learning_rate": 8.783682360870052e-06, + "loss": 0.0018, + "step": 35990 + }, + { + "epoch": 0.6084010038616564, + "grad_norm": 0.11101002246141434, + "learning_rate": 8.782718070863072e-06, + "loss": 0.0023, + "step": 36000 + }, + { + "epoch": 0.6085700041405069, + "grad_norm": 0.029666326940059662, + "learning_rate": 8.781753451741295e-06, + "loss": 0.0023, + "step": 36010 + }, + { + "epoch": 0.6087390044193572, + "grad_norm": 0.07296469062566757, + "learning_rate": 8.780788503588642e-06, + "loss": 0.0028, + "step": 36020 + }, + { + "epoch": 0.6089080046982077, + "grad_norm": 0.0563102550804615, + "learning_rate": 8.77982322648907e-06, + "loss": 0.0008, + "step": 36030 + }, + { + "epoch": 0.6090770049770582, + "grad_norm": 0.0819505974650383, + "learning_rate": 8.778857620526566e-06, + "loss": 0.0026, + "step": 36040 + }, + { + "epoch": 0.6092460052559087, + "grad_norm": 0.021597327664494514, + "learning_rate": 8.77789168578514e-06, + "loss": 0.0014, + "step": 36050 + }, + { + "epoch": 0.6094150055347591, + "grad_norm": 0.07086276262998581, + "learning_rate": 8.776925422348833e-06, + "loss": 0.0016, + "step": 36060 + }, + { + "epoch": 0.6095840058136096, + "grad_norm": 0.06087581813335419, + "learning_rate": 8.775958830301713e-06, + "loss": 0.0024, + "step": 36070 + }, + { + "epoch": 0.6097530060924601, + "grad_norm": 0.264974981546402, + "learning_rate": 8.77499190972788e-06, + "loss": 0.005, + "step": 36080 + }, + { + "epoch": 0.6099220063713106, + "grad_norm": 0.29691457748413086, + "learning_rate": 8.774024660711462e-06, + "loss": 0.0027, + "step": 36090 + }, + { + "epoch": 0.610091006650161, + "grad_norm": 0.1828441023826599, + "learning_rate": 8.773057083336612e-06, + "loss": 0.0026, + "step": 36100 + }, + { + "epoch": 0.6102600069290114, + "grad_norm": 0.06337442249059677, + "learning_rate": 8.772089177687516e-06, + "loss": 0.0033, + "step": 36110 + }, + { + "epoch": 0.6104290072078619, + "grad_norm": 0.11669375747442245, + "learning_rate": 8.771120943848384e-06, + "loss": 0.0013, + "step": 36120 + }, + { + "epoch": 0.6105980074867123, + "grad_norm": 0.20372651517391205, + "learning_rate": 8.77015238190346e-06, + "loss": 0.0023, + "step": 36130 + }, + { + "epoch": 0.6107670077655628, + "grad_norm": 0.24548251926898956, + "learning_rate": 8.76918349193701e-06, + "loss": 0.005, + "step": 36140 + }, + { + "epoch": 0.6109360080444133, + "grad_norm": 0.12593404948711395, + "learning_rate": 8.768214274033334e-06, + "loss": 0.0025, + "step": 36150 + }, + { + "epoch": 0.6111050083232638, + "grad_norm": 0.1450646072626114, + "learning_rate": 8.76724472827676e-06, + "loss": 0.0019, + "step": 36160 + }, + { + "epoch": 0.6112740086021142, + "grad_norm": 0.11664585769176483, + "learning_rate": 8.766274854751642e-06, + "loss": 0.0019, + "step": 36170 + }, + { + "epoch": 0.6114430088809647, + "grad_norm": 0.20358210802078247, + "learning_rate": 8.765304653542362e-06, + "loss": 0.0031, + "step": 36180 + }, + { + "epoch": 0.6116120091598151, + "grad_norm": 0.08079025894403458, + "learning_rate": 8.764334124733336e-06, + "loss": 0.0043, + "step": 36190 + }, + { + "epoch": 0.6117810094386655, + "grad_norm": 0.18731054663658142, + "learning_rate": 8.763363268409002e-06, + "loss": 0.003, + "step": 36200 + }, + { + "epoch": 0.611950009717516, + "grad_norm": 0.023311221972107887, + "learning_rate": 8.762392084653829e-06, + "loss": 0.004, + "step": 36210 + }, + { + "epoch": 0.6121190099963665, + "grad_norm": 0.029529789462685585, + "learning_rate": 8.761420573552314e-06, + "loss": 0.0028, + "step": 36220 + }, + { + "epoch": 0.612288010275217, + "grad_norm": 0.05020727589726448, + "learning_rate": 8.760448735188987e-06, + "loss": 0.0056, + "step": 36230 + }, + { + "epoch": 0.6124570105540674, + "grad_norm": 0.04046209901571274, + "learning_rate": 8.7594765696484e-06, + "loss": 0.0025, + "step": 36240 + }, + { + "epoch": 0.6126260108329179, + "grad_norm": 0.04337368533015251, + "learning_rate": 8.758504077015136e-06, + "loss": 0.0034, + "step": 36250 + }, + { + "epoch": 0.6127950111117684, + "grad_norm": 0.05420010909438133, + "learning_rate": 8.757531257373806e-06, + "loss": 0.0022, + "step": 36260 + }, + { + "epoch": 0.6129640113906188, + "grad_norm": 0.016965752467513084, + "learning_rate": 8.756558110809052e-06, + "loss": 0.0043, + "step": 36270 + }, + { + "epoch": 0.6131330116694692, + "grad_norm": 0.08924896270036697, + "learning_rate": 8.755584637405541e-06, + "loss": 0.0031, + "step": 36280 + }, + { + "epoch": 0.6133020119483197, + "grad_norm": 0.017866497859358788, + "learning_rate": 8.75461083724797e-06, + "loss": 0.0023, + "step": 36290 + }, + { + "epoch": 0.6134710122271702, + "grad_norm": 0.10966131091117859, + "learning_rate": 8.753636710421067e-06, + "loss": 0.0036, + "step": 36300 + }, + { + "epoch": 0.6136400125060206, + "grad_norm": 0.022142933681607246, + "learning_rate": 8.75266225700958e-06, + "loss": 0.0024, + "step": 36310 + }, + { + "epoch": 0.6138090127848711, + "grad_norm": 0.11732402443885803, + "learning_rate": 8.751687477098296e-06, + "loss": 0.002, + "step": 36320 + }, + { + "epoch": 0.6139780130637216, + "grad_norm": 0.07567041367292404, + "learning_rate": 8.750712370772025e-06, + "loss": 0.0013, + "step": 36330 + }, + { + "epoch": 0.614147013342572, + "grad_norm": 0.07736122608184814, + "learning_rate": 8.749736938115605e-06, + "loss": 0.0032, + "step": 36340 + }, + { + "epoch": 0.6143160136214225, + "grad_norm": 0.023870807141065598, + "learning_rate": 8.748761179213903e-06, + "loss": 0.0017, + "step": 36350 + }, + { + "epoch": 0.6144850139002729, + "grad_norm": 0.16306602954864502, + "learning_rate": 8.747785094151815e-06, + "loss": 0.0043, + "step": 36360 + }, + { + "epoch": 0.6146540141791234, + "grad_norm": 0.08596381545066833, + "learning_rate": 8.746808683014266e-06, + "loss": 0.005, + "step": 36370 + }, + { + "epoch": 0.6148230144579738, + "grad_norm": 0.16764451563358307, + "learning_rate": 8.745831945886207e-06, + "loss": 0.0028, + "step": 36380 + }, + { + "epoch": 0.6149920147368243, + "grad_norm": 0.054501231759786606, + "learning_rate": 8.74485488285262e-06, + "loss": 0.0017, + "step": 36390 + }, + { + "epoch": 0.6151610150156748, + "grad_norm": 0.07855256646871567, + "learning_rate": 8.743877493998514e-06, + "loss": 0.0025, + "step": 36400 + }, + { + "epoch": 0.6153300152945252, + "grad_norm": 0.14877623319625854, + "learning_rate": 8.742899779408926e-06, + "loss": 0.0018, + "step": 36410 + }, + { + "epoch": 0.6154990155733757, + "grad_norm": 0.03570941835641861, + "learning_rate": 8.741921739168923e-06, + "loss": 0.0049, + "step": 36420 + }, + { + "epoch": 0.6156680158522262, + "grad_norm": 0.11335615813732147, + "learning_rate": 8.7409433733636e-06, + "loss": 0.0023, + "step": 36430 + }, + { + "epoch": 0.6158370161310767, + "grad_norm": 0.23981893062591553, + "learning_rate": 8.739964682078076e-06, + "loss": 0.0026, + "step": 36440 + }, + { + "epoch": 0.616006016409927, + "grad_norm": 0.09835696965456009, + "learning_rate": 8.738985665397505e-06, + "loss": 0.0034, + "step": 36450 + }, + { + "epoch": 0.6161750166887775, + "grad_norm": 0.08674298226833344, + "learning_rate": 8.738006323407064e-06, + "loss": 0.0084, + "step": 36460 + }, + { + "epoch": 0.616344016967628, + "grad_norm": 0.04812869057059288, + "learning_rate": 8.737026656191964e-06, + "loss": 0.0019, + "step": 36470 + }, + { + "epoch": 0.6165130172464784, + "grad_norm": 0.0043332562781870365, + "learning_rate": 8.736046663837439e-06, + "loss": 0.0029, + "step": 36480 + }, + { + "epoch": 0.6166820175253289, + "grad_norm": 0.10359267890453339, + "learning_rate": 8.735066346428751e-06, + "loss": 0.0017, + "step": 36490 + }, + { + "epoch": 0.6168510178041794, + "grad_norm": 0.06938648223876953, + "learning_rate": 8.734085704051194e-06, + "loss": 0.0025, + "step": 36500 + }, + { + "epoch": 0.6170200180830299, + "grad_norm": 0.09565950185060501, + "learning_rate": 8.733104736790088e-06, + "loss": 0.0028, + "step": 36510 + }, + { + "epoch": 0.6171890183618803, + "grad_norm": 0.19001342356204987, + "learning_rate": 8.732123444730785e-06, + "loss": 0.0013, + "step": 36520 + }, + { + "epoch": 0.6173580186407308, + "grad_norm": 0.22249983251094818, + "learning_rate": 8.731141827958659e-06, + "loss": 0.0024, + "step": 36530 + }, + { + "epoch": 0.6175270189195812, + "grad_norm": 0.06101768836379051, + "learning_rate": 8.730159886559116e-06, + "loss": 0.0018, + "step": 36540 + }, + { + "epoch": 0.6176960191984316, + "grad_norm": 0.05670422315597534, + "learning_rate": 8.729177620617588e-06, + "loss": 0.0024, + "step": 36550 + }, + { + "epoch": 0.6178650194772821, + "grad_norm": 0.10868832468986511, + "learning_rate": 8.72819503021954e-06, + "loss": 0.0022, + "step": 36560 + }, + { + "epoch": 0.6180340197561326, + "grad_norm": 0.1556289792060852, + "learning_rate": 8.727212115450462e-06, + "loss": 0.0029, + "step": 36570 + }, + { + "epoch": 0.6182030200349831, + "grad_norm": 0.06256671994924545, + "learning_rate": 8.72622887639587e-06, + "loss": 0.0023, + "step": 36580 + }, + { + "epoch": 0.6183720203138335, + "grad_norm": 0.04170137643814087, + "learning_rate": 8.725245313141313e-06, + "loss": 0.0015, + "step": 36590 + }, + { + "epoch": 0.618541020592684, + "grad_norm": 0.07276473194360733, + "learning_rate": 8.724261425772362e-06, + "loss": 0.0034, + "step": 36600 + }, + { + "epoch": 0.6187100208715345, + "grad_norm": 0.08967496454715729, + "learning_rate": 8.723277214374625e-06, + "loss": 0.0022, + "step": 36610 + }, + { + "epoch": 0.6188790211503848, + "grad_norm": 0.051659129559993744, + "learning_rate": 8.722292679033731e-06, + "loss": 0.0023, + "step": 36620 + }, + { + "epoch": 0.6190480214292353, + "grad_norm": 0.04656399413943291, + "learning_rate": 8.721307819835336e-06, + "loss": 0.0017, + "step": 36630 + }, + { + "epoch": 0.6192170217080858, + "grad_norm": 0.025773653760552406, + "learning_rate": 8.720322636865132e-06, + "loss": 0.0016, + "step": 36640 + }, + { + "epoch": 0.6193860219869363, + "grad_norm": 0.08894651383161545, + "learning_rate": 8.719337130208833e-06, + "loss": 0.0028, + "step": 36650 + }, + { + "epoch": 0.6195550222657867, + "grad_norm": 0.05207120627164841, + "learning_rate": 8.718351299952185e-06, + "loss": 0.0017, + "step": 36660 + }, + { + "epoch": 0.6197240225446372, + "grad_norm": 0.1420729160308838, + "learning_rate": 8.717365146180956e-06, + "loss": 0.0025, + "step": 36670 + }, + { + "epoch": 0.6198930228234877, + "grad_norm": 0.105345718562603, + "learning_rate": 8.71637866898095e-06, + "loss": 0.0026, + "step": 36680 + }, + { + "epoch": 0.6200620231023382, + "grad_norm": 0.16853417456150055, + "learning_rate": 8.715391868437992e-06, + "loss": 0.0017, + "step": 36690 + }, + { + "epoch": 0.6202310233811886, + "grad_norm": 0.07919266819953918, + "learning_rate": 8.714404744637938e-06, + "loss": 0.0018, + "step": 36700 + }, + { + "epoch": 0.620400023660039, + "grad_norm": 0.05420541763305664, + "learning_rate": 8.713417297666678e-06, + "loss": 0.0035, + "step": 36710 + }, + { + "epoch": 0.6205690239388895, + "grad_norm": 0.12405819445848465, + "learning_rate": 8.71242952761012e-06, + "loss": 0.0014, + "step": 36720 + }, + { + "epoch": 0.6207380242177399, + "grad_norm": 0.14918090403079987, + "learning_rate": 8.711441434554207e-06, + "loss": 0.0017, + "step": 36730 + }, + { + "epoch": 0.6209070244965904, + "grad_norm": 0.1024555116891861, + "learning_rate": 8.710453018584906e-06, + "loss": 0.0017, + "step": 36740 + }, + { + "epoch": 0.6210760247754409, + "grad_norm": 0.0839439108967781, + "learning_rate": 8.709464279788213e-06, + "loss": 0.0018, + "step": 36750 + }, + { + "epoch": 0.6212450250542914, + "grad_norm": 0.11152900755405426, + "learning_rate": 8.708475218250158e-06, + "loss": 0.0024, + "step": 36760 + }, + { + "epoch": 0.6214140253331418, + "grad_norm": 0.0031347458716481924, + "learning_rate": 8.707485834056789e-06, + "loss": 0.0015, + "step": 36770 + }, + { + "epoch": 0.6215830256119923, + "grad_norm": 0.1972360461950302, + "learning_rate": 8.706496127294191e-06, + "loss": 0.0034, + "step": 36780 + }, + { + "epoch": 0.6217520258908428, + "grad_norm": 0.057392384856939316, + "learning_rate": 8.705506098048468e-06, + "loss": 0.0021, + "step": 36790 + }, + { + "epoch": 0.6219210261696931, + "grad_norm": 0.2517576813697815, + "learning_rate": 8.704515746405764e-06, + "loss": 0.0029, + "step": 36800 + }, + { + "epoch": 0.6220900264485436, + "grad_norm": 0.08012565225362778, + "learning_rate": 8.703525072452241e-06, + "loss": 0.0024, + "step": 36810 + }, + { + "epoch": 0.6222590267273941, + "grad_norm": 0.023475486785173416, + "learning_rate": 8.702534076274092e-06, + "loss": 0.0027, + "step": 36820 + }, + { + "epoch": 0.6224280270062446, + "grad_norm": 0.04848853126168251, + "learning_rate": 8.701542757957539e-06, + "loss": 0.0025, + "step": 36830 + }, + { + "epoch": 0.622597027285095, + "grad_norm": 0.02437884360551834, + "learning_rate": 8.700551117588834e-06, + "loss": 0.0022, + "step": 36840 + }, + { + "epoch": 0.6227660275639455, + "grad_norm": 0.07362344115972519, + "learning_rate": 8.69955915525425e-06, + "loss": 0.0043, + "step": 36850 + }, + { + "epoch": 0.622935027842796, + "grad_norm": 0.06642599403858185, + "learning_rate": 8.698566871040094e-06, + "loss": 0.0022, + "step": 36860 + }, + { + "epoch": 0.6231040281216464, + "grad_norm": 0.07961343228816986, + "learning_rate": 8.697574265032701e-06, + "loss": 0.0024, + "step": 36870 + }, + { + "epoch": 0.6232730284004968, + "grad_norm": 0.05048443377017975, + "learning_rate": 8.696581337318432e-06, + "loss": 0.0011, + "step": 36880 + }, + { + "epoch": 0.6234420286793473, + "grad_norm": 0.12879827618598938, + "learning_rate": 8.695588087983676e-06, + "loss": 0.0016, + "step": 36890 + }, + { + "epoch": 0.6236110289581978, + "grad_norm": 0.20827510952949524, + "learning_rate": 8.694594517114851e-06, + "loss": 0.0023, + "step": 36900 + }, + { + "epoch": 0.6237800292370482, + "grad_norm": 0.11347052454948425, + "learning_rate": 8.693600624798403e-06, + "loss": 0.0018, + "step": 36910 + }, + { + "epoch": 0.6239490295158987, + "grad_norm": 0.17409923672676086, + "learning_rate": 8.692606411120804e-06, + "loss": 0.0021, + "step": 36920 + }, + { + "epoch": 0.6241180297947492, + "grad_norm": 0.06025707721710205, + "learning_rate": 8.691611876168556e-06, + "loss": 0.0017, + "step": 36930 + }, + { + "epoch": 0.6242870300735996, + "grad_norm": 0.07102333009243011, + "learning_rate": 8.69061702002819e-06, + "loss": 0.0019, + "step": 36940 + }, + { + "epoch": 0.6244560303524501, + "grad_norm": 0.16395600140094757, + "learning_rate": 8.68962184278626e-06, + "loss": 0.0035, + "step": 36950 + }, + { + "epoch": 0.6246250306313006, + "grad_norm": 0.09424368292093277, + "learning_rate": 8.688626344529353e-06, + "loss": 0.001, + "step": 36960 + }, + { + "epoch": 0.624794030910151, + "grad_norm": 0.04473784193396568, + "learning_rate": 8.687630525344084e-06, + "loss": 0.0013, + "step": 36970 + }, + { + "epoch": 0.6249630311890014, + "grad_norm": 0.06145351380109787, + "learning_rate": 8.686634385317089e-06, + "loss": 0.0028, + "step": 36980 + }, + { + "epoch": 0.6251320314678519, + "grad_norm": 0.08815675973892212, + "learning_rate": 8.685637924535044e-06, + "loss": 0.0028, + "step": 36990 + }, + { + "epoch": 0.6253010317467024, + "grad_norm": 0.12052954733371735, + "learning_rate": 8.68464114308464e-06, + "loss": 0.0023, + "step": 37000 + }, + { + "epoch": 0.6254700320255528, + "grad_norm": 0.09614243358373642, + "learning_rate": 8.683644041052605e-06, + "loss": 0.0022, + "step": 37010 + }, + { + "epoch": 0.6256390323044033, + "grad_norm": 0.07599113136529922, + "learning_rate": 8.682646618525692e-06, + "loss": 0.0031, + "step": 37020 + }, + { + "epoch": 0.6258080325832538, + "grad_norm": 0.04539747163653374, + "learning_rate": 8.681648875590678e-06, + "loss": 0.001, + "step": 37030 + }, + { + "epoch": 0.6259770328621043, + "grad_norm": 0.15155501663684845, + "learning_rate": 8.680650812334374e-06, + "loss": 0.0043, + "step": 37040 + }, + { + "epoch": 0.6261460331409546, + "grad_norm": 0.012156975455582142, + "learning_rate": 8.679652428843618e-06, + "loss": 0.0012, + "step": 37050 + }, + { + "epoch": 0.6263150334198051, + "grad_norm": 0.028095301240682602, + "learning_rate": 8.678653725205271e-06, + "loss": 0.0025, + "step": 37060 + }, + { + "epoch": 0.6264840336986556, + "grad_norm": 0.11469139158725739, + "learning_rate": 8.677654701506227e-06, + "loss": 0.0028, + "step": 37070 + }, + { + "epoch": 0.626653033977506, + "grad_norm": 0.09888265281915665, + "learning_rate": 8.676655357833405e-06, + "loss": 0.0031, + "step": 37080 + }, + { + "epoch": 0.6268220342563565, + "grad_norm": 0.041696254163980484, + "learning_rate": 8.675655694273753e-06, + "loss": 0.0013, + "step": 37090 + }, + { + "epoch": 0.626991034535207, + "grad_norm": 0.08160720765590668, + "learning_rate": 8.674655710914246e-06, + "loss": 0.0018, + "step": 37100 + }, + { + "epoch": 0.6271600348140575, + "grad_norm": 0.050693582743406296, + "learning_rate": 8.673655407841888e-06, + "loss": 0.0022, + "step": 37110 + }, + { + "epoch": 0.6273290350929079, + "grad_norm": 0.058973561972379684, + "learning_rate": 8.672654785143712e-06, + "loss": 0.0022, + "step": 37120 + }, + { + "epoch": 0.6274980353717584, + "grad_norm": 0.05065792426466942, + "learning_rate": 8.671653842906774e-06, + "loss": 0.0022, + "step": 37130 + }, + { + "epoch": 0.6276670356506088, + "grad_norm": 0.17974597215652466, + "learning_rate": 8.670652581218162e-06, + "loss": 0.0019, + "step": 37140 + }, + { + "epoch": 0.6278360359294592, + "grad_norm": 0.0987791046500206, + "learning_rate": 8.669651000164992e-06, + "loss": 0.0032, + "step": 37150 + }, + { + "epoch": 0.6280050362083097, + "grad_norm": 0.08784514665603638, + "learning_rate": 8.668649099834404e-06, + "loss": 0.0016, + "step": 37160 + }, + { + "epoch": 0.6281740364871602, + "grad_norm": 0.1562742441892624, + "learning_rate": 8.667646880313569e-06, + "loss": 0.0037, + "step": 37170 + }, + { + "epoch": 0.6283430367660107, + "grad_norm": 0.0067938147112727165, + "learning_rate": 8.666644341689686e-06, + "loss": 0.0016, + "step": 37180 + }, + { + "epoch": 0.6285120370448611, + "grad_norm": 0.07273447513580322, + "learning_rate": 8.66564148404998e-06, + "loss": 0.0029, + "step": 37190 + }, + { + "epoch": 0.6286810373237116, + "grad_norm": 0.11638252437114716, + "learning_rate": 8.664638307481704e-06, + "loss": 0.0017, + "step": 37200 + }, + { + "epoch": 0.6288500376025621, + "grad_norm": 0.18697509169578552, + "learning_rate": 8.66363481207214e-06, + "loss": 0.0032, + "step": 37210 + }, + { + "epoch": 0.6290190378814126, + "grad_norm": 0.16037853062152863, + "learning_rate": 8.662630997908597e-06, + "loss": 0.0024, + "step": 37220 + }, + { + "epoch": 0.6291880381602629, + "grad_norm": 0.07233048975467682, + "learning_rate": 8.66162686507841e-06, + "loss": 0.0019, + "step": 37230 + }, + { + "epoch": 0.6293570384391134, + "grad_norm": 0.1072436049580574, + "learning_rate": 8.660622413668945e-06, + "loss": 0.0023, + "step": 37240 + }, + { + "epoch": 0.6295260387179639, + "grad_norm": 0.09999535977840424, + "learning_rate": 8.659617643767595e-06, + "loss": 0.0022, + "step": 37250 + }, + { + "epoch": 0.6296950389968143, + "grad_norm": 0.06610429286956787, + "learning_rate": 8.658612555461779e-06, + "loss": 0.0023, + "step": 37260 + }, + { + "epoch": 0.6298640392756648, + "grad_norm": 0.05709943547844887, + "learning_rate": 8.657607148838943e-06, + "loss": 0.0023, + "step": 37270 + }, + { + "epoch": 0.6300330395545153, + "grad_norm": 0.040123891085386276, + "learning_rate": 8.656601423986564e-06, + "loss": 0.0019, + "step": 37280 + }, + { + "epoch": 0.6302020398333658, + "grad_norm": 0.04873102530837059, + "learning_rate": 8.655595380992144e-06, + "loss": 0.0017, + "step": 37290 + }, + { + "epoch": 0.6303710401122162, + "grad_norm": 0.13486704230308533, + "learning_rate": 8.654589019943215e-06, + "loss": 0.0027, + "step": 37300 + }, + { + "epoch": 0.6305400403910666, + "grad_norm": 0.03334526717662811, + "learning_rate": 8.653582340927333e-06, + "loss": 0.0019, + "step": 37310 + }, + { + "epoch": 0.6307090406699171, + "grad_norm": 0.2030312567949295, + "learning_rate": 8.652575344032085e-06, + "loss": 0.002, + "step": 37320 + }, + { + "epoch": 0.6308780409487675, + "grad_norm": 0.06566629558801651, + "learning_rate": 8.651568029345088e-06, + "loss": 0.0031, + "step": 37330 + }, + { + "epoch": 0.631047041227618, + "grad_norm": 0.13266269862651825, + "learning_rate": 8.650560396953978e-06, + "loss": 0.0019, + "step": 37340 + }, + { + "epoch": 0.6312160415064685, + "grad_norm": 0.16177508234977722, + "learning_rate": 8.649552446946426e-06, + "loss": 0.0029, + "step": 37350 + }, + { + "epoch": 0.631385041785319, + "grad_norm": 0.02120721898972988, + "learning_rate": 8.64854417941013e-06, + "loss": 0.0017, + "step": 37360 + }, + { + "epoch": 0.6315540420641694, + "grad_norm": 0.06453455984592438, + "learning_rate": 8.647535594432812e-06, + "loss": 0.0013, + "step": 37370 + }, + { + "epoch": 0.6317230423430199, + "grad_norm": 0.01956728659570217, + "learning_rate": 8.646526692102224e-06, + "loss": 0.0023, + "step": 37380 + }, + { + "epoch": 0.6318920426218704, + "grad_norm": 0.09439927339553833, + "learning_rate": 8.645517472506146e-06, + "loss": 0.0019, + "step": 37390 + }, + { + "epoch": 0.6320610429007207, + "grad_norm": 0.17421653866767883, + "learning_rate": 8.644507935732385e-06, + "loss": 0.003, + "step": 37400 + }, + { + "epoch": 0.6322300431795712, + "grad_norm": 0.02730964496731758, + "learning_rate": 8.643498081868778e-06, + "loss": 0.0047, + "step": 37410 + }, + { + "epoch": 0.6323990434584217, + "grad_norm": 0.043696578592061996, + "learning_rate": 8.642487911003181e-06, + "loss": 0.0024, + "step": 37420 + }, + { + "epoch": 0.6325680437372722, + "grad_norm": 0.019823472946882248, + "learning_rate": 8.64147742322349e-06, + "loss": 0.0018, + "step": 37430 + }, + { + "epoch": 0.6327370440161226, + "grad_norm": 0.013545677065849304, + "learning_rate": 8.640466618617618e-06, + "loss": 0.0017, + "step": 37440 + }, + { + "epoch": 0.6329060442949731, + "grad_norm": 0.06836103647947311, + "learning_rate": 8.639455497273512e-06, + "loss": 0.0038, + "step": 37450 + }, + { + "epoch": 0.6330750445738236, + "grad_norm": 0.028289545327425003, + "learning_rate": 8.638444059279146e-06, + "loss": 0.0014, + "step": 37460 + }, + { + "epoch": 0.633244044852674, + "grad_norm": 0.04600983485579491, + "learning_rate": 8.637432304722517e-06, + "loss": 0.002, + "step": 37470 + }, + { + "epoch": 0.6334130451315245, + "grad_norm": 0.08732181042432785, + "learning_rate": 8.636420233691654e-06, + "loss": 0.002, + "step": 37480 + }, + { + "epoch": 0.6335820454103749, + "grad_norm": 0.045885950326919556, + "learning_rate": 8.63540784627461e-06, + "loss": 0.0037, + "step": 37490 + }, + { + "epoch": 0.6337510456892254, + "grad_norm": 0.09269632399082184, + "learning_rate": 8.634395142559471e-06, + "loss": 0.0021, + "step": 37500 + }, + { + "epoch": 0.6339200459680758, + "grad_norm": 0.1494239717721939, + "learning_rate": 8.633382122634347e-06, + "loss": 0.003, + "step": 37510 + }, + { + "epoch": 0.6340890462469263, + "grad_norm": 0.05048111826181412, + "learning_rate": 8.632368786587371e-06, + "loss": 0.0025, + "step": 37520 + }, + { + "epoch": 0.6342580465257768, + "grad_norm": 0.04324718937277794, + "learning_rate": 8.631355134506713e-06, + "loss": 0.0021, + "step": 37530 + }, + { + "epoch": 0.6344270468046272, + "grad_norm": 0.039146389812231064, + "learning_rate": 8.630341166480565e-06, + "loss": 0.0012, + "step": 37540 + }, + { + "epoch": 0.6345960470834777, + "grad_norm": 0.0289743822067976, + "learning_rate": 8.629326882597145e-06, + "loss": 0.0021, + "step": 37550 + }, + { + "epoch": 0.6347650473623282, + "grad_norm": 0.08949195593595505, + "learning_rate": 8.628312282944701e-06, + "loss": 0.0036, + "step": 37560 + }, + { + "epoch": 0.6349340476411786, + "grad_norm": 0.16662773489952087, + "learning_rate": 8.62729736761151e-06, + "loss": 0.0019, + "step": 37570 + }, + { + "epoch": 0.635103047920029, + "grad_norm": 0.06934000551700592, + "learning_rate": 8.626282136685873e-06, + "loss": 0.002, + "step": 37580 + }, + { + "epoch": 0.6352720481988795, + "grad_norm": 0.005404521245509386, + "learning_rate": 8.625266590256121e-06, + "loss": 0.0016, + "step": 37590 + }, + { + "epoch": 0.63544104847773, + "grad_norm": 0.029986826702952385, + "learning_rate": 8.62425072841061e-06, + "loss": 0.0022, + "step": 37600 + }, + { + "epoch": 0.6356100487565804, + "grad_norm": 0.06996724754571915, + "learning_rate": 8.623234551237724e-06, + "loss": 0.002, + "step": 37610 + }, + { + "epoch": 0.6357790490354309, + "grad_norm": 0.046281080693006516, + "learning_rate": 8.62221805882588e-06, + "loss": 0.0014, + "step": 37620 + }, + { + "epoch": 0.6359480493142814, + "grad_norm": 0.03734259307384491, + "learning_rate": 8.621201251263514e-06, + "loss": 0.0011, + "step": 37630 + }, + { + "epoch": 0.6361170495931319, + "grad_norm": 0.04263751208782196, + "learning_rate": 8.620184128639094e-06, + "loss": 0.0027, + "step": 37640 + }, + { + "epoch": 0.6362860498719823, + "grad_norm": 0.05749150365591049, + "learning_rate": 8.619166691041114e-06, + "loss": 0.0032, + "step": 37650 + }, + { + "epoch": 0.6364550501508327, + "grad_norm": 0.148232102394104, + "learning_rate": 8.618148938558097e-06, + "loss": 0.0035, + "step": 37660 + }, + { + "epoch": 0.6366240504296832, + "grad_norm": 0.01768629066646099, + "learning_rate": 8.617130871278592e-06, + "loss": 0.0014, + "step": 37670 + }, + { + "epoch": 0.6367930507085336, + "grad_norm": 0.02394348382949829, + "learning_rate": 8.616112489291177e-06, + "loss": 0.0032, + "step": 37680 + }, + { + "epoch": 0.6369620509873841, + "grad_norm": 0.03687208145856857, + "learning_rate": 8.615093792684453e-06, + "loss": 0.0016, + "step": 37690 + }, + { + "epoch": 0.6371310512662346, + "grad_norm": 0.07785465568304062, + "learning_rate": 8.614074781547055e-06, + "loss": 0.0024, + "step": 37700 + }, + { + "epoch": 0.6373000515450851, + "grad_norm": 0.067860446870327, + "learning_rate": 8.61305545596764e-06, + "loss": 0.0012, + "step": 37710 + }, + { + "epoch": 0.6374690518239355, + "grad_norm": 0.05987097695469856, + "learning_rate": 8.612035816034895e-06, + "loss": 0.0023, + "step": 37720 + }, + { + "epoch": 0.637638052102786, + "grad_norm": 0.06388407200574875, + "learning_rate": 8.611015861837532e-06, + "loss": 0.0019, + "step": 37730 + }, + { + "epoch": 0.6378070523816364, + "grad_norm": 0.05277885124087334, + "learning_rate": 8.609995593464294e-06, + "loss": 0.0022, + "step": 37740 + }, + { + "epoch": 0.6379760526604868, + "grad_norm": 0.04513770341873169, + "learning_rate": 8.608975011003949e-06, + "loss": 0.0017, + "step": 37750 + }, + { + "epoch": 0.6381450529393373, + "grad_norm": 0.03364074230194092, + "learning_rate": 8.60795411454529e-06, + "loss": 0.0026, + "step": 37760 + }, + { + "epoch": 0.6383140532181878, + "grad_norm": 0.09462929517030716, + "learning_rate": 8.606932904177144e-06, + "loss": 0.0019, + "step": 37770 + }, + { + "epoch": 0.6384830534970383, + "grad_norm": 0.07569252699613571, + "learning_rate": 8.605911379988359e-06, + "loss": 0.0023, + "step": 37780 + }, + { + "epoch": 0.6386520537758887, + "grad_norm": 0.06281714886426926, + "learning_rate": 8.604889542067814e-06, + "loss": 0.0017, + "step": 37790 + }, + { + "epoch": 0.6388210540547392, + "grad_norm": 0.06001376360654831, + "learning_rate": 8.60386739050441e-06, + "loss": 0.0034, + "step": 37800 + }, + { + "epoch": 0.6389900543335897, + "grad_norm": 0.02849130518734455, + "learning_rate": 8.602844925387083e-06, + "loss": 0.0035, + "step": 37810 + }, + { + "epoch": 0.6391590546124402, + "grad_norm": 0.15602053701877594, + "learning_rate": 8.601822146804792e-06, + "loss": 0.0011, + "step": 37820 + }, + { + "epoch": 0.6393280548912905, + "grad_norm": 0.19311657547950745, + "learning_rate": 8.600799054846525e-06, + "loss": 0.0024, + "step": 37830 + }, + { + "epoch": 0.639497055170141, + "grad_norm": 0.05133889615535736, + "learning_rate": 8.599775649601292e-06, + "loss": 0.0029, + "step": 37840 + }, + { + "epoch": 0.6396660554489915, + "grad_norm": 0.09188152849674225, + "learning_rate": 8.598751931158136e-06, + "loss": 0.0015, + "step": 37850 + }, + { + "epoch": 0.6398350557278419, + "grad_norm": 0.05758042261004448, + "learning_rate": 8.597727899606125e-06, + "loss": 0.0029, + "step": 37860 + }, + { + "epoch": 0.6400040560066924, + "grad_norm": 0.040008142590522766, + "learning_rate": 8.596703555034356e-06, + "loss": 0.0021, + "step": 37870 + }, + { + "epoch": 0.6401730562855429, + "grad_norm": 0.06656382232904434, + "learning_rate": 8.595678897531952e-06, + "loss": 0.002, + "step": 37880 + }, + { + "epoch": 0.6403420565643934, + "grad_norm": 0.03702574595808983, + "learning_rate": 8.594653927188062e-06, + "loss": 0.0017, + "step": 37890 + }, + { + "epoch": 0.6405110568432438, + "grad_norm": 0.0633983165025711, + "learning_rate": 8.593628644091863e-06, + "loss": 0.0018, + "step": 37900 + }, + { + "epoch": 0.6406800571220943, + "grad_norm": 0.0489104762673378, + "learning_rate": 8.59260304833256e-06, + "loss": 0.0024, + "step": 37910 + }, + { + "epoch": 0.6408490574009447, + "grad_norm": 0.024015096947550774, + "learning_rate": 8.591577139999387e-06, + "loss": 0.0016, + "step": 37920 + }, + { + "epoch": 0.6410180576797951, + "grad_norm": 0.1253378987312317, + "learning_rate": 8.590550919181601e-06, + "loss": 0.0025, + "step": 37930 + }, + { + "epoch": 0.6411870579586456, + "grad_norm": 0.049682728946208954, + "learning_rate": 8.589524385968486e-06, + "loss": 0.0016, + "step": 37940 + }, + { + "epoch": 0.6413560582374961, + "grad_norm": 0.04882590472698212, + "learning_rate": 8.588497540449361e-06, + "loss": 0.0016, + "step": 37950 + }, + { + "epoch": 0.6415250585163466, + "grad_norm": 0.19600225985050201, + "learning_rate": 8.587470382713562e-06, + "loss": 0.0024, + "step": 37960 + }, + { + "epoch": 0.641694058795197, + "grad_norm": 0.06281403452157974, + "learning_rate": 8.586442912850456e-06, + "loss": 0.0037, + "step": 37970 + }, + { + "epoch": 0.6418630590740475, + "grad_norm": 0.2796551287174225, + "learning_rate": 8.585415130949444e-06, + "loss": 0.0014, + "step": 37980 + }, + { + "epoch": 0.642032059352898, + "grad_norm": 0.1508016139268875, + "learning_rate": 8.584387037099941e-06, + "loss": 0.0036, + "step": 37990 + }, + { + "epoch": 0.6422010596317483, + "grad_norm": 0.13536739349365234, + "learning_rate": 8.5833586313914e-06, + "loss": 0.0021, + "step": 38000 + }, + { + "epoch": 0.6423700599105988, + "grad_norm": 0.024195270612835884, + "learning_rate": 8.582329913913297e-06, + "loss": 0.0052, + "step": 38010 + }, + { + "epoch": 0.6425390601894493, + "grad_norm": 0.07989949733018875, + "learning_rate": 8.581300884755133e-06, + "loss": 0.0025, + "step": 38020 + }, + { + "epoch": 0.6427080604682998, + "grad_norm": 0.051182933151721954, + "learning_rate": 8.58027154400644e-06, + "loss": 0.0026, + "step": 38030 + }, + { + "epoch": 0.6428770607471502, + "grad_norm": 0.04734310135245323, + "learning_rate": 8.579241891756778e-06, + "loss": 0.0017, + "step": 38040 + }, + { + "epoch": 0.6430460610260007, + "grad_norm": 0.03388788551092148, + "learning_rate": 8.578211928095727e-06, + "loss": 0.0009, + "step": 38050 + }, + { + "epoch": 0.6432150613048512, + "grad_norm": 0.04434414207935333, + "learning_rate": 8.577181653112904e-06, + "loss": 0.0018, + "step": 38060 + }, + { + "epoch": 0.6433840615837016, + "grad_norm": 0.05806152522563934, + "learning_rate": 8.576151066897944e-06, + "loss": 0.0019, + "step": 38070 + }, + { + "epoch": 0.6435530618625521, + "grad_norm": 0.059645067900419235, + "learning_rate": 8.575120169540514e-06, + "loss": 0.0038, + "step": 38080 + }, + { + "epoch": 0.6437220621414025, + "grad_norm": 0.05072898417711258, + "learning_rate": 8.574088961130308e-06, + "loss": 0.0029, + "step": 38090 + }, + { + "epoch": 0.643891062420253, + "grad_norm": 0.14522813260555267, + "learning_rate": 8.573057441757045e-06, + "loss": 0.0019, + "step": 38100 + }, + { + "epoch": 0.6440600626991034, + "grad_norm": 0.041980892419815063, + "learning_rate": 8.572025611510473e-06, + "loss": 0.0031, + "step": 38110 + }, + { + "epoch": 0.6442290629779539, + "grad_norm": 0.09430034458637238, + "learning_rate": 8.570993470480367e-06, + "loss": 0.0025, + "step": 38120 + }, + { + "epoch": 0.6443980632568044, + "grad_norm": 0.1635764241218567, + "learning_rate": 8.569961018756526e-06, + "loss": 0.0016, + "step": 38130 + }, + { + "epoch": 0.6445670635356548, + "grad_norm": 0.08032064884901047, + "learning_rate": 8.56892825642878e-06, + "loss": 0.0042, + "step": 38140 + }, + { + "epoch": 0.6447360638145053, + "grad_norm": 0.1031501516699791, + "learning_rate": 8.567895183586983e-06, + "loss": 0.0024, + "step": 38150 + }, + { + "epoch": 0.6449050640933558, + "grad_norm": 0.09169802814722061, + "learning_rate": 8.566861800321019e-06, + "loss": 0.0024, + "step": 38160 + }, + { + "epoch": 0.6450740643722063, + "grad_norm": 0.059827886521816254, + "learning_rate": 8.565828106720797e-06, + "loss": 0.0024, + "step": 38170 + }, + { + "epoch": 0.6452430646510566, + "grad_norm": 0.04389607533812523, + "learning_rate": 8.564794102876251e-06, + "loss": 0.0013, + "step": 38180 + }, + { + "epoch": 0.6454120649299071, + "grad_norm": 0.07341597229242325, + "learning_rate": 8.563759788877348e-06, + "loss": 0.0017, + "step": 38190 + }, + { + "epoch": 0.6455810652087576, + "grad_norm": 0.062065500766038895, + "learning_rate": 8.562725164814077e-06, + "loss": 0.0029, + "step": 38200 + }, + { + "epoch": 0.645750065487608, + "grad_norm": 0.0988144800066948, + "learning_rate": 8.561690230776451e-06, + "loss": 0.0023, + "step": 38210 + }, + { + "epoch": 0.6459190657664585, + "grad_norm": 0.032794203609228134, + "learning_rate": 8.560654986854522e-06, + "loss": 0.0015, + "step": 38220 + }, + { + "epoch": 0.646088066045309, + "grad_norm": 0.06741363555192947, + "learning_rate": 8.559619433138357e-06, + "loss": 0.0037, + "step": 38230 + }, + { + "epoch": 0.6462570663241595, + "grad_norm": 0.5312454700469971, + "learning_rate": 8.558583569718053e-06, + "loss": 0.0019, + "step": 38240 + }, + { + "epoch": 0.6464260666030099, + "grad_norm": 0.27058297395706177, + "learning_rate": 8.557547396683738e-06, + "loss": 0.0015, + "step": 38250 + }, + { + "epoch": 0.6465950668818603, + "grad_norm": 0.0890686959028244, + "learning_rate": 8.556510914125562e-06, + "loss": 0.0023, + "step": 38260 + }, + { + "epoch": 0.6467640671607108, + "grad_norm": 0.081411212682724, + "learning_rate": 8.555474122133706e-06, + "loss": 0.0028, + "step": 38270 + }, + { + "epoch": 0.6469330674395612, + "grad_norm": 0.017591990530490875, + "learning_rate": 8.554437020798374e-06, + "loss": 0.001, + "step": 38280 + }, + { + "epoch": 0.6471020677184117, + "grad_norm": 0.042150210589170456, + "learning_rate": 8.553399610209798e-06, + "loss": 0.0012, + "step": 38290 + }, + { + "epoch": 0.6472710679972622, + "grad_norm": 0.09767927974462509, + "learning_rate": 8.552361890458242e-06, + "loss": 0.0016, + "step": 38300 + }, + { + "epoch": 0.6474400682761127, + "grad_norm": 0.27567175030708313, + "learning_rate": 8.55132386163399e-06, + "loss": 0.002, + "step": 38310 + }, + { + "epoch": 0.6476090685549631, + "grad_norm": 0.026824578642845154, + "learning_rate": 8.550285523827352e-06, + "loss": 0.0023, + "step": 38320 + }, + { + "epoch": 0.6477780688338136, + "grad_norm": 0.14262062311172485, + "learning_rate": 8.549246877128674e-06, + "loss": 0.0033, + "step": 38330 + }, + { + "epoch": 0.6479470691126641, + "grad_norm": 0.08674229681491852, + "learning_rate": 8.548207921628321e-06, + "loss": 0.0018, + "step": 38340 + }, + { + "epoch": 0.6481160693915144, + "grad_norm": 0.03979768604040146, + "learning_rate": 8.547168657416688e-06, + "loss": 0.0018, + "step": 38350 + }, + { + "epoch": 0.6482850696703649, + "grad_norm": 0.02881024219095707, + "learning_rate": 8.546129084584196e-06, + "loss": 0.0015, + "step": 38360 + }, + { + "epoch": 0.6484540699492154, + "grad_norm": 0.03815798461437225, + "learning_rate": 8.54508920322129e-06, + "loss": 0.0019, + "step": 38370 + }, + { + "epoch": 0.6486230702280659, + "grad_norm": 0.04694780707359314, + "learning_rate": 8.544049013418447e-06, + "loss": 0.0017, + "step": 38380 + }, + { + "epoch": 0.6487920705069163, + "grad_norm": 0.12879303097724915, + "learning_rate": 8.54300851526617e-06, + "loss": 0.002, + "step": 38390 + }, + { + "epoch": 0.6489610707857668, + "grad_norm": 0.0995473712682724, + "learning_rate": 8.541967708854986e-06, + "loss": 0.0015, + "step": 38400 + }, + { + "epoch": 0.6491300710646173, + "grad_norm": 0.17174296081066132, + "learning_rate": 8.54092659427545e-06, + "loss": 0.0035, + "step": 38410 + }, + { + "epoch": 0.6492990713434678, + "grad_norm": 0.017600564286112785, + "learning_rate": 8.539885171618143e-06, + "loss": 0.0019, + "step": 38420 + }, + { + "epoch": 0.6494680716223181, + "grad_norm": 0.1489916890859604, + "learning_rate": 8.538843440973677e-06, + "loss": 0.0023, + "step": 38430 + }, + { + "epoch": 0.6496370719011686, + "grad_norm": 0.008243445307016373, + "learning_rate": 8.537801402432684e-06, + "loss": 0.002, + "step": 38440 + }, + { + "epoch": 0.6498060721800191, + "grad_norm": 0.10142067819833755, + "learning_rate": 8.536759056085828e-06, + "loss": 0.002, + "step": 38450 + }, + { + "epoch": 0.6499750724588695, + "grad_norm": 0.07499175518751144, + "learning_rate": 8.535716402023798e-06, + "loss": 0.0038, + "step": 38460 + }, + { + "epoch": 0.65014407273772, + "grad_norm": 0.026488911360502243, + "learning_rate": 8.53467344033731e-06, + "loss": 0.0025, + "step": 38470 + }, + { + "epoch": 0.6503130730165705, + "grad_norm": 0.05331513658165932, + "learning_rate": 8.533630171117108e-06, + "loss": 0.0022, + "step": 38480 + }, + { + "epoch": 0.650482073295421, + "grad_norm": 0.09920284897089005, + "learning_rate": 8.53258659445396e-06, + "loss": 0.0019, + "step": 38490 + }, + { + "epoch": 0.6506510735742714, + "grad_norm": 0.03672245889902115, + "learning_rate": 8.531542710438662e-06, + "loss": 0.0028, + "step": 38500 + }, + { + "epoch": 0.6508200738531219, + "grad_norm": 0.16491785645484924, + "learning_rate": 8.530498519162037e-06, + "loss": 0.0082, + "step": 38510 + }, + { + "epoch": 0.6509890741319723, + "grad_norm": 0.11625287681818008, + "learning_rate": 8.529454020714936e-06, + "loss": 0.0019, + "step": 38520 + }, + { + "epoch": 0.6511580744108227, + "grad_norm": 0.09387943893671036, + "learning_rate": 8.528409215188233e-06, + "loss": 0.0018, + "step": 38530 + }, + { + "epoch": 0.6513270746896732, + "grad_norm": 0.12414376437664032, + "learning_rate": 8.527364102672835e-06, + "loss": 0.005, + "step": 38540 + }, + { + "epoch": 0.6514960749685237, + "grad_norm": 0.10484426468610764, + "learning_rate": 8.526318683259668e-06, + "loss": 0.0026, + "step": 38550 + }, + { + "epoch": 0.6516650752473742, + "grad_norm": 0.035917624831199646, + "learning_rate": 8.525272957039692e-06, + "loss": 0.0025, + "step": 38560 + }, + { + "epoch": 0.6518340755262246, + "grad_norm": 0.0756058618426323, + "learning_rate": 8.524226924103887e-06, + "loss": 0.0024, + "step": 38570 + }, + { + "epoch": 0.6520030758050751, + "grad_norm": 0.04967297613620758, + "learning_rate": 8.523180584543265e-06, + "loss": 0.0022, + "step": 38580 + }, + { + "epoch": 0.6521720760839256, + "grad_norm": 0.0803271010518074, + "learning_rate": 8.52213393844886e-06, + "loss": 0.003, + "step": 38590 + }, + { + "epoch": 0.652341076362776, + "grad_norm": 0.0883374959230423, + "learning_rate": 8.52108698591174e-06, + "loss": 0.0023, + "step": 38600 + }, + { + "epoch": 0.6525100766416264, + "grad_norm": 0.05122964084148407, + "learning_rate": 8.52003972702299e-06, + "loss": 0.0021, + "step": 38610 + }, + { + "epoch": 0.6526790769204769, + "grad_norm": 0.02245727740228176, + "learning_rate": 8.51899216187373e-06, + "loss": 0.0012, + "step": 38620 + }, + { + "epoch": 0.6528480771993274, + "grad_norm": 0.21799735724925995, + "learning_rate": 8.517944290555102e-06, + "loss": 0.0026, + "step": 38630 + }, + { + "epoch": 0.6530170774781778, + "grad_norm": 0.020439540967345238, + "learning_rate": 8.516896113158274e-06, + "loss": 0.0016, + "step": 38640 + }, + { + "epoch": 0.6531860777570283, + "grad_norm": 0.029102226719260216, + "learning_rate": 8.515847629774445e-06, + "loss": 0.0025, + "step": 38650 + }, + { + "epoch": 0.6533550780358788, + "grad_norm": 0.049179356545209885, + "learning_rate": 8.51479884049484e-06, + "loss": 0.0024, + "step": 38660 + }, + { + "epoch": 0.6535240783147293, + "grad_norm": 0.22065278887748718, + "learning_rate": 8.513749745410705e-06, + "loss": 0.0038, + "step": 38670 + }, + { + "epoch": 0.6536930785935797, + "grad_norm": 0.046514589339494705, + "learning_rate": 8.512700344613316e-06, + "loss": 0.0025, + "step": 38680 + }, + { + "epoch": 0.6538620788724301, + "grad_norm": 0.022628309205174446, + "learning_rate": 8.51165063819398e-06, + "loss": 0.0017, + "step": 38690 + }, + { + "epoch": 0.6540310791512806, + "grad_norm": 0.150055930018425, + "learning_rate": 8.510600626244024e-06, + "loss": 0.0017, + "step": 38700 + }, + { + "epoch": 0.654200079430131, + "grad_norm": 0.1109904795885086, + "learning_rate": 8.509550308854801e-06, + "loss": 0.0022, + "step": 38710 + }, + { + "epoch": 0.6543690797089815, + "grad_norm": 0.0615401454269886, + "learning_rate": 8.5084996861177e-06, + "loss": 0.0013, + "step": 38720 + }, + { + "epoch": 0.654538079987832, + "grad_norm": 0.01564824767410755, + "learning_rate": 8.507448758124126e-06, + "loss": 0.0019, + "step": 38730 + }, + { + "epoch": 0.6547070802666825, + "grad_norm": 0.07921988517045975, + "learning_rate": 8.506397524965517e-06, + "loss": 0.0022, + "step": 38740 + }, + { + "epoch": 0.6548760805455329, + "grad_norm": 0.11442530155181885, + "learning_rate": 8.505345986733335e-06, + "loss": 0.0018, + "step": 38750 + }, + { + "epoch": 0.6550450808243834, + "grad_norm": 0.1136259213089943, + "learning_rate": 8.504294143519067e-06, + "loss": 0.0033, + "step": 38760 + }, + { + "epoch": 0.6552140811032339, + "grad_norm": 0.1569293588399887, + "learning_rate": 8.50324199541423e-06, + "loss": 0.0033, + "step": 38770 + }, + { + "epoch": 0.6553830813820842, + "grad_norm": 0.044599708169698715, + "learning_rate": 8.502189542510365e-06, + "loss": 0.0029, + "step": 38780 + }, + { + "epoch": 0.6555520816609347, + "grad_norm": 0.09630095213651657, + "learning_rate": 8.501136784899043e-06, + "loss": 0.0021, + "step": 38790 + }, + { + "epoch": 0.6557210819397852, + "grad_norm": 0.030088450759649277, + "learning_rate": 8.500083722671857e-06, + "loss": 0.002, + "step": 38800 + }, + { + "epoch": 0.6558900822186357, + "grad_norm": 0.075492262840271, + "learning_rate": 8.499030355920429e-06, + "loss": 0.0021, + "step": 38810 + }, + { + "epoch": 0.6560590824974861, + "grad_norm": 0.012088959105312824, + "learning_rate": 8.497976684736407e-06, + "loss": 0.0009, + "step": 38820 + }, + { + "epoch": 0.6562280827763366, + "grad_norm": 0.06977924704551697, + "learning_rate": 8.496922709211464e-06, + "loss": 0.0077, + "step": 38830 + }, + { + "epoch": 0.6563970830551871, + "grad_norm": 0.06464297324419022, + "learning_rate": 8.495868429437302e-06, + "loss": 0.0021, + "step": 38840 + }, + { + "epoch": 0.6565660833340375, + "grad_norm": 0.13068783283233643, + "learning_rate": 8.49481384550565e-06, + "loss": 0.0034, + "step": 38850 + }, + { + "epoch": 0.656735083612888, + "grad_norm": 0.0852997675538063, + "learning_rate": 8.49375895750826e-06, + "loss": 0.0024, + "step": 38860 + }, + { + "epoch": 0.6569040838917384, + "grad_norm": 0.08365332335233688, + "learning_rate": 8.492703765536913e-06, + "loss": 0.0013, + "step": 38870 + }, + { + "epoch": 0.6570730841705889, + "grad_norm": 0.028204182162880898, + "learning_rate": 8.491648269683416e-06, + "loss": 0.0023, + "step": 38880 + }, + { + "epoch": 0.6572420844494393, + "grad_norm": 0.13570831716060638, + "learning_rate": 8.490592470039605e-06, + "loss": 0.0024, + "step": 38890 + }, + { + "epoch": 0.6574110847282898, + "grad_norm": 0.07442338764667511, + "learning_rate": 8.489536366697333e-06, + "loss": 0.0037, + "step": 38900 + }, + { + "epoch": 0.6575800850071403, + "grad_norm": 0.2090086191892624, + "learning_rate": 8.48847995974849e-06, + "loss": 0.0027, + "step": 38910 + }, + { + "epoch": 0.6577490852859907, + "grad_norm": 0.06681293994188309, + "learning_rate": 8.487423249284989e-06, + "loss": 0.0014, + "step": 38920 + }, + { + "epoch": 0.6579180855648412, + "grad_norm": 0.10129017382860184, + "learning_rate": 8.486366235398771e-06, + "loss": 0.0032, + "step": 38930 + }, + { + "epoch": 0.6580870858436917, + "grad_norm": 0.09688436985015869, + "learning_rate": 8.485308918181796e-06, + "loss": 0.0026, + "step": 38940 + }, + { + "epoch": 0.658256086122542, + "grad_norm": 0.08046122640371323, + "learning_rate": 8.484251297726059e-06, + "loss": 0.0017, + "step": 38950 + }, + { + "epoch": 0.6584250864013925, + "grad_norm": 0.04880860820412636, + "learning_rate": 8.483193374123576e-06, + "loss": 0.0013, + "step": 38960 + }, + { + "epoch": 0.658594086680243, + "grad_norm": 0.07618697732686996, + "learning_rate": 8.482135147466395e-06, + "loss": 0.001, + "step": 38970 + }, + { + "epoch": 0.6587630869590935, + "grad_norm": 0.022066382691264153, + "learning_rate": 8.481076617846586e-06, + "loss": 0.0015, + "step": 38980 + }, + { + "epoch": 0.6589320872379439, + "grad_norm": 0.030474252998828888, + "learning_rate": 8.480017785356243e-06, + "loss": 0.0024, + "step": 38990 + }, + { + "epoch": 0.6591010875167944, + "grad_norm": 0.057772617787122726, + "learning_rate": 8.478958650087492e-06, + "loss": 0.0032, + "step": 39000 + }, + { + "epoch": 0.6592700877956449, + "grad_norm": 0.023182455450296402, + "learning_rate": 8.477899212132483e-06, + "loss": 0.0018, + "step": 39010 + }, + { + "epoch": 0.6594390880744954, + "grad_norm": 0.03691640868782997, + "learning_rate": 8.476839471583391e-06, + "loss": 0.0011, + "step": 39020 + }, + { + "epoch": 0.6596080883533458, + "grad_norm": 0.08493918925523758, + "learning_rate": 8.475779428532418e-06, + "loss": 0.0027, + "step": 39030 + }, + { + "epoch": 0.6597770886321962, + "grad_norm": 0.05697249621152878, + "learning_rate": 8.474719083071796e-06, + "loss": 0.0014, + "step": 39040 + }, + { + "epoch": 0.6599460889110467, + "grad_norm": 0.0171417985111475, + "learning_rate": 8.473658435293779e-06, + "loss": 0.0031, + "step": 39050 + }, + { + "epoch": 0.6601150891898971, + "grad_norm": 0.3440181314945221, + "learning_rate": 8.472597485290647e-06, + "loss": 0.0017, + "step": 39060 + }, + { + "epoch": 0.6602840894687476, + "grad_norm": 0.03360786661505699, + "learning_rate": 8.47153623315471e-06, + "loss": 0.0023, + "step": 39070 + }, + { + "epoch": 0.6604530897475981, + "grad_norm": 0.037310730665922165, + "learning_rate": 8.470474678978297e-06, + "loss": 0.0024, + "step": 39080 + }, + { + "epoch": 0.6606220900264486, + "grad_norm": 0.06788239628076553, + "learning_rate": 8.469412822853775e-06, + "loss": 0.0027, + "step": 39090 + }, + { + "epoch": 0.660791090305299, + "grad_norm": 0.11595677584409714, + "learning_rate": 8.468350664873526e-06, + "loss": 0.0029, + "step": 39100 + }, + { + "epoch": 0.6609600905841495, + "grad_norm": 0.020321834832429886, + "learning_rate": 8.467288205129967e-06, + "loss": 0.0025, + "step": 39110 + }, + { + "epoch": 0.6611290908629999, + "grad_norm": 0.008805109187960625, + "learning_rate": 8.466225443715535e-06, + "loss": 0.0025, + "step": 39120 + }, + { + "epoch": 0.6612980911418503, + "grad_norm": 0.08032660186290741, + "learning_rate": 8.465162380722693e-06, + "loss": 0.002, + "step": 39130 + }, + { + "epoch": 0.6614670914207008, + "grad_norm": 0.1384652853012085, + "learning_rate": 8.464099016243933e-06, + "loss": 0.0025, + "step": 39140 + }, + { + "epoch": 0.6616360916995513, + "grad_norm": 0.06193441525101662, + "learning_rate": 8.46303535037178e-06, + "loss": 0.0022, + "step": 39150 + }, + { + "epoch": 0.6618050919784018, + "grad_norm": 0.035007353872060776, + "learning_rate": 8.461971383198768e-06, + "loss": 0.0031, + "step": 39160 + }, + { + "epoch": 0.6619740922572522, + "grad_norm": 0.0404779352247715, + "learning_rate": 8.460907114817474e-06, + "loss": 0.0026, + "step": 39170 + }, + { + "epoch": 0.6621430925361027, + "grad_norm": 0.055109139531850815, + "learning_rate": 8.459842545320492e-06, + "loss": 0.0017, + "step": 39180 + }, + { + "epoch": 0.6623120928149532, + "grad_norm": 0.013876045122742653, + "learning_rate": 8.458777674800446e-06, + "loss": 0.0016, + "step": 39190 + }, + { + "epoch": 0.6624810930938037, + "grad_norm": 0.11018216609954834, + "learning_rate": 8.457712503349984e-06, + "loss": 0.0047, + "step": 39200 + }, + { + "epoch": 0.662650093372654, + "grad_norm": 0.15153026580810547, + "learning_rate": 8.45664703106178e-06, + "loss": 0.0027, + "step": 39210 + }, + { + "epoch": 0.6628190936515045, + "grad_norm": 0.08052218705415726, + "learning_rate": 8.455581258028539e-06, + "loss": 0.0029, + "step": 39220 + }, + { + "epoch": 0.662988093930355, + "grad_norm": 0.05759026110172272, + "learning_rate": 8.454515184342983e-06, + "loss": 0.0022, + "step": 39230 + }, + { + "epoch": 0.6631570942092054, + "grad_norm": 0.08084557205438614, + "learning_rate": 8.453448810097871e-06, + "loss": 0.002, + "step": 39240 + }, + { + "epoch": 0.6633260944880559, + "grad_norm": 0.10249501466751099, + "learning_rate": 8.452382135385978e-06, + "loss": 0.0024, + "step": 39250 + }, + { + "epoch": 0.6634950947669064, + "grad_norm": 0.13480144739151, + "learning_rate": 8.451315160300114e-06, + "loss": 0.002, + "step": 39260 + }, + { + "epoch": 0.6636640950457569, + "grad_norm": 0.013405361212790012, + "learning_rate": 8.450247884933107e-06, + "loss": 0.0019, + "step": 39270 + }, + { + "epoch": 0.6638330953246073, + "grad_norm": 0.0013330960646271706, + "learning_rate": 8.449180309377817e-06, + "loss": 0.0009, + "step": 39280 + }, + { + "epoch": 0.6640020956034578, + "grad_norm": 0.17702990770339966, + "learning_rate": 8.44811243372713e-06, + "loss": 0.0023, + "step": 39290 + }, + { + "epoch": 0.6641710958823082, + "grad_norm": 0.15253756940364838, + "learning_rate": 8.447044258073955e-06, + "loss": 0.0034, + "step": 39300 + }, + { + "epoch": 0.6643400961611586, + "grad_norm": 0.15157701075077057, + "learning_rate": 8.445975782511227e-06, + "loss": 0.0015, + "step": 39310 + }, + { + "epoch": 0.6645090964400091, + "grad_norm": 0.08215749263763428, + "learning_rate": 8.444907007131911e-06, + "loss": 0.0026, + "step": 39320 + }, + { + "epoch": 0.6646780967188596, + "grad_norm": 0.13002920150756836, + "learning_rate": 8.443837932028995e-06, + "loss": 0.0014, + "step": 39330 + }, + { + "epoch": 0.66484709699771, + "grad_norm": 0.09273547679185867, + "learning_rate": 8.442768557295491e-06, + "loss": 0.0012, + "step": 39340 + }, + { + "epoch": 0.6650160972765605, + "grad_norm": 0.059002723544836044, + "learning_rate": 8.441698883024443e-06, + "loss": 0.0041, + "step": 39350 + }, + { + "epoch": 0.665185097555411, + "grad_norm": 0.030401628464460373, + "learning_rate": 8.440628909308917e-06, + "loss": 0.0038, + "step": 39360 + }, + { + "epoch": 0.6653540978342615, + "grad_norm": 0.06252733618021011, + "learning_rate": 8.439558636242005e-06, + "loss": 0.0027, + "step": 39370 + }, + { + "epoch": 0.6655230981131118, + "grad_norm": 0.04471417888998985, + "learning_rate": 8.438488063916826e-06, + "loss": 0.0013, + "step": 39380 + }, + { + "epoch": 0.6656920983919623, + "grad_norm": 0.12027346342802048, + "learning_rate": 8.437417192426527e-06, + "loss": 0.0021, + "step": 39390 + }, + { + "epoch": 0.6658610986708128, + "grad_norm": 0.21675223112106323, + "learning_rate": 8.436346021864277e-06, + "loss": 0.0024, + "step": 39400 + }, + { + "epoch": 0.6660300989496633, + "grad_norm": 0.14156877994537354, + "learning_rate": 8.435274552323274e-06, + "loss": 0.0026, + "step": 39410 + }, + { + "epoch": 0.6661990992285137, + "grad_norm": 0.0528615340590477, + "learning_rate": 8.43420278389674e-06, + "loss": 0.0023, + "step": 39420 + }, + { + "epoch": 0.6663680995073642, + "grad_norm": 0.040200572460889816, + "learning_rate": 8.433130716677923e-06, + "loss": 0.0021, + "step": 39430 + }, + { + "epoch": 0.6665370997862147, + "grad_norm": 0.04540213569998741, + "learning_rate": 8.432058350760103e-06, + "loss": 0.0008, + "step": 39440 + }, + { + "epoch": 0.6667061000650651, + "grad_norm": 0.06027652695775032, + "learning_rate": 8.430985686236577e-06, + "loss": 0.0039, + "step": 39450 + }, + { + "epoch": 0.6668751003439156, + "grad_norm": 0.060570668429136276, + "learning_rate": 8.429912723200672e-06, + "loss": 0.0016, + "step": 39460 + }, + { + "epoch": 0.667044100622766, + "grad_norm": 0.05765363201498985, + "learning_rate": 8.428839461745742e-06, + "loss": 0.002, + "step": 39470 + }, + { + "epoch": 0.6672131009016165, + "grad_norm": 0.07702869176864624, + "learning_rate": 8.427765901965165e-06, + "loss": 0.0019, + "step": 39480 + }, + { + "epoch": 0.6673821011804669, + "grad_norm": 0.10805605351924896, + "learning_rate": 8.42669204395235e-06, + "loss": 0.0022, + "step": 39490 + }, + { + "epoch": 0.6675511014593174, + "grad_norm": 0.06277460604906082, + "learning_rate": 8.425617887800722e-06, + "loss": 0.0018, + "step": 39500 + }, + { + "epoch": 0.6677201017381679, + "grad_norm": 0.0981348529458046, + "learning_rate": 8.42454343360374e-06, + "loss": 0.0032, + "step": 39510 + }, + { + "epoch": 0.6678891020170183, + "grad_norm": 0.06740453839302063, + "learning_rate": 8.423468681454886e-06, + "loss": 0.0024, + "step": 39520 + }, + { + "epoch": 0.6680581022958688, + "grad_norm": 0.029674388468265533, + "learning_rate": 8.42239363144767e-06, + "loss": 0.0024, + "step": 39530 + }, + { + "epoch": 0.6682271025747193, + "grad_norm": 0.3581937551498413, + "learning_rate": 8.421318283675628e-06, + "loss": 0.0019, + "step": 39540 + }, + { + "epoch": 0.6683961028535698, + "grad_norm": 0.13029687106609344, + "learning_rate": 8.420242638232318e-06, + "loss": 0.0018, + "step": 39550 + }, + { + "epoch": 0.6685651031324201, + "grad_norm": 0.0804767906665802, + "learning_rate": 8.419166695211325e-06, + "loss": 0.0011, + "step": 39560 + }, + { + "epoch": 0.6687341034112706, + "grad_norm": 0.0854073241353035, + "learning_rate": 8.418090454706267e-06, + "loss": 0.0018, + "step": 39570 + }, + { + "epoch": 0.6689031036901211, + "grad_norm": 0.1427302360534668, + "learning_rate": 8.417013916810774e-06, + "loss": 0.0017, + "step": 39580 + }, + { + "epoch": 0.6690721039689715, + "grad_norm": 0.13676531612873077, + "learning_rate": 8.415937081618519e-06, + "loss": 0.0013, + "step": 39590 + }, + { + "epoch": 0.669241104247822, + "grad_norm": 0.03680437058210373, + "learning_rate": 8.414859949223184e-06, + "loss": 0.0014, + "step": 39600 + }, + { + "epoch": 0.6694101045266725, + "grad_norm": 0.23977738618850708, + "learning_rate": 8.41378251971849e-06, + "loss": 0.0037, + "step": 39610 + }, + { + "epoch": 0.669579104805523, + "grad_norm": 0.18916615843772888, + "learning_rate": 8.412704793198175e-06, + "loss": 0.0029, + "step": 39620 + }, + { + "epoch": 0.6697481050843734, + "grad_norm": 0.10192802548408508, + "learning_rate": 8.41162676975601e-06, + "loss": 0.0025, + "step": 39630 + }, + { + "epoch": 0.6699171053632238, + "grad_norm": 0.07449067384004593, + "learning_rate": 8.410548449485785e-06, + "loss": 0.0027, + "step": 39640 + }, + { + "epoch": 0.6700861056420743, + "grad_norm": 0.03700610250234604, + "learning_rate": 8.40946983248132e-06, + "loss": 0.0029, + "step": 39650 + }, + { + "epoch": 0.6702551059209247, + "grad_norm": 0.07545451074838638, + "learning_rate": 8.40839091883646e-06, + "loss": 0.0015, + "step": 39660 + }, + { + "epoch": 0.6704241061997752, + "grad_norm": 0.01644122786819935, + "learning_rate": 8.407311708645075e-06, + "loss": 0.0016, + "step": 39670 + }, + { + "epoch": 0.6705931064786257, + "grad_norm": 0.044411513954401016, + "learning_rate": 8.406232202001066e-06, + "loss": 0.002, + "step": 39680 + }, + { + "epoch": 0.6707621067574762, + "grad_norm": 0.04547111690044403, + "learning_rate": 8.40515239899835e-06, + "loss": 0.0026, + "step": 39690 + }, + { + "epoch": 0.6709311070363266, + "grad_norm": 0.14344197511672974, + "learning_rate": 8.404072299730877e-06, + "loss": 0.0024, + "step": 39700 + }, + { + "epoch": 0.6711001073151771, + "grad_norm": 0.032576240599155426, + "learning_rate": 8.402991904292621e-06, + "loss": 0.001, + "step": 39710 + }, + { + "epoch": 0.6712691075940276, + "grad_norm": 0.11407008767127991, + "learning_rate": 8.401911212777583e-06, + "loss": 0.0022, + "step": 39720 + }, + { + "epoch": 0.6714381078728779, + "grad_norm": 0.02802203968167305, + "learning_rate": 8.400830225279784e-06, + "loss": 0.0026, + "step": 39730 + }, + { + "epoch": 0.6716071081517284, + "grad_norm": 0.16928566992282867, + "learning_rate": 8.39974894189328e-06, + "loss": 0.0019, + "step": 39740 + }, + { + "epoch": 0.6717761084305789, + "grad_norm": 0.08227849751710892, + "learning_rate": 8.398667362712145e-06, + "loss": 0.0026, + "step": 39750 + }, + { + "epoch": 0.6719451087094294, + "grad_norm": 0.06275780498981476, + "learning_rate": 8.397585487830482e-06, + "loss": 0.0027, + "step": 39760 + }, + { + "epoch": 0.6721141089882798, + "grad_norm": 0.14453285932540894, + "learning_rate": 8.396503317342422e-06, + "loss": 0.0021, + "step": 39770 + }, + { + "epoch": 0.6722831092671303, + "grad_norm": 0.09584707766771317, + "learning_rate": 8.395420851342115e-06, + "loss": 0.0027, + "step": 39780 + }, + { + "epoch": 0.6724521095459808, + "grad_norm": 0.07883055508136749, + "learning_rate": 8.394338089923744e-06, + "loss": 0.002, + "step": 39790 + }, + { + "epoch": 0.6726211098248313, + "grad_norm": 0.12426584213972092, + "learning_rate": 8.393255033181511e-06, + "loss": 0.0021, + "step": 39800 + }, + { + "epoch": 0.6727901101036816, + "grad_norm": 0.16997219622135162, + "learning_rate": 8.39217168120965e-06, + "loss": 0.001, + "step": 39810 + }, + { + "epoch": 0.6729591103825321, + "grad_norm": 0.09920654445886612, + "learning_rate": 8.39108803410242e-06, + "loss": 0.0019, + "step": 39820 + }, + { + "epoch": 0.6731281106613826, + "grad_norm": 0.06657091528177261, + "learning_rate": 8.390004091954099e-06, + "loss": 0.0022, + "step": 39830 + }, + { + "epoch": 0.673297110940233, + "grad_norm": 0.087304025888443, + "learning_rate": 8.388919854858997e-06, + "loss": 0.0015, + "step": 39840 + }, + { + "epoch": 0.6734661112190835, + "grad_norm": 0.09787190705537796, + "learning_rate": 8.387835322911449e-06, + "loss": 0.0026, + "step": 39850 + }, + { + "epoch": 0.673635111497934, + "grad_norm": 0.08263973146677017, + "learning_rate": 8.386750496205811e-06, + "loss": 0.0018, + "step": 39860 + }, + { + "epoch": 0.6738041117767845, + "grad_norm": 0.12377431243658066, + "learning_rate": 8.385665374836473e-06, + "loss": 0.0014, + "step": 39870 + }, + { + "epoch": 0.6739731120556349, + "grad_norm": 0.06167079880833626, + "learning_rate": 8.384579958897843e-06, + "loss": 0.0016, + "step": 39880 + }, + { + "epoch": 0.6741421123344854, + "grad_norm": 0.03726949170231819, + "learning_rate": 8.383494248484356e-06, + "loss": 0.002, + "step": 39890 + }, + { + "epoch": 0.6743111126133358, + "grad_norm": 0.19137848913669586, + "learning_rate": 8.38240824369048e-06, + "loss": 0.0027, + "step": 39900 + }, + { + "epoch": 0.6744801128921862, + "grad_norm": 0.033898577094078064, + "learning_rate": 8.381321944610693e-06, + "loss": 0.0024, + "step": 39910 + }, + { + "epoch": 0.6746491131710367, + "grad_norm": 0.07841331511735916, + "learning_rate": 8.380235351339518e-06, + "loss": 0.0022, + "step": 39920 + }, + { + "epoch": 0.6748181134498872, + "grad_norm": 0.08964890986680984, + "learning_rate": 8.37914846397149e-06, + "loss": 0.002, + "step": 39930 + }, + { + "epoch": 0.6749871137287377, + "grad_norm": 0.013514422811567783, + "learning_rate": 8.378061282601171e-06, + "loss": 0.0007, + "step": 39940 + }, + { + "epoch": 0.6751561140075881, + "grad_norm": 0.144585520029068, + "learning_rate": 8.376973807323154e-06, + "loss": 0.0025, + "step": 39950 + }, + { + "epoch": 0.6753251142864386, + "grad_norm": 0.14480367302894592, + "learning_rate": 8.375886038232056e-06, + "loss": 0.0012, + "step": 39960 + }, + { + "epoch": 0.6754941145652891, + "grad_norm": 0.07895231246948242, + "learning_rate": 8.374797975422513e-06, + "loss": 0.0019, + "step": 39970 + }, + { + "epoch": 0.6756631148441395, + "grad_norm": 0.08148206025362015, + "learning_rate": 8.373709618989196e-06, + "loss": 0.0031, + "step": 39980 + }, + { + "epoch": 0.6758321151229899, + "grad_norm": 0.040260862559080124, + "learning_rate": 8.372620969026798e-06, + "loss": 0.0027, + "step": 39990 + }, + { + "epoch": 0.6760011154018404, + "grad_norm": 0.12580999732017517, + "learning_rate": 8.371532025630033e-06, + "loss": 0.0032, + "step": 40000 + }, + { + "epoch": 0.6761701156806909, + "grad_norm": 0.0271195899695158, + "learning_rate": 8.370442788893646e-06, + "loss": 0.0027, + "step": 40010 + }, + { + "epoch": 0.6763391159595413, + "grad_norm": 0.18015502393245697, + "learning_rate": 8.369353258912408e-06, + "loss": 0.0036, + "step": 40020 + }, + { + "epoch": 0.6765081162383918, + "grad_norm": 0.06827221810817719, + "learning_rate": 8.36826343578111e-06, + "loss": 0.0054, + "step": 40030 + }, + { + "epoch": 0.6766771165172423, + "grad_norm": 0.00251463963650167, + "learning_rate": 8.367173319594575e-06, + "loss": 0.0017, + "step": 40040 + }, + { + "epoch": 0.6768461167960927, + "grad_norm": 0.025977712124586105, + "learning_rate": 8.366082910447646e-06, + "loss": 0.0023, + "step": 40050 + }, + { + "epoch": 0.6770151170749432, + "grad_norm": 0.04994544759392738, + "learning_rate": 8.364992208435195e-06, + "loss": 0.0019, + "step": 40060 + }, + { + "epoch": 0.6771841173537936, + "grad_norm": 0.45482775568962097, + "learning_rate": 8.363901213652119e-06, + "loss": 0.0025, + "step": 40070 + }, + { + "epoch": 0.677353117632644, + "grad_norm": 0.060207583010196686, + "learning_rate": 8.362809926193337e-06, + "loss": 0.0013, + "step": 40080 + }, + { + "epoch": 0.6775221179114945, + "grad_norm": 0.13387027382850647, + "learning_rate": 8.3617183461538e-06, + "loss": 0.003, + "step": 40090 + }, + { + "epoch": 0.677691118190345, + "grad_norm": 0.09276372194290161, + "learning_rate": 8.360626473628476e-06, + "loss": 0.0019, + "step": 40100 + }, + { + "epoch": 0.6778601184691955, + "grad_norm": 0.05634180083870888, + "learning_rate": 8.359534308712369e-06, + "loss": 0.0018, + "step": 40110 + }, + { + "epoch": 0.678029118748046, + "grad_norm": 0.07601413130760193, + "learning_rate": 8.358441851500499e-06, + "loss": 0.002, + "step": 40120 + }, + { + "epoch": 0.6781981190268964, + "grad_norm": 0.04013429954648018, + "learning_rate": 8.357349102087915e-06, + "loss": 0.0013, + "step": 40130 + }, + { + "epoch": 0.6783671193057469, + "grad_norm": 0.13309259712696075, + "learning_rate": 8.356256060569694e-06, + "loss": 0.0012, + "step": 40140 + }, + { + "epoch": 0.6785361195845974, + "grad_norm": 0.05572674050927162, + "learning_rate": 8.355162727040934e-06, + "loss": 0.0022, + "step": 40150 + }, + { + "epoch": 0.6787051198634477, + "grad_norm": 0.0784897431731224, + "learning_rate": 8.35406910159676e-06, + "loss": 0.004, + "step": 40160 + }, + { + "epoch": 0.6788741201422982, + "grad_norm": 0.06939193606376648, + "learning_rate": 8.352975184332324e-06, + "loss": 0.0026, + "step": 40170 + }, + { + "epoch": 0.6790431204211487, + "grad_norm": 0.11883770674467087, + "learning_rate": 8.351880975342802e-06, + "loss": 0.0019, + "step": 40180 + }, + { + "epoch": 0.6792121206999991, + "grad_norm": 0.04164041578769684, + "learning_rate": 8.350786474723393e-06, + "loss": 0.0009, + "step": 40190 + }, + { + "epoch": 0.6793811209788496, + "grad_norm": 0.06501810997724533, + "learning_rate": 8.349691682569325e-06, + "loss": 0.0019, + "step": 40200 + }, + { + "epoch": 0.6795501212577001, + "grad_norm": 0.1461230218410492, + "learning_rate": 8.348596598975853e-06, + "loss": 0.0015, + "step": 40210 + }, + { + "epoch": 0.6797191215365506, + "grad_norm": 0.05555432662367821, + "learning_rate": 8.347501224038253e-06, + "loss": 0.0014, + "step": 40220 + }, + { + "epoch": 0.679888121815401, + "grad_norm": 0.028255827724933624, + "learning_rate": 8.346405557851827e-06, + "loss": 0.0011, + "step": 40230 + }, + { + "epoch": 0.6800571220942515, + "grad_norm": 0.06110149621963501, + "learning_rate": 8.345309600511903e-06, + "loss": 0.0032, + "step": 40240 + }, + { + "epoch": 0.6802261223731019, + "grad_norm": 0.06957794725894928, + "learning_rate": 8.344213352113835e-06, + "loss": 0.0025, + "step": 40250 + }, + { + "epoch": 0.6803951226519523, + "grad_norm": 0.027158791199326515, + "learning_rate": 8.343116812753004e-06, + "loss": 0.0027, + "step": 40260 + }, + { + "epoch": 0.6805641229308028, + "grad_norm": 0.07340021431446075, + "learning_rate": 8.342019982524811e-06, + "loss": 0.0019, + "step": 40270 + }, + { + "epoch": 0.6807331232096533, + "grad_norm": 0.06970573961734772, + "learning_rate": 8.340922861524687e-06, + "loss": 0.0013, + "step": 40280 + }, + { + "epoch": 0.6809021234885038, + "grad_norm": 0.4278048574924469, + "learning_rate": 8.33982544984809e-06, + "loss": 0.0011, + "step": 40290 + }, + { + "epoch": 0.6810711237673542, + "grad_norm": 0.10096825659275055, + "learning_rate": 8.338727747590494e-06, + "loss": 0.0017, + "step": 40300 + }, + { + "epoch": 0.6812401240462047, + "grad_norm": 0.027060270309448242, + "learning_rate": 8.337629754847408e-06, + "loss": 0.0014, + "step": 40310 + }, + { + "epoch": 0.6814091243250552, + "grad_norm": 0.0714111328125, + "learning_rate": 8.336531471714361e-06, + "loss": 0.0021, + "step": 40320 + }, + { + "epoch": 0.6815781246039055, + "grad_norm": 0.07323069870471954, + "learning_rate": 8.335432898286913e-06, + "loss": 0.0027, + "step": 40330 + }, + { + "epoch": 0.681747124882756, + "grad_norm": 0.04784776642918587, + "learning_rate": 8.33433403466064e-06, + "loss": 0.0023, + "step": 40340 + }, + { + "epoch": 0.6819161251616065, + "grad_norm": 0.03423130884766579, + "learning_rate": 8.333234880931151e-06, + "loss": 0.0022, + "step": 40350 + }, + { + "epoch": 0.682085125440457, + "grad_norm": 0.03832382336258888, + "learning_rate": 8.332135437194077e-06, + "loss": 0.0013, + "step": 40360 + }, + { + "epoch": 0.6822541257193074, + "grad_norm": 0.07870305329561234, + "learning_rate": 8.331035703545076e-06, + "loss": 0.0022, + "step": 40370 + }, + { + "epoch": 0.6824231259981579, + "grad_norm": 0.06901619583368301, + "learning_rate": 8.32993568007983e-06, + "loss": 0.0023, + "step": 40380 + }, + { + "epoch": 0.6825921262770084, + "grad_norm": 0.0540190152823925, + "learning_rate": 8.328835366894045e-06, + "loss": 0.0013, + "step": 40390 + }, + { + "epoch": 0.6827611265558589, + "grad_norm": 0.17458319664001465, + "learning_rate": 8.327734764083458e-06, + "loss": 0.0017, + "step": 40400 + }, + { + "epoch": 0.6829301268347093, + "grad_norm": 0.03551612049341202, + "learning_rate": 8.326633871743818e-06, + "loss": 0.0024, + "step": 40410 + }, + { + "epoch": 0.6830991271135597, + "grad_norm": 0.07492489367723465, + "learning_rate": 8.325532689970917e-06, + "loss": 0.0026, + "step": 40420 + }, + { + "epoch": 0.6832681273924102, + "grad_norm": 0.048070840537548065, + "learning_rate": 8.324431218860558e-06, + "loss": 0.0013, + "step": 40430 + }, + { + "epoch": 0.6834371276712606, + "grad_norm": 0.00637691980227828, + "learning_rate": 8.323329458508575e-06, + "loss": 0.0019, + "step": 40440 + }, + { + "epoch": 0.6836061279501111, + "grad_norm": 0.07666812092065811, + "learning_rate": 8.322227409010828e-06, + "loss": 0.0023, + "step": 40450 + }, + { + "epoch": 0.6837751282289616, + "grad_norm": 0.055106133222579956, + "learning_rate": 8.3211250704632e-06, + "loss": 0.0019, + "step": 40460 + }, + { + "epoch": 0.683944128507812, + "grad_norm": 0.09392855316400528, + "learning_rate": 8.320022442961599e-06, + "loss": 0.0019, + "step": 40470 + }, + { + "epoch": 0.6841131287866625, + "grad_norm": 0.1125345304608345, + "learning_rate": 8.31891952660196e-06, + "loss": 0.002, + "step": 40480 + }, + { + "epoch": 0.684282129065513, + "grad_norm": 0.18436716496944427, + "learning_rate": 8.317816321480243e-06, + "loss": 0.0017, + "step": 40490 + }, + { + "epoch": 0.6844511293443635, + "grad_norm": 0.03023306466639042, + "learning_rate": 8.31671282769243e-06, + "loss": 0.0017, + "step": 40500 + }, + { + "epoch": 0.6846201296232138, + "grad_norm": 0.10193860530853271, + "learning_rate": 8.31560904533453e-06, + "loss": 0.0018, + "step": 40510 + }, + { + "epoch": 0.6847891299020643, + "grad_norm": 0.10619567334651947, + "learning_rate": 8.31450497450258e-06, + "loss": 0.0021, + "step": 40520 + }, + { + "epoch": 0.6849581301809148, + "grad_norm": 0.06131768599152565, + "learning_rate": 8.313400615292636e-06, + "loss": 0.0012, + "step": 40530 + }, + { + "epoch": 0.6851271304597653, + "grad_norm": 0.047531671822071075, + "learning_rate": 8.312295967800787e-06, + "loss": 0.002, + "step": 40540 + }, + { + "epoch": 0.6852961307386157, + "grad_norm": 0.0296910610049963, + "learning_rate": 8.31119103212314e-06, + "loss": 0.0015, + "step": 40550 + }, + { + "epoch": 0.6854651310174662, + "grad_norm": 0.005351419560611248, + "learning_rate": 8.31008580835583e-06, + "loss": 0.002, + "step": 40560 + }, + { + "epoch": 0.6856341312963167, + "grad_norm": 0.032045409083366394, + "learning_rate": 8.308980296595015e-06, + "loss": 0.0018, + "step": 40570 + }, + { + "epoch": 0.6858031315751671, + "grad_norm": 0.11112460494041443, + "learning_rate": 8.307874496936882e-06, + "loss": 0.0028, + "step": 40580 + }, + { + "epoch": 0.6859721318540175, + "grad_norm": 0.12762129306793213, + "learning_rate": 8.306768409477643e-06, + "loss": 0.0016, + "step": 40590 + }, + { + "epoch": 0.686141132132868, + "grad_norm": 0.07278034836053848, + "learning_rate": 8.30566203431353e-06, + "loss": 0.0016, + "step": 40600 + }, + { + "epoch": 0.6863101324117185, + "grad_norm": 0.022609353065490723, + "learning_rate": 8.304555371540803e-06, + "loss": 0.0015, + "step": 40610 + }, + { + "epoch": 0.6864791326905689, + "grad_norm": 0.1110212653875351, + "learning_rate": 8.303448421255748e-06, + "loss": 0.0021, + "step": 40620 + }, + { + "epoch": 0.6866481329694194, + "grad_norm": 0.03907875344157219, + "learning_rate": 8.302341183554676e-06, + "loss": 0.0018, + "step": 40630 + }, + { + "epoch": 0.6868171332482699, + "grad_norm": 0.09183419495820999, + "learning_rate": 8.30123365853392e-06, + "loss": 0.0023, + "step": 40640 + }, + { + "epoch": 0.6869861335271203, + "grad_norm": 0.0381489135324955, + "learning_rate": 8.30012584628984e-06, + "loss": 0.0015, + "step": 40650 + }, + { + "epoch": 0.6871551338059708, + "grad_norm": 0.02272479236125946, + "learning_rate": 8.299017746918823e-06, + "loss": 0.0025, + "step": 40660 + }, + { + "epoch": 0.6873241340848213, + "grad_norm": 0.13928182423114777, + "learning_rate": 8.297909360517279e-06, + "loss": 0.0022, + "step": 40670 + }, + { + "epoch": 0.6874931343636717, + "grad_norm": 0.051701080054044724, + "learning_rate": 8.296800687181638e-06, + "loss": 0.0019, + "step": 40680 + }, + { + "epoch": 0.6876621346425221, + "grad_norm": 0.06480807065963745, + "learning_rate": 8.295691727008366e-06, + "loss": 0.0027, + "step": 40690 + }, + { + "epoch": 0.6878311349213726, + "grad_norm": 0.030916791409254074, + "learning_rate": 8.294582480093947e-06, + "loss": 0.0026, + "step": 40700 + }, + { + "epoch": 0.6880001352002231, + "grad_norm": 0.024703755974769592, + "learning_rate": 8.293472946534888e-06, + "loss": 0.0022, + "step": 40710 + }, + { + "epoch": 0.6881691354790735, + "grad_norm": 0.028602372854948044, + "learning_rate": 8.292363126427725e-06, + "loss": 0.0015, + "step": 40720 + }, + { + "epoch": 0.688338135757924, + "grad_norm": 0.23439855873584747, + "learning_rate": 8.29125301986902e-06, + "loss": 0.0015, + "step": 40730 + }, + { + "epoch": 0.6885071360367745, + "grad_norm": 0.10362856090068817, + "learning_rate": 8.290142626955355e-06, + "loss": 0.0016, + "step": 40740 + }, + { + "epoch": 0.688676136315625, + "grad_norm": 0.07249779999256134, + "learning_rate": 8.28903194778334e-06, + "loss": 0.0017, + "step": 40750 + }, + { + "epoch": 0.6888451365944753, + "grad_norm": 0.10326391458511353, + "learning_rate": 8.287920982449611e-06, + "loss": 0.0016, + "step": 40760 + }, + { + "epoch": 0.6890141368733258, + "grad_norm": 0.1046016737818718, + "learning_rate": 8.286809731050824e-06, + "loss": 0.0031, + "step": 40770 + }, + { + "epoch": 0.6891831371521763, + "grad_norm": 0.06414433568716049, + "learning_rate": 8.285698193683667e-06, + "loss": 0.0019, + "step": 40780 + }, + { + "epoch": 0.6893521374310267, + "grad_norm": 0.0254384633153677, + "learning_rate": 8.284586370444847e-06, + "loss": 0.0015, + "step": 40790 + }, + { + "epoch": 0.6895211377098772, + "grad_norm": 0.11462149024009705, + "learning_rate": 8.2834742614311e-06, + "loss": 0.0018, + "step": 40800 + }, + { + "epoch": 0.6896901379887277, + "grad_norm": 0.07652153074741364, + "learning_rate": 8.282361866739181e-06, + "loss": 0.0017, + "step": 40810 + }, + { + "epoch": 0.6898591382675782, + "grad_norm": 0.290982186794281, + "learning_rate": 8.281249186465879e-06, + "loss": 0.0021, + "step": 40820 + }, + { + "epoch": 0.6900281385464286, + "grad_norm": 0.101005919277668, + "learning_rate": 8.280136220707998e-06, + "loss": 0.0044, + "step": 40830 + }, + { + "epoch": 0.6901971388252791, + "grad_norm": 0.01687195710837841, + "learning_rate": 8.279022969562374e-06, + "loss": 0.0023, + "step": 40840 + }, + { + "epoch": 0.6903661391041295, + "grad_norm": 0.038489919155836105, + "learning_rate": 8.277909433125863e-06, + "loss": 0.0027, + "step": 40850 + }, + { + "epoch": 0.69053513938298, + "grad_norm": 0.05620067939162254, + "learning_rate": 8.276795611495351e-06, + "loss": 0.0021, + "step": 40860 + }, + { + "epoch": 0.6907041396618304, + "grad_norm": 0.05303415283560753, + "learning_rate": 8.275681504767742e-06, + "loss": 0.0016, + "step": 40870 + }, + { + "epoch": 0.6908731399406809, + "grad_norm": 0.03398628905415535, + "learning_rate": 8.274567113039974e-06, + "loss": 0.0012, + "step": 40880 + }, + { + "epoch": 0.6910421402195314, + "grad_norm": 0.04521585628390312, + "learning_rate": 8.273452436409e-06, + "loss": 0.0027, + "step": 40890 + }, + { + "epoch": 0.6912111404983818, + "grad_norm": 0.1029910147190094, + "learning_rate": 8.272337474971804e-06, + "loss": 0.0057, + "step": 40900 + }, + { + "epoch": 0.6913801407772323, + "grad_norm": 0.09532245248556137, + "learning_rate": 8.271222228825393e-06, + "loss": 0.0013, + "step": 40910 + }, + { + "epoch": 0.6915491410560828, + "grad_norm": 0.12107133120298386, + "learning_rate": 8.270106698066798e-06, + "loss": 0.003, + "step": 40920 + }, + { + "epoch": 0.6917181413349333, + "grad_norm": 0.054512280970811844, + "learning_rate": 8.268990882793078e-06, + "loss": 0.0031, + "step": 40930 + }, + { + "epoch": 0.6918871416137836, + "grad_norm": 0.0330926850438118, + "learning_rate": 8.26787478310131e-06, + "loss": 0.01, + "step": 40940 + }, + { + "epoch": 0.6920561418926341, + "grad_norm": 0.044844236224889755, + "learning_rate": 8.266758399088603e-06, + "loss": 0.0019, + "step": 40950 + }, + { + "epoch": 0.6922251421714846, + "grad_norm": 0.05058744177222252, + "learning_rate": 8.26564173085209e-06, + "loss": 0.0018, + "step": 40960 + }, + { + "epoch": 0.692394142450335, + "grad_norm": 0.04564560204744339, + "learning_rate": 8.264524778488923e-06, + "loss": 0.0014, + "step": 40970 + }, + { + "epoch": 0.6925631427291855, + "grad_norm": 0.008947601541876793, + "learning_rate": 8.263407542096282e-06, + "loss": 0.0015, + "step": 40980 + }, + { + "epoch": 0.692732143008036, + "grad_norm": 0.13389982283115387, + "learning_rate": 8.262290021771374e-06, + "loss": 0.003, + "step": 40990 + }, + { + "epoch": 0.6929011432868865, + "grad_norm": 0.08067138493061066, + "learning_rate": 8.261172217611429e-06, + "loss": 0.0025, + "step": 41000 + }, + { + "epoch": 0.6930701435657369, + "grad_norm": 0.026305221021175385, + "learning_rate": 8.260054129713699e-06, + "loss": 0.0016, + "step": 41010 + }, + { + "epoch": 0.6932391438445873, + "grad_norm": 0.1137649267911911, + "learning_rate": 8.258935758175463e-06, + "loss": 0.0025, + "step": 41020 + }, + { + "epoch": 0.6934081441234378, + "grad_norm": 0.07438289374113083, + "learning_rate": 8.257817103094027e-06, + "loss": 0.0017, + "step": 41030 + }, + { + "epoch": 0.6935771444022882, + "grad_norm": 0.029260028153657913, + "learning_rate": 8.25669816456672e-06, + "loss": 0.0021, + "step": 41040 + }, + { + "epoch": 0.6937461446811387, + "grad_norm": 0.1459071934223175, + "learning_rate": 8.255578942690895e-06, + "loss": 0.0057, + "step": 41050 + }, + { + "epoch": 0.6939151449599892, + "grad_norm": 0.07163554430007935, + "learning_rate": 8.254459437563927e-06, + "loss": 0.0019, + "step": 41060 + }, + { + "epoch": 0.6940841452388397, + "grad_norm": 0.11349696666002274, + "learning_rate": 8.253339649283217e-06, + "loss": 0.0019, + "step": 41070 + }, + { + "epoch": 0.6942531455176901, + "grad_norm": 0.15381790697574615, + "learning_rate": 8.252219577946196e-06, + "loss": 0.0027, + "step": 41080 + }, + { + "epoch": 0.6944221457965406, + "grad_norm": 0.11119570583105087, + "learning_rate": 8.251099223650317e-06, + "loss": 0.0021, + "step": 41090 + }, + { + "epoch": 0.6945911460753911, + "grad_norm": 0.08238446712493896, + "learning_rate": 8.249978586493052e-06, + "loss": 0.002, + "step": 41100 + }, + { + "epoch": 0.6947601463542414, + "grad_norm": 0.02086692675948143, + "learning_rate": 8.248857666571903e-06, + "loss": 0.0019, + "step": 41110 + }, + { + "epoch": 0.6949291466330919, + "grad_norm": 0.20051395893096924, + "learning_rate": 8.247736463984395e-06, + "loss": 0.0019, + "step": 41120 + }, + { + "epoch": 0.6950981469119424, + "grad_norm": 0.12535539269447327, + "learning_rate": 8.24661497882808e-06, + "loss": 0.0022, + "step": 41130 + }, + { + "epoch": 0.6952671471907929, + "grad_norm": 0.14362570643424988, + "learning_rate": 8.245493211200532e-06, + "loss": 0.0026, + "step": 41140 + }, + { + "epoch": 0.6954361474696433, + "grad_norm": 0.21121813356876373, + "learning_rate": 8.244371161199351e-06, + "loss": 0.0062, + "step": 41150 + }, + { + "epoch": 0.6956051477484938, + "grad_norm": 0.21628457307815552, + "learning_rate": 8.243248828922157e-06, + "loss": 0.002, + "step": 41160 + }, + { + "epoch": 0.6957741480273443, + "grad_norm": 0.07503283023834229, + "learning_rate": 8.242126214466602e-06, + "loss": 0.0019, + "step": 41170 + }, + { + "epoch": 0.6959431483061947, + "grad_norm": 0.11699981987476349, + "learning_rate": 8.241003317930359e-06, + "loss": 0.001, + "step": 41180 + }, + { + "epoch": 0.6961121485850452, + "grad_norm": 0.2051926851272583, + "learning_rate": 8.239880139411122e-06, + "loss": 0.0019, + "step": 41190 + }, + { + "epoch": 0.6962811488638956, + "grad_norm": 0.1709057241678238, + "learning_rate": 8.238756679006618e-06, + "loss": 0.004, + "step": 41200 + }, + { + "epoch": 0.6964501491427461, + "grad_norm": 0.10599850118160248, + "learning_rate": 8.237632936814589e-06, + "loss": 0.0032, + "step": 41210 + }, + { + "epoch": 0.6966191494215965, + "grad_norm": 0.12379378080368042, + "learning_rate": 8.236508912932809e-06, + "loss": 0.0011, + "step": 41220 + }, + { + "epoch": 0.696788149700447, + "grad_norm": 0.03391696885228157, + "learning_rate": 8.23538460745907e-06, + "loss": 0.0016, + "step": 41230 + }, + { + "epoch": 0.6969571499792975, + "grad_norm": 0.026621650904417038, + "learning_rate": 8.234260020491196e-06, + "loss": 0.0024, + "step": 41240 + }, + { + "epoch": 0.697126150258148, + "grad_norm": 0.10871417820453644, + "learning_rate": 8.233135152127029e-06, + "loss": 0.0019, + "step": 41250 + }, + { + "epoch": 0.6972951505369984, + "grad_norm": 0.03548885136842728, + "learning_rate": 8.232010002464441e-06, + "loss": 0.0015, + "step": 41260 + }, + { + "epoch": 0.6974641508158489, + "grad_norm": 0.06144818663597107, + "learning_rate": 8.230884571601322e-06, + "loss": 0.001, + "step": 41270 + }, + { + "epoch": 0.6976331510946993, + "grad_norm": 0.06657146662473679, + "learning_rate": 8.229758859635592e-06, + "loss": 0.0025, + "step": 41280 + }, + { + "epoch": 0.6978021513735497, + "grad_norm": 0.03593665733933449, + "learning_rate": 8.228632866665191e-06, + "loss": 0.002, + "step": 41290 + }, + { + "epoch": 0.6979711516524002, + "grad_norm": 0.05789535865187645, + "learning_rate": 8.227506592788089e-06, + "loss": 0.0023, + "step": 41300 + }, + { + "epoch": 0.6981401519312507, + "grad_norm": 0.09060919284820557, + "learning_rate": 8.226380038102274e-06, + "loss": 0.0017, + "step": 41310 + }, + { + "epoch": 0.6983091522101011, + "grad_norm": 0.04523943364620209, + "learning_rate": 8.225253202705766e-06, + "loss": 0.002, + "step": 41320 + }, + { + "epoch": 0.6984781524889516, + "grad_norm": 0.01592904143035412, + "learning_rate": 8.224126086696603e-06, + "loss": 0.0018, + "step": 41330 + }, + { + "epoch": 0.6986471527678021, + "grad_norm": 0.055878762155771255, + "learning_rate": 8.222998690172847e-06, + "loss": 0.0017, + "step": 41340 + }, + { + "epoch": 0.6988161530466526, + "grad_norm": 0.028551537543535233, + "learning_rate": 8.22187101323259e-06, + "loss": 0.0028, + "step": 41350 + }, + { + "epoch": 0.698985153325503, + "grad_norm": 0.013488137163221836, + "learning_rate": 8.220743055973943e-06, + "loss": 0.0017, + "step": 41360 + }, + { + "epoch": 0.6991541536043534, + "grad_norm": 0.011277561075985432, + "learning_rate": 8.219614818495047e-06, + "loss": 0.0013, + "step": 41370 + }, + { + "epoch": 0.6993231538832039, + "grad_norm": 0.039525020867586136, + "learning_rate": 8.218486300894061e-06, + "loss": 0.0025, + "step": 41380 + }, + { + "epoch": 0.6994921541620543, + "grad_norm": 0.027259863913059235, + "learning_rate": 8.217357503269175e-06, + "loss": 0.0017, + "step": 41390 + }, + { + "epoch": 0.6996611544409048, + "grad_norm": 0.050522465258836746, + "learning_rate": 8.216228425718596e-06, + "loss": 0.0021, + "step": 41400 + }, + { + "epoch": 0.6998301547197553, + "grad_norm": 0.04927310720086098, + "learning_rate": 8.215099068340562e-06, + "loss": 0.0011, + "step": 41410 + }, + { + "epoch": 0.6999991549986058, + "grad_norm": 0.03349223732948303, + "learning_rate": 8.213969431233332e-06, + "loss": 0.0011, + "step": 41420 + }, + { + "epoch": 0.7001681552774562, + "grad_norm": 0.0998213067650795, + "learning_rate": 8.212839514495189e-06, + "loss": 0.0018, + "step": 41430 + }, + { + "epoch": 0.7003371555563067, + "grad_norm": 0.12260545790195465, + "learning_rate": 8.211709318224442e-06, + "loss": 0.0018, + "step": 41440 + }, + { + "epoch": 0.7005061558351571, + "grad_norm": 0.16003373265266418, + "learning_rate": 8.21057884251942e-06, + "loss": 0.0017, + "step": 41450 + }, + { + "epoch": 0.7006751561140075, + "grad_norm": 0.07196174561977386, + "learning_rate": 8.209448087478486e-06, + "loss": 0.0036, + "step": 41460 + }, + { + "epoch": 0.700844156392858, + "grad_norm": 0.05935538932681084, + "learning_rate": 8.208317053200017e-06, + "loss": 0.0019, + "step": 41470 + }, + { + "epoch": 0.7010131566717085, + "grad_norm": 0.1089865043759346, + "learning_rate": 8.207185739782422e-06, + "loss": 0.0014, + "step": 41480 + }, + { + "epoch": 0.701182156950559, + "grad_norm": 0.10229632258415222, + "learning_rate": 8.206054147324127e-06, + "loss": 0.0013, + "step": 41490 + }, + { + "epoch": 0.7013511572294094, + "grad_norm": 0.053856879472732544, + "learning_rate": 8.204922275923587e-06, + "loss": 0.0019, + "step": 41500 + }, + { + "epoch": 0.7015201575082599, + "grad_norm": 0.07578303664922714, + "learning_rate": 8.203790125679281e-06, + "loss": 0.0022, + "step": 41510 + }, + { + "epoch": 0.7016891577871104, + "grad_norm": 0.38969898223876953, + "learning_rate": 8.202657696689713e-06, + "loss": 0.0024, + "step": 41520 + }, + { + "epoch": 0.7018581580659609, + "grad_norm": 0.04809711128473282, + "learning_rate": 8.201524989053406e-06, + "loss": 0.0017, + "step": 41530 + }, + { + "epoch": 0.7020271583448112, + "grad_norm": 0.0871298536658287, + "learning_rate": 8.200392002868914e-06, + "loss": 0.0028, + "step": 41540 + }, + { + "epoch": 0.7021961586236617, + "grad_norm": 0.07256130129098892, + "learning_rate": 8.199258738234812e-06, + "loss": 0.0012, + "step": 41550 + }, + { + "epoch": 0.7023651589025122, + "grad_norm": 0.04088551551103592, + "learning_rate": 8.198125195249697e-06, + "loss": 0.0022, + "step": 41560 + }, + { + "epoch": 0.7025341591813626, + "grad_norm": 0.06312688440084457, + "learning_rate": 8.196991374012197e-06, + "loss": 0.0018, + "step": 41570 + }, + { + "epoch": 0.7027031594602131, + "grad_norm": 0.08025973290205002, + "learning_rate": 8.195857274620958e-06, + "loss": 0.0018, + "step": 41580 + }, + { + "epoch": 0.7028721597390636, + "grad_norm": 0.0011194974649697542, + "learning_rate": 8.19472289717465e-06, + "loss": 0.0035, + "step": 41590 + }, + { + "epoch": 0.7030411600179141, + "grad_norm": 0.13875657320022583, + "learning_rate": 8.193588241771973e-06, + "loss": 0.0048, + "step": 41600 + }, + { + "epoch": 0.7032101602967645, + "grad_norm": 0.244681254029274, + "learning_rate": 8.192453308511644e-06, + "loss": 0.0031, + "step": 41610 + }, + { + "epoch": 0.703379160575615, + "grad_norm": 0.09452541172504425, + "learning_rate": 8.19131809749241e-06, + "loss": 0.0033, + "step": 41620 + }, + { + "epoch": 0.7035481608544654, + "grad_norm": 0.034787777811288834, + "learning_rate": 8.19018260881304e-06, + "loss": 0.0029, + "step": 41630 + }, + { + "epoch": 0.7037171611333158, + "grad_norm": 0.04553893581032753, + "learning_rate": 8.189046842572324e-06, + "loss": 0.0027, + "step": 41640 + }, + { + "epoch": 0.7038861614121663, + "grad_norm": 0.05115741491317749, + "learning_rate": 8.187910798869082e-06, + "loss": 0.0021, + "step": 41650 + }, + { + "epoch": 0.7040551616910168, + "grad_norm": 0.026538031175732613, + "learning_rate": 8.186774477802158e-06, + "loss": 0.0018, + "step": 41660 + }, + { + "epoch": 0.7042241619698673, + "grad_norm": 0.10690360516309738, + "learning_rate": 8.185637879470412e-06, + "loss": 0.0016, + "step": 41670 + }, + { + "epoch": 0.7043931622487177, + "grad_norm": 0.04989476874470711, + "learning_rate": 8.184501003972734e-06, + "loss": 0.0024, + "step": 41680 + }, + { + "epoch": 0.7045621625275682, + "grad_norm": 0.045074157416820526, + "learning_rate": 8.183363851408041e-06, + "loss": 0.0014, + "step": 41690 + }, + { + "epoch": 0.7047311628064187, + "grad_norm": 0.06971005350351334, + "learning_rate": 8.18222642187527e-06, + "loss": 0.002, + "step": 41700 + }, + { + "epoch": 0.704900163085269, + "grad_norm": 0.08040384203195572, + "learning_rate": 8.181088715473381e-06, + "loss": 0.0022, + "step": 41710 + }, + { + "epoch": 0.7050691633641195, + "grad_norm": 0.08214668184518814, + "learning_rate": 8.17995073230136e-06, + "loss": 0.0016, + "step": 41720 + }, + { + "epoch": 0.70523816364297, + "grad_norm": 0.008188508450984955, + "learning_rate": 8.17881247245822e-06, + "loss": 0.0008, + "step": 41730 + }, + { + "epoch": 0.7054071639218205, + "grad_norm": 0.036291953176259995, + "learning_rate": 8.177673936042992e-06, + "loss": 0.0023, + "step": 41740 + }, + { + "epoch": 0.7055761642006709, + "grad_norm": 0.08641888946294785, + "learning_rate": 8.176535123154736e-06, + "loss": 0.002, + "step": 41750 + }, + { + "epoch": 0.7057451644795214, + "grad_norm": 0.025727005675435066, + "learning_rate": 8.175396033892533e-06, + "loss": 0.0023, + "step": 41760 + }, + { + "epoch": 0.7059141647583719, + "grad_norm": 0.07313386350870132, + "learning_rate": 8.174256668355491e-06, + "loss": 0.0024, + "step": 41770 + }, + { + "epoch": 0.7060831650372223, + "grad_norm": 0.326588898897171, + "learning_rate": 8.173117026642738e-06, + "loss": 0.0016, + "step": 41780 + }, + { + "epoch": 0.7062521653160728, + "grad_norm": 0.10961807519197464, + "learning_rate": 8.17197710885343e-06, + "loss": 0.0021, + "step": 41790 + }, + { + "epoch": 0.7064211655949232, + "grad_norm": 0.033026713877916336, + "learning_rate": 8.170836915086744e-06, + "loss": 0.0022, + "step": 41800 + }, + { + "epoch": 0.7065901658737737, + "grad_norm": 0.07057605683803558, + "learning_rate": 8.169696445441886e-06, + "loss": 0.0018, + "step": 41810 + }, + { + "epoch": 0.7067591661526241, + "grad_norm": 0.17566239833831787, + "learning_rate": 8.168555700018077e-06, + "loss": 0.0017, + "step": 41820 + }, + { + "epoch": 0.7069281664314746, + "grad_norm": 0.1394510418176651, + "learning_rate": 8.167414678914572e-06, + "loss": 0.0024, + "step": 41830 + }, + { + "epoch": 0.7070971667103251, + "grad_norm": 0.03784767538309097, + "learning_rate": 8.166273382230642e-06, + "loss": 0.003, + "step": 41840 + }, + { + "epoch": 0.7072661669891755, + "grad_norm": 0.10037712007761002, + "learning_rate": 8.165131810065587e-06, + "loss": 0.0032, + "step": 41850 + }, + { + "epoch": 0.707435167268026, + "grad_norm": 0.04333595931529999, + "learning_rate": 8.16398996251873e-06, + "loss": 0.004, + "step": 41860 + }, + { + "epoch": 0.7076041675468765, + "grad_norm": 0.20991283655166626, + "learning_rate": 8.162847839689417e-06, + "loss": 0.0024, + "step": 41870 + }, + { + "epoch": 0.707773167825727, + "grad_norm": 0.1015588790178299, + "learning_rate": 8.161705441677015e-06, + "loss": 0.0032, + "step": 41880 + }, + { + "epoch": 0.7079421681045773, + "grad_norm": 0.007563309278339148, + "learning_rate": 8.160562768580922e-06, + "loss": 0.0025, + "step": 41890 + }, + { + "epoch": 0.7081111683834278, + "grad_norm": 0.05943568795919418, + "learning_rate": 8.159419820500555e-06, + "loss": 0.0021, + "step": 41900 + }, + { + "epoch": 0.7082801686622783, + "grad_norm": 0.03560509532690048, + "learning_rate": 8.158276597535358e-06, + "loss": 0.0029, + "step": 41910 + }, + { + "epoch": 0.7084491689411287, + "grad_norm": 0.09235816448926926, + "learning_rate": 8.157133099784791e-06, + "loss": 0.0019, + "step": 41920 + }, + { + "epoch": 0.7086181692199792, + "grad_norm": 0.3876766264438629, + "learning_rate": 8.15598932734835e-06, + "loss": 0.0021, + "step": 41930 + }, + { + "epoch": 0.7087871694988297, + "grad_norm": 0.077243372797966, + "learning_rate": 8.154845280325545e-06, + "loss": 0.0018, + "step": 41940 + }, + { + "epoch": 0.7089561697776802, + "grad_norm": 0.10611037909984589, + "learning_rate": 8.153700958815917e-06, + "loss": 0.0013, + "step": 41950 + }, + { + "epoch": 0.7091251700565306, + "grad_norm": 0.1063697338104248, + "learning_rate": 8.152556362919024e-06, + "loss": 0.0012, + "step": 41960 + }, + { + "epoch": 0.709294170335381, + "grad_norm": 0.08025922626256943, + "learning_rate": 8.151411492734454e-06, + "loss": 0.0011, + "step": 41970 + }, + { + "epoch": 0.7094631706142315, + "grad_norm": 0.027767019346356392, + "learning_rate": 8.150266348361814e-06, + "loss": 0.0018, + "step": 41980 + }, + { + "epoch": 0.709632170893082, + "grad_norm": 0.08533176779747009, + "learning_rate": 8.149120929900738e-06, + "loss": 0.0026, + "step": 41990 + }, + { + "epoch": 0.7098011711719324, + "grad_norm": 0.060593731701374054, + "learning_rate": 8.147975237450885e-06, + "loss": 0.0012, + "step": 42000 + }, + { + "epoch": 0.7099701714507829, + "grad_norm": 0.08707921206951141, + "learning_rate": 8.146829271111933e-06, + "loss": 0.0018, + "step": 42010 + }, + { + "epoch": 0.7101391717296334, + "grad_norm": 0.05134487897157669, + "learning_rate": 8.145683030983588e-06, + "loss": 0.0016, + "step": 42020 + }, + { + "epoch": 0.7103081720084838, + "grad_norm": 0.06112463027238846, + "learning_rate": 8.144536517165578e-06, + "loss": 0.0026, + "step": 42030 + }, + { + "epoch": 0.7104771722873343, + "grad_norm": 0.1436644345521927, + "learning_rate": 8.143389729757655e-06, + "loss": 0.0019, + "step": 42040 + }, + { + "epoch": 0.7106461725661848, + "grad_norm": 0.07123779505491257, + "learning_rate": 8.142242668859597e-06, + "loss": 0.0019, + "step": 42050 + }, + { + "epoch": 0.7108151728450351, + "grad_norm": 0.03608975559473038, + "learning_rate": 8.141095334571201e-06, + "loss": 0.0014, + "step": 42060 + }, + { + "epoch": 0.7109841731238856, + "grad_norm": 0.047314874827861786, + "learning_rate": 8.139947726992292e-06, + "loss": 0.0014, + "step": 42070 + }, + { + "epoch": 0.7111531734027361, + "grad_norm": 0.027236802503466606, + "learning_rate": 8.138799846222716e-06, + "loss": 0.0011, + "step": 42080 + }, + { + "epoch": 0.7113221736815866, + "grad_norm": 0.04754794016480446, + "learning_rate": 8.137651692362347e-06, + "loss": 0.0038, + "step": 42090 + }, + { + "epoch": 0.711491173960437, + "grad_norm": 0.1344657838344574, + "learning_rate": 8.13650326551108e-06, + "loss": 0.0019, + "step": 42100 + }, + { + "epoch": 0.7116601742392875, + "grad_norm": 0.0192538034170866, + "learning_rate": 8.13535456576883e-06, + "loss": 0.0024, + "step": 42110 + }, + { + "epoch": 0.711829174518138, + "grad_norm": 0.1750638335943222, + "learning_rate": 8.134205593235543e-06, + "loss": 0.0041, + "step": 42120 + }, + { + "epoch": 0.7119981747969885, + "grad_norm": 0.030514687299728394, + "learning_rate": 8.13305634801118e-06, + "loss": 0.0022, + "step": 42130 + }, + { + "epoch": 0.7121671750758388, + "grad_norm": 0.14213408529758453, + "learning_rate": 8.131906830195739e-06, + "loss": 0.0041, + "step": 42140 + }, + { + "epoch": 0.7123361753546893, + "grad_norm": 0.0784611701965332, + "learning_rate": 8.130757039889229e-06, + "loss": 0.0024, + "step": 42150 + }, + { + "epoch": 0.7125051756335398, + "grad_norm": 0.02702018804848194, + "learning_rate": 8.129606977191686e-06, + "loss": 0.003, + "step": 42160 + }, + { + "epoch": 0.7126741759123902, + "grad_norm": 0.0408581905066967, + "learning_rate": 8.128456642203174e-06, + "loss": 0.0034, + "step": 42170 + }, + { + "epoch": 0.7128431761912407, + "grad_norm": 0.05134117975831032, + "learning_rate": 8.127306035023776e-06, + "loss": 0.0015, + "step": 42180 + }, + { + "epoch": 0.7130121764700912, + "grad_norm": 0.17410869896411896, + "learning_rate": 8.126155155753601e-06, + "loss": 0.0038, + "step": 42190 + }, + { + "epoch": 0.7131811767489417, + "grad_norm": 0.01948716677725315, + "learning_rate": 8.12500400449278e-06, + "loss": 0.003, + "step": 42200 + }, + { + "epoch": 0.7133501770277921, + "grad_norm": 0.22991220653057098, + "learning_rate": 8.12385258134147e-06, + "loss": 0.0037, + "step": 42210 + }, + { + "epoch": 0.7135191773066426, + "grad_norm": 0.1062299907207489, + "learning_rate": 8.12270088639985e-06, + "loss": 0.0014, + "step": 42220 + }, + { + "epoch": 0.713688177585493, + "grad_norm": 0.018809964880347252, + "learning_rate": 8.121548919768124e-06, + "loss": 0.0023, + "step": 42230 + }, + { + "epoch": 0.7138571778643434, + "grad_norm": 0.06033628433942795, + "learning_rate": 8.120396681546516e-06, + "loss": 0.001, + "step": 42240 + }, + { + "epoch": 0.7140261781431939, + "grad_norm": 0.03717151656746864, + "learning_rate": 8.119244171835279e-06, + "loss": 0.0021, + "step": 42250 + }, + { + "epoch": 0.7141951784220444, + "grad_norm": 0.07586640119552612, + "learning_rate": 8.118091390734686e-06, + "loss": 0.0018, + "step": 42260 + }, + { + "epoch": 0.7143641787008949, + "grad_norm": 0.039691995829343796, + "learning_rate": 8.116938338345035e-06, + "loss": 0.0017, + "step": 42270 + }, + { + "epoch": 0.7145331789797453, + "grad_norm": 0.01930839754641056, + "learning_rate": 8.115785014766646e-06, + "loss": 0.0022, + "step": 42280 + }, + { + "epoch": 0.7147021792585958, + "grad_norm": 0.13970836997032166, + "learning_rate": 8.114631420099865e-06, + "loss": 0.0019, + "step": 42290 + }, + { + "epoch": 0.7148711795374463, + "grad_norm": 0.021033965051174164, + "learning_rate": 8.113477554445058e-06, + "loss": 0.0012, + "step": 42300 + }, + { + "epoch": 0.7150401798162968, + "grad_norm": 0.14079996943473816, + "learning_rate": 8.11232341790262e-06, + "loss": 0.0021, + "step": 42310 + }, + { + "epoch": 0.7152091800951471, + "grad_norm": 0.06294325739145279, + "learning_rate": 8.111169010572967e-06, + "loss": 0.0032, + "step": 42320 + }, + { + "epoch": 0.7153781803739976, + "grad_norm": 0.08401281386613846, + "learning_rate": 8.110014332556533e-06, + "loss": 0.0015, + "step": 42330 + }, + { + "epoch": 0.7155471806528481, + "grad_norm": 0.04065125808119774, + "learning_rate": 8.108859383953785e-06, + "loss": 0.0013, + "step": 42340 + }, + { + "epoch": 0.7157161809316985, + "grad_norm": 0.08111557364463806, + "learning_rate": 8.107704164865207e-06, + "loss": 0.0027, + "step": 42350 + }, + { + "epoch": 0.715885181210549, + "grad_norm": 0.2308748960494995, + "learning_rate": 8.10654867539131e-06, + "loss": 0.0041, + "step": 42360 + }, + { + "epoch": 0.7160541814893995, + "grad_norm": 0.04631703719496727, + "learning_rate": 8.105392915632626e-06, + "loss": 0.0017, + "step": 42370 + }, + { + "epoch": 0.71622318176825, + "grad_norm": 0.036768991500139236, + "learning_rate": 8.104236885689713e-06, + "loss": 0.0023, + "step": 42380 + }, + { + "epoch": 0.7163921820471004, + "grad_norm": 0.04694832116365433, + "learning_rate": 8.103080585663151e-06, + "loss": 0.0015, + "step": 42390 + }, + { + "epoch": 0.7165611823259508, + "grad_norm": 0.0976409986615181, + "learning_rate": 8.101924015653543e-06, + "loss": 0.0017, + "step": 42400 + }, + { + "epoch": 0.7167301826048013, + "grad_norm": 0.29701071977615356, + "learning_rate": 8.100767175761517e-06, + "loss": 0.0045, + "step": 42410 + }, + { + "epoch": 0.7168991828836517, + "grad_norm": 0.08202585577964783, + "learning_rate": 8.099610066087721e-06, + "loss": 0.0017, + "step": 42420 + }, + { + "epoch": 0.7170681831625022, + "grad_norm": 0.06088138371706009, + "learning_rate": 8.098452686732834e-06, + "loss": 0.0023, + "step": 42430 + }, + { + "epoch": 0.7172371834413527, + "grad_norm": 0.08535436540842056, + "learning_rate": 8.09729503779755e-06, + "loss": 0.0027, + "step": 42440 + }, + { + "epoch": 0.7174061837202031, + "grad_norm": 0.1157093346118927, + "learning_rate": 8.09613711938259e-06, + "loss": 0.0016, + "step": 42450 + }, + { + "epoch": 0.7175751839990536, + "grad_norm": 0.07748487591743469, + "learning_rate": 8.0949789315887e-06, + "loss": 0.0023, + "step": 42460 + }, + { + "epoch": 0.7177441842779041, + "grad_norm": 0.06741594523191452, + "learning_rate": 8.093820474516648e-06, + "loss": 0.0015, + "step": 42470 + }, + { + "epoch": 0.7179131845567546, + "grad_norm": 0.3889932632446289, + "learning_rate": 8.092661748267223e-06, + "loss": 0.006, + "step": 42480 + }, + { + "epoch": 0.7180821848356049, + "grad_norm": 0.17957502603530884, + "learning_rate": 8.091502752941245e-06, + "loss": 0.0031, + "step": 42490 + }, + { + "epoch": 0.7182511851144554, + "grad_norm": 0.043044958263635635, + "learning_rate": 8.090343488639547e-06, + "loss": 0.0013, + "step": 42500 + }, + { + "epoch": 0.7184201853933059, + "grad_norm": 0.04258865863084793, + "learning_rate": 8.08918395546299e-06, + "loss": 0.0012, + "step": 42510 + }, + { + "epoch": 0.7185891856721563, + "grad_norm": 0.06326818466186523, + "learning_rate": 8.088024153512465e-06, + "loss": 0.0019, + "step": 42520 + }, + { + "epoch": 0.7187581859510068, + "grad_norm": 0.2828580141067505, + "learning_rate": 8.086864082888876e-06, + "loss": 0.0035, + "step": 42530 + }, + { + "epoch": 0.7189271862298573, + "grad_norm": 0.1426171511411667, + "learning_rate": 8.085703743693155e-06, + "loss": 0.0019, + "step": 42540 + }, + { + "epoch": 0.7190961865087078, + "grad_norm": 0.039468914270401, + "learning_rate": 8.084543136026257e-06, + "loss": 0.0018, + "step": 42550 + }, + { + "epoch": 0.7192651867875582, + "grad_norm": 0.046568624675273895, + "learning_rate": 8.083382259989164e-06, + "loss": 0.003, + "step": 42560 + }, + { + "epoch": 0.7194341870664087, + "grad_norm": 0.16173051297664642, + "learning_rate": 8.082221115682872e-06, + "loss": 0.0015, + "step": 42570 + }, + { + "epoch": 0.7196031873452591, + "grad_norm": 0.07199832797050476, + "learning_rate": 8.08105970320841e-06, + "loss": 0.0023, + "step": 42580 + }, + { + "epoch": 0.7197721876241095, + "grad_norm": 0.2582061290740967, + "learning_rate": 8.079898022666827e-06, + "loss": 0.0021, + "step": 42590 + }, + { + "epoch": 0.71994118790296, + "grad_norm": 0.0317123606801033, + "learning_rate": 8.078736074159193e-06, + "loss": 0.0016, + "step": 42600 + }, + { + "epoch": 0.7201101881818105, + "grad_norm": 0.03768402338027954, + "learning_rate": 8.077573857786603e-06, + "loss": 0.0012, + "step": 42610 + }, + { + "epoch": 0.720279188460661, + "grad_norm": 0.15136878192424774, + "learning_rate": 8.076411373650177e-06, + "loss": 0.0067, + "step": 42620 + }, + { + "epoch": 0.7204481887395114, + "grad_norm": 0.05533058941364288, + "learning_rate": 8.075248621851056e-06, + "loss": 0.0011, + "step": 42630 + }, + { + "epoch": 0.7206171890183619, + "grad_norm": 0.10668498277664185, + "learning_rate": 8.074085602490404e-06, + "loss": 0.001, + "step": 42640 + }, + { + "epoch": 0.7207861892972124, + "grad_norm": 0.0986466333270073, + "learning_rate": 8.072922315669408e-06, + "loss": 0.0019, + "step": 42650 + }, + { + "epoch": 0.7209551895760627, + "grad_norm": 0.5844259262084961, + "learning_rate": 8.071758761489287e-06, + "loss": 0.003, + "step": 42660 + }, + { + "epoch": 0.7211241898549132, + "grad_norm": 0.08260132372379303, + "learning_rate": 8.070594940051269e-06, + "loss": 0.0018, + "step": 42670 + }, + { + "epoch": 0.7212931901337637, + "grad_norm": 0.045917097479104996, + "learning_rate": 8.069430851456612e-06, + "loss": 0.0025, + "step": 42680 + }, + { + "epoch": 0.7214621904126142, + "grad_norm": 0.0059732492081820965, + "learning_rate": 8.068266495806601e-06, + "loss": 0.0025, + "step": 42690 + }, + { + "epoch": 0.7216311906914646, + "grad_norm": 0.09748756140470505, + "learning_rate": 8.067101873202539e-06, + "loss": 0.0027, + "step": 42700 + }, + { + "epoch": 0.7218001909703151, + "grad_norm": 0.0393015593290329, + "learning_rate": 8.065936983745753e-06, + "loss": 0.0017, + "step": 42710 + }, + { + "epoch": 0.7219691912491656, + "grad_norm": 0.04428603872656822, + "learning_rate": 8.064771827537595e-06, + "loss": 0.0012, + "step": 42720 + }, + { + "epoch": 0.7221381915280161, + "grad_norm": 0.09002875536680222, + "learning_rate": 8.063606404679437e-06, + "loss": 0.0021, + "step": 42730 + }, + { + "epoch": 0.7223071918068665, + "grad_norm": 0.1135852113366127, + "learning_rate": 8.06244071527268e-06, + "loss": 0.0019, + "step": 42740 + }, + { + "epoch": 0.7224761920857169, + "grad_norm": 0.02696043998003006, + "learning_rate": 8.06127475941874e-06, + "loss": 0.0029, + "step": 42750 + }, + { + "epoch": 0.7226451923645674, + "grad_norm": 0.03349917009472847, + "learning_rate": 8.060108537219067e-06, + "loss": 0.0022, + "step": 42760 + }, + { + "epoch": 0.7228141926434178, + "grad_norm": 0.03234555572271347, + "learning_rate": 8.058942048775125e-06, + "loss": 0.001, + "step": 42770 + }, + { + "epoch": 0.7229831929222683, + "grad_norm": 0.20898035168647766, + "learning_rate": 8.057775294188401e-06, + "loss": 0.0013, + "step": 42780 + }, + { + "epoch": 0.7231521932011188, + "grad_norm": 0.2018376737833023, + "learning_rate": 8.056608273560414e-06, + "loss": 0.0035, + "step": 42790 + }, + { + "epoch": 0.7233211934799693, + "grad_norm": 0.13457611203193665, + "learning_rate": 8.055440986992696e-06, + "loss": 0.0032, + "step": 42800 + }, + { + "epoch": 0.7234901937588197, + "grad_norm": 0.11162465065717697, + "learning_rate": 8.054273434586808e-06, + "loss": 0.0016, + "step": 42810 + }, + { + "epoch": 0.7236591940376702, + "grad_norm": 0.030697602778673172, + "learning_rate": 8.053105616444334e-06, + "loss": 0.0013, + "step": 42820 + }, + { + "epoch": 0.7238281943165206, + "grad_norm": 0.12240386754274368, + "learning_rate": 8.051937532666878e-06, + "loss": 0.0015, + "step": 42830 + }, + { + "epoch": 0.723997194595371, + "grad_norm": 0.0318816602230072, + "learning_rate": 8.050769183356071e-06, + "loss": 0.0019, + "step": 42840 + }, + { + "epoch": 0.7241661948742215, + "grad_norm": 0.02436286211013794, + "learning_rate": 8.049600568613563e-06, + "loss": 0.0025, + "step": 42850 + }, + { + "epoch": 0.724335195153072, + "grad_norm": 0.11259996145963669, + "learning_rate": 8.048431688541028e-06, + "loss": 0.0019, + "step": 42860 + }, + { + "epoch": 0.7245041954319225, + "grad_norm": 0.05099119618535042, + "learning_rate": 8.047262543240169e-06, + "loss": 0.0016, + "step": 42870 + }, + { + "epoch": 0.7246731957107729, + "grad_norm": 0.1111067682504654, + "learning_rate": 8.046093132812703e-06, + "loss": 0.002, + "step": 42880 + }, + { + "epoch": 0.7248421959896234, + "grad_norm": 0.037433598190546036, + "learning_rate": 8.044923457360376e-06, + "loss": 0.0014, + "step": 42890 + }, + { + "epoch": 0.7250111962684739, + "grad_norm": 0.10939794778823853, + "learning_rate": 8.043753516984954e-06, + "loss": 0.0018, + "step": 42900 + }, + { + "epoch": 0.7251801965473244, + "grad_norm": 0.09101684391498566, + "learning_rate": 8.04258331178823e-06, + "loss": 0.0014, + "step": 42910 + }, + { + "epoch": 0.7253491968261747, + "grad_norm": 0.006667679641395807, + "learning_rate": 8.041412841872016e-06, + "loss": 0.0011, + "step": 42920 + }, + { + "epoch": 0.7255181971050252, + "grad_norm": 0.10800191760063171, + "learning_rate": 8.040242107338147e-06, + "loss": 0.002, + "step": 42930 + }, + { + "epoch": 0.7256871973838757, + "grad_norm": 0.0515403188765049, + "learning_rate": 8.039071108288488e-06, + "loss": 0.002, + "step": 42940 + }, + { + "epoch": 0.7258561976627261, + "grad_norm": 0.24728050827980042, + "learning_rate": 8.037899844824916e-06, + "loss": 0.001, + "step": 42950 + }, + { + "epoch": 0.7260251979415766, + "grad_norm": 0.05296405404806137, + "learning_rate": 8.036728317049339e-06, + "loss": 0.0011, + "step": 42960 + }, + { + "epoch": 0.7261941982204271, + "grad_norm": 0.0076844641007483006, + "learning_rate": 8.035556525063684e-06, + "loss": 0.0026, + "step": 42970 + }, + { + "epoch": 0.7263631984992776, + "grad_norm": 0.07447580993175507, + "learning_rate": 8.034384468969905e-06, + "loss": 0.002, + "step": 42980 + }, + { + "epoch": 0.726532198778128, + "grad_norm": 0.0800366997718811, + "learning_rate": 8.033212148869973e-06, + "loss": 0.0039, + "step": 42990 + }, + { + "epoch": 0.7267011990569785, + "grad_norm": 0.0012301752576604486, + "learning_rate": 8.03203956486589e-06, + "loss": 0.0014, + "step": 43000 + }, + { + "epoch": 0.7268701993358289, + "grad_norm": 0.026477621868252754, + "learning_rate": 8.030866717059673e-06, + "loss": 0.0015, + "step": 43010 + }, + { + "epoch": 0.7270391996146793, + "grad_norm": 0.05160536244511604, + "learning_rate": 8.029693605553367e-06, + "loss": 0.0021, + "step": 43020 + }, + { + "epoch": 0.7272081998935298, + "grad_norm": 0.055757176131010056, + "learning_rate": 8.02852023044904e-06, + "loss": 0.0017, + "step": 43030 + }, + { + "epoch": 0.7273772001723803, + "grad_norm": 0.05494154617190361, + "learning_rate": 8.027346591848777e-06, + "loss": 0.0026, + "step": 43040 + }, + { + "epoch": 0.7275462004512308, + "grad_norm": 4.639435768127441, + "learning_rate": 8.026172689854694e-06, + "loss": 0.0164, + "step": 43050 + }, + { + "epoch": 0.7277152007300812, + "grad_norm": 0.10933412611484528, + "learning_rate": 8.024998524568925e-06, + "loss": 0.0013, + "step": 43060 + }, + { + "epoch": 0.7278842010089317, + "grad_norm": 0.15748070180416107, + "learning_rate": 8.023824096093628e-06, + "loss": 0.0018, + "step": 43070 + }, + { + "epoch": 0.7280532012877822, + "grad_norm": 0.08826566487550735, + "learning_rate": 8.022649404530981e-06, + "loss": 0.0015, + "step": 43080 + }, + { + "epoch": 0.7282222015666325, + "grad_norm": 0.06301755458116531, + "learning_rate": 8.021474449983195e-06, + "loss": 0.0023, + "step": 43090 + }, + { + "epoch": 0.728391201845483, + "grad_norm": 0.05063905566930771, + "learning_rate": 8.020299232552491e-06, + "loss": 0.0019, + "step": 43100 + }, + { + "epoch": 0.7285602021243335, + "grad_norm": 0.024068308994174004, + "learning_rate": 8.019123752341119e-06, + "loss": 0.003, + "step": 43110 + }, + { + "epoch": 0.728729202403184, + "grad_norm": 0.07127765566110611, + "learning_rate": 8.017948009451352e-06, + "loss": 0.0016, + "step": 43120 + }, + { + "epoch": 0.7288982026820344, + "grad_norm": 0.05042535439133644, + "learning_rate": 8.016772003985488e-06, + "loss": 0.002, + "step": 43130 + }, + { + "epoch": 0.7290672029608849, + "grad_norm": 0.11436517536640167, + "learning_rate": 8.015595736045842e-06, + "loss": 0.0013, + "step": 43140 + }, + { + "epoch": 0.7292362032397354, + "grad_norm": 0.3207210302352905, + "learning_rate": 8.014419205734756e-06, + "loss": 0.0021, + "step": 43150 + }, + { + "epoch": 0.7294052035185858, + "grad_norm": 0.01475997269153595, + "learning_rate": 8.013242413154596e-06, + "loss": 0.0011, + "step": 43160 + }, + { + "epoch": 0.7295742037974363, + "grad_norm": 0.09151560068130493, + "learning_rate": 8.012065358407743e-06, + "loss": 0.0021, + "step": 43170 + }, + { + "epoch": 0.7297432040762867, + "grad_norm": 0.25620636343955994, + "learning_rate": 8.010888041596611e-06, + "loss": 0.0017, + "step": 43180 + }, + { + "epoch": 0.7299122043551372, + "grad_norm": 0.1682850569486618, + "learning_rate": 8.009710462823632e-06, + "loss": 0.0015, + "step": 43190 + }, + { + "epoch": 0.7300812046339876, + "grad_norm": 0.04165157303214073, + "learning_rate": 8.00853262219126e-06, + "loss": 0.0027, + "step": 43200 + }, + { + "epoch": 0.7302502049128381, + "grad_norm": 0.012091743759810925, + "learning_rate": 8.007354519801975e-06, + "loss": 0.0014, + "step": 43210 + }, + { + "epoch": 0.7304192051916886, + "grad_norm": 0.03053271770477295, + "learning_rate": 8.006176155758274e-06, + "loss": 0.0013, + "step": 43220 + }, + { + "epoch": 0.730588205470539, + "grad_norm": 0.006675877142697573, + "learning_rate": 8.004997530162683e-06, + "loss": 0.0022, + "step": 43230 + }, + { + "epoch": 0.7307572057493895, + "grad_norm": 0.08256799727678299, + "learning_rate": 8.003818643117746e-06, + "loss": 0.0023, + "step": 43240 + }, + { + "epoch": 0.73092620602824, + "grad_norm": 0.03869280219078064, + "learning_rate": 8.002639494726034e-06, + "loss": 0.0007, + "step": 43250 + }, + { + "epoch": 0.7310952063070905, + "grad_norm": 0.064326211810112, + "learning_rate": 8.001460085090138e-06, + "loss": 0.0013, + "step": 43260 + }, + { + "epoch": 0.7312642065859408, + "grad_norm": 0.128001868724823, + "learning_rate": 8.000280414312672e-06, + "loss": 0.002, + "step": 43270 + }, + { + "epoch": 0.7314332068647913, + "grad_norm": 0.04683956503868103, + "learning_rate": 7.999100482496273e-06, + "loss": 0.002, + "step": 43280 + }, + { + "epoch": 0.7316022071436418, + "grad_norm": 0.02895090915262699, + "learning_rate": 7.997920289743601e-06, + "loss": 0.0023, + "step": 43290 + }, + { + "epoch": 0.7317712074224922, + "grad_norm": 0.16094285249710083, + "learning_rate": 7.996739836157338e-06, + "loss": 0.0021, + "step": 43300 + }, + { + "epoch": 0.7319402077013427, + "grad_norm": 0.11246848106384277, + "learning_rate": 7.995559121840192e-06, + "loss": 0.0018, + "step": 43310 + }, + { + "epoch": 0.7321092079801932, + "grad_norm": 0.03383997455239296, + "learning_rate": 7.994378146894887e-06, + "loss": 0.0018, + "step": 43320 + }, + { + "epoch": 0.7322782082590437, + "grad_norm": 0.09549947082996368, + "learning_rate": 7.993196911424174e-06, + "loss": 0.004, + "step": 43330 + }, + { + "epoch": 0.7324472085378941, + "grad_norm": 0.019019361585378647, + "learning_rate": 7.99201541553083e-06, + "loss": 0.0014, + "step": 43340 + }, + { + "epoch": 0.7326162088167445, + "grad_norm": 0.21534746885299683, + "learning_rate": 7.990833659317648e-06, + "loss": 0.0062, + "step": 43350 + }, + { + "epoch": 0.732785209095595, + "grad_norm": 0.03729574382305145, + "learning_rate": 7.989651642887445e-06, + "loss": 0.002, + "step": 43360 + }, + { + "epoch": 0.7329542093744454, + "grad_norm": 0.15042176842689514, + "learning_rate": 7.988469366343063e-06, + "loss": 0.0018, + "step": 43370 + }, + { + "epoch": 0.7331232096532959, + "grad_norm": 0.06487035751342773, + "learning_rate": 7.987286829787369e-06, + "loss": 0.0011, + "step": 43380 + }, + { + "epoch": 0.7332922099321464, + "grad_norm": 0.07418685406446457, + "learning_rate": 7.986104033323246e-06, + "loss": 0.0017, + "step": 43390 + }, + { + "epoch": 0.7334612102109969, + "grad_norm": 0.10712029784917831, + "learning_rate": 7.984920977053606e-06, + "loss": 0.0028, + "step": 43400 + }, + { + "epoch": 0.7336302104898473, + "grad_norm": 0.04626644402742386, + "learning_rate": 7.983737661081376e-06, + "loss": 0.0016, + "step": 43410 + }, + { + "epoch": 0.7337992107686978, + "grad_norm": 0.04742048308253288, + "learning_rate": 7.982554085509512e-06, + "loss": 0.0016, + "step": 43420 + }, + { + "epoch": 0.7339682110475483, + "grad_norm": 0.0983252003788948, + "learning_rate": 7.981370250440996e-06, + "loss": 0.0025, + "step": 43430 + }, + { + "epoch": 0.7341372113263986, + "grad_norm": 0.138091579079628, + "learning_rate": 7.98018615597882e-06, + "loss": 0.0016, + "step": 43440 + }, + { + "epoch": 0.7343062116052491, + "grad_norm": 0.06672213226556778, + "learning_rate": 7.97900180222601e-06, + "loss": 0.002, + "step": 43450 + }, + { + "epoch": 0.7344752118840996, + "grad_norm": 0.06542443484067917, + "learning_rate": 7.977817189285609e-06, + "loss": 0.0018, + "step": 43460 + }, + { + "epoch": 0.7346442121629501, + "grad_norm": 0.04221392050385475, + "learning_rate": 7.976632317260686e-06, + "loss": 0.0019, + "step": 43470 + }, + { + "epoch": 0.7348132124418005, + "grad_norm": 0.06752992421388626, + "learning_rate": 7.975447186254327e-06, + "loss": 0.0027, + "step": 43480 + }, + { + "epoch": 0.734982212720651, + "grad_norm": 0.0305518489331007, + "learning_rate": 7.97426179636965e-06, + "loss": 0.0014, + "step": 43490 + }, + { + "epoch": 0.7351512129995015, + "grad_norm": 0.0345865860581398, + "learning_rate": 7.973076147709782e-06, + "loss": 0.0018, + "step": 43500 + }, + { + "epoch": 0.735320213278352, + "grad_norm": 0.11416380852460861, + "learning_rate": 7.971890240377886e-06, + "loss": 0.0019, + "step": 43510 + }, + { + "epoch": 0.7354892135572023, + "grad_norm": 0.2210749387741089, + "learning_rate": 7.97070407447714e-06, + "loss": 0.0036, + "step": 43520 + }, + { + "epoch": 0.7356582138360528, + "grad_norm": 0.04454043507575989, + "learning_rate": 7.969517650110747e-06, + "loss": 0.0021, + "step": 43530 + }, + { + "epoch": 0.7358272141149033, + "grad_norm": 0.1723904013633728, + "learning_rate": 7.968330967381931e-06, + "loss": 0.0032, + "step": 43540 + }, + { + "epoch": 0.7359962143937537, + "grad_norm": 0.036471135914325714, + "learning_rate": 7.967144026393939e-06, + "loss": 0.0014, + "step": 43550 + }, + { + "epoch": 0.7361652146726042, + "grad_norm": 0.026985328644514084, + "learning_rate": 7.965956827250038e-06, + "loss": 0.0011, + "step": 43560 + }, + { + "epoch": 0.7363342149514547, + "grad_norm": 0.14341233670711517, + "learning_rate": 7.964769370053525e-06, + "loss": 0.0028, + "step": 43570 + }, + { + "epoch": 0.7365032152303052, + "grad_norm": 0.04602760449051857, + "learning_rate": 7.96358165490771e-06, + "loss": 0.0025, + "step": 43580 + }, + { + "epoch": 0.7366722155091556, + "grad_norm": 0.09611208736896515, + "learning_rate": 7.962393681915934e-06, + "loss": 0.0012, + "step": 43590 + }, + { + "epoch": 0.7368412157880061, + "grad_norm": 0.04702535644173622, + "learning_rate": 7.961205451181555e-06, + "loss": 0.0011, + "step": 43600 + }, + { + "epoch": 0.7370102160668565, + "grad_norm": 0.08078736811876297, + "learning_rate": 7.960016962807952e-06, + "loss": 0.0015, + "step": 43610 + }, + { + "epoch": 0.7371792163457069, + "grad_norm": 0.029118545353412628, + "learning_rate": 7.958828216898535e-06, + "loss": 0.0014, + "step": 43620 + }, + { + "epoch": 0.7373482166245574, + "grad_norm": 0.03096819296479225, + "learning_rate": 7.957639213556725e-06, + "loss": 0.0018, + "step": 43630 + }, + { + "epoch": 0.7375172169034079, + "grad_norm": 0.10239429771900177, + "learning_rate": 7.956449952885973e-06, + "loss": 0.0026, + "step": 43640 + }, + { + "epoch": 0.7376862171822584, + "grad_norm": 0.16455641388893127, + "learning_rate": 7.955260434989752e-06, + "loss": 0.0022, + "step": 43650 + }, + { + "epoch": 0.7378552174611088, + "grad_norm": 0.08374723047018051, + "learning_rate": 7.954070659971554e-06, + "loss": 0.0012, + "step": 43660 + }, + { + "epoch": 0.7380242177399593, + "grad_norm": 0.03553815931081772, + "learning_rate": 7.952880627934896e-06, + "loss": 0.0044, + "step": 43670 + }, + { + "epoch": 0.7381932180188098, + "grad_norm": 0.020006325095891953, + "learning_rate": 7.951690338983317e-06, + "loss": 0.0016, + "step": 43680 + }, + { + "epoch": 0.7383622182976602, + "grad_norm": 0.19382953643798828, + "learning_rate": 7.950499793220377e-06, + "loss": 0.005, + "step": 43690 + }, + { + "epoch": 0.7385312185765106, + "grad_norm": 0.0368359349668026, + "learning_rate": 7.949308990749658e-06, + "loss": 0.0026, + "step": 43700 + }, + { + "epoch": 0.7387002188553611, + "grad_norm": 0.059048112481832504, + "learning_rate": 7.948117931674769e-06, + "loss": 0.0013, + "step": 43710 + }, + { + "epoch": 0.7388692191342116, + "grad_norm": 0.025906303897500038, + "learning_rate": 7.946926616099334e-06, + "loss": 0.0017, + "step": 43720 + }, + { + "epoch": 0.739038219413062, + "grad_norm": 0.09001726657152176, + "learning_rate": 7.945735044127006e-06, + "loss": 0.0021, + "step": 43730 + }, + { + "epoch": 0.7392072196919125, + "grad_norm": 0.06536002457141876, + "learning_rate": 7.944543215861458e-06, + "loss": 0.0016, + "step": 43740 + }, + { + "epoch": 0.739376219970763, + "grad_norm": 0.021984895691275597, + "learning_rate": 7.943351131406381e-06, + "loss": 0.0013, + "step": 43750 + }, + { + "epoch": 0.7395452202496134, + "grad_norm": 0.07657831907272339, + "learning_rate": 7.942158790865496e-06, + "loss": 0.0015, + "step": 43760 + }, + { + "epoch": 0.7397142205284639, + "grad_norm": 0.32660388946533203, + "learning_rate": 7.94096619434254e-06, + "loss": 0.0037, + "step": 43770 + }, + { + "epoch": 0.7398832208073143, + "grad_norm": 0.024858536198735237, + "learning_rate": 7.93977334194128e-06, + "loss": 0.0013, + "step": 43780 + }, + { + "epoch": 0.7400522210861648, + "grad_norm": 0.08623042702674866, + "learning_rate": 7.938580233765492e-06, + "loss": 0.0017, + "step": 43790 + }, + { + "epoch": 0.7402212213650152, + "grad_norm": 0.05644140765070915, + "learning_rate": 7.937386869918986e-06, + "loss": 0.002, + "step": 43800 + }, + { + "epoch": 0.7403902216438657, + "grad_norm": 0.03610311076045036, + "learning_rate": 7.93619325050559e-06, + "loss": 0.0021, + "step": 43810 + }, + { + "epoch": 0.7405592219227162, + "grad_norm": 0.040945619344711304, + "learning_rate": 7.934999375629158e-06, + "loss": 0.0009, + "step": 43820 + }, + { + "epoch": 0.7407282222015666, + "grad_norm": 0.16894613206386566, + "learning_rate": 7.933805245393558e-06, + "loss": 0.0033, + "step": 43830 + }, + { + "epoch": 0.7408972224804171, + "grad_norm": 0.12098273634910583, + "learning_rate": 7.932610859902688e-06, + "loss": 0.0018, + "step": 43840 + }, + { + "epoch": 0.7410662227592676, + "grad_norm": 0.027642671018838882, + "learning_rate": 7.931416219260462e-06, + "loss": 0.0014, + "step": 43850 + }, + { + "epoch": 0.7412352230381181, + "grad_norm": 0.03056027740240097, + "learning_rate": 7.930221323570824e-06, + "loss": 0.0017, + "step": 43860 + }, + { + "epoch": 0.7414042233169684, + "grad_norm": 0.05739610642194748, + "learning_rate": 7.929026172937732e-06, + "loss": 0.001, + "step": 43870 + }, + { + "epoch": 0.7415732235958189, + "grad_norm": 0.09340202063322067, + "learning_rate": 7.927830767465174e-06, + "loss": 0.0025, + "step": 43880 + }, + { + "epoch": 0.7417422238746694, + "grad_norm": 0.06758419424295425, + "learning_rate": 7.926635107257152e-06, + "loss": 0.0012, + "step": 43890 + }, + { + "epoch": 0.7419112241535198, + "grad_norm": 0.06023062393069267, + "learning_rate": 7.925439192417695e-06, + "loss": 0.0021, + "step": 43900 + }, + { + "epoch": 0.7420802244323703, + "grad_norm": 0.04593726247549057, + "learning_rate": 7.924243023050855e-06, + "loss": 0.0014, + "step": 43910 + }, + { + "epoch": 0.7422492247112208, + "grad_norm": 0.09606166929006577, + "learning_rate": 7.923046599260704e-06, + "loss": 0.0034, + "step": 43920 + }, + { + "epoch": 0.7424182249900713, + "grad_norm": 0.11645695567131042, + "learning_rate": 7.921849921151337e-06, + "loss": 0.0014, + "step": 43930 + }, + { + "epoch": 0.7425872252689217, + "grad_norm": 0.056003108620643616, + "learning_rate": 7.920652988826868e-06, + "loss": 0.0019, + "step": 43940 + }, + { + "epoch": 0.7427562255477722, + "grad_norm": 0.02698294259607792, + "learning_rate": 7.919455802391439e-06, + "loss": 0.002, + "step": 43950 + }, + { + "epoch": 0.7429252258266226, + "grad_norm": 0.042158737778663635, + "learning_rate": 7.918258361949211e-06, + "loss": 0.0027, + "step": 43960 + }, + { + "epoch": 0.743094226105473, + "grad_norm": 0.002662374172359705, + "learning_rate": 7.917060667604365e-06, + "loss": 0.0015, + "step": 43970 + }, + { + "epoch": 0.7432632263843235, + "grad_norm": 0.04010889679193497, + "learning_rate": 7.91586271946111e-06, + "loss": 0.0015, + "step": 43980 + }, + { + "epoch": 0.743432226663174, + "grad_norm": 0.019668444991111755, + "learning_rate": 7.914664517623668e-06, + "loss": 0.0023, + "step": 43990 + }, + { + "epoch": 0.7436012269420245, + "grad_norm": 0.07019349187612534, + "learning_rate": 7.913466062196291e-06, + "loss": 0.0026, + "step": 44000 + }, + { + "epoch": 0.7437702272208749, + "grad_norm": 0.05831541866064072, + "learning_rate": 7.912267353283251e-06, + "loss": 0.0016, + "step": 44010 + }, + { + "epoch": 0.7439392274997254, + "grad_norm": 0.015604469925165176, + "learning_rate": 7.91106839098884e-06, + "loss": 0.0017, + "step": 44020 + }, + { + "epoch": 0.7441082277785759, + "grad_norm": 0.15227645635604858, + "learning_rate": 7.909869175417378e-06, + "loss": 0.0025, + "step": 44030 + }, + { + "epoch": 0.7442772280574262, + "grad_norm": 0.12750785052776337, + "learning_rate": 7.908669706673197e-06, + "loss": 0.0039, + "step": 44040 + }, + { + "epoch": 0.7444462283362767, + "grad_norm": 0.07931360602378845, + "learning_rate": 7.907469984860658e-06, + "loss": 0.0019, + "step": 44050 + }, + { + "epoch": 0.7446152286151272, + "grad_norm": 0.07363370805978775, + "learning_rate": 7.906270010084145e-06, + "loss": 0.0033, + "step": 44060 + }, + { + "epoch": 0.7447842288939777, + "grad_norm": 0.01361178420484066, + "learning_rate": 7.905069782448059e-06, + "loss": 0.0017, + "step": 44070 + }, + { + "epoch": 0.7449532291728281, + "grad_norm": 0.1368517130613327, + "learning_rate": 7.903869302056825e-06, + "loss": 0.0021, + "step": 44080 + }, + { + "epoch": 0.7451222294516786, + "grad_norm": 0.1019308865070343, + "learning_rate": 7.902668569014895e-06, + "loss": 0.0013, + "step": 44090 + }, + { + "epoch": 0.7452912297305291, + "grad_norm": 0.10239925980567932, + "learning_rate": 7.901467583426735e-06, + "loss": 0.0024, + "step": 44100 + }, + { + "epoch": 0.7454602300093796, + "grad_norm": 0.0578392818570137, + "learning_rate": 7.900266345396837e-06, + "loss": 0.002, + "step": 44110 + }, + { + "epoch": 0.74562923028823, + "grad_norm": 0.016515590250492096, + "learning_rate": 7.899064855029713e-06, + "loss": 0.0008, + "step": 44120 + }, + { + "epoch": 0.7457982305670804, + "grad_norm": 0.04481057822704315, + "learning_rate": 7.897863112429902e-06, + "loss": 0.0023, + "step": 44130 + }, + { + "epoch": 0.7459672308459309, + "grad_norm": 0.01487799547612667, + "learning_rate": 7.89666111770196e-06, + "loss": 0.0008, + "step": 44140 + }, + { + "epoch": 0.7461362311247813, + "grad_norm": 0.043098077178001404, + "learning_rate": 7.895458870950465e-06, + "loss": 0.0021, + "step": 44150 + }, + { + "epoch": 0.7463052314036318, + "grad_norm": 0.07052203267812729, + "learning_rate": 7.89425637228002e-06, + "loss": 0.002, + "step": 44160 + }, + { + "epoch": 0.7464742316824823, + "grad_norm": 0.08470898866653442, + "learning_rate": 7.893053621795249e-06, + "loss": 0.0019, + "step": 44170 + }, + { + "epoch": 0.7466432319613328, + "grad_norm": 0.09704260528087616, + "learning_rate": 7.891850619600794e-06, + "loss": 0.0032, + "step": 44180 + }, + { + "epoch": 0.7468122322401832, + "grad_norm": 0.07925360649824142, + "learning_rate": 7.890647365801325e-06, + "loss": 0.0015, + "step": 44190 + }, + { + "epoch": 0.7469812325190337, + "grad_norm": 0.01802569441497326, + "learning_rate": 7.889443860501529e-06, + "loss": 0.002, + "step": 44200 + }, + { + "epoch": 0.7471502327978841, + "grad_norm": 0.032090552151203156, + "learning_rate": 7.888240103806117e-06, + "loss": 0.0013, + "step": 44210 + }, + { + "epoch": 0.7473192330767345, + "grad_norm": 0.2693747282028198, + "learning_rate": 7.887036095819822e-06, + "loss": 0.002, + "step": 44220 + }, + { + "epoch": 0.747488233355585, + "grad_norm": 0.31655386090278625, + "learning_rate": 7.8858318366474e-06, + "loss": 0.0022, + "step": 44230 + }, + { + "epoch": 0.7476572336344355, + "grad_norm": 0.05500679463148117, + "learning_rate": 7.884627326393624e-06, + "loss": 0.0023, + "step": 44240 + }, + { + "epoch": 0.747826233913286, + "grad_norm": 0.03424282744526863, + "learning_rate": 7.883422565163296e-06, + "loss": 0.0014, + "step": 44250 + }, + { + "epoch": 0.7479952341921364, + "grad_norm": 0.08011564612388611, + "learning_rate": 7.882217553061234e-06, + "loss": 0.0016, + "step": 44260 + }, + { + "epoch": 0.7481642344709869, + "grad_norm": 0.08712951093912125, + "learning_rate": 7.881012290192279e-06, + "loss": 0.0026, + "step": 44270 + }, + { + "epoch": 0.7483332347498374, + "grad_norm": 0.09755299985408783, + "learning_rate": 7.879806776661298e-06, + "loss": 0.0023, + "step": 44280 + }, + { + "epoch": 0.7485022350286878, + "grad_norm": 0.1405857801437378, + "learning_rate": 7.878601012573173e-06, + "loss": 0.003, + "step": 44290 + }, + { + "epoch": 0.7486712353075382, + "grad_norm": 0.007532436866313219, + "learning_rate": 7.877394998032812e-06, + "loss": 0.002, + "step": 44300 + }, + { + "epoch": 0.7488402355863887, + "grad_norm": 0.08370806276798248, + "learning_rate": 7.876188733145147e-06, + "loss": 0.0021, + "step": 44310 + }, + { + "epoch": 0.7490092358652392, + "grad_norm": 0.001437056460417807, + "learning_rate": 7.874982218015126e-06, + "loss": 0.0012, + "step": 44320 + }, + { + "epoch": 0.7491782361440896, + "grad_norm": 0.04407551512122154, + "learning_rate": 7.87377545274772e-06, + "loss": 0.0021, + "step": 44330 + }, + { + "epoch": 0.7493472364229401, + "grad_norm": 0.010266945697367191, + "learning_rate": 7.87256843744793e-06, + "loss": 0.0031, + "step": 44340 + }, + { + "epoch": 0.7495162367017906, + "grad_norm": 0.17960795760154724, + "learning_rate": 7.871361172220765e-06, + "loss": 0.0016, + "step": 44350 + }, + { + "epoch": 0.749685236980641, + "grad_norm": 0.038601137697696686, + "learning_rate": 7.870153657171267e-06, + "loss": 0.0031, + "step": 44360 + }, + { + "epoch": 0.7498542372594915, + "grad_norm": 0.00812953058630228, + "learning_rate": 7.868945892404496e-06, + "loss": 0.0017, + "step": 44370 + }, + { + "epoch": 0.750023237538342, + "grad_norm": 0.036118436604738235, + "learning_rate": 7.86773787802553e-06, + "loss": 0.0011, + "step": 44380 + }, + { + "epoch": 0.7501922378171924, + "grad_norm": 0.03733135759830475, + "learning_rate": 7.866529614139476e-06, + "loss": 0.0012, + "step": 44390 + }, + { + "epoch": 0.7503612380960428, + "grad_norm": 0.058701254427433014, + "learning_rate": 7.865321100851456e-06, + "loss": 0.0027, + "step": 44400 + }, + { + "epoch": 0.7505302383748933, + "grad_norm": 0.05390680581331253, + "learning_rate": 7.864112338266617e-06, + "loss": 0.0093, + "step": 44410 + }, + { + "epoch": 0.7506992386537438, + "grad_norm": 0.1075315922498703, + "learning_rate": 7.862903326490126e-06, + "loss": 0.0019, + "step": 44420 + }, + { + "epoch": 0.7508682389325942, + "grad_norm": 0.12688098847866058, + "learning_rate": 7.861694065627178e-06, + "loss": 0.0012, + "step": 44430 + }, + { + "epoch": 0.7510372392114447, + "grad_norm": 0.030170833691954613, + "learning_rate": 7.860484555782977e-06, + "loss": 0.0011, + "step": 44440 + }, + { + "epoch": 0.7512062394902952, + "grad_norm": 0.05321137607097626, + "learning_rate": 7.859274797062764e-06, + "loss": 0.002, + "step": 44450 + }, + { + "epoch": 0.7513752397691457, + "grad_norm": 0.052268922328948975, + "learning_rate": 7.858064789571787e-06, + "loss": 0.0017, + "step": 44460 + }, + { + "epoch": 0.751544240047996, + "grad_norm": 0.004613368771970272, + "learning_rate": 7.856854533415328e-06, + "loss": 0.0018, + "step": 44470 + }, + { + "epoch": 0.7517132403268465, + "grad_norm": 0.029189545661211014, + "learning_rate": 7.855644028698683e-06, + "loss": 0.002, + "step": 44480 + }, + { + "epoch": 0.751882240605697, + "grad_norm": 0.09404749423265457, + "learning_rate": 7.85443327552717e-06, + "loss": 0.0028, + "step": 44490 + }, + { + "epoch": 0.7520512408845474, + "grad_norm": 0.05537321791052818, + "learning_rate": 7.853222274006134e-06, + "loss": 0.0017, + "step": 44500 + }, + { + "epoch": 0.7522202411633979, + "grad_norm": 0.11430644989013672, + "learning_rate": 7.852011024240933e-06, + "loss": 0.0033, + "step": 44510 + }, + { + "epoch": 0.7523892414422484, + "grad_norm": 0.059698112308979034, + "learning_rate": 7.850799526336956e-06, + "loss": 0.0015, + "step": 44520 + }, + { + "epoch": 0.7525582417210989, + "grad_norm": 0.03818683326244354, + "learning_rate": 7.849587780399608e-06, + "loss": 0.001, + "step": 44530 + }, + { + "epoch": 0.7527272419999493, + "grad_norm": 0.12590816617012024, + "learning_rate": 7.848375786534316e-06, + "loss": 0.0022, + "step": 44540 + }, + { + "epoch": 0.7528962422787998, + "grad_norm": 0.07889236509799957, + "learning_rate": 7.847163544846532e-06, + "loss": 0.0021, + "step": 44550 + }, + { + "epoch": 0.7530652425576502, + "grad_norm": 0.013532519340515137, + "learning_rate": 7.845951055441723e-06, + "loss": 0.0017, + "step": 44560 + }, + { + "epoch": 0.7532342428365006, + "grad_norm": 0.08118347078561783, + "learning_rate": 7.844738318425384e-06, + "loss": 0.0022, + "step": 44570 + }, + { + "epoch": 0.7534032431153511, + "grad_norm": 0.04712570086121559, + "learning_rate": 7.843525333903028e-06, + "loss": 0.0017, + "step": 44580 + }, + { + "epoch": 0.7535722433942016, + "grad_norm": 0.056935735046863556, + "learning_rate": 7.842312101980189e-06, + "loss": 0.0019, + "step": 44590 + }, + { + "epoch": 0.7537412436730521, + "grad_norm": 0.35753145813941956, + "learning_rate": 7.84109862276243e-06, + "loss": 0.0033, + "step": 44600 + }, + { + "epoch": 0.7539102439519025, + "grad_norm": 0.06886877864599228, + "learning_rate": 7.83988489635532e-06, + "loss": 0.0021, + "step": 44610 + }, + { + "epoch": 0.754079244230753, + "grad_norm": 0.05907822027802467, + "learning_rate": 7.838670922864468e-06, + "loss": 0.0022, + "step": 44620 + }, + { + "epoch": 0.7542482445096035, + "grad_norm": 0.04054168239235878, + "learning_rate": 7.83745670239549e-06, + "loss": 0.0041, + "step": 44630 + }, + { + "epoch": 0.754417244788454, + "grad_norm": 0.027725081890821457, + "learning_rate": 7.836242235054033e-06, + "loss": 0.0016, + "step": 44640 + }, + { + "epoch": 0.7545862450673043, + "grad_norm": 0.23544612526893616, + "learning_rate": 7.83502752094576e-06, + "loss": 0.0014, + "step": 44650 + }, + { + "epoch": 0.7547552453461548, + "grad_norm": 0.41741564869880676, + "learning_rate": 7.833812560176355e-06, + "loss": 0.0026, + "step": 44660 + }, + { + "epoch": 0.7549242456250053, + "grad_norm": 0.08145532011985779, + "learning_rate": 7.832597352851527e-06, + "loss": 0.0021, + "step": 44670 + }, + { + "epoch": 0.7550932459038557, + "grad_norm": 0.08561309427022934, + "learning_rate": 7.831381899077007e-06, + "loss": 0.003, + "step": 44680 + }, + { + "epoch": 0.7552622461827062, + "grad_norm": 0.09209153056144714, + "learning_rate": 7.830166198958543e-06, + "loss": 0.0019, + "step": 44690 + }, + { + "epoch": 0.7554312464615567, + "grad_norm": 0.07907470315694809, + "learning_rate": 7.828950252601905e-06, + "loss": 0.0027, + "step": 44700 + }, + { + "epoch": 0.7556002467404072, + "grad_norm": 0.06275120377540588, + "learning_rate": 7.827734060112889e-06, + "loss": 0.0015, + "step": 44710 + }, + { + "epoch": 0.7557692470192576, + "grad_norm": 0.07097408175468445, + "learning_rate": 7.826517621597309e-06, + "loss": 0.0029, + "step": 44720 + }, + { + "epoch": 0.755938247298108, + "grad_norm": 0.044329412281513214, + "learning_rate": 7.825300937161003e-06, + "loss": 0.0014, + "step": 44730 + }, + { + "epoch": 0.7561072475769585, + "grad_norm": 0.026017211377620697, + "learning_rate": 7.824084006909827e-06, + "loss": 0.0012, + "step": 44740 + }, + { + "epoch": 0.7562762478558089, + "grad_norm": 0.08053651452064514, + "learning_rate": 7.822866830949656e-06, + "loss": 0.0026, + "step": 44750 + }, + { + "epoch": 0.7564452481346594, + "grad_norm": 0.026170331984758377, + "learning_rate": 7.821649409386396e-06, + "loss": 0.0027, + "step": 44760 + }, + { + "epoch": 0.7566142484135099, + "grad_norm": 0.034089915454387665, + "learning_rate": 7.820431742325967e-06, + "loss": 0.0103, + "step": 44770 + }, + { + "epoch": 0.7567832486923604, + "grad_norm": 0.10103444755077362, + "learning_rate": 7.819213829874308e-06, + "loss": 0.0023, + "step": 44780 + }, + { + "epoch": 0.7569522489712108, + "grad_norm": 0.042457886040210724, + "learning_rate": 7.817995672137388e-06, + "loss": 0.0017, + "step": 44790 + }, + { + "epoch": 0.7571212492500613, + "grad_norm": 0.07881304621696472, + "learning_rate": 7.816777269221192e-06, + "loss": 0.0014, + "step": 44800 + }, + { + "epoch": 0.7572902495289118, + "grad_norm": 0.04411272704601288, + "learning_rate": 7.815558621231724e-06, + "loss": 0.0042, + "step": 44810 + }, + { + "epoch": 0.7574592498077621, + "grad_norm": 0.17249134182929993, + "learning_rate": 7.814339728275017e-06, + "loss": 0.0026, + "step": 44820 + }, + { + "epoch": 0.7576282500866126, + "grad_norm": 0.2123340666294098, + "learning_rate": 7.813120590457116e-06, + "loss": 0.0019, + "step": 44830 + }, + { + "epoch": 0.7577972503654631, + "grad_norm": 0.09830141812562943, + "learning_rate": 7.811901207884094e-06, + "loss": 0.0017, + "step": 44840 + }, + { + "epoch": 0.7579662506443136, + "grad_norm": 0.14184986054897308, + "learning_rate": 7.810681580662044e-06, + "loss": 0.0033, + "step": 44850 + }, + { + "epoch": 0.758135250923164, + "grad_norm": 0.03080633282661438, + "learning_rate": 7.809461708897076e-06, + "loss": 0.0011, + "step": 44860 + }, + { + "epoch": 0.7583042512020145, + "grad_norm": 0.0676361694931984, + "learning_rate": 7.80824159269533e-06, + "loss": 0.0017, + "step": 44870 + }, + { + "epoch": 0.758473251480865, + "grad_norm": 0.010642868466675282, + "learning_rate": 7.807021232162956e-06, + "loss": 0.0008, + "step": 44880 + }, + { + "epoch": 0.7586422517597154, + "grad_norm": 0.07253513485193253, + "learning_rate": 7.805800627406138e-06, + "loss": 0.001, + "step": 44890 + }, + { + "epoch": 0.7588112520385658, + "grad_norm": 0.07711581885814667, + "learning_rate": 7.80457977853107e-06, + "loss": 0.0021, + "step": 44900 + }, + { + "epoch": 0.7589802523174163, + "grad_norm": 0.08204604685306549, + "learning_rate": 7.80335868564397e-06, + "loss": 0.0038, + "step": 44910 + }, + { + "epoch": 0.7591492525962668, + "grad_norm": 0.02592233009636402, + "learning_rate": 7.802137348851084e-06, + "loss": 0.0026, + "step": 44920 + }, + { + "epoch": 0.7593182528751172, + "grad_norm": 0.0777740627527237, + "learning_rate": 7.800915768258673e-06, + "loss": 0.0028, + "step": 44930 + }, + { + "epoch": 0.7594872531539677, + "grad_norm": 0.01917082443833351, + "learning_rate": 7.799693943973019e-06, + "loss": 0.0019, + "step": 44940 + }, + { + "epoch": 0.7596562534328182, + "grad_norm": 0.08356926590204239, + "learning_rate": 7.798471876100423e-06, + "loss": 0.0043, + "step": 44950 + }, + { + "epoch": 0.7598252537116686, + "grad_norm": 0.07758168131113052, + "learning_rate": 7.79724956474722e-06, + "loss": 0.0024, + "step": 44960 + }, + { + "epoch": 0.7599942539905191, + "grad_norm": 0.014164702966809273, + "learning_rate": 7.796027010019751e-06, + "loss": 0.0007, + "step": 44970 + }, + { + "epoch": 0.7601632542693696, + "grad_norm": 0.13659994304180145, + "learning_rate": 7.794804212024383e-06, + "loss": 0.0012, + "step": 44980 + }, + { + "epoch": 0.76033225454822, + "grad_norm": 0.23086796700954437, + "learning_rate": 7.793581170867509e-06, + "loss": 0.003, + "step": 44990 + }, + { + "epoch": 0.7605012548270704, + "grad_norm": 0.014240220189094543, + "learning_rate": 7.792357886655537e-06, + "loss": 0.0017, + "step": 45000 + }, + { + "epoch": 0.7606702551059209, + "grad_norm": 0.01912778429687023, + "learning_rate": 7.791134359494902e-06, + "loss": 0.0015, + "step": 45010 + }, + { + "epoch": 0.7608392553847714, + "grad_norm": 0.05160927772521973, + "learning_rate": 7.789910589492052e-06, + "loss": 0.0029, + "step": 45020 + }, + { + "epoch": 0.7610082556636218, + "grad_norm": 0.0658368393778801, + "learning_rate": 7.788686576753462e-06, + "loss": 0.0014, + "step": 45030 + }, + { + "epoch": 0.7611772559424723, + "grad_norm": 0.1321316808462143, + "learning_rate": 7.78746232138563e-06, + "loss": 0.0022, + "step": 45040 + }, + { + "epoch": 0.7613462562213228, + "grad_norm": 0.03437534719705582, + "learning_rate": 7.78623782349507e-06, + "loss": 0.0014, + "step": 45050 + }, + { + "epoch": 0.7615152565001733, + "grad_norm": 0.06349821388721466, + "learning_rate": 7.78501308318832e-06, + "loss": 0.0023, + "step": 45060 + }, + { + "epoch": 0.7616842567790237, + "grad_norm": 0.07751069217920303, + "learning_rate": 7.783788100571939e-06, + "loss": 0.0021, + "step": 45070 + }, + { + "epoch": 0.7618532570578741, + "grad_norm": 0.02845473401248455, + "learning_rate": 7.782562875752504e-06, + "loss": 0.0018, + "step": 45080 + }, + { + "epoch": 0.7620222573367246, + "grad_norm": 0.07686923444271088, + "learning_rate": 7.781337408836618e-06, + "loss": 0.0017, + "step": 45090 + }, + { + "epoch": 0.762191257615575, + "grad_norm": 0.04277089238166809, + "learning_rate": 7.7801116999309e-06, + "loss": 0.0024, + "step": 45100 + }, + { + "epoch": 0.7623602578944255, + "grad_norm": 0.04295666515827179, + "learning_rate": 7.778885749141997e-06, + "loss": 0.0014, + "step": 45110 + }, + { + "epoch": 0.762529258173276, + "grad_norm": 0.04123243689537048, + "learning_rate": 7.777659556576567e-06, + "loss": 0.0014, + "step": 45120 + }, + { + "epoch": 0.7626982584521265, + "grad_norm": 0.14625336229801178, + "learning_rate": 7.7764331223413e-06, + "loss": 0.0022, + "step": 45130 + }, + { + "epoch": 0.7628672587309769, + "grad_norm": 0.09272245317697525, + "learning_rate": 7.775206446542898e-06, + "loss": 0.0015, + "step": 45140 + }, + { + "epoch": 0.7630362590098274, + "grad_norm": 0.03592293709516525, + "learning_rate": 7.77397952928809e-06, + "loss": 0.0008, + "step": 45150 + }, + { + "epoch": 0.7632052592886778, + "grad_norm": 0.0790335014462471, + "learning_rate": 7.77275237068362e-06, + "loss": 0.0014, + "step": 45160 + }, + { + "epoch": 0.7633742595675282, + "grad_norm": 0.037326548248529434, + "learning_rate": 7.771524970836261e-06, + "loss": 0.0025, + "step": 45170 + }, + { + "epoch": 0.7635432598463787, + "grad_norm": 0.08837980777025223, + "learning_rate": 7.770297329852801e-06, + "loss": 0.0018, + "step": 45180 + }, + { + "epoch": 0.7637122601252292, + "grad_norm": 0.10220792889595032, + "learning_rate": 7.769069447840051e-06, + "loss": 0.0025, + "step": 45190 + }, + { + "epoch": 0.7638812604040797, + "grad_norm": 0.04530085623264313, + "learning_rate": 7.767841324904843e-06, + "loss": 0.0024, + "step": 45200 + }, + { + "epoch": 0.7640502606829301, + "grad_norm": 0.1346471905708313, + "learning_rate": 7.766612961154029e-06, + "loss": 0.003, + "step": 45210 + }, + { + "epoch": 0.7642192609617806, + "grad_norm": 0.0872393548488617, + "learning_rate": 7.765384356694483e-06, + "loss": 0.002, + "step": 45220 + }, + { + "epoch": 0.7643882612406311, + "grad_norm": 0.019787881523370743, + "learning_rate": 7.7641555116331e-06, + "loss": 0.0019, + "step": 45230 + }, + { + "epoch": 0.7645572615194816, + "grad_norm": 0.12332823127508163, + "learning_rate": 7.762926426076793e-06, + "loss": 0.0015, + "step": 45240 + }, + { + "epoch": 0.7647262617983319, + "grad_norm": 0.02287132292985916, + "learning_rate": 7.761697100132502e-06, + "loss": 0.002, + "step": 45250 + }, + { + "epoch": 0.7648952620771824, + "grad_norm": 0.038460344076156616, + "learning_rate": 7.760467533907182e-06, + "loss": 0.0015, + "step": 45260 + }, + { + "epoch": 0.7650642623560329, + "grad_norm": 0.07872225344181061, + "learning_rate": 7.759237727507811e-06, + "loss": 0.0019, + "step": 45270 + }, + { + "epoch": 0.7652332626348833, + "grad_norm": 0.06950631737709045, + "learning_rate": 7.758007681041391e-06, + "loss": 0.0018, + "step": 45280 + }, + { + "epoch": 0.7654022629137338, + "grad_norm": 0.14089111983776093, + "learning_rate": 7.756777394614937e-06, + "loss": 0.0015, + "step": 45290 + }, + { + "epoch": 0.7655712631925843, + "grad_norm": 0.02855812944471836, + "learning_rate": 7.755546868335494e-06, + "loss": 0.0011, + "step": 45300 + }, + { + "epoch": 0.7657402634714348, + "grad_norm": 0.05885821208357811, + "learning_rate": 7.754316102310121e-06, + "loss": 0.0012, + "step": 45310 + }, + { + "epoch": 0.7659092637502852, + "grad_norm": 0.04385798051953316, + "learning_rate": 7.753085096645906e-06, + "loss": 0.0013, + "step": 45320 + }, + { + "epoch": 0.7660782640291357, + "grad_norm": 0.04075145721435547, + "learning_rate": 7.751853851449947e-06, + "loss": 0.0013, + "step": 45330 + }, + { + "epoch": 0.7662472643079861, + "grad_norm": 0.10517627000808716, + "learning_rate": 7.750622366829368e-06, + "loss": 0.0022, + "step": 45340 + }, + { + "epoch": 0.7664162645868365, + "grad_norm": 0.050586577504873276, + "learning_rate": 7.749390642891317e-06, + "loss": 0.0019, + "step": 45350 + }, + { + "epoch": 0.766585264865687, + "grad_norm": 0.09203847497701645, + "learning_rate": 7.748158679742961e-06, + "loss": 0.004, + "step": 45360 + }, + { + "epoch": 0.7667542651445375, + "grad_norm": 0.09800178557634354, + "learning_rate": 7.746926477491481e-06, + "loss": 0.0024, + "step": 45370 + }, + { + "epoch": 0.766923265423388, + "grad_norm": 0.042162712663412094, + "learning_rate": 7.74569403624409e-06, + "loss": 0.0017, + "step": 45380 + }, + { + "epoch": 0.7670922657022384, + "grad_norm": 0.06394949555397034, + "learning_rate": 7.744461356108016e-06, + "loss": 0.0018, + "step": 45390 + }, + { + "epoch": 0.7672612659810889, + "grad_norm": 0.21785315871238708, + "learning_rate": 7.743228437190507e-06, + "loss": 0.0023, + "step": 45400 + }, + { + "epoch": 0.7674302662599394, + "grad_norm": 0.1498011201620102, + "learning_rate": 7.741995279598831e-06, + "loss": 0.0017, + "step": 45410 + }, + { + "epoch": 0.7675992665387897, + "grad_norm": 0.03804841265082359, + "learning_rate": 7.74076188344028e-06, + "loss": 0.0031, + "step": 45420 + }, + { + "epoch": 0.7677682668176402, + "grad_norm": 0.03907634690403938, + "learning_rate": 7.739528248822168e-06, + "loss": 0.0018, + "step": 45430 + }, + { + "epoch": 0.7679372670964907, + "grad_norm": 0.03149436414241791, + "learning_rate": 7.738294375851823e-06, + "loss": 0.0016, + "step": 45440 + }, + { + "epoch": 0.7681062673753412, + "grad_norm": 0.05602965131402016, + "learning_rate": 7.737060264636601e-06, + "loss": 0.0011, + "step": 45450 + }, + { + "epoch": 0.7682752676541916, + "grad_norm": 0.045581117272377014, + "learning_rate": 7.735825915283874e-06, + "loss": 0.0024, + "step": 45460 + }, + { + "epoch": 0.7684442679330421, + "grad_norm": 0.17535948753356934, + "learning_rate": 7.734591327901037e-06, + "loss": 0.0015, + "step": 45470 + }, + { + "epoch": 0.7686132682118926, + "grad_norm": 0.06803618371486664, + "learning_rate": 7.733356502595506e-06, + "loss": 0.002, + "step": 45480 + }, + { + "epoch": 0.768782268490743, + "grad_norm": 0.13037042319774628, + "learning_rate": 7.732121439474716e-06, + "loss": 0.0021, + "step": 45490 + }, + { + "epoch": 0.7689512687695935, + "grad_norm": 0.048493217676877975, + "learning_rate": 7.730886138646121e-06, + "loss": 0.0013, + "step": 45500 + }, + { + "epoch": 0.7691202690484439, + "grad_norm": 0.07940898835659027, + "learning_rate": 7.729650600217204e-06, + "loss": 0.0019, + "step": 45510 + }, + { + "epoch": 0.7692892693272944, + "grad_norm": 0.16000744700431824, + "learning_rate": 7.728414824295456e-06, + "loss": 0.0046, + "step": 45520 + }, + { + "epoch": 0.7694582696061448, + "grad_norm": 0.10460644215345383, + "learning_rate": 7.7271788109884e-06, + "loss": 0.0064, + "step": 45530 + }, + { + "epoch": 0.7696272698849953, + "grad_norm": 0.16053324937820435, + "learning_rate": 7.725942560403574e-06, + "loss": 0.0022, + "step": 45540 + }, + { + "epoch": 0.7697962701638458, + "grad_norm": 0.024782394990324974, + "learning_rate": 7.724706072648537e-06, + "loss": 0.0029, + "step": 45550 + }, + { + "epoch": 0.7699652704426962, + "grad_norm": 0.03775293380022049, + "learning_rate": 7.723469347830871e-06, + "loss": 0.0029, + "step": 45560 + }, + { + "epoch": 0.7701342707215467, + "grad_norm": 0.05658729374408722, + "learning_rate": 7.722232386058178e-06, + "loss": 0.0029, + "step": 45570 + }, + { + "epoch": 0.7703032710003972, + "grad_norm": 0.3801966905593872, + "learning_rate": 7.720995187438077e-06, + "loss": 0.0057, + "step": 45580 + }, + { + "epoch": 0.7704722712792476, + "grad_norm": 0.055622778832912445, + "learning_rate": 7.71975775207821e-06, + "loss": 0.004, + "step": 45590 + }, + { + "epoch": 0.770641271558098, + "grad_norm": 0.06273094564676285, + "learning_rate": 7.71852008008624e-06, + "loss": 0.0015, + "step": 45600 + }, + { + "epoch": 0.7708102718369485, + "grad_norm": 0.048402734100818634, + "learning_rate": 7.717282171569853e-06, + "loss": 0.0027, + "step": 45610 + }, + { + "epoch": 0.770979272115799, + "grad_norm": 0.16744104027748108, + "learning_rate": 7.716044026636753e-06, + "loss": 0.0022, + "step": 45620 + }, + { + "epoch": 0.7711482723946494, + "grad_norm": 0.11811065673828125, + "learning_rate": 7.71480564539466e-06, + "loss": 0.0011, + "step": 45630 + }, + { + "epoch": 0.7713172726734999, + "grad_norm": 0.04153404012322426, + "learning_rate": 7.713567027951325e-06, + "loss": 0.0019, + "step": 45640 + }, + { + "epoch": 0.7714862729523504, + "grad_norm": 0.1193832978606224, + "learning_rate": 7.71232817441451e-06, + "loss": 0.0013, + "step": 45650 + }, + { + "epoch": 0.7716552732312009, + "grad_norm": 0.06552287191152573, + "learning_rate": 7.711089084892001e-06, + "loss": 0.0013, + "step": 45660 + }, + { + "epoch": 0.7718242735100513, + "grad_norm": 0.0960426852107048, + "learning_rate": 7.709849759491607e-06, + "loss": 0.0034, + "step": 45670 + }, + { + "epoch": 0.7719932737889017, + "grad_norm": 0.0046477243304252625, + "learning_rate": 7.708610198321155e-06, + "loss": 0.0012, + "step": 45680 + }, + { + "epoch": 0.7721622740677522, + "grad_norm": 0.08189505338668823, + "learning_rate": 7.707370401488494e-06, + "loss": 0.0017, + "step": 45690 + }, + { + "epoch": 0.7723312743466026, + "grad_norm": 0.14401870965957642, + "learning_rate": 7.706130369101487e-06, + "loss": 0.0036, + "step": 45700 + }, + { + "epoch": 0.7725002746254531, + "grad_norm": 0.09131434559822083, + "learning_rate": 7.704890101268028e-06, + "loss": 0.0031, + "step": 45710 + }, + { + "epoch": 0.7726692749043036, + "grad_norm": 0.06472761929035187, + "learning_rate": 7.703649598096025e-06, + "loss": 0.0014, + "step": 45720 + }, + { + "epoch": 0.7728382751831541, + "grad_norm": 0.0034629059955477715, + "learning_rate": 7.702408859693406e-06, + "loss": 0.0018, + "step": 45730 + }, + { + "epoch": 0.7730072754620045, + "grad_norm": 0.03553701192140579, + "learning_rate": 7.701167886168124e-06, + "loss": 0.0013, + "step": 45740 + }, + { + "epoch": 0.773176275740855, + "grad_norm": 0.06160648912191391, + "learning_rate": 7.699926677628148e-06, + "loss": 0.0024, + "step": 45750 + }, + { + "epoch": 0.7733452760197055, + "grad_norm": 0.037900350987911224, + "learning_rate": 7.698685234181471e-06, + "loss": 0.0022, + "step": 45760 + }, + { + "epoch": 0.7735142762985558, + "grad_norm": 0.10797583311796188, + "learning_rate": 7.697443555936102e-06, + "loss": 0.0018, + "step": 45770 + }, + { + "epoch": 0.7736832765774063, + "grad_norm": 0.3845529854297638, + "learning_rate": 7.696201643000075e-06, + "loss": 0.0023, + "step": 45780 + }, + { + "epoch": 0.7738522768562568, + "grad_norm": 0.043602894991636276, + "learning_rate": 7.69495949548144e-06, + "loss": 0.0013, + "step": 45790 + }, + { + "epoch": 0.7740212771351073, + "grad_norm": 0.05094465613365173, + "learning_rate": 7.693717113488273e-06, + "loss": 0.0016, + "step": 45800 + }, + { + "epoch": 0.7741902774139577, + "grad_norm": 0.03464687988162041, + "learning_rate": 7.692474497128665e-06, + "loss": 0.002, + "step": 45810 + }, + { + "epoch": 0.7743592776928082, + "grad_norm": 0.032336119562387466, + "learning_rate": 7.691231646510731e-06, + "loss": 0.0009, + "step": 45820 + }, + { + "epoch": 0.7745282779716587, + "grad_norm": 0.12466021627187729, + "learning_rate": 7.689988561742603e-06, + "loss": 0.0013, + "step": 45830 + }, + { + "epoch": 0.7746972782505092, + "grad_norm": 0.010928106494247913, + "learning_rate": 7.688745242932439e-06, + "loss": 0.002, + "step": 45840 + }, + { + "epoch": 0.7748662785293595, + "grad_norm": 0.049774374812841415, + "learning_rate": 7.687501690188409e-06, + "loss": 0.0016, + "step": 45850 + }, + { + "epoch": 0.77503527880821, + "grad_norm": 0.14102478325366974, + "learning_rate": 7.686257903618713e-06, + "loss": 0.0036, + "step": 45860 + }, + { + "epoch": 0.7752042790870605, + "grad_norm": 0.06147325783967972, + "learning_rate": 7.685013883331562e-06, + "loss": 0.002, + "step": 45870 + }, + { + "epoch": 0.7753732793659109, + "grad_norm": 0.0031127941329032183, + "learning_rate": 7.683769629435195e-06, + "loss": 0.0016, + "step": 45880 + }, + { + "epoch": 0.7755422796447614, + "grad_norm": 0.1135169044137001, + "learning_rate": 7.682525142037869e-06, + "loss": 0.0025, + "step": 45890 + }, + { + "epoch": 0.7757112799236119, + "grad_norm": 0.07466382533311844, + "learning_rate": 7.681280421247856e-06, + "loss": 0.0015, + "step": 45900 + }, + { + "epoch": 0.7758802802024624, + "grad_norm": 0.3025516867637634, + "learning_rate": 7.680035467173456e-06, + "loss": 0.0017, + "step": 45910 + }, + { + "epoch": 0.7760492804813128, + "grad_norm": 0.07543689012527466, + "learning_rate": 7.678790279922987e-06, + "loss": 0.003, + "step": 45920 + }, + { + "epoch": 0.7762182807601633, + "grad_norm": 0.060232553631067276, + "learning_rate": 7.677544859604782e-06, + "loss": 0.0027, + "step": 45930 + }, + { + "epoch": 0.7763872810390137, + "grad_norm": 0.09006506949663162, + "learning_rate": 7.676299206327202e-06, + "loss": 0.0014, + "step": 45940 + }, + { + "epoch": 0.7765562813178641, + "grad_norm": 0.21766416728496552, + "learning_rate": 7.675053320198624e-06, + "loss": 0.0011, + "step": 45950 + }, + { + "epoch": 0.7767252815967146, + "grad_norm": 0.07666429877281189, + "learning_rate": 7.673807201327448e-06, + "loss": 0.0026, + "step": 45960 + }, + { + "epoch": 0.7768942818755651, + "grad_norm": 0.07516634464263916, + "learning_rate": 7.672560849822088e-06, + "loss": 0.0011, + "step": 45970 + }, + { + "epoch": 0.7770632821544156, + "grad_norm": 0.0709579735994339, + "learning_rate": 7.671314265790987e-06, + "loss": 0.0008, + "step": 45980 + }, + { + "epoch": 0.777232282433266, + "grad_norm": 0.06900666654109955, + "learning_rate": 7.670067449342602e-06, + "loss": 0.0011, + "step": 45990 + }, + { + "epoch": 0.7774012827121165, + "grad_norm": 0.04497666656970978, + "learning_rate": 7.668820400585411e-06, + "loss": 0.0018, + "step": 46000 + }, + { + "epoch": 0.777570282990967, + "grad_norm": 0.03760146722197533, + "learning_rate": 7.667573119627915e-06, + "loss": 0.0047, + "step": 46010 + }, + { + "epoch": 0.7777392832698174, + "grad_norm": 0.038645707070827484, + "learning_rate": 7.666325606578633e-06, + "loss": 0.0023, + "step": 46020 + }, + { + "epoch": 0.7779082835486678, + "grad_norm": 0.014442669227719307, + "learning_rate": 7.665077861546104e-06, + "loss": 0.0021, + "step": 46030 + }, + { + "epoch": 0.7780772838275183, + "grad_norm": 0.05108046531677246, + "learning_rate": 7.66382988463889e-06, + "loss": 0.0013, + "step": 46040 + }, + { + "epoch": 0.7782462841063688, + "grad_norm": 0.07995917648077011, + "learning_rate": 7.662581675965568e-06, + "loss": 0.0015, + "step": 46050 + }, + { + "epoch": 0.7784152843852192, + "grad_norm": 0.09550514817237854, + "learning_rate": 7.66133323563474e-06, + "loss": 0.0023, + "step": 46060 + }, + { + "epoch": 0.7785842846640697, + "grad_norm": 0.08962764590978622, + "learning_rate": 7.660084563755025e-06, + "loss": 0.0022, + "step": 46070 + }, + { + "epoch": 0.7787532849429202, + "grad_norm": 0.008625727146863937, + "learning_rate": 7.658835660435066e-06, + "loss": 0.0012, + "step": 46080 + }, + { + "epoch": 0.7789222852217706, + "grad_norm": 0.023746177554130554, + "learning_rate": 7.657586525783523e-06, + "loss": 0.0009, + "step": 46090 + }, + { + "epoch": 0.7790912855006211, + "grad_norm": 0.14935356378555298, + "learning_rate": 7.656337159909074e-06, + "loss": 0.0025, + "step": 46100 + }, + { + "epoch": 0.7792602857794715, + "grad_norm": 0.10728771239519119, + "learning_rate": 7.655087562920423e-06, + "loss": 0.0022, + "step": 46110 + }, + { + "epoch": 0.779429286058322, + "grad_norm": 0.06259474903345108, + "learning_rate": 7.65383773492629e-06, + "loss": 0.0021, + "step": 46120 + }, + { + "epoch": 0.7795982863371724, + "grad_norm": 0.0671854317188263, + "learning_rate": 7.652587676035414e-06, + "loss": 0.0019, + "step": 46130 + }, + { + "epoch": 0.7797672866160229, + "grad_norm": 0.21874253451824188, + "learning_rate": 7.65133738635656e-06, + "loss": 0.0025, + "step": 46140 + }, + { + "epoch": 0.7799362868948734, + "grad_norm": 0.0705648735165596, + "learning_rate": 7.650086865998508e-06, + "loss": 0.0022, + "step": 46150 + }, + { + "epoch": 0.7801052871737238, + "grad_norm": 0.10782632231712341, + "learning_rate": 7.648836115070057e-06, + "loss": 0.0015, + "step": 46160 + }, + { + "epoch": 0.7802742874525743, + "grad_norm": 0.002489664824679494, + "learning_rate": 7.64758513368003e-06, + "loss": 0.0009, + "step": 46170 + }, + { + "epoch": 0.7804432877314248, + "grad_norm": 0.08178409188985825, + "learning_rate": 7.646333921937269e-06, + "loss": 0.0018, + "step": 46180 + }, + { + "epoch": 0.7806122880102753, + "grad_norm": 0.11821407824754715, + "learning_rate": 7.645082479950635e-06, + "loss": 0.0033, + "step": 46190 + }, + { + "epoch": 0.7807812882891256, + "grad_norm": 0.019284335896372795, + "learning_rate": 7.64383080782901e-06, + "loss": 0.0016, + "step": 46200 + }, + { + "epoch": 0.7809502885679761, + "grad_norm": 0.32529857754707336, + "learning_rate": 7.642578905681295e-06, + "loss": 0.0022, + "step": 46210 + }, + { + "epoch": 0.7811192888468266, + "grad_norm": 0.0022389700170606375, + "learning_rate": 7.641326773616411e-06, + "loss": 0.0017, + "step": 46220 + }, + { + "epoch": 0.781288289125677, + "grad_norm": 0.08486919850111008, + "learning_rate": 7.6400744117433e-06, + "loss": 0.0019, + "step": 46230 + }, + { + "epoch": 0.7814572894045275, + "grad_norm": 0.16006088256835938, + "learning_rate": 7.638821820170925e-06, + "loss": 0.0026, + "step": 46240 + }, + { + "epoch": 0.781626289683378, + "grad_norm": 0.060759540647268295, + "learning_rate": 7.637568999008265e-06, + "loss": 0.0017, + "step": 46250 + }, + { + "epoch": 0.7817952899622285, + "grad_norm": 0.07992040365934372, + "learning_rate": 7.636315948364323e-06, + "loss": 0.0009, + "step": 46260 + }, + { + "epoch": 0.7819642902410789, + "grad_norm": 0.03660142421722412, + "learning_rate": 7.635062668348122e-06, + "loss": 0.001, + "step": 46270 + }, + { + "epoch": 0.7821332905199293, + "grad_norm": 0.03165186569094658, + "learning_rate": 7.633809159068699e-06, + "loss": 0.0013, + "step": 46280 + }, + { + "epoch": 0.7823022907987798, + "grad_norm": 0.07846418023109436, + "learning_rate": 7.63255542063512e-06, + "loss": 0.0019, + "step": 46290 + }, + { + "epoch": 0.7824712910776302, + "grad_norm": 0.23081886768341064, + "learning_rate": 7.631301453156464e-06, + "loss": 0.0024, + "step": 46300 + }, + { + "epoch": 0.7826402913564807, + "grad_norm": 0.09403436630964279, + "learning_rate": 7.63004725674183e-06, + "loss": 0.0013, + "step": 46310 + }, + { + "epoch": 0.7828092916353312, + "grad_norm": 0.06292977929115295, + "learning_rate": 7.628792831500345e-06, + "loss": 0.0013, + "step": 46320 + }, + { + "epoch": 0.7829782919141817, + "grad_norm": 0.0667533427476883, + "learning_rate": 7.627538177541145e-06, + "loss": 0.0016, + "step": 46330 + }, + { + "epoch": 0.7831472921930321, + "grad_norm": 0.04467438533902168, + "learning_rate": 7.626283294973394e-06, + "loss": 0.0008, + "step": 46340 + }, + { + "epoch": 0.7833162924718826, + "grad_norm": 0.045477494597435, + "learning_rate": 7.625028183906272e-06, + "loss": 0.0017, + "step": 46350 + }, + { + "epoch": 0.7834852927507331, + "grad_norm": 0.08229431509971619, + "learning_rate": 7.62377284444898e-06, + "loss": 0.002, + "step": 46360 + }, + { + "epoch": 0.7836542930295834, + "grad_norm": 0.16802437603473663, + "learning_rate": 7.622517276710737e-06, + "loss": 0.0022, + "step": 46370 + }, + { + "epoch": 0.7838232933084339, + "grad_norm": 0.05041522905230522, + "learning_rate": 7.6212614808007835e-06, + "loss": 0.0025, + "step": 46380 + }, + { + "epoch": 0.7839922935872844, + "grad_norm": 0.03219315782189369, + "learning_rate": 7.620005456828383e-06, + "loss": 0.0038, + "step": 46390 + }, + { + "epoch": 0.7841612938661349, + "grad_norm": 0.05192848667502403, + "learning_rate": 7.618749204902812e-06, + "loss": 0.0015, + "step": 46400 + }, + { + "epoch": 0.7843302941449853, + "grad_norm": 0.04018478840589523, + "learning_rate": 7.617492725133372e-06, + "loss": 0.0015, + "step": 46410 + }, + { + "epoch": 0.7844992944238358, + "grad_norm": 0.016311295330524445, + "learning_rate": 7.616236017629383e-06, + "loss": 0.0022, + "step": 46420 + }, + { + "epoch": 0.7846682947026863, + "grad_norm": 0.04850300773978233, + "learning_rate": 7.614979082500185e-06, + "loss": 0.0021, + "step": 46430 + }, + { + "epoch": 0.7848372949815368, + "grad_norm": 0.013393869623541832, + "learning_rate": 7.613721919855137e-06, + "loss": 0.0023, + "step": 46440 + }, + { + "epoch": 0.7850062952603872, + "grad_norm": 0.10269208252429962, + "learning_rate": 7.612464529803618e-06, + "loss": 0.0013, + "step": 46450 + }, + { + "epoch": 0.7851752955392376, + "grad_norm": 0.3491341173648834, + "learning_rate": 7.611206912455027e-06, + "loss": 0.0014, + "step": 46460 + }, + { + "epoch": 0.7853442958180881, + "grad_norm": 0.03559425100684166, + "learning_rate": 7.609949067918785e-06, + "loss": 0.0015, + "step": 46470 + }, + { + "epoch": 0.7855132960969385, + "grad_norm": 0.07604513317346573, + "learning_rate": 7.608690996304327e-06, + "loss": 0.0024, + "step": 46480 + }, + { + "epoch": 0.785682296375789, + "grad_norm": 0.1284206509590149, + "learning_rate": 7.607432697721112e-06, + "loss": 0.003, + "step": 46490 + }, + { + "epoch": 0.7858512966546395, + "grad_norm": 0.06238522008061409, + "learning_rate": 7.60617417227862e-06, + "loss": 0.001, + "step": 46500 + }, + { + "epoch": 0.78602029693349, + "grad_norm": 0.045297060161828995, + "learning_rate": 7.604915420086348e-06, + "loss": 0.0012, + "step": 46510 + }, + { + "epoch": 0.7861892972123404, + "grad_norm": 0.020996225997805595, + "learning_rate": 7.603656441253811e-06, + "loss": 0.0012, + "step": 46520 + }, + { + "epoch": 0.7863582974911909, + "grad_norm": 0.01850290037691593, + "learning_rate": 7.602397235890551e-06, + "loss": 0.0014, + "step": 46530 + }, + { + "epoch": 0.7865272977700413, + "grad_norm": 0.06593858450651169, + "learning_rate": 7.6011378041061225e-06, + "loss": 0.0017, + "step": 46540 + }, + { + "epoch": 0.7866962980488917, + "grad_norm": 0.08187979459762573, + "learning_rate": 7.5998781460101026e-06, + "loss": 0.0026, + "step": 46550 + }, + { + "epoch": 0.7868652983277422, + "grad_norm": 0.12238030880689621, + "learning_rate": 7.598618261712087e-06, + "loss": 0.0015, + "step": 46560 + }, + { + "epoch": 0.7870342986065927, + "grad_norm": 0.05987643823027611, + "learning_rate": 7.59735815132169e-06, + "loss": 0.0028, + "step": 46570 + }, + { + "epoch": 0.7872032988854432, + "grad_norm": 0.09325841069221497, + "learning_rate": 7.596097814948551e-06, + "loss": 0.0019, + "step": 46580 + }, + { + "epoch": 0.7873722991642936, + "grad_norm": 0.05478665232658386, + "learning_rate": 7.594837252702324e-06, + "loss": 0.0014, + "step": 46590 + }, + { + "epoch": 0.7875412994431441, + "grad_norm": 0.07214100658893585, + "learning_rate": 7.593576464692684e-06, + "loss": 0.0015, + "step": 46600 + }, + { + "epoch": 0.7877102997219946, + "grad_norm": 0.15464851260185242, + "learning_rate": 7.592315451029324e-06, + "loss": 0.0026, + "step": 46610 + }, + { + "epoch": 0.787879300000845, + "grad_norm": 0.0955459251999855, + "learning_rate": 7.59105421182196e-06, + "loss": 0.0024, + "step": 46620 + }, + { + "epoch": 0.7880483002796954, + "grad_norm": 0.0869637057185173, + "learning_rate": 7.589792747180327e-06, + "loss": 0.0017, + "step": 46630 + }, + { + "epoch": 0.7882173005585459, + "grad_norm": 0.04810710251331329, + "learning_rate": 7.588531057214175e-06, + "loss": 0.0014, + "step": 46640 + }, + { + "epoch": 0.7883863008373964, + "grad_norm": 0.02471080981194973, + "learning_rate": 7.587269142033281e-06, + "loss": 0.0025, + "step": 46650 + }, + { + "epoch": 0.7885553011162468, + "grad_norm": 0.02822282165288925, + "learning_rate": 7.586007001747436e-06, + "loss": 0.0021, + "step": 46660 + }, + { + "epoch": 0.7887243013950973, + "grad_norm": 0.060560278594493866, + "learning_rate": 7.584744636466453e-06, + "loss": 0.0019, + "step": 46670 + }, + { + "epoch": 0.7888933016739478, + "grad_norm": 0.04017733037471771, + "learning_rate": 7.583482046300161e-06, + "loss": 0.0013, + "step": 46680 + }, + { + "epoch": 0.7890623019527983, + "grad_norm": 0.0022847475484013557, + "learning_rate": 7.582219231358415e-06, + "loss": 0.0014, + "step": 46690 + }, + { + "epoch": 0.7892313022316487, + "grad_norm": 0.006776855327188969, + "learning_rate": 7.580956191751084e-06, + "loss": 0.0014, + "step": 46700 + }, + { + "epoch": 0.7894003025104992, + "grad_norm": 0.06477966159582138, + "learning_rate": 7.57969292758806e-06, + "loss": 0.0013, + "step": 46710 + }, + { + "epoch": 0.7895693027893496, + "grad_norm": 0.14177727699279785, + "learning_rate": 7.5784294389792535e-06, + "loss": 0.001, + "step": 46720 + }, + { + "epoch": 0.7897383030682, + "grad_norm": 0.06468930095434189, + "learning_rate": 7.5771657260345924e-06, + "loss": 0.0015, + "step": 46730 + }, + { + "epoch": 0.7899073033470505, + "grad_norm": 0.08887003362178802, + "learning_rate": 7.575901788864025e-06, + "loss": 0.0019, + "step": 46740 + }, + { + "epoch": 0.790076303625901, + "grad_norm": 0.07502151280641556, + "learning_rate": 7.574637627577524e-06, + "loss": 0.0016, + "step": 46750 + }, + { + "epoch": 0.7902453039047515, + "grad_norm": 0.08747242391109467, + "learning_rate": 7.573373242285073e-06, + "loss": 0.0006, + "step": 46760 + }, + { + "epoch": 0.7904143041836019, + "grad_norm": 0.016262156888842583, + "learning_rate": 7.572108633096682e-06, + "loss": 0.0033, + "step": 46770 + }, + { + "epoch": 0.7905833044624524, + "grad_norm": 0.02630412019789219, + "learning_rate": 7.570843800122377e-06, + "loss": 0.0014, + "step": 46780 + }, + { + "epoch": 0.7907523047413029, + "grad_norm": 0.04608182981610298, + "learning_rate": 7.569578743472206e-06, + "loss": 0.0019, + "step": 46790 + }, + { + "epoch": 0.7909213050201532, + "grad_norm": 0.16504621505737305, + "learning_rate": 7.568313463256234e-06, + "loss": 0.0017, + "step": 46800 + }, + { + "epoch": 0.7910903052990037, + "grad_norm": 0.050347063690423965, + "learning_rate": 7.567047959584548e-06, + "loss": 0.0014, + "step": 46810 + }, + { + "epoch": 0.7912593055778542, + "grad_norm": 0.09296340495347977, + "learning_rate": 7.565782232567252e-06, + "loss": 0.0012, + "step": 46820 + }, + { + "epoch": 0.7914283058567047, + "grad_norm": 0.059468045830726624, + "learning_rate": 7.564516282314469e-06, + "loss": 0.0016, + "step": 46830 + }, + { + "epoch": 0.7915973061355551, + "grad_norm": 0.06091529503464699, + "learning_rate": 7.563250108936344e-06, + "loss": 0.0016, + "step": 46840 + }, + { + "epoch": 0.7917663064144056, + "grad_norm": 0.023602819070219994, + "learning_rate": 7.561983712543042e-06, + "loss": 0.0012, + "step": 46850 + }, + { + "epoch": 0.7919353066932561, + "grad_norm": 0.07649361342191696, + "learning_rate": 7.560717093244743e-06, + "loss": 0.0014, + "step": 46860 + }, + { + "epoch": 0.7921043069721065, + "grad_norm": 0.37941059470176697, + "learning_rate": 7.55945025115165e-06, + "loss": 0.0029, + "step": 46870 + }, + { + "epoch": 0.792273307250957, + "grad_norm": 0.046618007123470306, + "learning_rate": 7.558183186373984e-06, + "loss": 0.0019, + "step": 46880 + }, + { + "epoch": 0.7924423075298074, + "grad_norm": 0.17688718438148499, + "learning_rate": 7.556915899021986e-06, + "loss": 0.0049, + "step": 46890 + }, + { + "epoch": 0.7926113078086579, + "grad_norm": 0.02750130370259285, + "learning_rate": 7.5556483892059165e-06, + "loss": 0.0022, + "step": 46900 + }, + { + "epoch": 0.7927803080875083, + "grad_norm": 0.14209090173244476, + "learning_rate": 7.5543806570360545e-06, + "loss": 0.0025, + "step": 46910 + }, + { + "epoch": 0.7929493083663588, + "grad_norm": 0.06824956834316254, + "learning_rate": 7.5531127026227e-06, + "loss": 0.0023, + "step": 46920 + }, + { + "epoch": 0.7931183086452093, + "grad_norm": 0.14599743485450745, + "learning_rate": 7.551844526076169e-06, + "loss": 0.0023, + "step": 46930 + }, + { + "epoch": 0.7932873089240597, + "grad_norm": 0.06734983623027802, + "learning_rate": 7.5505761275068015e-06, + "loss": 0.0014, + "step": 46940 + }, + { + "epoch": 0.7934563092029102, + "grad_norm": 0.028400149196386337, + "learning_rate": 7.549307507024952e-06, + "loss": 0.0029, + "step": 46950 + }, + { + "epoch": 0.7936253094817607, + "grad_norm": 0.15007208287715912, + "learning_rate": 7.548038664740999e-06, + "loss": 0.0053, + "step": 46960 + }, + { + "epoch": 0.793794309760611, + "grad_norm": 0.04033247381448746, + "learning_rate": 7.546769600765336e-06, + "loss": 0.0016, + "step": 46970 + }, + { + "epoch": 0.7939633100394615, + "grad_norm": 0.030600406229496002, + "learning_rate": 7.545500315208377e-06, + "loss": 0.0015, + "step": 46980 + }, + { + "epoch": 0.794132310318312, + "grad_norm": 0.11937287449836731, + "learning_rate": 7.544230808180559e-06, + "loss": 0.002, + "step": 46990 + }, + { + "epoch": 0.7943013105971625, + "grad_norm": 0.014048217795789242, + "learning_rate": 7.542961079792333e-06, + "loss": 0.002, + "step": 47000 + }, + { + "epoch": 0.7944703108760129, + "grad_norm": 0.05937180668115616, + "learning_rate": 7.541691130154172e-06, + "loss": 0.0016, + "step": 47010 + }, + { + "epoch": 0.7946393111548634, + "grad_norm": 0.04418695718050003, + "learning_rate": 7.540420959376569e-06, + "loss": 0.0006, + "step": 47020 + }, + { + "epoch": 0.7948083114337139, + "grad_norm": 0.021853217855095863, + "learning_rate": 7.5391505675700325e-06, + "loss": 0.0025, + "step": 47030 + }, + { + "epoch": 0.7949773117125644, + "grad_norm": 0.08879373222589493, + "learning_rate": 7.537879954845095e-06, + "loss": 0.0018, + "step": 47040 + }, + { + "epoch": 0.7951463119914148, + "grad_norm": 0.0371592752635479, + "learning_rate": 7.536609121312305e-06, + "loss": 0.001, + "step": 47050 + }, + { + "epoch": 0.7953153122702652, + "grad_norm": 0.02704356424510479, + "learning_rate": 7.5353380670822314e-06, + "loss": 0.0009, + "step": 47060 + }, + { + "epoch": 0.7954843125491157, + "grad_norm": 0.16344471275806427, + "learning_rate": 7.534066792265461e-06, + "loss": 0.0016, + "step": 47070 + }, + { + "epoch": 0.7956533128279661, + "grad_norm": 0.0721210241317749, + "learning_rate": 7.532795296972602e-06, + "loss": 0.0023, + "step": 47080 + }, + { + "epoch": 0.7958223131068166, + "grad_norm": 0.12913252413272858, + "learning_rate": 7.53152358131428e-06, + "loss": 0.0012, + "step": 47090 + }, + { + "epoch": 0.7959913133856671, + "grad_norm": 0.036034420132637024, + "learning_rate": 7.530251645401143e-06, + "loss": 0.0021, + "step": 47100 + }, + { + "epoch": 0.7961603136645176, + "grad_norm": 0.22235152125358582, + "learning_rate": 7.528979489343853e-06, + "loss": 0.0028, + "step": 47110 + }, + { + "epoch": 0.796329313943368, + "grad_norm": 0.06673062592744827, + "learning_rate": 7.527707113253093e-06, + "loss": 0.0019, + "step": 47120 + }, + { + "epoch": 0.7964983142222185, + "grad_norm": 0.13184094429016113, + "learning_rate": 7.526434517239568e-06, + "loss": 0.0031, + "step": 47130 + }, + { + "epoch": 0.796667314501069, + "grad_norm": 0.05706809088587761, + "learning_rate": 7.525161701413999e-06, + "loss": 0.0017, + "step": 47140 + }, + { + "epoch": 0.7968363147799193, + "grad_norm": 0.16983768343925476, + "learning_rate": 7.523888665887127e-06, + "loss": 0.0017, + "step": 47150 + }, + { + "epoch": 0.7970053150587698, + "grad_norm": 0.03151121735572815, + "learning_rate": 7.522615410769714e-06, + "loss": 0.0015, + "step": 47160 + }, + { + "epoch": 0.7971743153376203, + "grad_norm": 0.11783250421285629, + "learning_rate": 7.521341936172536e-06, + "loss": 0.0034, + "step": 47170 + }, + { + "epoch": 0.7973433156164708, + "grad_norm": 0.016078723594546318, + "learning_rate": 7.520068242206393e-06, + "loss": 0.0012, + "step": 47180 + }, + { + "epoch": 0.7975123158953212, + "grad_norm": 0.0879022553563118, + "learning_rate": 7.518794328982104e-06, + "loss": 0.0017, + "step": 47190 + }, + { + "epoch": 0.7976813161741717, + "grad_norm": 0.05336923897266388, + "learning_rate": 7.5175201966105045e-06, + "loss": 0.0009, + "step": 47200 + }, + { + "epoch": 0.7978503164530222, + "grad_norm": 0.06102690473198891, + "learning_rate": 7.516245845202451e-06, + "loss": 0.0013, + "step": 47210 + }, + { + "epoch": 0.7980193167318727, + "grad_norm": 0.0765201672911644, + "learning_rate": 7.514971274868817e-06, + "loss": 0.0016, + "step": 47220 + }, + { + "epoch": 0.798188317010723, + "grad_norm": 0.023189343512058258, + "learning_rate": 7.513696485720496e-06, + "loss": 0.0015, + "step": 47230 + }, + { + "epoch": 0.7983573172895735, + "grad_norm": 0.081252820789814, + "learning_rate": 7.512421477868402e-06, + "loss": 0.003, + "step": 47240 + }, + { + "epoch": 0.798526317568424, + "grad_norm": 0.027922332286834717, + "learning_rate": 7.511146251423467e-06, + "loss": 0.0015, + "step": 47250 + }, + { + "epoch": 0.7986953178472744, + "grad_norm": 0.05592355132102966, + "learning_rate": 7.509870806496642e-06, + "loss": 0.0024, + "step": 47260 + }, + { + "epoch": 0.7988643181261249, + "grad_norm": 0.03578708693385124, + "learning_rate": 7.508595143198894e-06, + "loss": 0.0012, + "step": 47270 + }, + { + "epoch": 0.7990333184049754, + "grad_norm": 0.04387123882770538, + "learning_rate": 7.507319261641215e-06, + "loss": 0.0038, + "step": 47280 + }, + { + "epoch": 0.7992023186838259, + "grad_norm": 0.0392196886241436, + "learning_rate": 7.506043161934613e-06, + "loss": 0.001, + "step": 47290 + }, + { + "epoch": 0.7993713189626763, + "grad_norm": 0.09163472056388855, + "learning_rate": 7.5047668441901124e-06, + "loss": 0.001, + "step": 47300 + }, + { + "epoch": 0.7995403192415268, + "grad_norm": 0.059991274029016495, + "learning_rate": 7.503490308518761e-06, + "loss": 0.0011, + "step": 47310 + }, + { + "epoch": 0.7997093195203772, + "grad_norm": 0.045976802706718445, + "learning_rate": 7.502213555031623e-06, + "loss": 0.0046, + "step": 47320 + }, + { + "epoch": 0.7998783197992276, + "grad_norm": 0.03616219386458397, + "learning_rate": 7.500936583839782e-06, + "loss": 0.0027, + "step": 47330 + }, + { + "epoch": 0.8000473200780781, + "grad_norm": 0.03759624436497688, + "learning_rate": 7.499659395054342e-06, + "loss": 0.0025, + "step": 47340 + }, + { + "epoch": 0.8002163203569286, + "grad_norm": 0.07205487787723541, + "learning_rate": 7.498381988786423e-06, + "loss": 0.0014, + "step": 47350 + }, + { + "epoch": 0.800385320635779, + "grad_norm": 0.02757079340517521, + "learning_rate": 7.4971043651471654e-06, + "loss": 0.0017, + "step": 47360 + }, + { + "epoch": 0.8005543209146295, + "grad_norm": 0.06361888349056244, + "learning_rate": 7.49582652424773e-06, + "loss": 0.0022, + "step": 47370 + }, + { + "epoch": 0.80072332119348, + "grad_norm": 0.054886069148778915, + "learning_rate": 7.494548466199294e-06, + "loss": 0.0013, + "step": 47380 + }, + { + "epoch": 0.8008923214723305, + "grad_norm": 0.08569411933422089, + "learning_rate": 7.493270191113054e-06, + "loss": 0.0017, + "step": 47390 + }, + { + "epoch": 0.8010613217511809, + "grad_norm": 0.02172980271279812, + "learning_rate": 7.4919916991002295e-06, + "loss": 0.0011, + "step": 47400 + }, + { + "epoch": 0.8012303220300313, + "grad_norm": 0.048847027122974396, + "learning_rate": 7.490712990272052e-06, + "loss": 0.0014, + "step": 47410 + }, + { + "epoch": 0.8013993223088818, + "grad_norm": 0.21808849275112152, + "learning_rate": 7.489434064739777e-06, + "loss": 0.0034, + "step": 47420 + }, + { + "epoch": 0.8015683225877323, + "grad_norm": 0.025194497779011726, + "learning_rate": 7.488154922614677e-06, + "loss": 0.0017, + "step": 47430 + }, + { + "epoch": 0.8017373228665827, + "grad_norm": 0.008024209178984165, + "learning_rate": 7.486875564008043e-06, + "loss": 0.0015, + "step": 47440 + }, + { + "epoch": 0.8019063231454332, + "grad_norm": 0.009351897984743118, + "learning_rate": 7.485595989031186e-06, + "loss": 0.0009, + "step": 47450 + }, + { + "epoch": 0.8020753234242837, + "grad_norm": 0.06636117398738861, + "learning_rate": 7.484316197795434e-06, + "loss": 0.0008, + "step": 47460 + }, + { + "epoch": 0.8022443237031341, + "grad_norm": 0.029118984937667847, + "learning_rate": 7.483036190412136e-06, + "loss": 0.0014, + "step": 47470 + }, + { + "epoch": 0.8024133239819846, + "grad_norm": 0.14868061244487762, + "learning_rate": 7.48175596699266e-06, + "loss": 0.0023, + "step": 47480 + }, + { + "epoch": 0.802582324260835, + "grad_norm": 0.011304926127195358, + "learning_rate": 7.48047552764839e-06, + "loss": 0.0026, + "step": 47490 + }, + { + "epoch": 0.8027513245396855, + "grad_norm": 0.10953537374734879, + "learning_rate": 7.479194872490731e-06, + "loss": 0.0039, + "step": 47500 + }, + { + "epoch": 0.8029203248185359, + "grad_norm": 0.0011343724327161908, + "learning_rate": 7.477914001631106e-06, + "loss": 0.0026, + "step": 47510 + }, + { + "epoch": 0.8030893250973864, + "grad_norm": 0.05069960653781891, + "learning_rate": 7.4766329151809565e-06, + "loss": 0.0011, + "step": 47520 + }, + { + "epoch": 0.8032583253762369, + "grad_norm": 0.14260950684547424, + "learning_rate": 7.475351613251744e-06, + "loss": 0.002, + "step": 47530 + }, + { + "epoch": 0.8034273256550873, + "grad_norm": 0.014285523444414139, + "learning_rate": 7.474070095954948e-06, + "loss": 0.0022, + "step": 47540 + }, + { + "epoch": 0.8035963259339378, + "grad_norm": 0.04909459874033928, + "learning_rate": 7.472788363402068e-06, + "loss": 0.0015, + "step": 47550 + }, + { + "epoch": 0.8037653262127883, + "grad_norm": 0.12003076076507568, + "learning_rate": 7.471506415704617e-06, + "loss": 0.0026, + "step": 47560 + }, + { + "epoch": 0.8039343264916388, + "grad_norm": 0.07390342652797699, + "learning_rate": 7.470224252974134e-06, + "loss": 0.0014, + "step": 47570 + }, + { + "epoch": 0.8041033267704891, + "grad_norm": 0.08915874361991882, + "learning_rate": 7.468941875322173e-06, + "loss": 0.0019, + "step": 47580 + }, + { + "epoch": 0.8042723270493396, + "grad_norm": 0.05357299745082855, + "learning_rate": 7.467659282860306e-06, + "loss": 0.0016, + "step": 47590 + }, + { + "epoch": 0.8044413273281901, + "grad_norm": 0.03945795074105263, + "learning_rate": 7.466376475700126e-06, + "loss": 0.0018, + "step": 47600 + }, + { + "epoch": 0.8046103276070405, + "grad_norm": 0.04925532266497612, + "learning_rate": 7.465093453953241e-06, + "loss": 0.0013, + "step": 47610 + }, + { + "epoch": 0.804779327885891, + "grad_norm": 0.04004979878664017, + "learning_rate": 7.463810217731283e-06, + "loss": 0.0017, + "step": 47620 + }, + { + "epoch": 0.8049483281647415, + "grad_norm": 0.06461845338344574, + "learning_rate": 7.462526767145899e-06, + "loss": 0.0019, + "step": 47630 + }, + { + "epoch": 0.805117328443592, + "grad_norm": 0.061418574303388596, + "learning_rate": 7.461243102308755e-06, + "loss": 0.001, + "step": 47640 + }, + { + "epoch": 0.8052863287224424, + "grad_norm": 0.04609359800815582, + "learning_rate": 7.459959223331537e-06, + "loss": 0.0008, + "step": 47650 + }, + { + "epoch": 0.8054553290012928, + "grad_norm": 0.0352080874145031, + "learning_rate": 7.4586751303259455e-06, + "loss": 0.0018, + "step": 47660 + }, + { + "epoch": 0.8056243292801433, + "grad_norm": 0.07530752569437027, + "learning_rate": 7.457390823403706e-06, + "loss": 0.0011, + "step": 47670 + }, + { + "epoch": 0.8057933295589937, + "grad_norm": 0.010745921172201633, + "learning_rate": 7.456106302676559e-06, + "loss": 0.0025, + "step": 47680 + }, + { + "epoch": 0.8059623298378442, + "grad_norm": 0.052886590361595154, + "learning_rate": 7.454821568256263e-06, + "loss": 0.0024, + "step": 47690 + }, + { + "epoch": 0.8061313301166947, + "grad_norm": 0.0723862424492836, + "learning_rate": 7.453536620254598e-06, + "loss": 0.0017, + "step": 47700 + }, + { + "epoch": 0.8063003303955452, + "grad_norm": 0.006251264829188585, + "learning_rate": 7.452251458783359e-06, + "loss": 0.0017, + "step": 47710 + }, + { + "epoch": 0.8064693306743956, + "grad_norm": 0.08329357951879501, + "learning_rate": 7.450966083954361e-06, + "loss": 0.0014, + "step": 47720 + }, + { + "epoch": 0.8066383309532461, + "grad_norm": 0.15640972554683685, + "learning_rate": 7.449680495879439e-06, + "loss": 0.0018, + "step": 47730 + }, + { + "epoch": 0.8068073312320966, + "grad_norm": 0.11291138082742691, + "learning_rate": 7.4483946946704445e-06, + "loss": 0.0015, + "step": 47740 + }, + { + "epoch": 0.8069763315109469, + "grad_norm": 0.0313311368227005, + "learning_rate": 7.447108680439248e-06, + "loss": 0.0025, + "step": 47750 + }, + { + "epoch": 0.8071453317897974, + "grad_norm": 0.06498327106237411, + "learning_rate": 7.44582245329774e-06, + "loss": 0.0014, + "step": 47760 + }, + { + "epoch": 0.8073143320686479, + "grad_norm": 0.012859559617936611, + "learning_rate": 7.44453601335783e-06, + "loss": 0.0025, + "step": 47770 + }, + { + "epoch": 0.8074833323474984, + "grad_norm": 0.07249462604522705, + "learning_rate": 7.4432493607314405e-06, + "loss": 0.0014, + "step": 47780 + }, + { + "epoch": 0.8076523326263488, + "grad_norm": 0.09710532426834106, + "learning_rate": 7.4419624955305205e-06, + "loss": 0.0016, + "step": 47790 + }, + { + "epoch": 0.8078213329051993, + "grad_norm": 0.03200296685099602, + "learning_rate": 7.440675417867031e-06, + "loss": 0.001, + "step": 47800 + }, + { + "epoch": 0.8079903331840498, + "grad_norm": 0.15108339488506317, + "learning_rate": 7.4393881278529555e-06, + "loss": 0.0027, + "step": 47810 + }, + { + "epoch": 0.8081593334629003, + "grad_norm": 0.2669786214828491, + "learning_rate": 7.438100625600293e-06, + "loss": 0.0021, + "step": 47820 + }, + { + "epoch": 0.8083283337417507, + "grad_norm": 0.1186618059873581, + "learning_rate": 7.436812911221064e-06, + "loss": 0.0022, + "step": 47830 + }, + { + "epoch": 0.8084973340206011, + "grad_norm": 0.08381953835487366, + "learning_rate": 7.435524984827304e-06, + "loss": 0.0015, + "step": 47840 + }, + { + "epoch": 0.8086663342994516, + "grad_norm": 0.18541555106639862, + "learning_rate": 7.434236846531071e-06, + "loss": 0.0018, + "step": 47850 + }, + { + "epoch": 0.808835334578302, + "grad_norm": 0.09643784165382385, + "learning_rate": 7.432948496444437e-06, + "loss": 0.0019, + "step": 47860 + }, + { + "epoch": 0.8090043348571525, + "grad_norm": 0.03466213867068291, + "learning_rate": 7.431659934679496e-06, + "loss": 0.0021, + "step": 47870 + }, + { + "epoch": 0.809173335136003, + "grad_norm": 0.059613388031721115, + "learning_rate": 7.43037116134836e-06, + "loss": 0.0025, + "step": 47880 + }, + { + "epoch": 0.8093423354148535, + "grad_norm": 0.006972366478294134, + "learning_rate": 7.429082176563157e-06, + "loss": 0.0012, + "step": 47890 + }, + { + "epoch": 0.8095113356937039, + "grad_norm": 0.02702466771006584, + "learning_rate": 7.427792980436036e-06, + "loss": 0.0024, + "step": 47900 + }, + { + "epoch": 0.8096803359725544, + "grad_norm": 0.11114290356636047, + "learning_rate": 7.426503573079162e-06, + "loss": 0.0014, + "step": 47910 + }, + { + "epoch": 0.8098493362514048, + "grad_norm": 0.05626865476369858, + "learning_rate": 7.425213954604722e-06, + "loss": 0.0016, + "step": 47920 + }, + { + "epoch": 0.8100183365302552, + "grad_norm": 0.09901049733161926, + "learning_rate": 7.4239241251249165e-06, + "loss": 0.0019, + "step": 47930 + }, + { + "epoch": 0.8101873368091057, + "grad_norm": 0.04712087661027908, + "learning_rate": 7.422634084751967e-06, + "loss": 0.0021, + "step": 47940 + }, + { + "epoch": 0.8103563370879562, + "grad_norm": 0.0625760555267334, + "learning_rate": 7.421343833598115e-06, + "loss": 0.0026, + "step": 47950 + }, + { + "epoch": 0.8105253373668067, + "grad_norm": 0.02616805210709572, + "learning_rate": 7.420053371775618e-06, + "loss": 0.002, + "step": 47960 + }, + { + "epoch": 0.8106943376456571, + "grad_norm": 0.007708234712481499, + "learning_rate": 7.418762699396752e-06, + "loss": 0.0017, + "step": 47970 + }, + { + "epoch": 0.8108633379245076, + "grad_norm": 0.07194136083126068, + "learning_rate": 7.417471816573812e-06, + "loss": 0.0014, + "step": 47980 + }, + { + "epoch": 0.8110323382033581, + "grad_norm": 0.03533967584371567, + "learning_rate": 7.416180723419112e-06, + "loss": 0.0008, + "step": 47990 + }, + { + "epoch": 0.8112013384822085, + "grad_norm": 0.06678535044193268, + "learning_rate": 7.414889420044982e-06, + "loss": 0.0013, + "step": 48000 + }, + { + "epoch": 0.8113703387610589, + "grad_norm": 0.05515426769852638, + "learning_rate": 7.413597906563771e-06, + "loss": 0.0015, + "step": 48010 + }, + { + "epoch": 0.8115393390399094, + "grad_norm": 0.025319591164588928, + "learning_rate": 7.412306183087849e-06, + "loss": 0.0014, + "step": 48020 + }, + { + "epoch": 0.8117083393187599, + "grad_norm": 0.06620465219020844, + "learning_rate": 7.4110142497296e-06, + "loss": 0.0023, + "step": 48030 + }, + { + "epoch": 0.8118773395976103, + "grad_norm": 0.0314614363014698, + "learning_rate": 7.4097221066014306e-06, + "loss": 0.0014, + "step": 48040 + }, + { + "epoch": 0.8120463398764608, + "grad_norm": 0.10447190701961517, + "learning_rate": 7.408429753815762e-06, + "loss": 0.0021, + "step": 48050 + }, + { + "epoch": 0.8122153401553113, + "grad_norm": 0.12224993854761124, + "learning_rate": 7.407137191485036e-06, + "loss": 0.0026, + "step": 48060 + }, + { + "epoch": 0.8123843404341617, + "grad_norm": 0.05101926624774933, + "learning_rate": 7.405844419721712e-06, + "loss": 0.0014, + "step": 48070 + }, + { + "epoch": 0.8125533407130122, + "grad_norm": 0.1120670735836029, + "learning_rate": 7.404551438638265e-06, + "loss": 0.0013, + "step": 48080 + }, + { + "epoch": 0.8127223409918627, + "grad_norm": 0.03729407116770744, + "learning_rate": 7.403258248347195e-06, + "loss": 0.001, + "step": 48090 + }, + { + "epoch": 0.812891341270713, + "grad_norm": 0.05864240974187851, + "learning_rate": 7.401964848961012e-06, + "loss": 0.0022, + "step": 48100 + }, + { + "epoch": 0.8130603415495635, + "grad_norm": 0.008091941475868225, + "learning_rate": 7.40067124059225e-06, + "loss": 0.0017, + "step": 48110 + }, + { + "epoch": 0.813229341828414, + "grad_norm": 0.10884171724319458, + "learning_rate": 7.399377423353457e-06, + "loss": 0.0016, + "step": 48120 + }, + { + "epoch": 0.8133983421072645, + "grad_norm": 0.03182748705148697, + "learning_rate": 7.398083397357205e-06, + "loss": 0.0024, + "step": 48130 + }, + { + "epoch": 0.8135673423861149, + "grad_norm": 0.10057803243398666, + "learning_rate": 7.396789162716076e-06, + "loss": 0.0018, + "step": 48140 + }, + { + "epoch": 0.8137363426649654, + "grad_norm": 0.01269405148923397, + "learning_rate": 7.395494719542679e-06, + "loss": 0.0029, + "step": 48150 + }, + { + "epoch": 0.8139053429438159, + "grad_norm": 0.07138130068778992, + "learning_rate": 7.394200067949635e-06, + "loss": 0.0021, + "step": 48160 + }, + { + "epoch": 0.8140743432226664, + "grad_norm": 0.05810423940420151, + "learning_rate": 7.392905208049585e-06, + "loss": 0.0056, + "step": 48170 + }, + { + "epoch": 0.8142433435015167, + "grad_norm": 0.05451294779777527, + "learning_rate": 7.391610139955187e-06, + "loss": 0.0024, + "step": 48180 + }, + { + "epoch": 0.8144123437803672, + "grad_norm": 0.050688549876213074, + "learning_rate": 7.39031486377912e-06, + "loss": 0.0017, + "step": 48190 + }, + { + "epoch": 0.8145813440592177, + "grad_norm": 0.274347186088562, + "learning_rate": 7.389019379634078e-06, + "loss": 0.0017, + "step": 48200 + }, + { + "epoch": 0.8147503443380681, + "grad_norm": 0.11318135261535645, + "learning_rate": 7.387723687632775e-06, + "loss": 0.0025, + "step": 48210 + }, + { + "epoch": 0.8149193446169186, + "grad_norm": 0.219595804810524, + "learning_rate": 7.386427787887943e-06, + "loss": 0.0018, + "step": 48220 + }, + { + "epoch": 0.8150883448957691, + "grad_norm": 0.04114341735839844, + "learning_rate": 7.38513168051233e-06, + "loss": 0.002, + "step": 48230 + }, + { + "epoch": 0.8152573451746196, + "grad_norm": 0.058271802961826324, + "learning_rate": 7.383835365618706e-06, + "loss": 0.0017, + "step": 48240 + }, + { + "epoch": 0.81542634545347, + "grad_norm": 0.16201192140579224, + "learning_rate": 7.382538843319853e-06, + "loss": 0.0026, + "step": 48250 + }, + { + "epoch": 0.8155953457323205, + "grad_norm": 0.06540218740701675, + "learning_rate": 7.381242113728579e-06, + "loss": 0.0025, + "step": 48260 + }, + { + "epoch": 0.8157643460111709, + "grad_norm": 0.003137104446068406, + "learning_rate": 7.3799451769577036e-06, + "loss": 0.0015, + "step": 48270 + }, + { + "epoch": 0.8159333462900213, + "grad_norm": 0.1325099766254425, + "learning_rate": 7.378648033120066e-06, + "loss": 0.0033, + "step": 48280 + }, + { + "epoch": 0.8161023465688718, + "grad_norm": 0.04073556885123253, + "learning_rate": 7.377350682328525e-06, + "loss": 0.0028, + "step": 48290 + }, + { + "epoch": 0.8162713468477223, + "grad_norm": 0.0781053751707077, + "learning_rate": 7.3760531246959555e-06, + "loss": 0.0024, + "step": 48300 + }, + { + "epoch": 0.8164403471265728, + "grad_norm": 0.04935900494456291, + "learning_rate": 7.374755360335253e-06, + "loss": 0.0028, + "step": 48310 + }, + { + "epoch": 0.8166093474054232, + "grad_norm": 0.044646281749010086, + "learning_rate": 7.373457389359327e-06, + "loss": 0.002, + "step": 48320 + }, + { + "epoch": 0.8167783476842737, + "grad_norm": 0.008936772122979164, + "learning_rate": 7.372159211881109e-06, + "loss": 0.0018, + "step": 48330 + }, + { + "epoch": 0.8169473479631242, + "grad_norm": 0.1457815021276474, + "learning_rate": 7.370860828013546e-06, + "loss": 0.0024, + "step": 48340 + }, + { + "epoch": 0.8171163482419745, + "grad_norm": 0.09478127211332321, + "learning_rate": 7.3695622378696045e-06, + "loss": 0.0027, + "step": 48350 + }, + { + "epoch": 0.817285348520825, + "grad_norm": 0.11928408592939377, + "learning_rate": 7.368263441562266e-06, + "loss": 0.0021, + "step": 48360 + }, + { + "epoch": 0.8174543487996755, + "grad_norm": 0.20206162333488464, + "learning_rate": 7.366964439204535e-06, + "loss": 0.0015, + "step": 48370 + }, + { + "epoch": 0.817623349078526, + "grad_norm": 0.014312900602817535, + "learning_rate": 7.365665230909429e-06, + "loss": 0.0012, + "step": 48380 + }, + { + "epoch": 0.8177923493573764, + "grad_norm": 0.023647421970963478, + "learning_rate": 7.364365816789987e-06, + "loss": 0.0018, + "step": 48390 + }, + { + "epoch": 0.8179613496362269, + "grad_norm": 0.08986024558544159, + "learning_rate": 7.363066196959262e-06, + "loss": 0.0024, + "step": 48400 + }, + { + "epoch": 0.8181303499150774, + "grad_norm": 0.11756408214569092, + "learning_rate": 7.361766371530329e-06, + "loss": 0.0015, + "step": 48410 + }, + { + "epoch": 0.8182993501939279, + "grad_norm": 0.07477547973394394, + "learning_rate": 7.360466340616279e-06, + "loss": 0.0024, + "step": 48420 + }, + { + "epoch": 0.8184683504727783, + "grad_norm": 0.06278149038553238, + "learning_rate": 7.35916610433022e-06, + "loss": 0.0029, + "step": 48430 + }, + { + "epoch": 0.8186373507516287, + "grad_norm": 0.07817571610212326, + "learning_rate": 7.35786566278528e-06, + "loss": 0.0016, + "step": 48440 + }, + { + "epoch": 0.8188063510304792, + "grad_norm": 0.10295785218477249, + "learning_rate": 7.3565650160946036e-06, + "loss": 0.0012, + "step": 48450 + }, + { + "epoch": 0.8189753513093296, + "grad_norm": 0.07114120572805405, + "learning_rate": 7.355264164371352e-06, + "loss": 0.0023, + "step": 48460 + }, + { + "epoch": 0.8191443515881801, + "grad_norm": 0.039024025201797485, + "learning_rate": 7.353963107728708e-06, + "loss": 0.0009, + "step": 48470 + }, + { + "epoch": 0.8193133518670306, + "grad_norm": 0.10922796279191971, + "learning_rate": 7.352661846279867e-06, + "loss": 0.0026, + "step": 48480 + }, + { + "epoch": 0.819482352145881, + "grad_norm": 0.003510313807055354, + "learning_rate": 7.351360380138046e-06, + "loss": 0.0014, + "step": 48490 + }, + { + "epoch": 0.8196513524247315, + "grad_norm": 0.06770562380552292, + "learning_rate": 7.350058709416481e-06, + "loss": 0.002, + "step": 48500 + }, + { + "epoch": 0.819820352703582, + "grad_norm": 0.14002270996570587, + "learning_rate": 7.348756834228421e-06, + "loss": 0.0013, + "step": 48510 + }, + { + "epoch": 0.8199893529824325, + "grad_norm": 0.30503541231155396, + "learning_rate": 7.347454754687136e-06, + "loss": 0.0035, + "step": 48520 + }, + { + "epoch": 0.8201583532612828, + "grad_norm": 0.043744396418333054, + "learning_rate": 7.346152470905915e-06, + "loss": 0.001, + "step": 48530 + }, + { + "epoch": 0.8203273535401333, + "grad_norm": 0.04082144796848297, + "learning_rate": 7.344849982998061e-06, + "loss": 0.0012, + "step": 48540 + }, + { + "epoch": 0.8204963538189838, + "grad_norm": 0.036161936819553375, + "learning_rate": 7.343547291076898e-06, + "loss": 0.0021, + "step": 48550 + }, + { + "epoch": 0.8206653540978343, + "grad_norm": 0.008398571982979774, + "learning_rate": 7.342244395255765e-06, + "loss": 0.0023, + "step": 48560 + }, + { + "epoch": 0.8208343543766847, + "grad_norm": 0.0453002005815506, + "learning_rate": 7.3409412956480216e-06, + "loss": 0.0016, + "step": 48570 + }, + { + "epoch": 0.8210033546555352, + "grad_norm": 0.3084333837032318, + "learning_rate": 7.339637992367044e-06, + "loss": 0.0027, + "step": 48580 + }, + { + "epoch": 0.8211723549343857, + "grad_norm": 0.14241471886634827, + "learning_rate": 7.338334485526224e-06, + "loss": 0.0024, + "step": 48590 + }, + { + "epoch": 0.8213413552132361, + "grad_norm": 0.07546312361955643, + "learning_rate": 7.337030775238976e-06, + "loss": 0.0017, + "step": 48600 + }, + { + "epoch": 0.8215103554920865, + "grad_norm": 0.04120171442627907, + "learning_rate": 7.3357268616187235e-06, + "loss": 0.0023, + "step": 48610 + }, + { + "epoch": 0.821679355770937, + "grad_norm": 0.05609271302819252, + "learning_rate": 7.3344227447789204e-06, + "loss": 0.0036, + "step": 48620 + }, + { + "epoch": 0.8218483560497875, + "grad_norm": 0.18657422065734863, + "learning_rate": 7.333118424833028e-06, + "loss": 0.0015, + "step": 48630 + }, + { + "epoch": 0.8220173563286379, + "grad_norm": 0.018935708329081535, + "learning_rate": 7.331813901894526e-06, + "loss": 0.0015, + "step": 48640 + }, + { + "epoch": 0.8221863566074884, + "grad_norm": 0.016682909801602364, + "learning_rate": 7.33050917607692e-06, + "loss": 0.0015, + "step": 48650 + }, + { + "epoch": 0.8223553568863389, + "grad_norm": 0.07267876714468002, + "learning_rate": 7.329204247493722e-06, + "loss": 0.0016, + "step": 48660 + }, + { + "epoch": 0.8225243571651893, + "grad_norm": 0.10636164247989655, + "learning_rate": 7.32789911625847e-06, + "loss": 0.0012, + "step": 48670 + }, + { + "epoch": 0.8226933574440398, + "grad_norm": 0.08027669787406921, + "learning_rate": 7.326593782484716e-06, + "loss": 0.0013, + "step": 48680 + }, + { + "epoch": 0.8228623577228903, + "grad_norm": 0.06711503863334656, + "learning_rate": 7.325288246286031e-06, + "loss": 0.0015, + "step": 48690 + }, + { + "epoch": 0.8230313580017407, + "grad_norm": 0.062487997114658356, + "learning_rate": 7.3239825077760015e-06, + "loss": 0.0018, + "step": 48700 + }, + { + "epoch": 0.8232003582805911, + "grad_norm": 0.05867687985301018, + "learning_rate": 7.322676567068234e-06, + "loss": 0.0019, + "step": 48710 + }, + { + "epoch": 0.8233693585594416, + "grad_norm": 0.13448190689086914, + "learning_rate": 7.321370424276351e-06, + "loss": 0.0022, + "step": 48720 + }, + { + "epoch": 0.8235383588382921, + "grad_norm": 0.02572086825966835, + "learning_rate": 7.320064079513993e-06, + "loss": 0.0022, + "step": 48730 + }, + { + "epoch": 0.8237073591171425, + "grad_norm": 0.11511944234371185, + "learning_rate": 7.31875753289482e-06, + "loss": 0.003, + "step": 48740 + }, + { + "epoch": 0.823876359395993, + "grad_norm": 0.05535471439361572, + "learning_rate": 7.3174507845325085e-06, + "loss": 0.0008, + "step": 48750 + }, + { + "epoch": 0.8240453596748435, + "grad_norm": 0.19832754135131836, + "learning_rate": 7.316143834540749e-06, + "loss": 0.002, + "step": 48760 + }, + { + "epoch": 0.824214359953694, + "grad_norm": 0.09600799530744553, + "learning_rate": 7.314836683033254e-06, + "loss": 0.0025, + "step": 48770 + }, + { + "epoch": 0.8243833602325444, + "grad_norm": 0.026862656697630882, + "learning_rate": 7.313529330123752e-06, + "loss": 0.002, + "step": 48780 + }, + { + "epoch": 0.8245523605113948, + "grad_norm": 0.12975527346134186, + "learning_rate": 7.312221775925989e-06, + "loss": 0.002, + "step": 48790 + }, + { + "epoch": 0.8247213607902453, + "grad_norm": 0.07842448353767395, + "learning_rate": 7.310914020553728e-06, + "loss": 0.0011, + "step": 48800 + }, + { + "epoch": 0.8248903610690957, + "grad_norm": 0.016468260437250137, + "learning_rate": 7.309606064120751e-06, + "loss": 0.0005, + "step": 48810 + }, + { + "epoch": 0.8250593613479462, + "grad_norm": 0.001380032510496676, + "learning_rate": 7.308297906740856e-06, + "loss": 0.0016, + "step": 48820 + }, + { + "epoch": 0.8252283616267967, + "grad_norm": 0.09836116433143616, + "learning_rate": 7.306989548527859e-06, + "loss": 0.0016, + "step": 48830 + }, + { + "epoch": 0.8253973619056472, + "grad_norm": 0.03906187042593956, + "learning_rate": 7.305680989595595e-06, + "loss": 0.0016, + "step": 48840 + }, + { + "epoch": 0.8255663621844976, + "grad_norm": 0.024572791531682014, + "learning_rate": 7.304372230057913e-06, + "loss": 0.0019, + "step": 48850 + }, + { + "epoch": 0.8257353624633481, + "grad_norm": 0.06552372127771378, + "learning_rate": 7.303063270028681e-06, + "loss": 0.0009, + "step": 48860 + }, + { + "epoch": 0.8259043627421985, + "grad_norm": 0.1286754161119461, + "learning_rate": 7.301754109621786e-06, + "loss": 0.0024, + "step": 48870 + }, + { + "epoch": 0.826073363021049, + "grad_norm": 0.009336556307971478, + "learning_rate": 7.300444748951132e-06, + "loss": 0.0014, + "step": 48880 + }, + { + "epoch": 0.8262423632998994, + "grad_norm": 0.12424356490373611, + "learning_rate": 7.299135188130639e-06, + "loss": 0.0019, + "step": 48890 + }, + { + "epoch": 0.8264113635787499, + "grad_norm": 0.028109529986977577, + "learning_rate": 7.2978254272742445e-06, + "loss": 0.0013, + "step": 48900 + }, + { + "epoch": 0.8265803638576004, + "grad_norm": 0.10972272604703903, + "learning_rate": 7.296515466495903e-06, + "loss": 0.002, + "step": 48910 + }, + { + "epoch": 0.8267493641364508, + "grad_norm": 0.1797754168510437, + "learning_rate": 7.2952053059095915e-06, + "loss": 0.0014, + "step": 48920 + }, + { + "epoch": 0.8269183644153013, + "grad_norm": 0.59943687915802, + "learning_rate": 7.293894945629298e-06, + "loss": 0.003, + "step": 48930 + }, + { + "epoch": 0.8270873646941518, + "grad_norm": 0.0860147625207901, + "learning_rate": 7.2925843857690295e-06, + "loss": 0.0023, + "step": 48940 + }, + { + "epoch": 0.8272563649730023, + "grad_norm": 0.07308562099933624, + "learning_rate": 7.291273626442812e-06, + "loss": 0.0014, + "step": 48950 + }, + { + "epoch": 0.8274253652518526, + "grad_norm": 0.05884837359189987, + "learning_rate": 7.289962667764688e-06, + "loss": 0.002, + "step": 48960 + }, + { + "epoch": 0.8275943655307031, + "grad_norm": 0.024381542578339577, + "learning_rate": 7.288651509848715e-06, + "loss": 0.0019, + "step": 48970 + }, + { + "epoch": 0.8277633658095536, + "grad_norm": 0.06704352796077728, + "learning_rate": 7.287340152808973e-06, + "loss": 0.0013, + "step": 48980 + }, + { + "epoch": 0.827932366088404, + "grad_norm": 0.0669189840555191, + "learning_rate": 7.286028596759555e-06, + "loss": 0.0013, + "step": 48990 + }, + { + "epoch": 0.8281013663672545, + "grad_norm": 0.024829840287566185, + "learning_rate": 7.2847168418145716e-06, + "loss": 0.0017, + "step": 49000 + }, + { + "epoch": 0.828270366646105, + "grad_norm": 0.07830045372247696, + "learning_rate": 7.2834048880881545e-06, + "loss": 0.0027, + "step": 49010 + }, + { + "epoch": 0.8284393669249555, + "grad_norm": 0.08251339942216873, + "learning_rate": 7.282092735694449e-06, + "loss": 0.0032, + "step": 49020 + }, + { + "epoch": 0.8286083672038059, + "grad_norm": 0.018376484513282776, + "learning_rate": 7.280780384747619e-06, + "loss": 0.0013, + "step": 49030 + }, + { + "epoch": 0.8287773674826564, + "grad_norm": 0.17820164561271667, + "learning_rate": 7.279467835361844e-06, + "loss": 0.0015, + "step": 49040 + }, + { + "epoch": 0.8289463677615068, + "grad_norm": 0.13392888009548187, + "learning_rate": 7.278155087651324e-06, + "loss": 0.0024, + "step": 49050 + }, + { + "epoch": 0.8291153680403572, + "grad_norm": 0.09335264563560486, + "learning_rate": 7.276842141730273e-06, + "loss": 0.001, + "step": 49060 + }, + { + "epoch": 0.8292843683192077, + "grad_norm": 0.06904011964797974, + "learning_rate": 7.275528997712924e-06, + "loss": 0.0008, + "step": 49070 + }, + { + "epoch": 0.8294533685980582, + "grad_norm": 0.04905041307210922, + "learning_rate": 7.2742156557135265e-06, + "loss": 0.0013, + "step": 49080 + }, + { + "epoch": 0.8296223688769087, + "grad_norm": 0.1327400803565979, + "learning_rate": 7.272902115846347e-06, + "loss": 0.003, + "step": 49090 + }, + { + "epoch": 0.8297913691557591, + "grad_norm": 0.08582969009876251, + "learning_rate": 7.271588378225673e-06, + "loss": 0.0015, + "step": 49100 + }, + { + "epoch": 0.8299603694346096, + "grad_norm": 0.023333383724093437, + "learning_rate": 7.270274442965804e-06, + "loss": 0.0011, + "step": 49110 + }, + { + "epoch": 0.8301293697134601, + "grad_norm": 0.07024768739938736, + "learning_rate": 7.268960310181057e-06, + "loss": 0.0013, + "step": 49120 + }, + { + "epoch": 0.8302983699923104, + "grad_norm": 0.09488508850336075, + "learning_rate": 7.2676459799857716e-06, + "loss": 0.0011, + "step": 49130 + }, + { + "epoch": 0.8304673702711609, + "grad_norm": 0.057504843920469284, + "learning_rate": 7.2663314524942975e-06, + "loss": 0.0012, + "step": 49140 + }, + { + "epoch": 0.8306363705500114, + "grad_norm": 0.06765588372945786, + "learning_rate": 7.265016727821008e-06, + "loss": 0.0021, + "step": 49150 + }, + { + "epoch": 0.8308053708288619, + "grad_norm": 0.060480304062366486, + "learning_rate": 7.263701806080288e-06, + "loss": 0.0028, + "step": 49160 + }, + { + "epoch": 0.8309743711077123, + "grad_norm": 0.15077215433120728, + "learning_rate": 7.2623866873865424e-06, + "loss": 0.0013, + "step": 49170 + }, + { + "epoch": 0.8311433713865628, + "grad_norm": 0.05140575021505356, + "learning_rate": 7.261071371854195e-06, + "loss": 0.0014, + "step": 49180 + }, + { + "epoch": 0.8313123716654133, + "grad_norm": 0.06735235452651978, + "learning_rate": 7.259755859597681e-06, + "loss": 0.0011, + "step": 49190 + }, + { + "epoch": 0.8314813719442637, + "grad_norm": 0.017180589959025383, + "learning_rate": 7.2584401507314595e-06, + "loss": 0.0014, + "step": 49200 + }, + { + "epoch": 0.8316503722231142, + "grad_norm": 0.11614830791950226, + "learning_rate": 7.257124245370003e-06, + "loss": 0.0014, + "step": 49210 + }, + { + "epoch": 0.8318193725019646, + "grad_norm": 0.01634841412305832, + "learning_rate": 7.2558081436278e-06, + "loss": 0.0026, + "step": 49220 + }, + { + "epoch": 0.831988372780815, + "grad_norm": 0.04172159731388092, + "learning_rate": 7.25449184561936e-06, + "loss": 0.0015, + "step": 49230 + }, + { + "epoch": 0.8321573730596655, + "grad_norm": 0.041454069316387177, + "learning_rate": 7.2531753514592065e-06, + "loss": 0.0018, + "step": 49240 + }, + { + "epoch": 0.832326373338516, + "grad_norm": 0.016928566619753838, + "learning_rate": 7.25185866126188e-06, + "loss": 0.002, + "step": 49250 + }, + { + "epoch": 0.8324953736173665, + "grad_norm": 0.11260402947664261, + "learning_rate": 7.25054177514194e-06, + "loss": 0.0013, + "step": 49260 + }, + { + "epoch": 0.832664373896217, + "grad_norm": 0.013846118934452534, + "learning_rate": 7.249224693213961e-06, + "loss": 0.0014, + "step": 49270 + }, + { + "epoch": 0.8328333741750674, + "grad_norm": 0.07115150988101959, + "learning_rate": 7.247907415592534e-06, + "loss": 0.0012, + "step": 49280 + }, + { + "epoch": 0.8330023744539179, + "grad_norm": 0.04459391161799431, + "learning_rate": 7.246589942392272e-06, + "loss": 0.0033, + "step": 49290 + }, + { + "epoch": 0.8331713747327683, + "grad_norm": 0.05890648439526558, + "learning_rate": 7.2452722737278e-06, + "loss": 0.0011, + "step": 49300 + }, + { + "epoch": 0.8333403750116187, + "grad_norm": 0.04924681782722473, + "learning_rate": 7.243954409713763e-06, + "loss": 0.0011, + "step": 49310 + }, + { + "epoch": 0.8335093752904692, + "grad_norm": 0.044344622641801834, + "learning_rate": 7.242636350464819e-06, + "loss": 0.0014, + "step": 49320 + }, + { + "epoch": 0.8336783755693197, + "grad_norm": 0.06724030524492264, + "learning_rate": 7.241318096095646e-06, + "loss": 0.0017, + "step": 49330 + }, + { + "epoch": 0.8338473758481701, + "grad_norm": 0.007544257678091526, + "learning_rate": 7.23999964672094e-06, + "loss": 0.0015, + "step": 49340 + }, + { + "epoch": 0.8340163761270206, + "grad_norm": 0.02665545605123043, + "learning_rate": 7.2386810024554125e-06, + "loss": 0.0033, + "step": 49350 + }, + { + "epoch": 0.8341853764058711, + "grad_norm": 0.0006780088879168034, + "learning_rate": 7.2373621634137904e-06, + "loss": 0.0011, + "step": 49360 + }, + { + "epoch": 0.8343543766847216, + "grad_norm": 0.021561365574598312, + "learning_rate": 7.236043129710818e-06, + "loss": 0.0024, + "step": 49370 + }, + { + "epoch": 0.834523376963572, + "grad_norm": 0.08685091137886047, + "learning_rate": 7.234723901461262e-06, + "loss": 0.0014, + "step": 49380 + }, + { + "epoch": 0.8346923772424224, + "grad_norm": 0.08030354976654053, + "learning_rate": 7.233404478779896e-06, + "loss": 0.0023, + "step": 49390 + }, + { + "epoch": 0.8348613775212729, + "grad_norm": 0.5395932197570801, + "learning_rate": 7.232084861781522e-06, + "loss": 0.0024, + "step": 49400 + }, + { + "epoch": 0.8350303778001233, + "grad_norm": 0.011918571777641773, + "learning_rate": 7.23076505058095e-06, + "loss": 0.0026, + "step": 49410 + }, + { + "epoch": 0.8351993780789738, + "grad_norm": 0.027819110080599785, + "learning_rate": 7.2294450452930095e-06, + "loss": 0.0022, + "step": 49420 + }, + { + "epoch": 0.8353683783578243, + "grad_norm": 0.02106357552111149, + "learning_rate": 7.228124846032549e-06, + "loss": 0.001, + "step": 49430 + }, + { + "epoch": 0.8355373786366748, + "grad_norm": 0.058657340705394745, + "learning_rate": 7.226804452914431e-06, + "loss": 0.0019, + "step": 49440 + }, + { + "epoch": 0.8357063789155252, + "grad_norm": 0.08190031349658966, + "learning_rate": 7.225483866053536e-06, + "loss": 0.0013, + "step": 49450 + }, + { + "epoch": 0.8358753791943757, + "grad_norm": 0.0971725806593895, + "learning_rate": 7.224163085564763e-06, + "loss": 0.0014, + "step": 49460 + }, + { + "epoch": 0.8360443794732262, + "grad_norm": 0.020731769502162933, + "learning_rate": 7.222842111563024e-06, + "loss": 0.0011, + "step": 49470 + }, + { + "epoch": 0.8362133797520765, + "grad_norm": 0.06420711427927017, + "learning_rate": 7.221520944163252e-06, + "loss": 0.0016, + "step": 49480 + }, + { + "epoch": 0.836382380030927, + "grad_norm": 0.03257574141025543, + "learning_rate": 7.220199583480394e-06, + "loss": 0.0022, + "step": 49490 + }, + { + "epoch": 0.8365513803097775, + "grad_norm": 0.013980922289192677, + "learning_rate": 7.218878029629416e-06, + "loss": 0.0012, + "step": 49500 + }, + { + "epoch": 0.836720380588628, + "grad_norm": 0.04765333607792854, + "learning_rate": 7.217556282725298e-06, + "loss": 0.002, + "step": 49510 + }, + { + "epoch": 0.8368893808674784, + "grad_norm": 0.02529972605407238, + "learning_rate": 7.216234342883039e-06, + "loss": 0.0014, + "step": 49520 + }, + { + "epoch": 0.8370583811463289, + "grad_norm": 0.06483682245016098, + "learning_rate": 7.214912210217655e-06, + "loss": 0.0018, + "step": 49530 + }, + { + "epoch": 0.8372273814251794, + "grad_norm": 0.07930611073970795, + "learning_rate": 7.213589884844177e-06, + "loss": 0.002, + "step": 49540 + }, + { + "epoch": 0.8373963817040299, + "grad_norm": 0.029101185500621796, + "learning_rate": 7.212267366877654e-06, + "loss": 0.0008, + "step": 49550 + }, + { + "epoch": 0.8375653819828802, + "grad_norm": 0.10520884394645691, + "learning_rate": 7.210944656433151e-06, + "loss": 0.0024, + "step": 49560 + }, + { + "epoch": 0.8377343822617307, + "grad_norm": 0.035545896738767624, + "learning_rate": 7.209621753625751e-06, + "loss": 0.0023, + "step": 49570 + }, + { + "epoch": 0.8379033825405812, + "grad_norm": 0.14516295492649078, + "learning_rate": 7.208298658570551e-06, + "loss": 0.0023, + "step": 49580 + }, + { + "epoch": 0.8380723828194316, + "grad_norm": 0.04255577176809311, + "learning_rate": 7.2069753713826695e-06, + "loss": 0.0031, + "step": 49590 + }, + { + "epoch": 0.8382413830982821, + "grad_norm": 0.04397115111351013, + "learning_rate": 7.205651892177239e-06, + "loss": 0.0014, + "step": 49600 + }, + { + "epoch": 0.8384103833771326, + "grad_norm": 0.07351876050233841, + "learning_rate": 7.2043282210694054e-06, + "loss": 0.0016, + "step": 49610 + }, + { + "epoch": 0.8385793836559831, + "grad_norm": 0.005482340231537819, + "learning_rate": 7.2030043581743366e-06, + "loss": 0.0009, + "step": 49620 + }, + { + "epoch": 0.8387483839348335, + "grad_norm": 0.06633653491735458, + "learning_rate": 7.2016803036072144e-06, + "loss": 0.0018, + "step": 49630 + }, + { + "epoch": 0.838917384213684, + "grad_norm": 0.03800305351614952, + "learning_rate": 7.20035605748324e-06, + "loss": 0.0021, + "step": 49640 + }, + { + "epoch": 0.8390863844925344, + "grad_norm": 0.04843270033597946, + "learning_rate": 7.199031619917627e-06, + "loss": 0.0017, + "step": 49650 + }, + { + "epoch": 0.8392553847713848, + "grad_norm": 0.01804737001657486, + "learning_rate": 7.197706991025608e-06, + "loss": 0.0013, + "step": 49660 + }, + { + "epoch": 0.8394243850502353, + "grad_norm": 0.12135551124811172, + "learning_rate": 7.196382170922432e-06, + "loss": 0.0027, + "step": 49670 + }, + { + "epoch": 0.8395933853290858, + "grad_norm": 0.041261736303567886, + "learning_rate": 7.195057159723366e-06, + "loss": 0.0023, + "step": 49680 + }, + { + "epoch": 0.8397623856079363, + "grad_norm": 0.03335694223642349, + "learning_rate": 7.193731957543693e-06, + "loss": 0.002, + "step": 49690 + }, + { + "epoch": 0.8399313858867867, + "grad_norm": 0.05219758301973343, + "learning_rate": 7.19240656449871e-06, + "loss": 0.0016, + "step": 49700 + }, + { + "epoch": 0.8401003861656372, + "grad_norm": 0.10179810225963593, + "learning_rate": 7.191080980703733e-06, + "loss": 0.0011, + "step": 49710 + }, + { + "epoch": 0.8402693864444877, + "grad_norm": 0.03386679291725159, + "learning_rate": 7.189755206274095e-06, + "loss": 0.001, + "step": 49720 + }, + { + "epoch": 0.8404383867233381, + "grad_norm": 0.043032724410295486, + "learning_rate": 7.188429241325145e-06, + "loss": 0.0016, + "step": 49730 + }, + { + "epoch": 0.8406073870021885, + "grad_norm": 0.06790139526128769, + "learning_rate": 7.187103085972247e-06, + "loss": 0.0016, + "step": 49740 + }, + { + "epoch": 0.840776387281039, + "grad_norm": 0.09641426056623459, + "learning_rate": 7.185776740330784e-06, + "loss": 0.0017, + "step": 49750 + }, + { + "epoch": 0.8409453875598895, + "grad_norm": 0.07377991080284119, + "learning_rate": 7.1844502045161545e-06, + "loss": 0.0017, + "step": 49760 + }, + { + "epoch": 0.8411143878387399, + "grad_norm": 0.0946054458618164, + "learning_rate": 7.183123478643772e-06, + "loss": 0.0023, + "step": 49770 + }, + { + "epoch": 0.8412833881175904, + "grad_norm": 0.016826393082737923, + "learning_rate": 7.181796562829071e-06, + "loss": 0.0018, + "step": 49780 + }, + { + "epoch": 0.8414523883964409, + "grad_norm": 0.07942421734333038, + "learning_rate": 7.180469457187498e-06, + "loss": 0.0021, + "step": 49790 + }, + { + "epoch": 0.8416213886752913, + "grad_norm": 0.0243277158588171, + "learning_rate": 7.179142161834517e-06, + "loss": 0.0015, + "step": 49800 + }, + { + "epoch": 0.8417903889541418, + "grad_norm": 0.041775017976760864, + "learning_rate": 7.17781467688561e-06, + "loss": 0.002, + "step": 49810 + }, + { + "epoch": 0.8419593892329922, + "grad_norm": 0.10232304781675339, + "learning_rate": 7.176487002456274e-06, + "loss": 0.0019, + "step": 49820 + }, + { + "epoch": 0.8421283895118427, + "grad_norm": 0.06999991834163666, + "learning_rate": 7.175159138662024e-06, + "loss": 0.0022, + "step": 49830 + }, + { + "epoch": 0.8422973897906931, + "grad_norm": 0.007234103046357632, + "learning_rate": 7.17383108561839e-06, + "loss": 0.0007, + "step": 49840 + }, + { + "epoch": 0.8424663900695436, + "grad_norm": 0.009289576672017574, + "learning_rate": 7.1725028434409185e-06, + "loss": 0.0019, + "step": 49850 + }, + { + "epoch": 0.8426353903483941, + "grad_norm": 0.020704420283436775, + "learning_rate": 7.171174412245173e-06, + "loss": 0.0018, + "step": 49860 + }, + { + "epoch": 0.8428043906272445, + "grad_norm": 0.027486352249979973, + "learning_rate": 7.1698457921467345e-06, + "loss": 0.0013, + "step": 49870 + }, + { + "epoch": 0.842973390906095, + "grad_norm": 0.058970868587493896, + "learning_rate": 7.1685169832612e-06, + "loss": 0.0019, + "step": 49880 + }, + { + "epoch": 0.8431423911849455, + "grad_norm": 0.02297344245016575, + "learning_rate": 7.1671879857041805e-06, + "loss": 0.0012, + "step": 49890 + }, + { + "epoch": 0.843311391463796, + "grad_norm": 0.06462989002466202, + "learning_rate": 7.165858799591306e-06, + "loss": 0.0015, + "step": 49900 + }, + { + "epoch": 0.8434803917426463, + "grad_norm": 0.06035640090703964, + "learning_rate": 7.1645294250382225e-06, + "loss": 0.0019, + "step": 49910 + }, + { + "epoch": 0.8436493920214968, + "grad_norm": 0.06446599960327148, + "learning_rate": 7.163199862160591e-06, + "loss": 0.0019, + "step": 49920 + }, + { + "epoch": 0.8438183923003473, + "grad_norm": 0.03763274848461151, + "learning_rate": 7.1618701110740905e-06, + "loss": 0.0017, + "step": 49930 + }, + { + "epoch": 0.8439873925791977, + "grad_norm": 0.05461042746901512, + "learning_rate": 7.160540171894416e-06, + "loss": 0.0016, + "step": 49940 + }, + { + "epoch": 0.8441563928580482, + "grad_norm": 0.04370475932955742, + "learning_rate": 7.159210044737279e-06, + "loss": 0.0023, + "step": 49950 + }, + { + "epoch": 0.8443253931368987, + "grad_norm": 0.015897465869784355, + "learning_rate": 7.157879729718407e-06, + "loss": 0.0018, + "step": 49960 + }, + { + "epoch": 0.8444943934157492, + "grad_norm": 0.07231079041957855, + "learning_rate": 7.156549226953542e-06, + "loss": 0.0088, + "step": 49970 + }, + { + "epoch": 0.8446633936945996, + "grad_norm": 0.14473244547843933, + "learning_rate": 7.155218536558446e-06, + "loss": 0.0027, + "step": 49980 + }, + { + "epoch": 0.84483239397345, + "grad_norm": 0.08568382263183594, + "learning_rate": 7.153887658648895e-06, + "loss": 0.0022, + "step": 49990 + }, + { + "epoch": 0.8450013942523005, + "grad_norm": 0.022534925490617752, + "learning_rate": 7.152556593340683e-06, + "loss": 0.0019, + "step": 50000 + }, + { + "epoch": 0.845170394531151, + "grad_norm": 0.05036391317844391, + "learning_rate": 7.151225340749617e-06, + "loss": 0.0013, + "step": 50010 + }, + { + "epoch": 0.8453393948100014, + "grad_norm": 0.04636518657207489, + "learning_rate": 7.149893900991523e-06, + "loss": 0.0015, + "step": 50020 + }, + { + "epoch": 0.8455083950888519, + "grad_norm": 0.026759391650557518, + "learning_rate": 7.148562274182243e-06, + "loss": 0.0014, + "step": 50030 + }, + { + "epoch": 0.8456773953677024, + "grad_norm": 0.16903258860111237, + "learning_rate": 7.147230460437636e-06, + "loss": 0.0025, + "step": 50040 + }, + { + "epoch": 0.8458463956465528, + "grad_norm": 0.0035917949862778187, + "learning_rate": 7.145898459873575e-06, + "loss": 0.0013, + "step": 50050 + }, + { + "epoch": 0.8460153959254033, + "grad_norm": 0.030653197318315506, + "learning_rate": 7.14456627260595e-06, + "loss": 0.0026, + "step": 50060 + }, + { + "epoch": 0.8461843962042538, + "grad_norm": 0.09031669050455093, + "learning_rate": 7.14323389875067e-06, + "loss": 0.0025, + "step": 50070 + }, + { + "epoch": 0.8463533964831041, + "grad_norm": 0.0398169681429863, + "learning_rate": 7.141901338423657e-06, + "loss": 0.0008, + "step": 50080 + }, + { + "epoch": 0.8465223967619546, + "grad_norm": 0.023374401032924652, + "learning_rate": 7.140568591740849e-06, + "loss": 0.0017, + "step": 50090 + }, + { + "epoch": 0.8466913970408051, + "grad_norm": 0.062085092067718506, + "learning_rate": 7.139235658818202e-06, + "loss": 0.0096, + "step": 50100 + }, + { + "epoch": 0.8468603973196556, + "grad_norm": 0.04841598868370056, + "learning_rate": 7.137902539771688e-06, + "loss": 0.0015, + "step": 50110 + }, + { + "epoch": 0.847029397598506, + "grad_norm": 0.06624577194452286, + "learning_rate": 7.136569234717296e-06, + "loss": 0.0013, + "step": 50120 + }, + { + "epoch": 0.8471983978773565, + "grad_norm": 0.12280794978141785, + "learning_rate": 7.135235743771027e-06, + "loss": 0.001, + "step": 50130 + }, + { + "epoch": 0.847367398156207, + "grad_norm": 0.1125815361738205, + "learning_rate": 7.133902067048902e-06, + "loss": 0.0014, + "step": 50140 + }, + { + "epoch": 0.8475363984350575, + "grad_norm": 0.0400351881980896, + "learning_rate": 7.13256820466696e-06, + "loss": 0.0042, + "step": 50150 + }, + { + "epoch": 0.8477053987139079, + "grad_norm": 0.17242294549942017, + "learning_rate": 7.131234156741249e-06, + "loss": 0.0019, + "step": 50160 + }, + { + "epoch": 0.8478743989927583, + "grad_norm": 0.008832902647554874, + "learning_rate": 7.129899923387843e-06, + "loss": 0.0016, + "step": 50170 + }, + { + "epoch": 0.8480433992716088, + "grad_norm": 0.05237210914492607, + "learning_rate": 7.128565504722824e-06, + "loss": 0.001, + "step": 50180 + }, + { + "epoch": 0.8482123995504592, + "grad_norm": 0.11998438835144043, + "learning_rate": 7.127230900862292e-06, + "loss": 0.0024, + "step": 50190 + }, + { + "epoch": 0.8483813998293097, + "grad_norm": 0.026236379519104958, + "learning_rate": 7.125896111922366e-06, + "loss": 0.0011, + "step": 50200 + }, + { + "epoch": 0.8485504001081602, + "grad_norm": 0.052572187036275864, + "learning_rate": 7.1245611380191775e-06, + "loss": 0.0012, + "step": 50210 + }, + { + "epoch": 0.8487194003870107, + "grad_norm": 0.04595402255654335, + "learning_rate": 7.1232259792688755e-06, + "loss": 0.0016, + "step": 50220 + }, + { + "epoch": 0.8488884006658611, + "grad_norm": 0.07488039135932922, + "learning_rate": 7.1218906357876275e-06, + "loss": 0.002, + "step": 50230 + }, + { + "epoch": 0.8490574009447116, + "grad_norm": 0.05835746228694916, + "learning_rate": 7.1205551076916125e-06, + "loss": 0.0013, + "step": 50240 + }, + { + "epoch": 0.849226401223562, + "grad_norm": 0.07503136247396469, + "learning_rate": 7.119219395097028e-06, + "loss": 0.0018, + "step": 50250 + }, + { + "epoch": 0.8493954015024124, + "grad_norm": 0.10128705948591232, + "learning_rate": 7.117883498120088e-06, + "loss": 0.0016, + "step": 50260 + }, + { + "epoch": 0.8495644017812629, + "grad_norm": 0.06169435754418373, + "learning_rate": 7.116547416877024e-06, + "loss": 0.0014, + "step": 50270 + }, + { + "epoch": 0.8497334020601134, + "grad_norm": 0.03982456028461456, + "learning_rate": 7.115211151484081e-06, + "loss": 0.0013, + "step": 50280 + }, + { + "epoch": 0.8499024023389639, + "grad_norm": 0.14415329694747925, + "learning_rate": 7.1138747020575175e-06, + "loss": 0.0017, + "step": 50290 + }, + { + "epoch": 0.8500714026178143, + "grad_norm": 0.04282059520483017, + "learning_rate": 7.112538068713612e-06, + "loss": 0.0014, + "step": 50300 + }, + { + "epoch": 0.8502404028966648, + "grad_norm": 0.020503170788288116, + "learning_rate": 7.11120125156866e-06, + "loss": 0.0008, + "step": 50310 + }, + { + "epoch": 0.8504094031755153, + "grad_norm": 0.17789845168590546, + "learning_rate": 7.109864250738971e-06, + "loss": 0.0018, + "step": 50320 + }, + { + "epoch": 0.8505784034543658, + "grad_norm": 0.04640667513012886, + "learning_rate": 7.108527066340869e-06, + "loss": 0.001, + "step": 50330 + }, + { + "epoch": 0.8507474037332161, + "grad_norm": 0.16030360758304596, + "learning_rate": 7.1071896984906955e-06, + "loss": 0.0031, + "step": 50340 + }, + { + "epoch": 0.8509164040120666, + "grad_norm": 0.051341064274311066, + "learning_rate": 7.105852147304809e-06, + "loss": 0.0016, + "step": 50350 + }, + { + "epoch": 0.8510854042909171, + "grad_norm": 0.08269865065813065, + "learning_rate": 7.104514412899583e-06, + "loss": 0.0015, + "step": 50360 + }, + { + "epoch": 0.8512544045697675, + "grad_norm": 0.13866493105888367, + "learning_rate": 7.103176495391406e-06, + "loss": 0.0036, + "step": 50370 + }, + { + "epoch": 0.851423404848618, + "grad_norm": 0.08190497010946274, + "learning_rate": 7.101838394896685e-06, + "loss": 0.0021, + "step": 50380 + }, + { + "epoch": 0.8515924051274685, + "grad_norm": 0.14248859882354736, + "learning_rate": 7.1005001115318386e-06, + "loss": 0.0049, + "step": 50390 + }, + { + "epoch": 0.851761405406319, + "grad_norm": 0.004143700003623962, + "learning_rate": 7.099161645413305e-06, + "loss": 0.0024, + "step": 50400 + }, + { + "epoch": 0.8519304056851694, + "grad_norm": 0.04483238607645035, + "learning_rate": 7.097822996657538e-06, + "loss": 0.0037, + "step": 50410 + }, + { + "epoch": 0.8520994059640199, + "grad_norm": 0.09904813021421432, + "learning_rate": 7.096484165381007e-06, + "loss": 0.0027, + "step": 50420 + }, + { + "epoch": 0.8522684062428703, + "grad_norm": 0.10735070705413818, + "learning_rate": 7.095145151700196e-06, + "loss": 0.0018, + "step": 50430 + }, + { + "epoch": 0.8524374065217207, + "grad_norm": 0.3052399456501007, + "learning_rate": 7.0938059557316055e-06, + "loss": 0.001, + "step": 50440 + }, + { + "epoch": 0.8526064068005712, + "grad_norm": 0.06089219078421593, + "learning_rate": 7.09246657759175e-06, + "loss": 0.0016, + "step": 50450 + }, + { + "epoch": 0.8527754070794217, + "grad_norm": 0.05923866108059883, + "learning_rate": 7.091127017397166e-06, + "loss": 0.0021, + "step": 50460 + }, + { + "epoch": 0.8529444073582721, + "grad_norm": 0.0695035457611084, + "learning_rate": 7.0897872752644e-06, + "loss": 0.0013, + "step": 50470 + }, + { + "epoch": 0.8531134076371226, + "grad_norm": 0.06044061854481697, + "learning_rate": 7.088447351310015e-06, + "loss": 0.001, + "step": 50480 + }, + { + "epoch": 0.8532824079159731, + "grad_norm": 0.03624828904867172, + "learning_rate": 7.087107245650592e-06, + "loss": 0.0017, + "step": 50490 + }, + { + "epoch": 0.8534514081948236, + "grad_norm": 0.035229530185461044, + "learning_rate": 7.085766958402727e-06, + "loss": 0.0012, + "step": 50500 + }, + { + "epoch": 0.8536204084736739, + "grad_norm": 0.0526130348443985, + "learning_rate": 7.08442648968303e-06, + "loss": 0.002, + "step": 50510 + }, + { + "epoch": 0.8537894087525244, + "grad_norm": 0.05172059312462807, + "learning_rate": 7.083085839608128e-06, + "loss": 0.0014, + "step": 50520 + }, + { + "epoch": 0.8539584090313749, + "grad_norm": 0.04551641643047333, + "learning_rate": 7.081745008294667e-06, + "loss": 0.0013, + "step": 50530 + }, + { + "epoch": 0.8541274093102253, + "grad_norm": 0.07173636555671692, + "learning_rate": 7.080403995859302e-06, + "loss": 0.0016, + "step": 50540 + }, + { + "epoch": 0.8542964095890758, + "grad_norm": 0.028299428522586823, + "learning_rate": 7.079062802418711e-06, + "loss": 0.0021, + "step": 50550 + }, + { + "epoch": 0.8544654098679263, + "grad_norm": 0.2714872360229492, + "learning_rate": 7.077721428089583e-06, + "loss": 0.0013, + "step": 50560 + }, + { + "epoch": 0.8546344101467768, + "grad_norm": 0.11973876506090164, + "learning_rate": 7.076379872988624e-06, + "loss": 0.0017, + "step": 50570 + }, + { + "epoch": 0.8548034104256272, + "grad_norm": 0.19866228103637695, + "learning_rate": 7.075038137232556e-06, + "loss": 0.0025, + "step": 50580 + }, + { + "epoch": 0.8549724107044777, + "grad_norm": 0.044867537915706635, + "learning_rate": 7.073696220938115e-06, + "loss": 0.0026, + "step": 50590 + }, + { + "epoch": 0.8551414109833281, + "grad_norm": 0.028597289696335793, + "learning_rate": 7.072354124222058e-06, + "loss": 0.001, + "step": 50600 + }, + { + "epoch": 0.8553104112621785, + "grad_norm": 0.0332944430410862, + "learning_rate": 7.071011847201149e-06, + "loss": 0.002, + "step": 50610 + }, + { + "epoch": 0.855479411541029, + "grad_norm": 0.08548944443464279, + "learning_rate": 7.0696693899921755e-06, + "loss": 0.0009, + "step": 50620 + }, + { + "epoch": 0.8556484118198795, + "grad_norm": 0.1740231066942215, + "learning_rate": 7.068326752711937e-06, + "loss": 0.0022, + "step": 50630 + }, + { + "epoch": 0.85581741209873, + "grad_norm": 0.15070150792598724, + "learning_rate": 7.066983935477251e-06, + "loss": 0.002, + "step": 50640 + }, + { + "epoch": 0.8559864123775804, + "grad_norm": 0.1309231072664261, + "learning_rate": 7.065640938404945e-06, + "loss": 0.0023, + "step": 50650 + }, + { + "epoch": 0.8561554126564309, + "grad_norm": 0.00871087796986103, + "learning_rate": 7.064297761611872e-06, + "loss": 0.0026, + "step": 50660 + }, + { + "epoch": 0.8563244129352814, + "grad_norm": 0.09649201482534409, + "learning_rate": 7.062954405214891e-06, + "loss": 0.0016, + "step": 50670 + }, + { + "epoch": 0.8564934132141317, + "grad_norm": 0.09451442211866379, + "learning_rate": 7.061610869330881e-06, + "loss": 0.0026, + "step": 50680 + }, + { + "epoch": 0.8566624134929822, + "grad_norm": 0.09087509661912918, + "learning_rate": 7.060267154076739e-06, + "loss": 0.0012, + "step": 50690 + }, + { + "epoch": 0.8568314137718327, + "grad_norm": 0.18903668224811554, + "learning_rate": 7.0589232595693705e-06, + "loss": 0.0013, + "step": 50700 + }, + { + "epoch": 0.8570004140506832, + "grad_norm": 0.10971001535654068, + "learning_rate": 7.057579185925702e-06, + "loss": 0.0016, + "step": 50710 + }, + { + "epoch": 0.8571694143295336, + "grad_norm": 0.051436763256788254, + "learning_rate": 7.0562349332626775e-06, + "loss": 0.0023, + "step": 50720 + }, + { + "epoch": 0.8573384146083841, + "grad_norm": 0.23147019743919373, + "learning_rate": 7.054890501697249e-06, + "loss": 0.0013, + "step": 50730 + }, + { + "epoch": 0.8575074148872346, + "grad_norm": 0.07230903208255768, + "learning_rate": 7.053545891346391e-06, + "loss": 0.0018, + "step": 50740 + }, + { + "epoch": 0.8576764151660851, + "grad_norm": 0.06627970933914185, + "learning_rate": 7.052201102327091e-06, + "loss": 0.0035, + "step": 50750 + }, + { + "epoch": 0.8578454154449355, + "grad_norm": 0.10356321185827255, + "learning_rate": 7.050856134756352e-06, + "loss": 0.002, + "step": 50760 + }, + { + "epoch": 0.8580144157237859, + "grad_norm": 0.1255701780319214, + "learning_rate": 7.049510988751193e-06, + "loss": 0.0017, + "step": 50770 + }, + { + "epoch": 0.8581834160026364, + "grad_norm": 0.07557336986064911, + "learning_rate": 7.0481656644286475e-06, + "loss": 0.0015, + "step": 50780 + }, + { + "epoch": 0.8583524162814868, + "grad_norm": 0.0351254902780056, + "learning_rate": 7.046820161905766e-06, + "loss": 0.0022, + "step": 50790 + }, + { + "epoch": 0.8585214165603373, + "grad_norm": 0.1283976435661316, + "learning_rate": 7.045474481299613e-06, + "loss": 0.0011, + "step": 50800 + }, + { + "epoch": 0.8586904168391878, + "grad_norm": 0.018550874665379524, + "learning_rate": 7.04412862272727e-06, + "loss": 0.0014, + "step": 50810 + }, + { + "epoch": 0.8588594171180383, + "grad_norm": 0.14388088881969452, + "learning_rate": 7.042782586305832e-06, + "loss": 0.0022, + "step": 50820 + }, + { + "epoch": 0.8590284173968887, + "grad_norm": 0.07648961246013641, + "learning_rate": 7.041436372152411e-06, + "loss": 0.0018, + "step": 50830 + }, + { + "epoch": 0.8591974176757392, + "grad_norm": 0.02715301141142845, + "learning_rate": 7.0400899803841364e-06, + "loss": 0.0027, + "step": 50840 + }, + { + "epoch": 0.8593664179545897, + "grad_norm": 0.03417889028787613, + "learning_rate": 7.038743411118148e-06, + "loss": 0.0018, + "step": 50850 + }, + { + "epoch": 0.85953541823344, + "grad_norm": 0.20179900527000427, + "learning_rate": 7.037396664471605e-06, + "loss": 0.0013, + "step": 50860 + }, + { + "epoch": 0.8597044185122905, + "grad_norm": 0.06350982934236526, + "learning_rate": 7.036049740561682e-06, + "loss": 0.0019, + "step": 50870 + }, + { + "epoch": 0.859873418791141, + "grad_norm": 0.003680954221636057, + "learning_rate": 7.034702639505565e-06, + "loss": 0.0013, + "step": 50880 + }, + { + "epoch": 0.8600424190699915, + "grad_norm": 0.12450505048036575, + "learning_rate": 7.033355361420461e-06, + "loss": 0.0014, + "step": 50890 + }, + { + "epoch": 0.8602114193488419, + "grad_norm": 0.04708400368690491, + "learning_rate": 7.032007906423588e-06, + "loss": 0.0011, + "step": 50900 + }, + { + "epoch": 0.8603804196276924, + "grad_norm": 0.08234564960002899, + "learning_rate": 7.0306602746321805e-06, + "loss": 0.001, + "step": 50910 + }, + { + "epoch": 0.8605494199065429, + "grad_norm": 0.02741340734064579, + "learning_rate": 7.0293124661634925e-06, + "loss": 0.0011, + "step": 50920 + }, + { + "epoch": 0.8607184201853934, + "grad_norm": 0.08544056117534637, + "learning_rate": 7.0279644811347855e-06, + "loss": 0.002, + "step": 50930 + }, + { + "epoch": 0.8608874204642437, + "grad_norm": 0.0741511806845665, + "learning_rate": 7.026616319663346e-06, + "loss": 0.001, + "step": 50940 + }, + { + "epoch": 0.8610564207430942, + "grad_norm": 0.0694316178560257, + "learning_rate": 7.025267981866466e-06, + "loss": 0.0018, + "step": 50950 + }, + { + "epoch": 0.8612254210219447, + "grad_norm": 0.17879147827625275, + "learning_rate": 7.023919467861459e-06, + "loss": 0.0018, + "step": 50960 + }, + { + "epoch": 0.8613944213007951, + "grad_norm": 0.09562186896800995, + "learning_rate": 7.022570777765651e-06, + "loss": 0.0011, + "step": 50970 + }, + { + "epoch": 0.8615634215796456, + "grad_norm": 0.055727630853652954, + "learning_rate": 7.0212219116963875e-06, + "loss": 0.0017, + "step": 50980 + }, + { + "epoch": 0.8617324218584961, + "grad_norm": 0.12096261233091354, + "learning_rate": 7.019872869771025e-06, + "loss": 0.0028, + "step": 50990 + }, + { + "epoch": 0.8619014221373466, + "grad_norm": 0.12893104553222656, + "learning_rate": 7.018523652106934e-06, + "loss": 0.0023, + "step": 51000 + }, + { + "epoch": 0.862070422416197, + "grad_norm": 0.13929513096809387, + "learning_rate": 7.0171742588215075e-06, + "loss": 0.0015, + "step": 51010 + }, + { + "epoch": 0.8622394226950475, + "grad_norm": 0.0476958341896534, + "learning_rate": 7.015824690032146e-06, + "loss": 0.0019, + "step": 51020 + }, + { + "epoch": 0.8624084229738979, + "grad_norm": 0.11641071736812592, + "learning_rate": 7.0144749458562686e-06, + "loss": 0.0027, + "step": 51030 + }, + { + "epoch": 0.8625774232527483, + "grad_norm": 0.0025708950124680996, + "learning_rate": 7.013125026411313e-06, + "loss": 0.0018, + "step": 51040 + }, + { + "epoch": 0.8627464235315988, + "grad_norm": 0.09292472898960114, + "learning_rate": 7.0117749318147256e-06, + "loss": 0.0011, + "step": 51050 + }, + { + "epoch": 0.8629154238104493, + "grad_norm": 0.062427133321762085, + "learning_rate": 7.010424662183971e-06, + "loss": 0.0009, + "step": 51060 + }, + { + "epoch": 0.8630844240892998, + "grad_norm": 0.21843813359737396, + "learning_rate": 7.00907421763653e-06, + "loss": 0.0024, + "step": 51070 + }, + { + "epoch": 0.8632534243681502, + "grad_norm": 0.013522603549063206, + "learning_rate": 7.007723598289898e-06, + "loss": 0.0012, + "step": 51080 + }, + { + "epoch": 0.8634224246470007, + "grad_norm": 0.0820799246430397, + "learning_rate": 7.006372804261586e-06, + "loss": 0.0016, + "step": 51090 + }, + { + "epoch": 0.8635914249258512, + "grad_norm": 0.026028936728835106, + "learning_rate": 7.005021835669119e-06, + "loss": 0.0024, + "step": 51100 + }, + { + "epoch": 0.8637604252047016, + "grad_norm": 0.05370361730456352, + "learning_rate": 7.003670692630035e-06, + "loss": 0.0031, + "step": 51110 + }, + { + "epoch": 0.863929425483552, + "grad_norm": 0.025905350223183632, + "learning_rate": 7.002319375261895e-06, + "loss": 0.0013, + "step": 51120 + }, + { + "epoch": 0.8640984257624025, + "grad_norm": 0.02977590821683407, + "learning_rate": 7.000967883682269e-06, + "loss": 0.0026, + "step": 51130 + }, + { + "epoch": 0.864267426041253, + "grad_norm": 0.09860771149396896, + "learning_rate": 6.999616218008741e-06, + "loss": 0.0029, + "step": 51140 + }, + { + "epoch": 0.8644364263201034, + "grad_norm": 0.006294582970440388, + "learning_rate": 6.998264378358915e-06, + "loss": 0.001, + "step": 51150 + }, + { + "epoch": 0.8646054265989539, + "grad_norm": 0.06445837020874023, + "learning_rate": 6.9969123648504035e-06, + "loss": 0.0018, + "step": 51160 + }, + { + "epoch": 0.8647744268778044, + "grad_norm": 0.17422986030578613, + "learning_rate": 6.995560177600842e-06, + "loss": 0.0022, + "step": 51170 + }, + { + "epoch": 0.8649434271566548, + "grad_norm": 0.050527047365903854, + "learning_rate": 6.994207816727877e-06, + "loss": 0.0011, + "step": 51180 + }, + { + "epoch": 0.8651124274355053, + "grad_norm": 0.020956074818968773, + "learning_rate": 6.992855282349169e-06, + "loss": 0.0016, + "step": 51190 + }, + { + "epoch": 0.8652814277143557, + "grad_norm": 0.0970766693353653, + "learning_rate": 6.991502574582397e-06, + "loss": 0.0016, + "step": 51200 + }, + { + "epoch": 0.8654504279932062, + "grad_norm": 0.04854821041226387, + "learning_rate": 6.99014969354525e-06, + "loss": 0.0022, + "step": 51210 + }, + { + "epoch": 0.8656194282720566, + "grad_norm": 0.08087870478630066, + "learning_rate": 6.98879663935544e-06, + "loss": 0.0014, + "step": 51220 + }, + { + "epoch": 0.8657884285509071, + "grad_norm": 0.012758402153849602, + "learning_rate": 6.987443412130684e-06, + "loss": 0.0019, + "step": 51230 + }, + { + "epoch": 0.8659574288297576, + "grad_norm": 0.03489887714385986, + "learning_rate": 6.986090011988723e-06, + "loss": 0.0008, + "step": 51240 + }, + { + "epoch": 0.866126429108608, + "grad_norm": 0.03194853290915489, + "learning_rate": 6.9847364390473084e-06, + "loss": 0.0013, + "step": 51250 + }, + { + "epoch": 0.8662954293874585, + "grad_norm": 0.024313364177942276, + "learning_rate": 6.9833826934242065e-06, + "loss": 0.0021, + "step": 51260 + }, + { + "epoch": 0.866464429666309, + "grad_norm": 0.05284968391060829, + "learning_rate": 6.9820287752372015e-06, + "loss": 0.0011, + "step": 51270 + }, + { + "epoch": 0.8666334299451595, + "grad_norm": 0.007368212100118399, + "learning_rate": 6.9806746846040895e-06, + "loss": 0.0016, + "step": 51280 + }, + { + "epoch": 0.8668024302240098, + "grad_norm": 0.059824682772159576, + "learning_rate": 6.9793204216426825e-06, + "loss": 0.0039, + "step": 51290 + }, + { + "epoch": 0.8669714305028603, + "grad_norm": 0.06650307029485703, + "learning_rate": 6.97796598647081e-06, + "loss": 0.0011, + "step": 51300 + }, + { + "epoch": 0.8671404307817108, + "grad_norm": 0.3455488681793213, + "learning_rate": 6.976611379206312e-06, + "loss": 0.0016, + "step": 51310 + }, + { + "epoch": 0.8673094310605612, + "grad_norm": 0.051520440727472305, + "learning_rate": 6.975256599967047e-06, + "loss": 0.0016, + "step": 51320 + }, + { + "epoch": 0.8674784313394117, + "grad_norm": 0.07747151702642441, + "learning_rate": 6.973901648870889e-06, + "loss": 0.0071, + "step": 51330 + }, + { + "epoch": 0.8676474316182622, + "grad_norm": 0.0553559847176075, + "learning_rate": 6.972546526035723e-06, + "loss": 0.0013, + "step": 51340 + }, + { + "epoch": 0.8678164318971127, + "grad_norm": 0.25140562653541565, + "learning_rate": 6.971191231579451e-06, + "loss": 0.0024, + "step": 51350 + }, + { + "epoch": 0.8679854321759631, + "grad_norm": 0.14226362109184265, + "learning_rate": 6.969835765619993e-06, + "loss": 0.0017, + "step": 51360 + }, + { + "epoch": 0.8681544324548135, + "grad_norm": 0.024550093337893486, + "learning_rate": 6.968480128275279e-06, + "loss": 0.0009, + "step": 51370 + }, + { + "epoch": 0.868323432733664, + "grad_norm": 0.15737617015838623, + "learning_rate": 6.967124319663255e-06, + "loss": 0.0013, + "step": 51380 + }, + { + "epoch": 0.8684924330125144, + "grad_norm": 0.052487265318632126, + "learning_rate": 6.965768339901885e-06, + "loss": 0.0025, + "step": 51390 + }, + { + "epoch": 0.8686614332913649, + "grad_norm": 0.051464054733514786, + "learning_rate": 6.964412189109145e-06, + "loss": 0.0017, + "step": 51400 + }, + { + "epoch": 0.8688304335702154, + "grad_norm": 0.06501564383506775, + "learning_rate": 6.963055867403027e-06, + "loss": 0.0008, + "step": 51410 + }, + { + "epoch": 0.8689994338490659, + "grad_norm": 0.008760693483054638, + "learning_rate": 6.961699374901536e-06, + "loss": 0.0016, + "step": 51420 + }, + { + "epoch": 0.8691684341279163, + "grad_norm": 0.009418006055057049, + "learning_rate": 6.960342711722697e-06, + "loss": 0.0006, + "step": 51430 + }, + { + "epoch": 0.8693374344067668, + "grad_norm": 0.0037960095796734095, + "learning_rate": 6.958985877984543e-06, + "loss": 0.0008, + "step": 51440 + }, + { + "epoch": 0.8695064346856173, + "grad_norm": 0.03941208869218826, + "learning_rate": 6.957628873805125e-06, + "loss": 0.0013, + "step": 51450 + }, + { + "epoch": 0.8696754349644676, + "grad_norm": 0.06688125431537628, + "learning_rate": 6.9562716993025125e-06, + "loss": 0.0026, + "step": 51460 + }, + { + "epoch": 0.8698444352433181, + "grad_norm": 0.02947884425520897, + "learning_rate": 6.954914354594782e-06, + "loss": 0.0012, + "step": 51470 + }, + { + "epoch": 0.8700134355221686, + "grad_norm": 0.06458774209022522, + "learning_rate": 6.953556839800031e-06, + "loss": 0.001, + "step": 51480 + }, + { + "epoch": 0.8701824358010191, + "grad_norm": 0.02007310651242733, + "learning_rate": 6.952199155036371e-06, + "loss": 0.0012, + "step": 51490 + }, + { + "epoch": 0.8703514360798695, + "grad_norm": 0.13680298626422882, + "learning_rate": 6.950841300421923e-06, + "loss": 0.0025, + "step": 51500 + }, + { + "epoch": 0.87052043635872, + "grad_norm": 0.05821897089481354, + "learning_rate": 6.949483276074832e-06, + "loss": 0.0013, + "step": 51510 + }, + { + "epoch": 0.8706894366375705, + "grad_norm": 0.028216827660799026, + "learning_rate": 6.948125082113251e-06, + "loss": 0.0032, + "step": 51520 + }, + { + "epoch": 0.870858436916421, + "grad_norm": 0.23589423298835754, + "learning_rate": 6.946766718655348e-06, + "loss": 0.0028, + "step": 51530 + }, + { + "epoch": 0.8710274371952714, + "grad_norm": 0.009308498352766037, + "learning_rate": 6.945408185819309e-06, + "loss": 0.0007, + "step": 51540 + }, + { + "epoch": 0.8711964374741218, + "grad_norm": 0.11392482370138168, + "learning_rate": 6.944049483723332e-06, + "loss": 0.0026, + "step": 51550 + }, + { + "epoch": 0.8713654377529723, + "grad_norm": 0.013795859180390835, + "learning_rate": 6.94269061248563e-06, + "loss": 0.0018, + "step": 51560 + }, + { + "epoch": 0.8715344380318227, + "grad_norm": 0.06885958462953568, + "learning_rate": 6.941331572224432e-06, + "loss": 0.0024, + "step": 51570 + }, + { + "epoch": 0.8717034383106732, + "grad_norm": 0.06273774057626724, + "learning_rate": 6.939972363057982e-06, + "loss": 0.0013, + "step": 51580 + }, + { + "epoch": 0.8718724385895237, + "grad_norm": 0.042911358177661896, + "learning_rate": 6.938612985104536e-06, + "loss": 0.0007, + "step": 51590 + }, + { + "epoch": 0.8720414388683742, + "grad_norm": 0.19388608634471893, + "learning_rate": 6.937253438482369e-06, + "loss": 0.0016, + "step": 51600 + }, + { + "epoch": 0.8722104391472246, + "grad_norm": 0.07669465988874435, + "learning_rate": 6.935893723309766e-06, + "loss": 0.0014, + "step": 51610 + }, + { + "epoch": 0.8723794394260751, + "grad_norm": 0.009165574796497822, + "learning_rate": 6.934533839705029e-06, + "loss": 0.0034, + "step": 51620 + }, + { + "epoch": 0.8725484397049255, + "grad_norm": 0.02931053563952446, + "learning_rate": 6.933173787786476e-06, + "loss": 0.0022, + "step": 51630 + }, + { + "epoch": 0.8727174399837759, + "grad_norm": 0.05150367692112923, + "learning_rate": 6.931813567672435e-06, + "loss": 0.0021, + "step": 51640 + }, + { + "epoch": 0.8728864402626264, + "grad_norm": 0.03767949715256691, + "learning_rate": 6.930453179481256e-06, + "loss": 0.0026, + "step": 51650 + }, + { + "epoch": 0.8730554405414769, + "grad_norm": 0.08445336669683456, + "learning_rate": 6.929092623331296e-06, + "loss": 0.0022, + "step": 51660 + }, + { + "epoch": 0.8732244408203274, + "grad_norm": 0.03677793964743614, + "learning_rate": 6.92773189934093e-06, + "loss": 0.0011, + "step": 51670 + }, + { + "epoch": 0.8733934410991778, + "grad_norm": 0.05863290652632713, + "learning_rate": 6.926371007628551e-06, + "loss": 0.0007, + "step": 51680 + }, + { + "epoch": 0.8735624413780283, + "grad_norm": 0.01422297116369009, + "learning_rate": 6.925009948312558e-06, + "loss": 0.0013, + "step": 51690 + }, + { + "epoch": 0.8737314416568788, + "grad_norm": 0.05631661415100098, + "learning_rate": 6.923648721511374e-06, + "loss": 0.0017, + "step": 51700 + }, + { + "epoch": 0.8739004419357292, + "grad_norm": 0.03909344598650932, + "learning_rate": 6.922287327343432e-06, + "loss": 0.0014, + "step": 51710 + }, + { + "epoch": 0.8740694422145796, + "grad_norm": 0.14640794694423676, + "learning_rate": 6.920925765927178e-06, + "loss": 0.0013, + "step": 51720 + }, + { + "epoch": 0.8742384424934301, + "grad_norm": 0.09230657666921616, + "learning_rate": 6.9195640373810756e-06, + "loss": 0.0017, + "step": 51730 + }, + { + "epoch": 0.8744074427722806, + "grad_norm": 0.11621160060167313, + "learning_rate": 6.9182021418236025e-06, + "loss": 0.002, + "step": 51740 + }, + { + "epoch": 0.874576443051131, + "grad_norm": 0.011284736916422844, + "learning_rate": 6.916840079373247e-06, + "loss": 0.0012, + "step": 51750 + }, + { + "epoch": 0.8747454433299815, + "grad_norm": 0.11019916832447052, + "learning_rate": 6.915477850148519e-06, + "loss": 0.0019, + "step": 51760 + }, + { + "epoch": 0.874914443608832, + "grad_norm": 0.06869740039110184, + "learning_rate": 6.914115454267936e-06, + "loss": 0.0011, + "step": 51770 + }, + { + "epoch": 0.8750834438876824, + "grad_norm": 0.09072849154472351, + "learning_rate": 6.9127528918500344e-06, + "loss": 0.0018, + "step": 51780 + }, + { + "epoch": 0.8752524441665329, + "grad_norm": 0.013464599847793579, + "learning_rate": 6.911390163013364e-06, + "loss": 0.0016, + "step": 51790 + }, + { + "epoch": 0.8754214444453834, + "grad_norm": 0.04589114338159561, + "learning_rate": 6.910027267876489e-06, + "loss": 0.0013, + "step": 51800 + }, + { + "epoch": 0.8755904447242338, + "grad_norm": 0.05709948390722275, + "learning_rate": 6.908664206557987e-06, + "loss": 0.002, + "step": 51810 + }, + { + "epoch": 0.8757594450030842, + "grad_norm": 0.05664777010679245, + "learning_rate": 6.907300979176452e-06, + "loss": 0.0019, + "step": 51820 + }, + { + "epoch": 0.8759284452819347, + "grad_norm": 0.030458100140094757, + "learning_rate": 6.905937585850491e-06, + "loss": 0.0013, + "step": 51830 + }, + { + "epoch": 0.8760974455607852, + "grad_norm": 0.02953479439020157, + "learning_rate": 6.904574026698724e-06, + "loss": 0.0019, + "step": 51840 + }, + { + "epoch": 0.8762664458396356, + "grad_norm": 0.0162524227052927, + "learning_rate": 6.90321030183979e-06, + "loss": 0.001, + "step": 51850 + }, + { + "epoch": 0.8764354461184861, + "grad_norm": 0.025975948199629784, + "learning_rate": 6.901846411392338e-06, + "loss": 0.0014, + "step": 51860 + }, + { + "epoch": 0.8766044463973366, + "grad_norm": 0.018599003553390503, + "learning_rate": 6.900482355475033e-06, + "loss": 0.0023, + "step": 51870 + }, + { + "epoch": 0.8767734466761871, + "grad_norm": 0.022095203399658203, + "learning_rate": 6.8991181342065546e-06, + "loss": 0.0019, + "step": 51880 + }, + { + "epoch": 0.8769424469550374, + "grad_norm": 0.05831044912338257, + "learning_rate": 6.897753747705599e-06, + "loss": 0.0015, + "step": 51890 + }, + { + "epoch": 0.8771114472338879, + "grad_norm": 0.2666162848472595, + "learning_rate": 6.896389196090871e-06, + "loss": 0.002, + "step": 51900 + }, + { + "epoch": 0.8772804475127384, + "grad_norm": 0.08272144943475723, + "learning_rate": 6.8950244794810965e-06, + "loss": 0.0026, + "step": 51910 + }, + { + "epoch": 0.8774494477915888, + "grad_norm": 0.03429991379380226, + "learning_rate": 6.893659597995009e-06, + "loss": 0.0056, + "step": 51920 + }, + { + "epoch": 0.8776184480704393, + "grad_norm": 0.05217054858803749, + "learning_rate": 6.892294551751362e-06, + "loss": 0.0017, + "step": 51930 + }, + { + "epoch": 0.8777874483492898, + "grad_norm": 0.26239824295043945, + "learning_rate": 6.890929340868921e-06, + "loss": 0.0016, + "step": 51940 + }, + { + "epoch": 0.8779564486281403, + "grad_norm": 0.08103934675455093, + "learning_rate": 6.889563965466465e-06, + "loss": 0.0034, + "step": 51950 + }, + { + "epoch": 0.8781254489069907, + "grad_norm": 0.0029180962592363358, + "learning_rate": 6.888198425662789e-06, + "loss": 0.0007, + "step": 51960 + }, + { + "epoch": 0.8782944491858412, + "grad_norm": 0.1408311277627945, + "learning_rate": 6.886832721576702e-06, + "loss": 0.0023, + "step": 51970 + }, + { + "epoch": 0.8784634494646916, + "grad_norm": 0.12084182351827621, + "learning_rate": 6.8854668533270245e-06, + "loss": 0.0016, + "step": 51980 + }, + { + "epoch": 0.878632449743542, + "grad_norm": 0.06310968846082687, + "learning_rate": 6.8841008210325966e-06, + "loss": 0.0009, + "step": 51990 + }, + { + "epoch": 0.8788014500223925, + "grad_norm": 0.06121402978897095, + "learning_rate": 6.882734624812269e-06, + "loss": 0.0013, + "step": 52000 + }, + { + "epoch": 0.878970450301243, + "grad_norm": 0.077993243932724, + "learning_rate": 6.881368264784906e-06, + "loss": 0.0022, + "step": 52010 + }, + { + "epoch": 0.8791394505800935, + "grad_norm": 0.06399333477020264, + "learning_rate": 6.880001741069391e-06, + "loss": 0.0015, + "step": 52020 + }, + { + "epoch": 0.8793084508589439, + "grad_norm": 0.11310141533613205, + "learning_rate": 6.878635053784614e-06, + "loss": 0.0016, + "step": 52030 + }, + { + "epoch": 0.8794774511377944, + "grad_norm": 0.051656268537044525, + "learning_rate": 6.877268203049484e-06, + "loss": 0.002, + "step": 52040 + }, + { + "epoch": 0.8796464514166449, + "grad_norm": 0.12784205377101898, + "learning_rate": 6.875901188982926e-06, + "loss": 0.0016, + "step": 52050 + }, + { + "epoch": 0.8798154516954952, + "grad_norm": 0.03359860181808472, + "learning_rate": 6.874534011703876e-06, + "loss": 0.0013, + "step": 52060 + }, + { + "epoch": 0.8799844519743457, + "grad_norm": 0.06326126307249069, + "learning_rate": 6.8731666713312835e-06, + "loss": 0.0015, + "step": 52070 + }, + { + "epoch": 0.8801534522531962, + "grad_norm": 0.0809256061911583, + "learning_rate": 6.871799167984116e-06, + "loss": 0.0009, + "step": 52080 + }, + { + "epoch": 0.8803224525320467, + "grad_norm": 0.06105821952223778, + "learning_rate": 6.870431501781352e-06, + "loss": 0.0016, + "step": 52090 + }, + { + "epoch": 0.8804914528108971, + "grad_norm": 0.09603806585073471, + "learning_rate": 6.869063672841983e-06, + "loss": 0.0016, + "step": 52100 + }, + { + "epoch": 0.8806604530897476, + "grad_norm": 0.026588289067149162, + "learning_rate": 6.86769568128502e-06, + "loss": 0.0012, + "step": 52110 + }, + { + "epoch": 0.8808294533685981, + "grad_norm": 0.1563897281885147, + "learning_rate": 6.8663275272294835e-06, + "loss": 0.0015, + "step": 52120 + }, + { + "epoch": 0.8809984536474486, + "grad_norm": 0.02206975221633911, + "learning_rate": 6.86495921079441e-06, + "loss": 0.0022, + "step": 52130 + }, + { + "epoch": 0.881167453926299, + "grad_norm": 0.00509232422336936, + "learning_rate": 6.863590732098848e-06, + "loss": 0.0015, + "step": 52140 + }, + { + "epoch": 0.8813364542051494, + "grad_norm": 0.1817808747291565, + "learning_rate": 6.862222091261864e-06, + "loss": 0.0017, + "step": 52150 + }, + { + "epoch": 0.8815054544839999, + "grad_norm": 0.09490422159433365, + "learning_rate": 6.860853288402534e-06, + "loss": 0.0015, + "step": 52160 + }, + { + "epoch": 0.8816744547628503, + "grad_norm": 0.021016210317611694, + "learning_rate": 6.859484323639953e-06, + "loss": 0.0021, + "step": 52170 + }, + { + "epoch": 0.8818434550417008, + "grad_norm": 0.022308198735117912, + "learning_rate": 6.8581151970932266e-06, + "loss": 0.0023, + "step": 52180 + }, + { + "epoch": 0.8820124553205513, + "grad_norm": 0.09990277886390686, + "learning_rate": 6.856745908881475e-06, + "loss": 0.0016, + "step": 52190 + }, + { + "epoch": 0.8821814555994018, + "grad_norm": 0.039097677916288376, + "learning_rate": 6.855376459123833e-06, + "loss": 0.0018, + "step": 52200 + }, + { + "epoch": 0.8823504558782522, + "grad_norm": 0.1134280264377594, + "learning_rate": 6.854006847939449e-06, + "loss": 0.0011, + "step": 52210 + }, + { + "epoch": 0.8825194561571027, + "grad_norm": 0.06151890754699707, + "learning_rate": 6.852637075447488e-06, + "loss": 0.0012, + "step": 52220 + }, + { + "epoch": 0.8826884564359532, + "grad_norm": 0.07523173838853836, + "learning_rate": 6.851267141767125e-06, + "loss": 0.0009, + "step": 52230 + }, + { + "epoch": 0.8828574567148035, + "grad_norm": 0.091850645840168, + "learning_rate": 6.849897047017551e-06, + "loss": 0.0014, + "step": 52240 + }, + { + "epoch": 0.883026456993654, + "grad_norm": 0.039377931505441666, + "learning_rate": 6.8485267913179694e-06, + "loss": 0.0012, + "step": 52250 + }, + { + "epoch": 0.8831954572725045, + "grad_norm": 0.04614482447504997, + "learning_rate": 6.847156374787602e-06, + "loss": 0.0015, + "step": 52260 + }, + { + "epoch": 0.883364457551355, + "grad_norm": 0.025399111211299896, + "learning_rate": 6.845785797545679e-06, + "loss": 0.0015, + "step": 52270 + }, + { + "epoch": 0.8835334578302054, + "grad_norm": 0.048147816210985184, + "learning_rate": 6.84441505971145e-06, + "loss": 0.001, + "step": 52280 + }, + { + "epoch": 0.8837024581090559, + "grad_norm": 0.054078638553619385, + "learning_rate": 6.8430441614041744e-06, + "loss": 0.002, + "step": 52290 + }, + { + "epoch": 0.8838714583879064, + "grad_norm": 0.020648863166570663, + "learning_rate": 6.841673102743126e-06, + "loss": 0.002, + "step": 52300 + }, + { + "epoch": 0.8840404586667568, + "grad_norm": 0.07791152596473694, + "learning_rate": 6.840301883847595e-06, + "loss": 0.0019, + "step": 52310 + }, + { + "epoch": 0.8842094589456072, + "grad_norm": 0.037497781217098236, + "learning_rate": 6.838930504836885e-06, + "loss": 0.0015, + "step": 52320 + }, + { + "epoch": 0.8843784592244577, + "grad_norm": 0.0458468534052372, + "learning_rate": 6.837558965830309e-06, + "loss": 0.0012, + "step": 52330 + }, + { + "epoch": 0.8845474595033082, + "grad_norm": 0.034784458577632904, + "learning_rate": 6.8361872669472006e-06, + "loss": 0.0007, + "step": 52340 + }, + { + "epoch": 0.8847164597821586, + "grad_norm": 0.05131611227989197, + "learning_rate": 6.834815408306903e-06, + "loss": 0.0021, + "step": 52350 + }, + { + "epoch": 0.8848854600610091, + "grad_norm": 0.16373878717422485, + "learning_rate": 6.833443390028775e-06, + "loss": 0.0028, + "step": 52360 + }, + { + "epoch": 0.8850544603398596, + "grad_norm": 0.08664316684007645, + "learning_rate": 6.832071212232191e-06, + "loss": 0.0022, + "step": 52370 + }, + { + "epoch": 0.88522346061871, + "grad_norm": 0.06289789080619812, + "learning_rate": 6.830698875036533e-06, + "loss": 0.0019, + "step": 52380 + }, + { + "epoch": 0.8853924608975605, + "grad_norm": 0.05547764152288437, + "learning_rate": 6.829326378561203e-06, + "loss": 0.0022, + "step": 52390 + }, + { + "epoch": 0.885561461176411, + "grad_norm": 0.07169349491596222, + "learning_rate": 6.827953722925616e-06, + "loss": 0.0007, + "step": 52400 + }, + { + "epoch": 0.8857304614552614, + "grad_norm": 0.013705150224268436, + "learning_rate": 6.826580908249198e-06, + "loss": 0.0017, + "step": 52410 + }, + { + "epoch": 0.8858994617341118, + "grad_norm": 0.003144758054986596, + "learning_rate": 6.82520793465139e-06, + "loss": 0.002, + "step": 52420 + }, + { + "epoch": 0.8860684620129623, + "grad_norm": 0.10687538981437683, + "learning_rate": 6.823834802251649e-06, + "loss": 0.0018, + "step": 52430 + }, + { + "epoch": 0.8862374622918128, + "grad_norm": 0.05764946714043617, + "learning_rate": 6.822461511169442e-06, + "loss": 0.0017, + "step": 52440 + }, + { + "epoch": 0.8864064625706632, + "grad_norm": 0.0790930986404419, + "learning_rate": 6.821088061524256e-06, + "loss": 0.0014, + "step": 52450 + }, + { + "epoch": 0.8865754628495137, + "grad_norm": 0.03424690291285515, + "learning_rate": 6.819714453435583e-06, + "loss": 0.0011, + "step": 52460 + }, + { + "epoch": 0.8867444631283642, + "grad_norm": 0.03840995952486992, + "learning_rate": 6.818340687022937e-06, + "loss": 0.0015, + "step": 52470 + }, + { + "epoch": 0.8869134634072147, + "grad_norm": 0.017015980556607246, + "learning_rate": 6.816966762405841e-06, + "loss": 0.0089, + "step": 52480 + }, + { + "epoch": 0.8870824636860651, + "grad_norm": 0.11019181460142136, + "learning_rate": 6.815592679703834e-06, + "loss": 0.0024, + "step": 52490 + }, + { + "epoch": 0.8872514639649155, + "grad_norm": 0.13899827003479004, + "learning_rate": 6.814218439036466e-06, + "loss": 0.0039, + "step": 52500 + }, + { + "epoch": 0.887420464243766, + "grad_norm": 0.07236754894256592, + "learning_rate": 6.812844040523305e-06, + "loss": 0.0014, + "step": 52510 + }, + { + "epoch": 0.8875894645226164, + "grad_norm": 0.039463385939598083, + "learning_rate": 6.811469484283928e-06, + "loss": 0.0014, + "step": 52520 + }, + { + "epoch": 0.8877584648014669, + "grad_norm": 0.05553396791219711, + "learning_rate": 6.810094770437929e-06, + "loss": 0.0019, + "step": 52530 + }, + { + "epoch": 0.8879274650803174, + "grad_norm": 0.2061227262020111, + "learning_rate": 6.808719899104916e-06, + "loss": 0.0025, + "step": 52540 + }, + { + "epoch": 0.8880964653591679, + "grad_norm": 0.057045165449380875, + "learning_rate": 6.807344870404506e-06, + "loss": 0.0019, + "step": 52550 + }, + { + "epoch": 0.8882654656380183, + "grad_norm": 0.03864588961005211, + "learning_rate": 6.8059696844563384e-06, + "loss": 0.0038, + "step": 52560 + }, + { + "epoch": 0.8884344659168688, + "grad_norm": 0.046701423823833466, + "learning_rate": 6.804594341380057e-06, + "loss": 0.0013, + "step": 52570 + }, + { + "epoch": 0.8886034661957192, + "grad_norm": 0.05355743691325188, + "learning_rate": 6.8032188412953235e-06, + "loss": 0.0016, + "step": 52580 + }, + { + "epoch": 0.8887724664745696, + "grad_norm": 0.04035189375281334, + "learning_rate": 6.8018431843218155e-06, + "loss": 0.0023, + "step": 52590 + }, + { + "epoch": 0.8889414667534201, + "grad_norm": 0.07668393105268478, + "learning_rate": 6.80046737057922e-06, + "loss": 0.0015, + "step": 52600 + }, + { + "epoch": 0.8891104670322706, + "grad_norm": 0.04548002406954765, + "learning_rate": 6.799091400187239e-06, + "loss": 0.0022, + "step": 52610 + }, + { + "epoch": 0.8892794673111211, + "grad_norm": 0.07837411761283875, + "learning_rate": 6.7977152732655905e-06, + "loss": 0.0007, + "step": 52620 + }, + { + "epoch": 0.8894484675899715, + "grad_norm": 0.059095341712236404, + "learning_rate": 6.7963389899340016e-06, + "loss": 0.0014, + "step": 52630 + }, + { + "epoch": 0.889617467868822, + "grad_norm": 0.09972900152206421, + "learning_rate": 6.794962550312217e-06, + "loss": 0.0017, + "step": 52640 + }, + { + "epoch": 0.8897864681476725, + "grad_norm": 0.026061296463012695, + "learning_rate": 6.793585954519995e-06, + "loss": 0.0009, + "step": 52650 + }, + { + "epoch": 0.889955468426523, + "grad_norm": 0.01142977736890316, + "learning_rate": 6.792209202677105e-06, + "loss": 0.0017, + "step": 52660 + }, + { + "epoch": 0.8901244687053733, + "grad_norm": 0.04268473759293556, + "learning_rate": 6.79083229490333e-06, + "loss": 0.0012, + "step": 52670 + }, + { + "epoch": 0.8902934689842238, + "grad_norm": 0.08778329193592072, + "learning_rate": 6.789455231318469e-06, + "loss": 0.0011, + "step": 52680 + }, + { + "epoch": 0.8904624692630743, + "grad_norm": 0.08509475737810135, + "learning_rate": 6.788078012042333e-06, + "loss": 0.0012, + "step": 52690 + }, + { + "epoch": 0.8906314695419247, + "grad_norm": 0.07777417451143265, + "learning_rate": 6.786700637194745e-06, + "loss": 0.0022, + "step": 52700 + }, + { + "epoch": 0.8908004698207752, + "grad_norm": 0.13288511335849762, + "learning_rate": 6.7853231068955474e-06, + "loss": 0.0022, + "step": 52710 + }, + { + "epoch": 0.8909694700996257, + "grad_norm": 0.008743119426071644, + "learning_rate": 6.7839454212645876e-06, + "loss": 0.0011, + "step": 52720 + }, + { + "epoch": 0.8911384703784762, + "grad_norm": 0.11511184275150299, + "learning_rate": 6.782567580421732e-06, + "loss": 0.002, + "step": 52730 + }, + { + "epoch": 0.8913074706573266, + "grad_norm": 0.03016517497599125, + "learning_rate": 6.7811895844868626e-06, + "loss": 0.0012, + "step": 52740 + }, + { + "epoch": 0.891476470936177, + "grad_norm": 0.02707226388156414, + "learning_rate": 6.779811433579867e-06, + "loss": 0.0013, + "step": 52750 + }, + { + "epoch": 0.8916454712150275, + "grad_norm": 0.013867163099348545, + "learning_rate": 6.7784331278206536e-06, + "loss": 0.0044, + "step": 52760 + }, + { + "epoch": 0.8918144714938779, + "grad_norm": 0.15949314832687378, + "learning_rate": 6.7770546673291425e-06, + "loss": 0.0014, + "step": 52770 + }, + { + "epoch": 0.8919834717727284, + "grad_norm": 0.02797427959740162, + "learning_rate": 6.775676052225265e-06, + "loss": 0.0014, + "step": 52780 + }, + { + "epoch": 0.8921524720515789, + "grad_norm": 0.03147095441818237, + "learning_rate": 6.7742972826289675e-06, + "loss": 0.0021, + "step": 52790 + }, + { + "epoch": 0.8923214723304294, + "grad_norm": 0.045642051845788956, + "learning_rate": 6.7729183586602095e-06, + "loss": 0.0024, + "step": 52800 + }, + { + "epoch": 0.8924904726092798, + "grad_norm": 0.0411669984459877, + "learning_rate": 6.771539280438966e-06, + "loss": 0.0011, + "step": 52810 + }, + { + "epoch": 0.8926594728881303, + "grad_norm": 0.02917470782995224, + "learning_rate": 6.77016004808522e-06, + "loss": 0.0015, + "step": 52820 + }, + { + "epoch": 0.8928284731669808, + "grad_norm": 0.04881921410560608, + "learning_rate": 6.768780661718973e-06, + "loss": 0.0014, + "step": 52830 + }, + { + "epoch": 0.8929974734458311, + "grad_norm": 0.03261767700314522, + "learning_rate": 6.767401121460239e-06, + "loss": 0.002, + "step": 52840 + }, + { + "epoch": 0.8931664737246816, + "grad_norm": 0.039354369044303894, + "learning_rate": 6.766021427429043e-06, + "loss": 0.0052, + "step": 52850 + }, + { + "epoch": 0.8933354740035321, + "grad_norm": 0.039264146238565445, + "learning_rate": 6.764641579745429e-06, + "loss": 0.0009, + "step": 52860 + }, + { + "epoch": 0.8935044742823826, + "grad_norm": 0.04962320253252983, + "learning_rate": 6.763261578529445e-06, + "loss": 0.0012, + "step": 52870 + }, + { + "epoch": 0.893673474561233, + "grad_norm": 0.05316542834043503, + "learning_rate": 6.761881423901162e-06, + "loss": 0.0025, + "step": 52880 + }, + { + "epoch": 0.8938424748400835, + "grad_norm": 0.019389688968658447, + "learning_rate": 6.760501115980659e-06, + "loss": 0.0011, + "step": 52890 + }, + { + "epoch": 0.894011475118934, + "grad_norm": 0.02297196164727211, + "learning_rate": 6.7591206548880285e-06, + "loss": 0.0019, + "step": 52900 + }, + { + "epoch": 0.8941804753977844, + "grad_norm": 0.07117078453302383, + "learning_rate": 6.757740040743378e-06, + "loss": 0.0012, + "step": 52910 + }, + { + "epoch": 0.8943494756766349, + "grad_norm": 0.06571634858846664, + "learning_rate": 6.756359273666827e-06, + "loss": 0.0009, + "step": 52920 + }, + { + "epoch": 0.8945184759554853, + "grad_norm": 0.07537243515253067, + "learning_rate": 6.754978353778508e-06, + "loss": 0.0066, + "step": 52930 + }, + { + "epoch": 0.8946874762343358, + "grad_norm": 0.02859083004295826, + "learning_rate": 6.753597281198571e-06, + "loss": 0.0018, + "step": 52940 + }, + { + "epoch": 0.8948564765131862, + "grad_norm": 0.07475516945123672, + "learning_rate": 6.752216056047174e-06, + "loss": 0.0015, + "step": 52950 + }, + { + "epoch": 0.8950254767920367, + "grad_norm": 0.0927102118730545, + "learning_rate": 6.75083467844449e-06, + "loss": 0.0012, + "step": 52960 + }, + { + "epoch": 0.8951944770708872, + "grad_norm": 0.16480670869350433, + "learning_rate": 6.749453148510706e-06, + "loss": 0.0009, + "step": 52970 + }, + { + "epoch": 0.8953634773497376, + "grad_norm": 0.04149400070309639, + "learning_rate": 6.748071466366023e-06, + "loss": 0.0012, + "step": 52980 + }, + { + "epoch": 0.8955324776285881, + "grad_norm": 0.07089632749557495, + "learning_rate": 6.746689632130652e-06, + "loss": 0.0023, + "step": 52990 + }, + { + "epoch": 0.8957014779074386, + "grad_norm": 0.05136013776063919, + "learning_rate": 6.74530764592482e-06, + "loss": 0.0011, + "step": 53000 + }, + { + "epoch": 0.895870478186289, + "grad_norm": 0.011685706675052643, + "learning_rate": 6.743925507868767e-06, + "loss": 0.0022, + "step": 53010 + }, + { + "epoch": 0.8960394784651394, + "grad_norm": 0.03491692245006561, + "learning_rate": 6.742543218082744e-06, + "loss": 0.001, + "step": 53020 + }, + { + "epoch": 0.8962084787439899, + "grad_norm": 0.0011314982548356056, + "learning_rate": 6.741160776687019e-06, + "loss": 0.0019, + "step": 53030 + }, + { + "epoch": 0.8963774790228404, + "grad_norm": 0.05087868496775627, + "learning_rate": 6.73977818380187e-06, + "loss": 0.0011, + "step": 53040 + }, + { + "epoch": 0.8965464793016908, + "grad_norm": 0.08818584680557251, + "learning_rate": 6.738395439547591e-06, + "loss": 0.0019, + "step": 53050 + }, + { + "epoch": 0.8967154795805413, + "grad_norm": 0.14296086132526398, + "learning_rate": 6.737012544044486e-06, + "loss": 0.0013, + "step": 53060 + }, + { + "epoch": 0.8968844798593918, + "grad_norm": 0.0492781437933445, + "learning_rate": 6.735629497412872e-06, + "loss": 0.001, + "step": 53070 + }, + { + "epoch": 0.8970534801382423, + "grad_norm": 0.025535326451063156, + "learning_rate": 6.734246299773084e-06, + "loss": 0.0021, + "step": 53080 + }, + { + "epoch": 0.8972224804170927, + "grad_norm": 0.06880789995193481, + "learning_rate": 6.7328629512454646e-06, + "loss": 0.0013, + "step": 53090 + }, + { + "epoch": 0.8973914806959431, + "grad_norm": 0.3037954866886139, + "learning_rate": 6.731479451950373e-06, + "loss": 0.0016, + "step": 53100 + }, + { + "epoch": 0.8975604809747936, + "grad_norm": 0.07403312623500824, + "learning_rate": 6.730095802008179e-06, + "loss": 0.0015, + "step": 53110 + }, + { + "epoch": 0.897729481253644, + "grad_norm": 0.08519771695137024, + "learning_rate": 6.728712001539266e-06, + "loss": 0.0017, + "step": 53120 + }, + { + "epoch": 0.8978984815324945, + "grad_norm": 0.024969011545181274, + "learning_rate": 6.7273280506640356e-06, + "loss": 0.0013, + "step": 53130 + }, + { + "epoch": 0.898067481811345, + "grad_norm": 0.06154831871390343, + "learning_rate": 6.725943949502896e-06, + "loss": 0.0017, + "step": 53140 + }, + { + "epoch": 0.8982364820901955, + "grad_norm": 0.045953039079904556, + "learning_rate": 6.724559698176269e-06, + "loss": 0.0013, + "step": 53150 + }, + { + "epoch": 0.8984054823690459, + "grad_norm": 0.03636516258120537, + "learning_rate": 6.723175296804594e-06, + "loss": 0.0012, + "step": 53160 + }, + { + "epoch": 0.8985744826478964, + "grad_norm": 0.06074857339262962, + "learning_rate": 6.7217907455083176e-06, + "loss": 0.0016, + "step": 53170 + }, + { + "epoch": 0.8987434829267469, + "grad_norm": 0.028408514335751534, + "learning_rate": 6.720406044407905e-06, + "loss": 0.0023, + "step": 53180 + }, + { + "epoch": 0.8989124832055972, + "grad_norm": 0.04262072965502739, + "learning_rate": 6.719021193623832e-06, + "loss": 0.0014, + "step": 53190 + }, + { + "epoch": 0.8990814834844477, + "grad_norm": 0.004805940669029951, + "learning_rate": 6.717636193276584e-06, + "loss": 0.0009, + "step": 53200 + }, + { + "epoch": 0.8992504837632982, + "grad_norm": 0.13305015861988068, + "learning_rate": 6.716251043486665e-06, + "loss": 0.0021, + "step": 53210 + }, + { + "epoch": 0.8994194840421487, + "grad_norm": 0.056643761694431305, + "learning_rate": 6.714865744374591e-06, + "loss": 0.0018, + "step": 53220 + }, + { + "epoch": 0.8995884843209991, + "grad_norm": 0.021547023206949234, + "learning_rate": 6.713480296060888e-06, + "loss": 0.0026, + "step": 53230 + }, + { + "epoch": 0.8997574845998496, + "grad_norm": 0.20311525464057922, + "learning_rate": 6.712094698666099e-06, + "loss": 0.001, + "step": 53240 + }, + { + "epoch": 0.8999264848787001, + "grad_norm": 0.051384858787059784, + "learning_rate": 6.710708952310774e-06, + "loss": 0.0015, + "step": 53250 + }, + { + "epoch": 0.9000954851575506, + "grad_norm": 0.004437590949237347, + "learning_rate": 6.709323057115482e-06, + "loss": 0.0021, + "step": 53260 + }, + { + "epoch": 0.9002644854364009, + "grad_norm": 0.04546438902616501, + "learning_rate": 6.707937013200803e-06, + "loss": 0.0013, + "step": 53270 + }, + { + "epoch": 0.9004334857152514, + "grad_norm": 0.05520818009972572, + "learning_rate": 6.706550820687328e-06, + "loss": 0.0013, + "step": 53280 + }, + { + "epoch": 0.9006024859941019, + "grad_norm": 0.10690100491046906, + "learning_rate": 6.7051644796956624e-06, + "loss": 0.0014, + "step": 53290 + }, + { + "epoch": 0.9007714862729523, + "grad_norm": 0.029496680945158005, + "learning_rate": 6.703777990346427e-06, + "loss": 0.0014, + "step": 53300 + }, + { + "epoch": 0.9009404865518028, + "grad_norm": 0.10258401930332184, + "learning_rate": 6.7023913527602506e-06, + "loss": 0.0017, + "step": 53310 + }, + { + "epoch": 0.9011094868306533, + "grad_norm": 0.06807773560285568, + "learning_rate": 6.701004567057777e-06, + "loss": 0.0023, + "step": 53320 + }, + { + "epoch": 0.9012784871095038, + "grad_norm": 0.10241279006004333, + "learning_rate": 6.699617633359666e-06, + "loss": 0.0013, + "step": 53330 + }, + { + "epoch": 0.9014474873883542, + "grad_norm": 0.07315660268068314, + "learning_rate": 6.698230551786586e-06, + "loss": 0.0026, + "step": 53340 + }, + { + "epoch": 0.9016164876672047, + "grad_norm": 0.01719767041504383, + "learning_rate": 6.69684332245922e-06, + "loss": 0.0012, + "step": 53350 + }, + { + "epoch": 0.9017854879460551, + "grad_norm": 0.010834837332367897, + "learning_rate": 6.695455945498264e-06, + "loss": 0.002, + "step": 53360 + }, + { + "epoch": 0.9019544882249055, + "grad_norm": 0.06381508708000183, + "learning_rate": 6.694068421024425e-06, + "loss": 0.0039, + "step": 53370 + }, + { + "epoch": 0.902123488503756, + "grad_norm": 0.03186887130141258, + "learning_rate": 6.6926807491584255e-06, + "loss": 0.002, + "step": 53380 + }, + { + "epoch": 0.9022924887826065, + "grad_norm": 0.04062811657786369, + "learning_rate": 6.691292930021001e-06, + "loss": 0.0009, + "step": 53390 + }, + { + "epoch": 0.902461489061457, + "grad_norm": 0.018574338406324387, + "learning_rate": 6.689904963732895e-06, + "loss": 0.0015, + "step": 53400 + }, + { + "epoch": 0.9026304893403074, + "grad_norm": 0.03585190325975418, + "learning_rate": 6.68851685041487e-06, + "loss": 0.0016, + "step": 53410 + }, + { + "epoch": 0.9027994896191579, + "grad_norm": 0.015502367168664932, + "learning_rate": 6.687128590187698e-06, + "loss": 0.0018, + "step": 53420 + }, + { + "epoch": 0.9029684898980084, + "grad_norm": 0.05451056733727455, + "learning_rate": 6.6857401831721645e-06, + "loss": 0.0013, + "step": 53430 + }, + { + "epoch": 0.9031374901768587, + "grad_norm": 0.05533871054649353, + "learning_rate": 6.684351629489067e-06, + "loss": 0.002, + "step": 53440 + }, + { + "epoch": 0.9033064904557092, + "grad_norm": 0.04295478016138077, + "learning_rate": 6.682962929259218e-06, + "loss": 0.0021, + "step": 53450 + }, + { + "epoch": 0.9034754907345597, + "grad_norm": 0.003592574968934059, + "learning_rate": 6.681574082603439e-06, + "loss": 0.0018, + "step": 53460 + }, + { + "epoch": 0.9036444910134102, + "grad_norm": 0.07409350574016571, + "learning_rate": 6.680185089642568e-06, + "loss": 0.0018, + "step": 53470 + }, + { + "epoch": 0.9038134912922606, + "grad_norm": 0.09028099477291107, + "learning_rate": 6.678795950497453e-06, + "loss": 0.0018, + "step": 53480 + }, + { + "epoch": 0.9039824915711111, + "grad_norm": 0.016657106578350067, + "learning_rate": 6.6774066652889565e-06, + "loss": 0.004, + "step": 53490 + }, + { + "epoch": 0.9041514918499616, + "grad_norm": 0.12282165139913559, + "learning_rate": 6.6760172341379535e-06, + "loss": 0.0018, + "step": 53500 + }, + { + "epoch": 0.904320492128812, + "grad_norm": 0.05469250679016113, + "learning_rate": 6.67462765716533e-06, + "loss": 0.0013, + "step": 53510 + }, + { + "epoch": 0.9044894924076625, + "grad_norm": 0.11904332041740417, + "learning_rate": 6.673237934491988e-06, + "loss": 0.0013, + "step": 53520 + }, + { + "epoch": 0.9046584926865129, + "grad_norm": 0.08364378660917282, + "learning_rate": 6.671848066238836e-06, + "loss": 0.0008, + "step": 53530 + }, + { + "epoch": 0.9048274929653634, + "grad_norm": 0.05299696698784828, + "learning_rate": 6.6704580525268035e-06, + "loss": 0.0011, + "step": 53540 + }, + { + "epoch": 0.9049964932442138, + "grad_norm": 0.08057032525539398, + "learning_rate": 6.669067893476827e-06, + "loss": 0.0027, + "step": 53550 + }, + { + "epoch": 0.9051654935230643, + "grad_norm": 0.10579432547092438, + "learning_rate": 6.667677589209858e-06, + "loss": 0.0016, + "step": 53560 + }, + { + "epoch": 0.9053344938019148, + "grad_norm": 0.05485056713223457, + "learning_rate": 6.666287139846857e-06, + "loss": 0.0017, + "step": 53570 + }, + { + "epoch": 0.9055034940807652, + "grad_norm": 0.04897540062665939, + "learning_rate": 6.6648965455088025e-06, + "loss": 0.002, + "step": 53580 + }, + { + "epoch": 0.9056724943596157, + "grad_norm": 0.07316815853118896, + "learning_rate": 6.663505806316681e-06, + "loss": 0.0007, + "step": 53590 + }, + { + "epoch": 0.9058414946384662, + "grad_norm": 0.02678026631474495, + "learning_rate": 6.662114922391494e-06, + "loss": 0.0012, + "step": 53600 + }, + { + "epoch": 0.9060104949173167, + "grad_norm": 0.08421721309423447, + "learning_rate": 6.660723893854256e-06, + "loss": 0.0014, + "step": 53610 + }, + { + "epoch": 0.906179495196167, + "grad_norm": 0.061888616532087326, + "learning_rate": 6.6593327208259935e-06, + "loss": 0.0029, + "step": 53620 + }, + { + "epoch": 0.9063484954750175, + "grad_norm": 0.07698092609643936, + "learning_rate": 6.657941403427745e-06, + "loss": 0.0015, + "step": 53630 + }, + { + "epoch": 0.906517495753868, + "grad_norm": 0.019941125065088272, + "learning_rate": 6.6565499417805615e-06, + "loss": 0.0027, + "step": 53640 + }, + { + "epoch": 0.9066864960327184, + "grad_norm": 0.04463280364871025, + "learning_rate": 6.655158336005505e-06, + "loss": 0.002, + "step": 53650 + }, + { + "epoch": 0.9068554963115689, + "grad_norm": 0.03760061785578728, + "learning_rate": 6.653766586223656e-06, + "loss": 0.0012, + "step": 53660 + }, + { + "epoch": 0.9070244965904194, + "grad_norm": 0.05356159806251526, + "learning_rate": 6.652374692556101e-06, + "loss": 0.0014, + "step": 53670 + }, + { + "epoch": 0.9071934968692699, + "grad_norm": 0.0786006897687912, + "learning_rate": 6.650982655123941e-06, + "loss": 0.0018, + "step": 53680 + }, + { + "epoch": 0.9073624971481203, + "grad_norm": 0.041036710143089294, + "learning_rate": 6.64959047404829e-06, + "loss": 0.0026, + "step": 53690 + }, + { + "epoch": 0.9075314974269707, + "grad_norm": 0.09620804339647293, + "learning_rate": 6.648198149450277e-06, + "loss": 0.0018, + "step": 53700 + }, + { + "epoch": 0.9077004977058212, + "grad_norm": 0.01466481015086174, + "learning_rate": 6.6468056814510385e-06, + "loss": 0.002, + "step": 53710 + }, + { + "epoch": 0.9078694979846716, + "grad_norm": 0.14561620354652405, + "learning_rate": 6.645413070171726e-06, + "loss": 0.0014, + "step": 53720 + }, + { + "epoch": 0.9080384982635221, + "grad_norm": 0.01577634923160076, + "learning_rate": 6.644020315733505e-06, + "loss": 0.0018, + "step": 53730 + }, + { + "epoch": 0.9082074985423726, + "grad_norm": 0.0007173779886215925, + "learning_rate": 6.642627418257551e-06, + "loss": 0.0015, + "step": 53740 + }, + { + "epoch": 0.9083764988212231, + "grad_norm": 0.04145677387714386, + "learning_rate": 6.641234377865053e-06, + "loss": 0.0024, + "step": 53750 + }, + { + "epoch": 0.9085454991000735, + "grad_norm": 0.003429944859817624, + "learning_rate": 6.639841194677213e-06, + "loss": 0.0016, + "step": 53760 + }, + { + "epoch": 0.908714499378924, + "grad_norm": 0.0353064239025116, + "learning_rate": 6.638447868815243e-06, + "loss": 0.0016, + "step": 53770 + }, + { + "epoch": 0.9088834996577745, + "grad_norm": 0.00795475672930479, + "learning_rate": 6.63705440040037e-06, + "loss": 0.0015, + "step": 53780 + }, + { + "epoch": 0.9090524999366248, + "grad_norm": 0.0757342278957367, + "learning_rate": 6.635660789553833e-06, + "loss": 0.0022, + "step": 53790 + }, + { + "epoch": 0.9092215002154753, + "grad_norm": 0.018039211630821228, + "learning_rate": 6.634267036396881e-06, + "loss": 0.0068, + "step": 53800 + }, + { + "epoch": 0.9093905004943258, + "grad_norm": 0.09006370604038239, + "learning_rate": 6.63287314105078e-06, + "loss": 0.0014, + "step": 53810 + }, + { + "epoch": 0.9095595007731763, + "grad_norm": 0.04026485234498978, + "learning_rate": 6.631479103636803e-06, + "loss": 0.0014, + "step": 53820 + }, + { + "epoch": 0.9097285010520267, + "grad_norm": 0.05421081930398941, + "learning_rate": 6.630084924276241e-06, + "loss": 0.0024, + "step": 53830 + }, + { + "epoch": 0.9098975013308772, + "grad_norm": 0.0850011482834816, + "learning_rate": 6.628690603090391e-06, + "loss": 0.002, + "step": 53840 + }, + { + "epoch": 0.9100665016097277, + "grad_norm": 0.006448943633586168, + "learning_rate": 6.627296140200569e-06, + "loss": 0.0011, + "step": 53850 + }, + { + "epoch": 0.9102355018885782, + "grad_norm": 0.006143988575786352, + "learning_rate": 6.6259015357280965e-06, + "loss": 0.0024, + "step": 53860 + }, + { + "epoch": 0.9104045021674286, + "grad_norm": 0.029627706855535507, + "learning_rate": 6.624506789794313e-06, + "loss": 0.0014, + "step": 53870 + }, + { + "epoch": 0.910573502446279, + "grad_norm": 0.05208719149231911, + "learning_rate": 6.623111902520569e-06, + "loss": 0.0012, + "step": 53880 + }, + { + "epoch": 0.9107425027251295, + "grad_norm": 0.04385032504796982, + "learning_rate": 6.6217168740282245e-06, + "loss": 0.0011, + "step": 53890 + }, + { + "epoch": 0.9109115030039799, + "grad_norm": 0.12211822718381882, + "learning_rate": 6.6203217044386546e-06, + "loss": 0.0016, + "step": 53900 + }, + { + "epoch": 0.9110805032828304, + "grad_norm": 0.11783907562494278, + "learning_rate": 6.618926393873246e-06, + "loss": 0.0013, + "step": 53910 + }, + { + "epoch": 0.9112495035616809, + "grad_norm": 0.03287163004279137, + "learning_rate": 6.6175309424533985e-06, + "loss": 0.0016, + "step": 53920 + }, + { + "epoch": 0.9114185038405314, + "grad_norm": 0.022966263815760612, + "learning_rate": 6.616135350300521e-06, + "loss": 0.002, + "step": 53930 + }, + { + "epoch": 0.9115875041193818, + "grad_norm": 0.4584880769252777, + "learning_rate": 6.614739617536037e-06, + "loss": 0.0014, + "step": 53940 + }, + { + "epoch": 0.9117565043982323, + "grad_norm": 0.05042840167880058, + "learning_rate": 6.613343744281383e-06, + "loss": 0.0031, + "step": 53950 + }, + { + "epoch": 0.9119255046770827, + "grad_norm": 0.05508103221654892, + "learning_rate": 6.611947730658006e-06, + "loss": 0.0011, + "step": 53960 + }, + { + "epoch": 0.9120945049559331, + "grad_norm": 0.16197894513607025, + "learning_rate": 6.610551576787367e-06, + "loss": 0.0019, + "step": 53970 + }, + { + "epoch": 0.9122635052347836, + "grad_norm": 0.059720925986766815, + "learning_rate": 6.609155282790937e-06, + "loss": 0.0016, + "step": 53980 + }, + { + "epoch": 0.9124325055136341, + "grad_norm": 0.02155834622681141, + "learning_rate": 6.607758848790201e-06, + "loss": 0.0013, + "step": 53990 + }, + { + "epoch": 0.9126015057924846, + "grad_norm": 0.03507447615265846, + "learning_rate": 6.606362274906655e-06, + "loss": 0.0031, + "step": 54000 + }, + { + "epoch": 0.912770506071335, + "grad_norm": 0.053267836570739746, + "learning_rate": 6.604965561261809e-06, + "loss": 0.0017, + "step": 54010 + }, + { + "epoch": 0.9129395063501855, + "grad_norm": 0.1057472974061966, + "learning_rate": 6.603568707977183e-06, + "loss": 0.0014, + "step": 54020 + }, + { + "epoch": 0.913108506629036, + "grad_norm": 0.003757775528356433, + "learning_rate": 6.602171715174309e-06, + "loss": 0.0019, + "step": 54030 + }, + { + "epoch": 0.9132775069078864, + "grad_norm": 0.0017160142306238413, + "learning_rate": 6.600774582974734e-06, + "loss": 0.0011, + "step": 54040 + }, + { + "epoch": 0.9134465071867368, + "grad_norm": 0.07644976675510406, + "learning_rate": 6.599377311500014e-06, + "loss": 0.0023, + "step": 54050 + }, + { + "epoch": 0.9136155074655873, + "grad_norm": 0.1244252622127533, + "learning_rate": 6.5979799008717186e-06, + "loss": 0.0017, + "step": 54060 + }, + { + "epoch": 0.9137845077444378, + "grad_norm": 0.03451064974069595, + "learning_rate": 6.596582351211429e-06, + "loss": 0.0048, + "step": 54070 + }, + { + "epoch": 0.9139535080232882, + "grad_norm": 0.04706354811787605, + "learning_rate": 6.595184662640741e-06, + "loss": 0.0023, + "step": 54080 + }, + { + "epoch": 0.9141225083021387, + "grad_norm": 0.04288541525602341, + "learning_rate": 6.5937868352812565e-06, + "loss": 0.0023, + "step": 54090 + }, + { + "epoch": 0.9142915085809892, + "grad_norm": 0.03782866150140762, + "learning_rate": 6.592388869254596e-06, + "loss": 0.0008, + "step": 54100 + }, + { + "epoch": 0.9144605088598396, + "grad_norm": 0.025055140256881714, + "learning_rate": 6.5909907646823876e-06, + "loss": 0.0017, + "step": 54110 + }, + { + "epoch": 0.9146295091386901, + "grad_norm": 0.03553859144449234, + "learning_rate": 6.589592521686277e-06, + "loss": 0.0027, + "step": 54120 + }, + { + "epoch": 0.9147985094175405, + "grad_norm": 0.06720779091119766, + "learning_rate": 6.5881941403879125e-06, + "loss": 0.0017, + "step": 54130 + }, + { + "epoch": 0.914967509696391, + "grad_norm": 0.045898016542196274, + "learning_rate": 6.586795620908964e-06, + "loss": 0.0019, + "step": 54140 + }, + { + "epoch": 0.9151365099752414, + "grad_norm": 0.11866870522499084, + "learning_rate": 6.585396963371108e-06, + "loss": 0.0017, + "step": 54150 + }, + { + "epoch": 0.9153055102540919, + "grad_norm": 0.057932619005441666, + "learning_rate": 6.583998167896035e-06, + "loss": 0.0015, + "step": 54160 + }, + { + "epoch": 0.9154745105329424, + "grad_norm": 0.03443612530827522, + "learning_rate": 6.5825992346054454e-06, + "loss": 0.0007, + "step": 54170 + }, + { + "epoch": 0.9156435108117928, + "grad_norm": 0.11433347314596176, + "learning_rate": 6.581200163621055e-06, + "loss": 0.0019, + "step": 54180 + }, + { + "epoch": 0.9158125110906433, + "grad_norm": 0.04202871024608612, + "learning_rate": 6.57980095506459e-06, + "loss": 0.0013, + "step": 54190 + }, + { + "epoch": 0.9159815113694938, + "grad_norm": 0.030800852924585342, + "learning_rate": 6.578401609057789e-06, + "loss": 0.002, + "step": 54200 + }, + { + "epoch": 0.9161505116483443, + "grad_norm": 0.02836521342396736, + "learning_rate": 6.577002125722398e-06, + "loss": 0.0015, + "step": 54210 + }, + { + "epoch": 0.9163195119271946, + "grad_norm": 0.11328999698162079, + "learning_rate": 6.575602505180183e-06, + "loss": 0.0021, + "step": 54220 + }, + { + "epoch": 0.9164885122060451, + "grad_norm": 0.015508916229009628, + "learning_rate": 6.574202747552914e-06, + "loss": 0.0011, + "step": 54230 + }, + { + "epoch": 0.9166575124848956, + "grad_norm": 0.040631003677845, + "learning_rate": 6.572802852962381e-06, + "loss": 0.0007, + "step": 54240 + }, + { + "epoch": 0.916826512763746, + "grad_norm": 0.0665983110666275, + "learning_rate": 6.571402821530378e-06, + "loss": 0.0011, + "step": 54250 + }, + { + "epoch": 0.9169955130425965, + "grad_norm": 0.053036030381917953, + "learning_rate": 6.570002653378717e-06, + "loss": 0.0016, + "step": 54260 + }, + { + "epoch": 0.917164513321447, + "grad_norm": 0.09500153362751007, + "learning_rate": 6.568602348629217e-06, + "loss": 0.0012, + "step": 54270 + }, + { + "epoch": 0.9173335136002975, + "grad_norm": 0.02812996692955494, + "learning_rate": 6.567201907403713e-06, + "loss": 0.001, + "step": 54280 + }, + { + "epoch": 0.9175025138791479, + "grad_norm": 0.02416916936635971, + "learning_rate": 6.565801329824051e-06, + "loss": 0.0019, + "step": 54290 + }, + { + "epoch": 0.9176715141579984, + "grad_norm": 0.09245016425848007, + "learning_rate": 6.564400616012085e-06, + "loss": 0.0009, + "step": 54300 + }, + { + "epoch": 0.9178405144368488, + "grad_norm": 0.09982512891292572, + "learning_rate": 6.562999766089687e-06, + "loss": 0.0014, + "step": 54310 + }, + { + "epoch": 0.9180095147156992, + "grad_norm": 0.06721735000610352, + "learning_rate": 6.561598780178736e-06, + "loss": 0.0016, + "step": 54320 + }, + { + "epoch": 0.9181785149945497, + "grad_norm": 0.08400999009609222, + "learning_rate": 6.560197658401126e-06, + "loss": 0.0011, + "step": 54330 + }, + { + "epoch": 0.9183475152734002, + "grad_norm": 0.14300887286663055, + "learning_rate": 6.55879640087876e-06, + "loss": 0.0019, + "step": 54340 + }, + { + "epoch": 0.9185165155522507, + "grad_norm": 0.028444314375519753, + "learning_rate": 6.557395007733554e-06, + "loss": 0.0015, + "step": 54350 + }, + { + "epoch": 0.9186855158311011, + "grad_norm": 0.040723320096731186, + "learning_rate": 6.555993479087436e-06, + "loss": 0.0017, + "step": 54360 + }, + { + "epoch": 0.9188545161099516, + "grad_norm": 0.04981546476483345, + "learning_rate": 6.554591815062346e-06, + "loss": 0.0016, + "step": 54370 + }, + { + "epoch": 0.9190235163888021, + "grad_norm": 0.06033708155155182, + "learning_rate": 6.553190015780238e-06, + "loss": 0.001, + "step": 54380 + }, + { + "epoch": 0.9191925166676524, + "grad_norm": 0.036511003971099854, + "learning_rate": 6.551788081363072e-06, + "loss": 0.0016, + "step": 54390 + }, + { + "epoch": 0.9193615169465029, + "grad_norm": 0.050383757799863815, + "learning_rate": 6.550386011932824e-06, + "loss": 0.0016, + "step": 54400 + }, + { + "epoch": 0.9195305172253534, + "grad_norm": 0.06114116311073303, + "learning_rate": 6.548983807611482e-06, + "loss": 0.0016, + "step": 54410 + }, + { + "epoch": 0.9196995175042039, + "grad_norm": 0.0021533877588808537, + "learning_rate": 6.547581468521044e-06, + "loss": 0.0009, + "step": 54420 + }, + { + "epoch": 0.9198685177830543, + "grad_norm": 0.03272373229265213, + "learning_rate": 6.546178994783519e-06, + "loss": 0.0013, + "step": 54430 + }, + { + "epoch": 0.9200375180619048, + "grad_norm": 0.00193277548532933, + "learning_rate": 6.544776386520931e-06, + "loss": 0.0012, + "step": 54440 + }, + { + "epoch": 0.9202065183407553, + "grad_norm": 0.03134723752737045, + "learning_rate": 6.543373643855312e-06, + "loss": 0.0013, + "step": 54450 + }, + { + "epoch": 0.9203755186196058, + "grad_norm": 0.03538252040743828, + "learning_rate": 6.541970766908707e-06, + "loss": 0.0011, + "step": 54460 + }, + { + "epoch": 0.9205445188984562, + "grad_norm": 0.06776958703994751, + "learning_rate": 6.540567755803177e-06, + "loss": 0.0013, + "step": 54470 + }, + { + "epoch": 0.9207135191773066, + "grad_norm": 0.10667550563812256, + "learning_rate": 6.539164610660785e-06, + "loss": 0.0012, + "step": 54480 + }, + { + "epoch": 0.9208825194561571, + "grad_norm": 0.06932281702756882, + "learning_rate": 6.537761331603617e-06, + "loss": 0.0013, + "step": 54490 + }, + { + "epoch": 0.9210515197350075, + "grad_norm": 0.058006856590509415, + "learning_rate": 6.536357918753762e-06, + "loss": 0.0018, + "step": 54500 + }, + { + "epoch": 0.921220520013858, + "grad_norm": 0.19771824777126312, + "learning_rate": 6.534954372233324e-06, + "loss": 0.0015, + "step": 54510 + }, + { + "epoch": 0.9213895202927085, + "grad_norm": 0.00955281127244234, + "learning_rate": 6.533550692164419e-06, + "loss": 0.0014, + "step": 54520 + }, + { + "epoch": 0.921558520571559, + "grad_norm": 0.03951649367809296, + "learning_rate": 6.532146878669172e-06, + "loss": 0.0014, + "step": 54530 + }, + { + "epoch": 0.9217275208504094, + "grad_norm": 0.11181564629077911, + "learning_rate": 6.530742931869725e-06, + "loss": 0.0014, + "step": 54540 + }, + { + "epoch": 0.9218965211292599, + "grad_norm": 0.03836284950375557, + "learning_rate": 6.529338851888225e-06, + "loss": 0.0018, + "step": 54550 + }, + { + "epoch": 0.9220655214081104, + "grad_norm": 0.10049938410520554, + "learning_rate": 6.527934638846836e-06, + "loss": 0.0019, + "step": 54560 + }, + { + "epoch": 0.9222345216869607, + "grad_norm": 0.04472474008798599, + "learning_rate": 6.526530292867729e-06, + "loss": 0.0018, + "step": 54570 + }, + { + "epoch": 0.9224035219658112, + "grad_norm": 0.10268321633338928, + "learning_rate": 6.5251258140730924e-06, + "loss": 0.0036, + "step": 54580 + }, + { + "epoch": 0.9225725222446617, + "grad_norm": 0.030455652624368668, + "learning_rate": 6.523721202585118e-06, + "loss": 0.0017, + "step": 54590 + }, + { + "epoch": 0.9227415225235122, + "grad_norm": 0.07699055969715118, + "learning_rate": 6.522316458526019e-06, + "loss": 0.0019, + "step": 54600 + }, + { + "epoch": 0.9229105228023626, + "grad_norm": 0.08997844159603119, + "learning_rate": 6.520911582018012e-06, + "loss": 0.0014, + "step": 54610 + }, + { + "epoch": 0.9230795230812131, + "grad_norm": 0.1037558987736702, + "learning_rate": 6.519506573183328e-06, + "loss": 0.0018, + "step": 54620 + }, + { + "epoch": 0.9232485233600636, + "grad_norm": 0.031466227024793625, + "learning_rate": 6.518101432144208e-06, + "loss": 0.0023, + "step": 54630 + }, + { + "epoch": 0.923417523638914, + "grad_norm": 0.15260553359985352, + "learning_rate": 6.5166961590229105e-06, + "loss": 0.003, + "step": 54640 + }, + { + "epoch": 0.9235865239177644, + "grad_norm": 0.07779121398925781, + "learning_rate": 6.515290753941697e-06, + "loss": 0.0028, + "step": 54650 + }, + { + "epoch": 0.9237555241966149, + "grad_norm": 0.04281062260270119, + "learning_rate": 6.513885217022846e-06, + "loss": 0.001, + "step": 54660 + }, + { + "epoch": 0.9239245244754654, + "grad_norm": 0.04976315423846245, + "learning_rate": 6.512479548388647e-06, + "loss": 0.001, + "step": 54670 + }, + { + "epoch": 0.9240935247543158, + "grad_norm": 0.05488727241754532, + "learning_rate": 6.5110737481614e-06, + "loss": 0.0014, + "step": 54680 + }, + { + "epoch": 0.9242625250331663, + "grad_norm": 0.054966460913419724, + "learning_rate": 6.509667816463414e-06, + "loss": 0.001, + "step": 54690 + }, + { + "epoch": 0.9244315253120168, + "grad_norm": 0.08770200610160828, + "learning_rate": 6.508261753417014e-06, + "loss": 0.0011, + "step": 54700 + }, + { + "epoch": 0.9246005255908673, + "grad_norm": 0.027067530900239944, + "learning_rate": 6.506855559144535e-06, + "loss": 0.0017, + "step": 54710 + }, + { + "epoch": 0.9247695258697177, + "grad_norm": 0.050520192831754684, + "learning_rate": 6.5054492337683205e-06, + "loss": 0.0015, + "step": 54720 + }, + { + "epoch": 0.9249385261485682, + "grad_norm": 0.09086226671934128, + "learning_rate": 6.504042777410728e-06, + "loss": 0.0016, + "step": 54730 + }, + { + "epoch": 0.9251075264274186, + "grad_norm": 0.026549015194177628, + "learning_rate": 6.502636190194127e-06, + "loss": 0.004, + "step": 54740 + }, + { + "epoch": 0.925276526706269, + "grad_norm": 0.009608623571693897, + "learning_rate": 6.501229472240896e-06, + "loss": 0.0024, + "step": 54750 + }, + { + "epoch": 0.9254455269851195, + "grad_norm": 0.03781052678823471, + "learning_rate": 6.499822623673429e-06, + "loss": 0.0011, + "step": 54760 + }, + { + "epoch": 0.92561452726397, + "grad_norm": 0.03538934886455536, + "learning_rate": 6.498415644614126e-06, + "loss": 0.0017, + "step": 54770 + }, + { + "epoch": 0.9257835275428205, + "grad_norm": 0.058362703770399094, + "learning_rate": 6.497008535185402e-06, + "loss": 0.0014, + "step": 54780 + }, + { + "epoch": 0.9259525278216709, + "grad_norm": 0.01351145002990961, + "learning_rate": 6.495601295509683e-06, + "loss": 0.0011, + "step": 54790 + }, + { + "epoch": 0.9261215281005214, + "grad_norm": 0.05451357737183571, + "learning_rate": 6.494193925709405e-06, + "loss": 0.0022, + "step": 54800 + }, + { + "epoch": 0.9262905283793719, + "grad_norm": 0.1450265794992447, + "learning_rate": 6.492786425907015e-06, + "loss": 0.0015, + "step": 54810 + }, + { + "epoch": 0.9264595286582222, + "grad_norm": 0.12919071316719055, + "learning_rate": 6.4913787962249745e-06, + "loss": 0.0017, + "step": 54820 + }, + { + "epoch": 0.9266285289370727, + "grad_norm": 0.006917062681168318, + "learning_rate": 6.489971036785752e-06, + "loss": 0.001, + "step": 54830 + }, + { + "epoch": 0.9267975292159232, + "grad_norm": 0.008178263902664185, + "learning_rate": 6.488563147711829e-06, + "loss": 0.0019, + "step": 54840 + }, + { + "epoch": 0.9269665294947737, + "grad_norm": 0.04077032953500748, + "learning_rate": 6.487155129125701e-06, + "loss": 0.0013, + "step": 54850 + }, + { + "epoch": 0.9271355297736241, + "grad_norm": 0.06073884665966034, + "learning_rate": 6.485746981149872e-06, + "loss": 0.0018, + "step": 54860 + }, + { + "epoch": 0.9273045300524746, + "grad_norm": 0.028377428650856018, + "learning_rate": 6.4843387039068566e-06, + "loss": 0.0016, + "step": 54870 + }, + { + "epoch": 0.9274735303313251, + "grad_norm": 0.3094031810760498, + "learning_rate": 6.482930297519181e-06, + "loss": 0.0017, + "step": 54880 + }, + { + "epoch": 0.9276425306101755, + "grad_norm": 0.04300961270928383, + "learning_rate": 6.481521762109386e-06, + "loss": 0.0015, + "step": 54890 + }, + { + "epoch": 0.927811530889026, + "grad_norm": 0.13757209479808807, + "learning_rate": 6.48011309780002e-06, + "loss": 0.0017, + "step": 54900 + }, + { + "epoch": 0.9279805311678764, + "grad_norm": 0.020510252565145493, + "learning_rate": 6.478704304713641e-06, + "loss": 0.0032, + "step": 54910 + }, + { + "epoch": 0.9281495314467268, + "grad_norm": 0.209051713347435, + "learning_rate": 6.477295382972826e-06, + "loss": 0.0015, + "step": 54920 + }, + { + "epoch": 0.9283185317255773, + "grad_norm": 0.35918501019477844, + "learning_rate": 6.475886332700152e-06, + "loss": 0.0019, + "step": 54930 + }, + { + "epoch": 0.9284875320044278, + "grad_norm": 0.03130156919360161, + "learning_rate": 6.4744771540182175e-06, + "loss": 0.002, + "step": 54940 + }, + { + "epoch": 0.9286565322832783, + "grad_norm": 0.17833785712718964, + "learning_rate": 6.473067847049627e-06, + "loss": 0.0014, + "step": 54950 + }, + { + "epoch": 0.9288255325621287, + "grad_norm": 0.02514052763581276, + "learning_rate": 6.4716584119169956e-06, + "loss": 0.0017, + "step": 54960 + }, + { + "epoch": 0.9289945328409792, + "grad_norm": 0.030019115656614304, + "learning_rate": 6.4702488487429526e-06, + "loss": 0.0016, + "step": 54970 + }, + { + "epoch": 0.9291635331198297, + "grad_norm": 0.03886539489030838, + "learning_rate": 6.468839157650138e-06, + "loss": 0.0013, + "step": 54980 + }, + { + "epoch": 0.9293325333986802, + "grad_norm": 0.006110138725489378, + "learning_rate": 6.467429338761197e-06, + "loss": 0.002, + "step": 54990 + }, + { + "epoch": 0.9295015336775305, + "grad_norm": 0.06936800479888916, + "learning_rate": 6.466019392198795e-06, + "loss": 0.0022, + "step": 55000 + }, + { + "epoch": 0.929670533956381, + "grad_norm": 0.04609772190451622, + "learning_rate": 6.464609318085602e-06, + "loss": 0.0013, + "step": 55010 + }, + { + "epoch": 0.9298395342352315, + "grad_norm": 0.04684607684612274, + "learning_rate": 6.463199116544303e-06, + "loss": 0.0012, + "step": 55020 + }, + { + "epoch": 0.9300085345140819, + "grad_norm": 0.03586406260728836, + "learning_rate": 6.4617887876975916e-06, + "loss": 0.0011, + "step": 55030 + }, + { + "epoch": 0.9301775347929324, + "grad_norm": 0.13385647535324097, + "learning_rate": 6.460378331668174e-06, + "loss": 0.0023, + "step": 55040 + }, + { + "epoch": 0.9303465350717829, + "grad_norm": 0.047625139355659485, + "learning_rate": 6.458967748578764e-06, + "loss": 0.0017, + "step": 55050 + }, + { + "epoch": 0.9305155353506334, + "grad_norm": 0.1226748377084732, + "learning_rate": 6.457557038552091e-06, + "loss": 0.001, + "step": 55060 + }, + { + "epoch": 0.9306845356294838, + "grad_norm": 0.2520878314971924, + "learning_rate": 6.456146201710895e-06, + "loss": 0.0028, + "step": 55070 + }, + { + "epoch": 0.9308535359083342, + "grad_norm": 0.043165381997823715, + "learning_rate": 6.454735238177924e-06, + "loss": 0.0015, + "step": 55080 + }, + { + "epoch": 0.9310225361871847, + "grad_norm": 0.06458351761102676, + "learning_rate": 6.453324148075939e-06, + "loss": 0.002, + "step": 55090 + }, + { + "epoch": 0.9311915364660351, + "grad_norm": 0.16306425631046295, + "learning_rate": 6.4519129315277104e-06, + "loss": 0.0016, + "step": 55100 + }, + { + "epoch": 0.9313605367448856, + "grad_norm": 0.05315855145454407, + "learning_rate": 6.450501588656024e-06, + "loss": 0.0008, + "step": 55110 + }, + { + "epoch": 0.9315295370237361, + "grad_norm": 0.09375635534524918, + "learning_rate": 6.449090119583671e-06, + "loss": 0.0013, + "step": 55120 + }, + { + "epoch": 0.9316985373025866, + "grad_norm": 0.05113506317138672, + "learning_rate": 6.447678524433456e-06, + "loss": 0.0018, + "step": 55130 + }, + { + "epoch": 0.931867537581437, + "grad_norm": 0.01696619763970375, + "learning_rate": 6.4462668033281935e-06, + "loss": 0.001, + "step": 55140 + }, + { + "epoch": 0.9320365378602875, + "grad_norm": 0.000842265144456178, + "learning_rate": 6.444854956390715e-06, + "loss": 0.0013, + "step": 55150 + }, + { + "epoch": 0.932205538139138, + "grad_norm": 0.022497303783893585, + "learning_rate": 6.443442983743853e-06, + "loss": 0.0011, + "step": 55160 + }, + { + "epoch": 0.9323745384179883, + "grad_norm": 0.03009202890098095, + "learning_rate": 6.442030885510459e-06, + "loss": 0.0024, + "step": 55170 + }, + { + "epoch": 0.9325435386968388, + "grad_norm": 0.027150332927703857, + "learning_rate": 6.440618661813389e-06, + "loss": 0.0014, + "step": 55180 + }, + { + "epoch": 0.9327125389756893, + "grad_norm": 0.03145081549882889, + "learning_rate": 6.439206312775518e-06, + "loss": 0.001, + "step": 55190 + }, + { + "epoch": 0.9328815392545398, + "grad_norm": 0.06422532349824905, + "learning_rate": 6.437793838519724e-06, + "loss": 0.0018, + "step": 55200 + }, + { + "epoch": 0.9330505395333902, + "grad_norm": 0.08543737232685089, + "learning_rate": 6.4363812391688985e-06, + "loss": 0.0029, + "step": 55210 + }, + { + "epoch": 0.9332195398122407, + "grad_norm": 0.12925246357917786, + "learning_rate": 6.434968514845947e-06, + "loss": 0.0018, + "step": 55220 + }, + { + "epoch": 0.9333885400910912, + "grad_norm": 0.040887799113988876, + "learning_rate": 6.433555665673781e-06, + "loss": 0.0013, + "step": 55230 + }, + { + "epoch": 0.9335575403699417, + "grad_norm": 0.1471778154373169, + "learning_rate": 6.432142691775327e-06, + "loss": 0.002, + "step": 55240 + }, + { + "epoch": 0.9337265406487921, + "grad_norm": 0.07183373719453812, + "learning_rate": 6.430729593273518e-06, + "loss": 0.0016, + "step": 55250 + }, + { + "epoch": 0.9338955409276425, + "grad_norm": 0.04649088531732559, + "learning_rate": 6.429316370291305e-06, + "loss": 0.0013, + "step": 55260 + }, + { + "epoch": 0.934064541206493, + "grad_norm": 0.2931188642978668, + "learning_rate": 6.427903022951642e-06, + "loss": 0.0032, + "step": 55270 + }, + { + "epoch": 0.9342335414853434, + "grad_norm": 0.031886644661426544, + "learning_rate": 6.426489551377497e-06, + "loss": 0.0014, + "step": 55280 + }, + { + "epoch": 0.9344025417641939, + "grad_norm": 0.21854813396930695, + "learning_rate": 6.42507595569185e-06, + "loss": 0.0027, + "step": 55290 + }, + { + "epoch": 0.9345715420430444, + "grad_norm": 0.1134476289153099, + "learning_rate": 6.423662236017692e-06, + "loss": 0.0012, + "step": 55300 + }, + { + "epoch": 0.9347405423218949, + "grad_norm": 0.07070956379175186, + "learning_rate": 6.422248392478019e-06, + "loss": 0.0026, + "step": 55310 + }, + { + "epoch": 0.9349095426007453, + "grad_norm": 0.01285579614341259, + "learning_rate": 6.420834425195845e-06, + "loss": 0.0012, + "step": 55320 + }, + { + "epoch": 0.9350785428795958, + "grad_norm": 0.8135231733322144, + "learning_rate": 6.419420334294193e-06, + "loss": 0.004, + "step": 55330 + }, + { + "epoch": 0.9352475431584462, + "grad_norm": 0.2671804428100586, + "learning_rate": 6.418006119896094e-06, + "loss": 0.0021, + "step": 55340 + }, + { + "epoch": 0.9354165434372966, + "grad_norm": 0.03381161391735077, + "learning_rate": 6.416591782124592e-06, + "loss": 0.0015, + "step": 55350 + }, + { + "epoch": 0.9355855437161471, + "grad_norm": 0.020434875041246414, + "learning_rate": 6.415177321102744e-06, + "loss": 0.0031, + "step": 55360 + }, + { + "epoch": 0.9357545439949976, + "grad_norm": 0.003813839051872492, + "learning_rate": 6.413762736953609e-06, + "loss": 0.0014, + "step": 55370 + }, + { + "epoch": 0.935923544273848, + "grad_norm": 0.1512255221605301, + "learning_rate": 6.412348029800268e-06, + "loss": 0.0021, + "step": 55380 + }, + { + "epoch": 0.9360925445526985, + "grad_norm": 0.01463087648153305, + "learning_rate": 6.410933199765806e-06, + "loss": 0.001, + "step": 55390 + }, + { + "epoch": 0.936261544831549, + "grad_norm": 0.046215448528528214, + "learning_rate": 6.409518246973318e-06, + "loss": 0.0016, + "step": 55400 + }, + { + "epoch": 0.9364305451103995, + "grad_norm": 0.05667540803551674, + "learning_rate": 6.408103171545913e-06, + "loss": 0.0025, + "step": 55410 + }, + { + "epoch": 0.9365995453892499, + "grad_norm": 0.2856285870075226, + "learning_rate": 6.406687973606709e-06, + "loss": 0.004, + "step": 55420 + }, + { + "epoch": 0.9367685456681003, + "grad_norm": 0.11543390899896622, + "learning_rate": 6.405272653278837e-06, + "loss": 0.0011, + "step": 55430 + }, + { + "epoch": 0.9369375459469508, + "grad_norm": 0.04262326657772064, + "learning_rate": 6.403857210685435e-06, + "loss": 0.0025, + "step": 55440 + }, + { + "epoch": 0.9371065462258013, + "grad_norm": 0.018638089299201965, + "learning_rate": 6.402441645949655e-06, + "loss": 0.0023, + "step": 55450 + }, + { + "epoch": 0.9372755465046517, + "grad_norm": 0.025145070627331734, + "learning_rate": 6.401025959194656e-06, + "loss": 0.0031, + "step": 55460 + }, + { + "epoch": 0.9374445467835022, + "grad_norm": 0.075159952044487, + "learning_rate": 6.39961015054361e-06, + "loss": 0.0016, + "step": 55470 + }, + { + "epoch": 0.9376135470623527, + "grad_norm": 0.18348009884357452, + "learning_rate": 6.398194220119701e-06, + "loss": 0.0011, + "step": 55480 + }, + { + "epoch": 0.9377825473412031, + "grad_norm": 0.06098527833819389, + "learning_rate": 6.396778168046119e-06, + "loss": 0.0011, + "step": 55490 + }, + { + "epoch": 0.9379515476200536, + "grad_norm": 0.013626277446746826, + "learning_rate": 6.39536199444607e-06, + "loss": 0.0017, + "step": 55500 + }, + { + "epoch": 0.938120547898904, + "grad_norm": 0.05409810692071915, + "learning_rate": 6.393945699442765e-06, + "loss": 0.0011, + "step": 55510 + }, + { + "epoch": 0.9382895481777545, + "grad_norm": 0.06189171224832535, + "learning_rate": 6.392529283159432e-06, + "loss": 0.0017, + "step": 55520 + }, + { + "epoch": 0.9384585484566049, + "grad_norm": 0.11078029125928879, + "learning_rate": 6.391112745719303e-06, + "loss": 0.0026, + "step": 55530 + }, + { + "epoch": 0.9386275487354554, + "grad_norm": 0.005103504750877619, + "learning_rate": 6.389696087245626e-06, + "loss": 0.0014, + "step": 55540 + }, + { + "epoch": 0.9387965490143059, + "grad_norm": 0.005580130498856306, + "learning_rate": 6.388279307861656e-06, + "loss": 0.001, + "step": 55550 + }, + { + "epoch": 0.9389655492931563, + "grad_norm": 0.07432939857244492, + "learning_rate": 6.386862407690661e-06, + "loss": 0.0012, + "step": 55560 + }, + { + "epoch": 0.9391345495720068, + "grad_norm": 0.0997578427195549, + "learning_rate": 6.385445386855915e-06, + "loss": 0.0028, + "step": 55570 + }, + { + "epoch": 0.9393035498508573, + "grad_norm": 0.046545471996068954, + "learning_rate": 6.384028245480709e-06, + "loss": 0.0054, + "step": 55580 + }, + { + "epoch": 0.9394725501297078, + "grad_norm": 0.06160053238272667, + "learning_rate": 6.38261098368834e-06, + "loss": 0.0008, + "step": 55590 + }, + { + "epoch": 0.9396415504085581, + "grad_norm": 0.03182736784219742, + "learning_rate": 6.381193601602116e-06, + "loss": 0.0011, + "step": 55600 + }, + { + "epoch": 0.9398105506874086, + "grad_norm": 0.05927756801247597, + "learning_rate": 6.379776099345356e-06, + "loss": 0.0022, + "step": 55610 + }, + { + "epoch": 0.9399795509662591, + "grad_norm": 0.02774706669151783, + "learning_rate": 6.378358477041391e-06, + "loss": 0.001, + "step": 55620 + }, + { + "epoch": 0.9401485512451095, + "grad_norm": 0.059262096881866455, + "learning_rate": 6.3769407348135595e-06, + "loss": 0.0023, + "step": 55630 + }, + { + "epoch": 0.94031755152396, + "grad_norm": 0.04048911854624748, + "learning_rate": 6.375522872785213e-06, + "loss": 0.0017, + "step": 55640 + }, + { + "epoch": 0.9404865518028105, + "grad_norm": 0.03914349153637886, + "learning_rate": 6.374104891079713e-06, + "loss": 0.0027, + "step": 55650 + }, + { + "epoch": 0.940655552081661, + "grad_norm": 2.4938673973083496, + "learning_rate": 6.37268678982043e-06, + "loss": 0.002, + "step": 55660 + }, + { + "epoch": 0.9408245523605114, + "grad_norm": 0.09228339791297913, + "learning_rate": 6.371268569130744e-06, + "loss": 0.0013, + "step": 55670 + }, + { + "epoch": 0.9409935526393619, + "grad_norm": 0.08421079069375992, + "learning_rate": 6.369850229134049e-06, + "loss": 0.0018, + "step": 55680 + }, + { + "epoch": 0.9411625529182123, + "grad_norm": 0.013725800439715385, + "learning_rate": 6.368431769953747e-06, + "loss": 0.0016, + "step": 55690 + }, + { + "epoch": 0.9413315531970627, + "grad_norm": 0.04755226522684097, + "learning_rate": 6.36701319171325e-06, + "loss": 0.0014, + "step": 55700 + }, + { + "epoch": 0.9415005534759132, + "grad_norm": 0.03607148677110672, + "learning_rate": 6.365594494535982e-06, + "loss": 0.0021, + "step": 55710 + }, + { + "epoch": 0.9416695537547637, + "grad_norm": 0.2749858498573303, + "learning_rate": 6.3641756785453775e-06, + "loss": 0.0012, + "step": 55720 + }, + { + "epoch": 0.9418385540336142, + "grad_norm": 0.09154827147722244, + "learning_rate": 6.36275674386488e-06, + "loss": 0.0012, + "step": 55730 + }, + { + "epoch": 0.9420075543124646, + "grad_norm": 0.03720062971115112, + "learning_rate": 6.361337690617942e-06, + "loss": 0.0013, + "step": 55740 + }, + { + "epoch": 0.9421765545913151, + "grad_norm": 0.040803078562021255, + "learning_rate": 6.35991851892803e-06, + "loss": 0.0015, + "step": 55750 + }, + { + "epoch": 0.9423455548701656, + "grad_norm": 0.15711310505867004, + "learning_rate": 6.358499228918617e-06, + "loss": 0.0015, + "step": 55760 + }, + { + "epoch": 0.9425145551490159, + "grad_norm": 0.011676276102662086, + "learning_rate": 6.357079820713188e-06, + "loss": 0.0014, + "step": 55770 + }, + { + "epoch": 0.9426835554278664, + "grad_norm": 0.144158735871315, + "learning_rate": 6.355660294435242e-06, + "loss": 0.0017, + "step": 55780 + }, + { + "epoch": 0.9428525557067169, + "grad_norm": 0.020729806274175644, + "learning_rate": 6.35424065020828e-06, + "loss": 0.0009, + "step": 55790 + }, + { + "epoch": 0.9430215559855674, + "grad_norm": 0.04956257343292236, + "learning_rate": 6.352820888155821e-06, + "loss": 0.0013, + "step": 55800 + }, + { + "epoch": 0.9431905562644178, + "grad_norm": 0.13145987689495087, + "learning_rate": 6.3514010084013896e-06, + "loss": 0.0024, + "step": 55810 + }, + { + "epoch": 0.9433595565432683, + "grad_norm": 0.09588636457920074, + "learning_rate": 6.3499810110685224e-06, + "loss": 0.0012, + "step": 55820 + }, + { + "epoch": 0.9435285568221188, + "grad_norm": 0.09883897006511688, + "learning_rate": 6.348560896280767e-06, + "loss": 0.001, + "step": 55830 + }, + { + "epoch": 0.9436975571009693, + "grad_norm": 0.027389192953705788, + "learning_rate": 6.34714066416168e-06, + "loss": 0.001, + "step": 55840 + }, + { + "epoch": 0.9438665573798197, + "grad_norm": 0.058483511209487915, + "learning_rate": 6.345720314834828e-06, + "loss": 0.0008, + "step": 55850 + }, + { + "epoch": 0.9440355576586701, + "grad_norm": 0.025423582643270493, + "learning_rate": 6.344299848423788e-06, + "loss": 0.0013, + "step": 55860 + }, + { + "epoch": 0.9442045579375206, + "grad_norm": 0.1596396118402481, + "learning_rate": 6.342879265052149e-06, + "loss": 0.0023, + "step": 55870 + }, + { + "epoch": 0.944373558216371, + "grad_norm": 0.10541984438896179, + "learning_rate": 6.341458564843507e-06, + "loss": 0.0009, + "step": 55880 + }, + { + "epoch": 0.9445425584952215, + "grad_norm": 0.060642823576927185, + "learning_rate": 6.34003774792147e-06, + "loss": 0.0012, + "step": 55890 + }, + { + "epoch": 0.944711558774072, + "grad_norm": 0.08629707247018814, + "learning_rate": 6.3386168144096564e-06, + "loss": 0.0019, + "step": 55900 + }, + { + "epoch": 0.9448805590529225, + "grad_norm": 0.039498019963502884, + "learning_rate": 6.337195764431694e-06, + "loss": 0.0011, + "step": 55910 + }, + { + "epoch": 0.9450495593317729, + "grad_norm": 0.06169212982058525, + "learning_rate": 6.335774598111222e-06, + "loss": 0.0013, + "step": 55920 + }, + { + "epoch": 0.9452185596106234, + "grad_norm": 0.14251552522182465, + "learning_rate": 6.334353315571887e-06, + "loss": 0.0023, + "step": 55930 + }, + { + "epoch": 0.9453875598894739, + "grad_norm": 0.04333026334643364, + "learning_rate": 6.33293191693735e-06, + "loss": 0.0014, + "step": 55940 + }, + { + "epoch": 0.9455565601683242, + "grad_norm": 0.032190270721912384, + "learning_rate": 6.331510402331276e-06, + "loss": 0.001, + "step": 55950 + }, + { + "epoch": 0.9457255604471747, + "grad_norm": 0.08754512667655945, + "learning_rate": 6.330088771877347e-06, + "loss": 0.0015, + "step": 55960 + }, + { + "epoch": 0.9458945607260252, + "grad_norm": 0.0429929755628109, + "learning_rate": 6.32866702569925e-06, + "loss": 0.0013, + "step": 55970 + }, + { + "epoch": 0.9460635610048757, + "grad_norm": 0.11732529103755951, + "learning_rate": 6.327245163920685e-06, + "loss": 0.0012, + "step": 55980 + }, + { + "epoch": 0.9462325612837261, + "grad_norm": 0.03252869099378586, + "learning_rate": 6.325823186665358e-06, + "loss": 0.0013, + "step": 55990 + }, + { + "epoch": 0.9464015615625766, + "grad_norm": 0.025627510622143745, + "learning_rate": 6.324401094056991e-06, + "loss": 0.0013, + "step": 56000 + }, + { + "epoch": 0.9465705618414271, + "grad_norm": 0.10628566890954971, + "learning_rate": 6.322978886219313e-06, + "loss": 0.0016, + "step": 56010 + }, + { + "epoch": 0.9467395621202775, + "grad_norm": 0.07275257259607315, + "learning_rate": 6.32155656327606e-06, + "loss": 0.0009, + "step": 56020 + }, + { + "epoch": 0.9469085623991279, + "grad_norm": 0.047055941075086594, + "learning_rate": 6.320134125350984e-06, + "loss": 0.001, + "step": 56030 + }, + { + "epoch": 0.9470775626779784, + "grad_norm": 0.04651493579149246, + "learning_rate": 6.3187115725678435e-06, + "loss": 0.001, + "step": 56040 + }, + { + "epoch": 0.9472465629568289, + "grad_norm": 0.10704516619443893, + "learning_rate": 6.3172889050504065e-06, + "loss": 0.0018, + "step": 56050 + }, + { + "epoch": 0.9474155632356793, + "grad_norm": 0.04490116983652115, + "learning_rate": 6.3158661229224524e-06, + "loss": 0.0021, + "step": 56060 + }, + { + "epoch": 0.9475845635145298, + "grad_norm": 0.07242177426815033, + "learning_rate": 6.31444322630777e-06, + "loss": 0.0017, + "step": 56070 + }, + { + "epoch": 0.9477535637933803, + "grad_norm": 0.03556281700730324, + "learning_rate": 6.313020215330159e-06, + "loss": 0.0014, + "step": 56080 + }, + { + "epoch": 0.9479225640722307, + "grad_norm": 0.047583408653736115, + "learning_rate": 6.311597090113426e-06, + "loss": 0.0012, + "step": 56090 + }, + { + "epoch": 0.9480915643510812, + "grad_norm": 0.02261732518672943, + "learning_rate": 6.310173850781391e-06, + "loss": 0.0016, + "step": 56100 + }, + { + "epoch": 0.9482605646299317, + "grad_norm": 0.0035633507650345564, + "learning_rate": 6.308750497457885e-06, + "loss": 0.0018, + "step": 56110 + }, + { + "epoch": 0.948429564908782, + "grad_norm": 0.028753597289323807, + "learning_rate": 6.307327030266743e-06, + "loss": 0.0013, + "step": 56120 + }, + { + "epoch": 0.9485985651876325, + "grad_norm": 0.014429560862481594, + "learning_rate": 6.305903449331817e-06, + "loss": 0.0017, + "step": 56130 + }, + { + "epoch": 0.948767565466483, + "grad_norm": 0.05367155745625496, + "learning_rate": 6.304479754776962e-06, + "loss": 0.001, + "step": 56140 + }, + { + "epoch": 0.9489365657453335, + "grad_norm": 0.0019822013564407825, + "learning_rate": 6.303055946726049e-06, + "loss": 0.0014, + "step": 56150 + }, + { + "epoch": 0.9491055660241839, + "grad_norm": 0.14954140782356262, + "learning_rate": 6.301632025302955e-06, + "loss": 0.0026, + "step": 56160 + }, + { + "epoch": 0.9492745663030344, + "grad_norm": 0.06319954991340637, + "learning_rate": 6.300207990631568e-06, + "loss": 0.0014, + "step": 56170 + }, + { + "epoch": 0.9494435665818849, + "grad_norm": 0.016481533646583557, + "learning_rate": 6.298783842835787e-06, + "loss": 0.0016, + "step": 56180 + }, + { + "epoch": 0.9496125668607354, + "grad_norm": 0.06972762942314148, + "learning_rate": 6.297359582039518e-06, + "loss": 0.0015, + "step": 56190 + }, + { + "epoch": 0.9497815671395857, + "grad_norm": 0.03541379049420357, + "learning_rate": 6.295935208366679e-06, + "loss": 0.0019, + "step": 56200 + }, + { + "epoch": 0.9499505674184362, + "grad_norm": 0.08550960570573807, + "learning_rate": 6.294510721941198e-06, + "loss": 0.001, + "step": 56210 + }, + { + "epoch": 0.9501195676972867, + "grad_norm": 0.08662448078393936, + "learning_rate": 6.293086122887013e-06, + "loss": 0.0031, + "step": 56220 + }, + { + "epoch": 0.9502885679761371, + "grad_norm": 0.041542794555425644, + "learning_rate": 6.29166141132807e-06, + "loss": 0.0018, + "step": 56230 + }, + { + "epoch": 0.9504575682549876, + "grad_norm": 0.06773274391889572, + "learning_rate": 6.2902365873883275e-06, + "loss": 0.0017, + "step": 56240 + }, + { + "epoch": 0.9506265685338381, + "grad_norm": 0.08712329715490341, + "learning_rate": 6.288811651191749e-06, + "loss": 0.0013, + "step": 56250 + }, + { + "epoch": 0.9507955688126886, + "grad_norm": 0.10475753247737885, + "learning_rate": 6.287386602862312e-06, + "loss": 0.0014, + "step": 56260 + }, + { + "epoch": 0.950964569091539, + "grad_norm": 0.007063519209623337, + "learning_rate": 6.285961442524003e-06, + "loss": 0.0011, + "step": 56270 + }, + { + "epoch": 0.9511335693703895, + "grad_norm": 0.06975963711738586, + "learning_rate": 6.284536170300818e-06, + "loss": 0.0013, + "step": 56280 + }, + { + "epoch": 0.9513025696492399, + "grad_norm": 0.02428462915122509, + "learning_rate": 6.283110786316763e-06, + "loss": 0.0013, + "step": 56290 + }, + { + "epoch": 0.9514715699280903, + "grad_norm": 0.020005803555250168, + "learning_rate": 6.281685290695851e-06, + "loss": 0.0018, + "step": 56300 + }, + { + "epoch": 0.9516405702069408, + "grad_norm": 0.0080246077850461, + "learning_rate": 6.280259683562111e-06, + "loss": 0.0015, + "step": 56310 + }, + { + "epoch": 0.9518095704857913, + "grad_norm": 0.19228099286556244, + "learning_rate": 6.2788339650395744e-06, + "loss": 0.0015, + "step": 56320 + }, + { + "epoch": 0.9519785707646418, + "grad_norm": 0.06089368835091591, + "learning_rate": 6.277408135252288e-06, + "loss": 0.0014, + "step": 56330 + }, + { + "epoch": 0.9521475710434922, + "grad_norm": 0.10971179604530334, + "learning_rate": 6.275982194324304e-06, + "loss": 0.001, + "step": 56340 + }, + { + "epoch": 0.9523165713223427, + "grad_norm": 0.11247263848781586, + "learning_rate": 6.274556142379686e-06, + "loss": 0.0015, + "step": 56350 + }, + { + "epoch": 0.9524855716011932, + "grad_norm": 0.01730550080537796, + "learning_rate": 6.273129979542509e-06, + "loss": 0.0008, + "step": 56360 + }, + { + "epoch": 0.9526545718800437, + "grad_norm": 0.016524197533726692, + "learning_rate": 6.2717037059368555e-06, + "loss": 0.001, + "step": 56370 + }, + { + "epoch": 0.952823572158894, + "grad_norm": 0.10556582361459732, + "learning_rate": 6.2702773216868185e-06, + "loss": 0.0022, + "step": 56380 + }, + { + "epoch": 0.9529925724377445, + "grad_norm": 0.019711600616574287, + "learning_rate": 6.268850826916497e-06, + "loss": 0.0021, + "step": 56390 + }, + { + "epoch": 0.953161572716595, + "grad_norm": 0.009352779015898705, + "learning_rate": 6.2674242217500096e-06, + "loss": 0.0007, + "step": 56400 + }, + { + "epoch": 0.9533305729954454, + "grad_norm": 0.07730622589588165, + "learning_rate": 6.265997506311472e-06, + "loss": 0.0014, + "step": 56410 + }, + { + "epoch": 0.9534995732742959, + "grad_norm": 0.027490902692079544, + "learning_rate": 6.2645706807250175e-06, + "loss": 0.0012, + "step": 56420 + }, + { + "epoch": 0.9536685735531464, + "grad_norm": 0.09496179968118668, + "learning_rate": 6.263143745114788e-06, + "loss": 0.003, + "step": 56430 + }, + { + "epoch": 0.9538375738319969, + "grad_norm": 0.1781720072031021, + "learning_rate": 6.261716699604932e-06, + "loss": 0.0042, + "step": 56440 + }, + { + "epoch": 0.9540065741108473, + "grad_norm": 0.039281900972127914, + "learning_rate": 6.26028954431961e-06, + "loss": 0.0012, + "step": 56450 + }, + { + "epoch": 0.9541755743896977, + "grad_norm": 0.030199745669960976, + "learning_rate": 6.2588622793829914e-06, + "loss": 0.0016, + "step": 56460 + }, + { + "epoch": 0.9543445746685482, + "grad_norm": 0.07956171780824661, + "learning_rate": 6.257434904919255e-06, + "loss": 0.0011, + "step": 56470 + }, + { + "epoch": 0.9545135749473986, + "grad_norm": 0.05297559127211571, + "learning_rate": 6.256007421052588e-06, + "loss": 0.0011, + "step": 56480 + }, + { + "epoch": 0.9546825752262491, + "grad_norm": 0.024796657264232635, + "learning_rate": 6.254579827907191e-06, + "loss": 0.0009, + "step": 56490 + }, + { + "epoch": 0.9548515755050996, + "grad_norm": 0.08131872117519379, + "learning_rate": 6.253152125607271e-06, + "loss": 0.0026, + "step": 56500 + }, + { + "epoch": 0.95502057578395, + "grad_norm": 0.058359820395708084, + "learning_rate": 6.251724314277045e-06, + "loss": 0.0013, + "step": 56510 + }, + { + "epoch": 0.9551895760628005, + "grad_norm": 0.028362829238176346, + "learning_rate": 6.2502963940407376e-06, + "loss": 0.0016, + "step": 56520 + }, + { + "epoch": 0.955358576341651, + "grad_norm": 0.010488352738320827, + "learning_rate": 6.248868365022586e-06, + "loss": 0.0014, + "step": 56530 + }, + { + "epoch": 0.9555275766205015, + "grad_norm": 0.07383910566568375, + "learning_rate": 6.247440227346836e-06, + "loss": 0.0018, + "step": 56540 + }, + { + "epoch": 0.9556965768993518, + "grad_norm": 0.0049832952208817005, + "learning_rate": 6.246011981137743e-06, + "loss": 0.0021, + "step": 56550 + }, + { + "epoch": 0.9558655771782023, + "grad_norm": 0.14020919799804688, + "learning_rate": 6.244583626519569e-06, + "loss": 0.0026, + "step": 56560 + }, + { + "epoch": 0.9560345774570528, + "grad_norm": 0.17698492109775543, + "learning_rate": 6.243155163616591e-06, + "loss": 0.0035, + "step": 56570 + }, + { + "epoch": 0.9562035777359033, + "grad_norm": 0.09493689239025116, + "learning_rate": 6.241726592553089e-06, + "loss": 0.0017, + "step": 56580 + }, + { + "epoch": 0.9563725780147537, + "grad_norm": 0.052152473479509354, + "learning_rate": 6.240297913453358e-06, + "loss": 0.0014, + "step": 56590 + }, + { + "epoch": 0.9565415782936042, + "grad_norm": 0.13550731539726257, + "learning_rate": 6.2388691264417e-06, + "loss": 0.0009, + "step": 56600 + }, + { + "epoch": 0.9567105785724547, + "grad_norm": 0.15419816970825195, + "learning_rate": 6.237440231642424e-06, + "loss": 0.0012, + "step": 56610 + }, + { + "epoch": 0.9568795788513051, + "grad_norm": 0.02464517019689083, + "learning_rate": 6.236011229179854e-06, + "loss": 0.0016, + "step": 56620 + }, + { + "epoch": 0.9570485791301556, + "grad_norm": 0.010589303448796272, + "learning_rate": 6.234582119178317e-06, + "loss": 0.0011, + "step": 56630 + }, + { + "epoch": 0.957217579409006, + "grad_norm": 0.05937943980097771, + "learning_rate": 6.233152901762155e-06, + "loss": 0.0024, + "step": 56640 + }, + { + "epoch": 0.9573865796878565, + "grad_norm": 0.12208323180675507, + "learning_rate": 6.231723577055715e-06, + "loss": 0.0011, + "step": 56650 + }, + { + "epoch": 0.9575555799667069, + "grad_norm": 0.03957764059305191, + "learning_rate": 6.230294145183357e-06, + "loss": 0.0019, + "step": 56660 + }, + { + "epoch": 0.9577245802455574, + "grad_norm": 0.07161909341812134, + "learning_rate": 6.228864606269446e-06, + "loss": 0.0032, + "step": 56670 + }, + { + "epoch": 0.9578935805244079, + "grad_norm": 0.08066277951002121, + "learning_rate": 6.227434960438361e-06, + "loss": 0.0021, + "step": 56680 + }, + { + "epoch": 0.9580625808032583, + "grad_norm": 0.01578165404498577, + "learning_rate": 6.2260052078144875e-06, + "loss": 0.0014, + "step": 56690 + }, + { + "epoch": 0.9582315810821088, + "grad_norm": 0.05495981127023697, + "learning_rate": 6.224575348522221e-06, + "loss": 0.0014, + "step": 56700 + }, + { + "epoch": 0.9584005813609593, + "grad_norm": 0.21792486310005188, + "learning_rate": 6.223145382685965e-06, + "loss": 0.0007, + "step": 56710 + }, + { + "epoch": 0.9585695816398097, + "grad_norm": 0.017716677859425545, + "learning_rate": 6.221715310430135e-06, + "loss": 0.0009, + "step": 56720 + }, + { + "epoch": 0.9587385819186601, + "grad_norm": 0.042220745235681534, + "learning_rate": 6.220285131879153e-06, + "loss": 0.0006, + "step": 56730 + }, + { + "epoch": 0.9589075821975106, + "grad_norm": 0.06167328357696533, + "learning_rate": 6.218854847157454e-06, + "loss": 0.0011, + "step": 56740 + }, + { + "epoch": 0.9590765824763611, + "grad_norm": 0.04163149371743202, + "learning_rate": 6.217424456389477e-06, + "loss": 0.0012, + "step": 56750 + }, + { + "epoch": 0.9592455827552115, + "grad_norm": 0.027633186429739, + "learning_rate": 6.215993959699672e-06, + "loss": 0.0007, + "step": 56760 + }, + { + "epoch": 0.959414583034062, + "grad_norm": 0.04331083595752716, + "learning_rate": 6.214563357212502e-06, + "loss": 0.0011, + "step": 56770 + }, + { + "epoch": 0.9595835833129125, + "grad_norm": 0.06631213426589966, + "learning_rate": 6.213132649052435e-06, + "loss": 0.0013, + "step": 56780 + }, + { + "epoch": 0.959752583591763, + "grad_norm": 0.31079480051994324, + "learning_rate": 6.21170183534395e-06, + "loss": 0.0012, + "step": 56790 + }, + { + "epoch": 0.9599215838706134, + "grad_norm": 0.2310328185558319, + "learning_rate": 6.210270916211535e-06, + "loss": 0.0038, + "step": 56800 + }, + { + "epoch": 0.9600905841494638, + "grad_norm": 0.26346540451049805, + "learning_rate": 6.208839891779685e-06, + "loss": 0.0045, + "step": 56810 + }, + { + "epoch": 0.9602595844283143, + "grad_norm": 0.06398440897464752, + "learning_rate": 6.207408762172909e-06, + "loss": 0.0063, + "step": 56820 + }, + { + "epoch": 0.9604285847071647, + "grad_norm": 0.07587594538927078, + "learning_rate": 6.205977527515721e-06, + "loss": 0.0026, + "step": 56830 + }, + { + "epoch": 0.9605975849860152, + "grad_norm": 0.003844884689897299, + "learning_rate": 6.204546187932644e-06, + "loss": 0.0014, + "step": 56840 + }, + { + "epoch": 0.9607665852648657, + "grad_norm": 0.0372488796710968, + "learning_rate": 6.203114743548213e-06, + "loss": 0.0009, + "step": 56850 + }, + { + "epoch": 0.9609355855437162, + "grad_norm": 0.00627544242888689, + "learning_rate": 6.20168319448697e-06, + "loss": 0.001, + "step": 56860 + }, + { + "epoch": 0.9611045858225666, + "grad_norm": 0.05668949335813522, + "learning_rate": 6.200251540873465e-06, + "loss": 0.0014, + "step": 56870 + }, + { + "epoch": 0.9612735861014171, + "grad_norm": 0.008376243524253368, + "learning_rate": 6.198819782832263e-06, + "loss": 0.0024, + "step": 56880 + }, + { + "epoch": 0.9614425863802675, + "grad_norm": 0.013443087227642536, + "learning_rate": 6.19738792048793e-06, + "loss": 0.0013, + "step": 56890 + }, + { + "epoch": 0.961611586659118, + "grad_norm": 0.034180834889411926, + "learning_rate": 6.195955953965049e-06, + "loss": 0.0018, + "step": 56900 + }, + { + "epoch": 0.9617805869379684, + "grad_norm": 0.05446441471576691, + "learning_rate": 6.194523883388203e-06, + "loss": 0.0012, + "step": 56910 + }, + { + "epoch": 0.9619495872168189, + "grad_norm": 0.11061554402112961, + "learning_rate": 6.193091708881992e-06, + "loss": 0.0006, + "step": 56920 + }, + { + "epoch": 0.9621185874956694, + "grad_norm": 0.0899341031908989, + "learning_rate": 6.191659430571022e-06, + "loss": 0.0008, + "step": 56930 + }, + { + "epoch": 0.9622875877745198, + "grad_norm": 0.011873092502355576, + "learning_rate": 6.190227048579908e-06, + "loss": 0.0006, + "step": 56940 + }, + { + "epoch": 0.9624565880533703, + "grad_norm": 0.028710903599858284, + "learning_rate": 6.188794563033274e-06, + "loss": 0.001, + "step": 56950 + }, + { + "epoch": 0.9626255883322208, + "grad_norm": 0.008559263311326504, + "learning_rate": 6.1873619740557515e-06, + "loss": 0.0014, + "step": 56960 + }, + { + "epoch": 0.9627945886110713, + "grad_norm": 0.07611192762851715, + "learning_rate": 6.185929281771985e-06, + "loss": 0.001, + "step": 56970 + }, + { + "epoch": 0.9629635888899216, + "grad_norm": 0.07905812561511993, + "learning_rate": 6.184496486306626e-06, + "loss": 0.0015, + "step": 56980 + }, + { + "epoch": 0.9631325891687721, + "grad_norm": 0.03562645614147186, + "learning_rate": 6.1830635877843325e-06, + "loss": 0.0008, + "step": 56990 + }, + { + "epoch": 0.9633015894476226, + "grad_norm": 0.03875849395990372, + "learning_rate": 6.181630586329776e-06, + "loss": 0.0015, + "step": 57000 + }, + { + "epoch": 0.963470589726473, + "grad_norm": 0.009532700292766094, + "learning_rate": 6.1801974820676336e-06, + "loss": 0.0018, + "step": 57010 + }, + { + "epoch": 0.9636395900053235, + "grad_norm": 0.11676210910081863, + "learning_rate": 6.178764275122592e-06, + "loss": 0.0017, + "step": 57020 + }, + { + "epoch": 0.963808590284174, + "grad_norm": 0.07378076761960983, + "learning_rate": 6.177330965619346e-06, + "loss": 0.0022, + "step": 57030 + }, + { + "epoch": 0.9639775905630245, + "grad_norm": 0.049584876745939255, + "learning_rate": 6.175897553682601e-06, + "loss": 0.0033, + "step": 57040 + }, + { + "epoch": 0.9641465908418749, + "grad_norm": 0.07187934964895248, + "learning_rate": 6.174464039437074e-06, + "loss": 0.0009, + "step": 57050 + }, + { + "epoch": 0.9643155911207254, + "grad_norm": 0.01915927045047283, + "learning_rate": 6.1730304230074835e-06, + "loss": 0.0016, + "step": 57060 + }, + { + "epoch": 0.9644845913995758, + "grad_norm": 0.07050523906946182, + "learning_rate": 6.171596704518562e-06, + "loss": 0.0016, + "step": 57070 + }, + { + "epoch": 0.9646535916784262, + "grad_norm": 0.02501530572772026, + "learning_rate": 6.170162884095053e-06, + "loss": 0.0018, + "step": 57080 + }, + { + "epoch": 0.9648225919572767, + "grad_norm": 0.1584603190422058, + "learning_rate": 6.168728961861702e-06, + "loss": 0.0013, + "step": 57090 + }, + { + "epoch": 0.9649915922361272, + "grad_norm": 0.011680858209729195, + "learning_rate": 6.1672949379432686e-06, + "loss": 0.0017, + "step": 57100 + }, + { + "epoch": 0.9651605925149777, + "grad_norm": 0.13482710719108582, + "learning_rate": 6.165860812464522e-06, + "loss": 0.0028, + "step": 57110 + }, + { + "epoch": 0.9653295927938281, + "grad_norm": 0.013348933309316635, + "learning_rate": 6.164426585550234e-06, + "loss": 0.0016, + "step": 57120 + }, + { + "epoch": 0.9654985930726786, + "grad_norm": 0.06926348060369492, + "learning_rate": 6.162992257325191e-06, + "loss": 0.0018, + "step": 57130 + }, + { + "epoch": 0.9656675933515291, + "grad_norm": 0.017068268731236458, + "learning_rate": 6.1615578279141874e-06, + "loss": 0.0016, + "step": 57140 + }, + { + "epoch": 0.9658365936303794, + "grad_norm": 0.04602353647351265, + "learning_rate": 6.160123297442025e-06, + "loss": 0.0016, + "step": 57150 + }, + { + "epoch": 0.9660055939092299, + "grad_norm": 0.0930069163441658, + "learning_rate": 6.158688666033515e-06, + "loss": 0.0019, + "step": 57160 + }, + { + "epoch": 0.9661745941880804, + "grad_norm": 0.11917515099048615, + "learning_rate": 6.157253933813476e-06, + "loss": 0.0021, + "step": 57170 + }, + { + "epoch": 0.9663435944669309, + "grad_norm": 0.007319287862628698, + "learning_rate": 6.155819100906739e-06, + "loss": 0.0007, + "step": 57180 + }, + { + "epoch": 0.9665125947457813, + "grad_norm": 0.08814607560634613, + "learning_rate": 6.15438416743814e-06, + "loss": 0.001, + "step": 57190 + }, + { + "epoch": 0.9666815950246318, + "grad_norm": 0.09839361160993576, + "learning_rate": 6.152949133532526e-06, + "loss": 0.0015, + "step": 57200 + }, + { + "epoch": 0.9668505953034823, + "grad_norm": 0.03802715986967087, + "learning_rate": 6.151513999314749e-06, + "loss": 0.0011, + "step": 57210 + }, + { + "epoch": 0.9670195955823327, + "grad_norm": 0.129659041762352, + "learning_rate": 6.150078764909676e-06, + "loss": 0.0011, + "step": 57220 + }, + { + "epoch": 0.9671885958611832, + "grad_norm": 0.04857420548796654, + "learning_rate": 6.148643430442179e-06, + "loss": 0.0015, + "step": 57230 + }, + { + "epoch": 0.9673575961400336, + "grad_norm": 0.028127888217568398, + "learning_rate": 6.1472079960371364e-06, + "loss": 0.0011, + "step": 57240 + }, + { + "epoch": 0.967526596418884, + "grad_norm": 0.08872781693935394, + "learning_rate": 6.145772461819441e-06, + "loss": 0.0028, + "step": 57250 + }, + { + "epoch": 0.9676955966977345, + "grad_norm": 0.02356112003326416, + "learning_rate": 6.1443368279139905e-06, + "loss": 0.0014, + "step": 57260 + }, + { + "epoch": 0.967864596976585, + "grad_norm": 0.05805081129074097, + "learning_rate": 6.142901094445691e-06, + "loss": 0.0009, + "step": 57270 + }, + { + "epoch": 0.9680335972554355, + "grad_norm": 0.06666218489408493, + "learning_rate": 6.141465261539459e-06, + "loss": 0.0022, + "step": 57280 + }, + { + "epoch": 0.968202597534286, + "grad_norm": 0.09788259863853455, + "learning_rate": 6.140029329320217e-06, + "loss": 0.0014, + "step": 57290 + }, + { + "epoch": 0.9683715978131364, + "grad_norm": 0.05695125833153725, + "learning_rate": 6.138593297912901e-06, + "loss": 0.0008, + "step": 57300 + }, + { + "epoch": 0.9685405980919869, + "grad_norm": 0.03995615988969803, + "learning_rate": 6.137157167442452e-06, + "loss": 0.0014, + "step": 57310 + }, + { + "epoch": 0.9687095983708374, + "grad_norm": 0.09380437433719635, + "learning_rate": 6.13572093803382e-06, + "loss": 0.001, + "step": 57320 + }, + { + "epoch": 0.9688785986496877, + "grad_norm": 0.1397034078836441, + "learning_rate": 6.1342846098119635e-06, + "loss": 0.0013, + "step": 57330 + }, + { + "epoch": 0.9690475989285382, + "grad_norm": 0.10551384091377258, + "learning_rate": 6.132848182901851e-06, + "loss": 0.0013, + "step": 57340 + }, + { + "epoch": 0.9692165992073887, + "grad_norm": 0.07721326500177383, + "learning_rate": 6.131411657428454e-06, + "loss": 0.0026, + "step": 57350 + }, + { + "epoch": 0.9693855994862391, + "grad_norm": 0.0641603097319603, + "learning_rate": 6.129975033516765e-06, + "loss": 0.0014, + "step": 57360 + }, + { + "epoch": 0.9695545997650896, + "grad_norm": 0.0963938981294632, + "learning_rate": 6.128538311291772e-06, + "loss": 0.0015, + "step": 57370 + }, + { + "epoch": 0.9697236000439401, + "grad_norm": 0.009676202200353146, + "learning_rate": 6.127101490878478e-06, + "loss": 0.0019, + "step": 57380 + }, + { + "epoch": 0.9698926003227906, + "grad_norm": 0.07981002330780029, + "learning_rate": 6.125664572401894e-06, + "loss": 0.0014, + "step": 57390 + }, + { + "epoch": 0.970061600601641, + "grad_norm": 0.2350415140390396, + "learning_rate": 6.124227555987037e-06, + "loss": 0.0027, + "step": 57400 + }, + { + "epoch": 0.9702306008804914, + "grad_norm": 0.040543004870414734, + "learning_rate": 6.122790441758937e-06, + "loss": 0.0009, + "step": 57410 + }, + { + "epoch": 0.9703996011593419, + "grad_norm": 0.03330293297767639, + "learning_rate": 6.121353229842627e-06, + "loss": 0.0012, + "step": 57420 + }, + { + "epoch": 0.9705686014381923, + "grad_norm": 0.07130574434995651, + "learning_rate": 6.119915920363154e-06, + "loss": 0.0015, + "step": 57430 + }, + { + "epoch": 0.9707376017170428, + "grad_norm": 0.03864027187228203, + "learning_rate": 6.118478513445568e-06, + "loss": 0.0016, + "step": 57440 + }, + { + "epoch": 0.9709066019958933, + "grad_norm": 0.09162461757659912, + "learning_rate": 6.117041009214935e-06, + "loss": 0.0025, + "step": 57450 + }, + { + "epoch": 0.9710756022747438, + "grad_norm": 0.0017001412343233824, + "learning_rate": 6.11560340779632e-06, + "loss": 0.0015, + "step": 57460 + }, + { + "epoch": 0.9712446025535942, + "grad_norm": 0.19290043413639069, + "learning_rate": 6.114165709314805e-06, + "loss": 0.0014, + "step": 57470 + }, + { + "epoch": 0.9714136028324447, + "grad_norm": 0.02218267321586609, + "learning_rate": 6.112727913895473e-06, + "loss": 0.0015, + "step": 57480 + }, + { + "epoch": 0.9715826031112952, + "grad_norm": 0.27840378880500793, + "learning_rate": 6.111290021663423e-06, + "loss": 0.0024, + "step": 57490 + }, + { + "epoch": 0.9717516033901455, + "grad_norm": 0.11960061639547348, + "learning_rate": 6.109852032743756e-06, + "loss": 0.0017, + "step": 57500 + }, + { + "epoch": 0.971920603668996, + "grad_norm": 0.22218191623687744, + "learning_rate": 6.108413947261585e-06, + "loss": 0.0023, + "step": 57510 + }, + { + "epoch": 0.9720896039478465, + "grad_norm": 0.062080252915620804, + "learning_rate": 6.10697576534203e-06, + "loss": 0.002, + "step": 57520 + }, + { + "epoch": 0.972258604226697, + "grad_norm": 0.03579581528902054, + "learning_rate": 6.105537487110219e-06, + "loss": 0.0019, + "step": 57530 + }, + { + "epoch": 0.9724276045055474, + "grad_norm": 0.05509597808122635, + "learning_rate": 6.1040991126912906e-06, + "loss": 0.0017, + "step": 57540 + }, + { + "epoch": 0.9725966047843979, + "grad_norm": 0.08803009986877441, + "learning_rate": 6.10266064221039e-06, + "loss": 0.002, + "step": 57550 + }, + { + "epoch": 0.9727656050632484, + "grad_norm": 0.05634976923465729, + "learning_rate": 6.101222075792671e-06, + "loss": 0.0011, + "step": 57560 + }, + { + "epoch": 0.9729346053420989, + "grad_norm": 0.04451654478907585, + "learning_rate": 6.0997834135632976e-06, + "loss": 0.0017, + "step": 57570 + }, + { + "epoch": 0.9731036056209492, + "grad_norm": 0.2588036358356476, + "learning_rate": 6.098344655647437e-06, + "loss": 0.0029, + "step": 57580 + }, + { + "epoch": 0.9732726058997997, + "grad_norm": 0.035599395632743835, + "learning_rate": 6.09690580217027e-06, + "loss": 0.001, + "step": 57590 + }, + { + "epoch": 0.9734416061786502, + "grad_norm": 0.015586217865347862, + "learning_rate": 6.095466853256984e-06, + "loss": 0.001, + "step": 57600 + }, + { + "epoch": 0.9736106064575006, + "grad_norm": 0.05883041396737099, + "learning_rate": 6.094027809032774e-06, + "loss": 0.0016, + "step": 57610 + }, + { + "epoch": 0.9737796067363511, + "grad_norm": 0.10953272879123688, + "learning_rate": 6.092588669622843e-06, + "loss": 0.0023, + "step": 57620 + }, + { + "epoch": 0.9739486070152016, + "grad_norm": 0.4857546389102936, + "learning_rate": 6.091149435152406e-06, + "loss": 0.0084, + "step": 57630 + }, + { + "epoch": 0.9741176072940521, + "grad_norm": 0.018154164776206017, + "learning_rate": 6.089710105746679e-06, + "loss": 0.0015, + "step": 57640 + }, + { + "epoch": 0.9742866075729025, + "grad_norm": 0.09440393000841141, + "learning_rate": 6.0882706815308945e-06, + "loss": 0.0011, + "step": 57650 + }, + { + "epoch": 0.974455607851753, + "grad_norm": 0.032431937754154205, + "learning_rate": 6.086831162630287e-06, + "loss": 0.001, + "step": 57660 + }, + { + "epoch": 0.9746246081306034, + "grad_norm": 0.08695471286773682, + "learning_rate": 6.085391549170103e-06, + "loss": 0.0028, + "step": 57670 + }, + { + "epoch": 0.9747936084094538, + "grad_norm": 0.0596468411386013, + "learning_rate": 6.083951841275596e-06, + "loss": 0.0015, + "step": 57680 + }, + { + "epoch": 0.9749626086883043, + "grad_norm": 0.017689548432826996, + "learning_rate": 6.082512039072027e-06, + "loss": 0.0013, + "step": 57690 + }, + { + "epoch": 0.9751316089671548, + "grad_norm": 0.054335687309503555, + "learning_rate": 6.081072142684665e-06, + "loss": 0.0012, + "step": 57700 + }, + { + "epoch": 0.9753006092460053, + "grad_norm": 0.10406859964132309, + "learning_rate": 6.079632152238789e-06, + "loss": 0.0022, + "step": 57710 + }, + { + "epoch": 0.9754696095248557, + "grad_norm": 0.12383397668600082, + "learning_rate": 6.078192067859684e-06, + "loss": 0.0008, + "step": 57720 + }, + { + "epoch": 0.9756386098037062, + "grad_norm": 0.04708423092961311, + "learning_rate": 6.076751889672645e-06, + "loss": 0.0034, + "step": 57730 + }, + { + "epoch": 0.9758076100825567, + "grad_norm": 0.06755197048187256, + "learning_rate": 6.0753116178029756e-06, + "loss": 0.002, + "step": 57740 + }, + { + "epoch": 0.9759766103614071, + "grad_norm": 0.08884324133396149, + "learning_rate": 6.073871252375986e-06, + "loss": 0.0007, + "step": 57750 + }, + { + "epoch": 0.9761456106402575, + "grad_norm": 0.001903891796246171, + "learning_rate": 6.072430793516993e-06, + "loss": 0.0016, + "step": 57760 + }, + { + "epoch": 0.976314610919108, + "grad_norm": 0.09241598099470139, + "learning_rate": 6.070990241351327e-06, + "loss": 0.0014, + "step": 57770 + }, + { + "epoch": 0.9764836111979585, + "grad_norm": 0.09973130375146866, + "learning_rate": 6.069549596004319e-06, + "loss": 0.0018, + "step": 57780 + }, + { + "epoch": 0.9766526114768089, + "grad_norm": 0.02484247088432312, + "learning_rate": 6.0681088576013144e-06, + "loss": 0.002, + "step": 57790 + }, + { + "epoch": 0.9768216117556594, + "grad_norm": 0.031424183398485184, + "learning_rate": 6.066668026267664e-06, + "loss": 0.0014, + "step": 57800 + }, + { + "epoch": 0.9769906120345099, + "grad_norm": 0.0393252931535244, + "learning_rate": 6.065227102128728e-06, + "loss": 0.0008, + "step": 57810 + }, + { + "epoch": 0.9771596123133603, + "grad_norm": 0.0469253845512867, + "learning_rate": 6.0637860853098714e-06, + "loss": 0.0009, + "step": 57820 + }, + { + "epoch": 0.9773286125922108, + "grad_norm": 0.10950836539268494, + "learning_rate": 6.062344975936472e-06, + "loss": 0.0042, + "step": 57830 + }, + { + "epoch": 0.9774976128710612, + "grad_norm": 0.042595747858285904, + "learning_rate": 6.060903774133913e-06, + "loss": 0.0011, + "step": 57840 + }, + { + "epoch": 0.9776666131499117, + "grad_norm": 0.038873203098773956, + "learning_rate": 6.059462480027585e-06, + "loss": 0.0017, + "step": 57850 + }, + { + "epoch": 0.9778356134287621, + "grad_norm": 0.02171887271106243, + "learning_rate": 6.058021093742888e-06, + "loss": 0.0016, + "step": 57860 + }, + { + "epoch": 0.9780046137076126, + "grad_norm": 0.05608280748128891, + "learning_rate": 6.05657961540523e-06, + "loss": 0.0014, + "step": 57870 + }, + { + "epoch": 0.9781736139864631, + "grad_norm": 0.03701461851596832, + "learning_rate": 6.055138045140027e-06, + "loss": 0.0008, + "step": 57880 + }, + { + "epoch": 0.9783426142653135, + "grad_norm": 0.012678167782723904, + "learning_rate": 6.0536963830727e-06, + "loss": 0.0048, + "step": 57890 + }, + { + "epoch": 0.978511614544164, + "grad_norm": 0.006651062052696943, + "learning_rate": 6.0522546293286845e-06, + "loss": 0.0016, + "step": 57900 + }, + { + "epoch": 0.9786806148230145, + "grad_norm": 0.08401114493608475, + "learning_rate": 6.050812784033417e-06, + "loss": 0.001, + "step": 57910 + }, + { + "epoch": 0.978849615101865, + "grad_norm": 0.02398318611085415, + "learning_rate": 6.049370847312345e-06, + "loss": 0.0015, + "step": 57920 + }, + { + "epoch": 0.9790186153807153, + "grad_norm": 0.05877359211444855, + "learning_rate": 6.047928819290925e-06, + "loss": 0.0019, + "step": 57930 + }, + { + "epoch": 0.9791876156595658, + "grad_norm": 0.037036243826150894, + "learning_rate": 6.046486700094621e-06, + "loss": 0.0019, + "step": 57940 + }, + { + "epoch": 0.9793566159384163, + "grad_norm": 0.1200292557477951, + "learning_rate": 6.045044489848904e-06, + "loss": 0.0013, + "step": 57950 + }, + { + "epoch": 0.9795256162172667, + "grad_norm": 0.004075548145920038, + "learning_rate": 6.043602188679253e-06, + "loss": 0.0011, + "step": 57960 + }, + { + "epoch": 0.9796946164961172, + "grad_norm": 0.00037315511144697666, + "learning_rate": 6.042159796711156e-06, + "loss": 0.0014, + "step": 57970 + }, + { + "epoch": 0.9798636167749677, + "grad_norm": 0.05303901433944702, + "learning_rate": 6.040717314070106e-06, + "loss": 0.0027, + "step": 57980 + }, + { + "epoch": 0.9800326170538182, + "grad_norm": 0.19602589309215546, + "learning_rate": 6.039274740881607e-06, + "loss": 0.0019, + "step": 57990 + }, + { + "epoch": 0.9802016173326686, + "grad_norm": 0.028596512973308563, + "learning_rate": 6.037832077271172e-06, + "loss": 0.0013, + "step": 58000 + }, + { + "epoch": 0.9803706176115191, + "grad_norm": 0.20253531634807587, + "learning_rate": 6.0363893233643155e-06, + "loss": 0.0016, + "step": 58010 + }, + { + "epoch": 0.9805396178903695, + "grad_norm": 0.061884328722953796, + "learning_rate": 6.0349464792865675e-06, + "loss": 0.0009, + "step": 58020 + }, + { + "epoch": 0.98070861816922, + "grad_norm": 0.05780744552612305, + "learning_rate": 6.033503545163462e-06, + "loss": 0.002, + "step": 58030 + }, + { + "epoch": 0.9808776184480704, + "grad_norm": 0.028961317613720894, + "learning_rate": 6.03206052112054e-06, + "loss": 0.0019, + "step": 58040 + }, + { + "epoch": 0.9810466187269209, + "grad_norm": 0.07479706406593323, + "learning_rate": 6.0306174072833525e-06, + "loss": 0.0013, + "step": 58050 + }, + { + "epoch": 0.9812156190057714, + "grad_norm": 0.004724375903606415, + "learning_rate": 6.0291742037774555e-06, + "loss": 0.0022, + "step": 58060 + }, + { + "epoch": 0.9813846192846218, + "grad_norm": 0.022922640666365623, + "learning_rate": 6.0277309107284174e-06, + "loss": 0.0011, + "step": 58070 + }, + { + "epoch": 0.9815536195634723, + "grad_norm": 0.03456398844718933, + "learning_rate": 6.026287528261812e-06, + "loss": 0.0016, + "step": 58080 + }, + { + "epoch": 0.9817226198423228, + "grad_norm": 0.07966052740812302, + "learning_rate": 6.024844056503218e-06, + "loss": 0.0016, + "step": 58090 + }, + { + "epoch": 0.9818916201211731, + "grad_norm": 0.06790140271186829, + "learning_rate": 6.023400495578226e-06, + "loss": 0.0014, + "step": 58100 + }, + { + "epoch": 0.9820606204000236, + "grad_norm": 0.06890040636062622, + "learning_rate": 6.021956845612432e-06, + "loss": 0.0008, + "step": 58110 + }, + { + "epoch": 0.9822296206788741, + "grad_norm": 0.008626767434179783, + "learning_rate": 6.02051310673144e-06, + "loss": 0.0021, + "step": 58120 + }, + { + "epoch": 0.9823986209577246, + "grad_norm": 0.07097417861223221, + "learning_rate": 6.019069279060864e-06, + "loss": 0.0015, + "step": 58130 + }, + { + "epoch": 0.982567621236575, + "grad_norm": 0.03134439140558243, + "learning_rate": 6.017625362726324e-06, + "loss": 0.0018, + "step": 58140 + }, + { + "epoch": 0.9827366215154255, + "grad_norm": 0.0705198273062706, + "learning_rate": 6.016181357853447e-06, + "loss": 0.0021, + "step": 58150 + }, + { + "epoch": 0.982905621794276, + "grad_norm": 0.046758249402046204, + "learning_rate": 6.014737264567867e-06, + "loss": 0.002, + "step": 58160 + }, + { + "epoch": 0.9830746220731265, + "grad_norm": 0.05306238308548927, + "learning_rate": 6.01329308299523e-06, + "loss": 0.0014, + "step": 58170 + }, + { + "epoch": 0.9832436223519769, + "grad_norm": 0.009773504920303822, + "learning_rate": 6.011848813261184e-06, + "loss": 0.0021, + "step": 58180 + }, + { + "epoch": 0.9834126226308273, + "grad_norm": 0.07215312123298645, + "learning_rate": 6.01040445549139e-06, + "loss": 0.0019, + "step": 58190 + }, + { + "epoch": 0.9835816229096778, + "grad_norm": 0.107475645840168, + "learning_rate": 6.008960009811512e-06, + "loss": 0.0019, + "step": 58200 + }, + { + "epoch": 0.9837506231885282, + "grad_norm": 0.009603841230273247, + "learning_rate": 6.007515476347223e-06, + "loss": 0.0018, + "step": 58210 + }, + { + "epoch": 0.9839196234673787, + "grad_norm": 0.03349253535270691, + "learning_rate": 6.006070855224209e-06, + "loss": 0.0011, + "step": 58220 + }, + { + "epoch": 0.9840886237462292, + "grad_norm": 0.17738290131092072, + "learning_rate": 6.004626146568154e-06, + "loss": 0.0022, + "step": 58230 + }, + { + "epoch": 0.9842576240250797, + "grad_norm": 0.039686206728219986, + "learning_rate": 6.003181350504758e-06, + "loss": 0.0014, + "step": 58240 + }, + { + "epoch": 0.9844266243039301, + "grad_norm": 0.03419140726327896, + "learning_rate": 6.001736467159723e-06, + "loss": 0.0016, + "step": 58250 + }, + { + "epoch": 0.9845956245827806, + "grad_norm": 0.07928406447172165, + "learning_rate": 6.000291496658763e-06, + "loss": 0.0025, + "step": 58260 + }, + { + "epoch": 0.9847646248616311, + "grad_norm": 0.22952072322368622, + "learning_rate": 5.998846439127596e-06, + "loss": 0.0023, + "step": 58270 + }, + { + "epoch": 0.9849336251404814, + "grad_norm": 0.07492335885763168, + "learning_rate": 5.997401294691949e-06, + "loss": 0.0021, + "step": 58280 + }, + { + "epoch": 0.9851026254193319, + "grad_norm": 0.09141848236322403, + "learning_rate": 5.995956063477555e-06, + "loss": 0.0062, + "step": 58290 + }, + { + "epoch": 0.9852716256981824, + "grad_norm": 0.022302517667412758, + "learning_rate": 5.9945107456101605e-06, + "loss": 0.0012, + "step": 58300 + }, + { + "epoch": 0.9854406259770329, + "grad_norm": 0.06388254463672638, + "learning_rate": 5.99306534121551e-06, + "loss": 0.0014, + "step": 58310 + }, + { + "epoch": 0.9856096262558833, + "grad_norm": 0.02491978369653225, + "learning_rate": 5.991619850419365e-06, + "loss": 0.002, + "step": 58320 + }, + { + "epoch": 0.9857786265347338, + "grad_norm": 0.12101317942142487, + "learning_rate": 5.99017427334749e-06, + "loss": 0.0016, + "step": 58330 + }, + { + "epoch": 0.9859476268135843, + "grad_norm": 0.03233025595545769, + "learning_rate": 5.988728610125653e-06, + "loss": 0.001, + "step": 58340 + }, + { + "epoch": 0.9861166270924347, + "grad_norm": 0.09366673231124878, + "learning_rate": 5.987282860879638e-06, + "loss": 0.0012, + "step": 58350 + }, + { + "epoch": 0.9862856273712851, + "grad_norm": 0.02704479545354843, + "learning_rate": 5.985837025735232e-06, + "loss": 0.0018, + "step": 58360 + }, + { + "epoch": 0.9864546276501356, + "grad_norm": 0.05152517184615135, + "learning_rate": 5.984391104818226e-06, + "loss": 0.0009, + "step": 58370 + }, + { + "epoch": 0.9866236279289861, + "grad_norm": 0.0815383568406105, + "learning_rate": 5.982945098254425e-06, + "loss": 0.0025, + "step": 58380 + }, + { + "epoch": 0.9867926282078365, + "grad_norm": 0.030310895293951035, + "learning_rate": 5.981499006169637e-06, + "loss": 0.0008, + "step": 58390 + }, + { + "epoch": 0.986961628486687, + "grad_norm": 0.054158393293619156, + "learning_rate": 5.980052828689681e-06, + "loss": 0.0013, + "step": 58400 + }, + { + "epoch": 0.9871306287655375, + "grad_norm": 0.10451913625001907, + "learning_rate": 5.97860656594038e-06, + "loss": 0.0034, + "step": 58410 + }, + { + "epoch": 0.987299629044388, + "grad_norm": 0.062465324997901917, + "learning_rate": 5.977160218047567e-06, + "loss": 0.0016, + "step": 58420 + }, + { + "epoch": 0.9874686293232384, + "grad_norm": 0.01912902668118477, + "learning_rate": 5.97571378513708e-06, + "loss": 0.0011, + "step": 58430 + }, + { + "epoch": 0.9876376296020889, + "grad_norm": 0.05353707820177078, + "learning_rate": 5.974267267334768e-06, + "loss": 0.0022, + "step": 58440 + }, + { + "epoch": 0.9878066298809393, + "grad_norm": 0.16070276498794556, + "learning_rate": 5.972820664766481e-06, + "loss": 0.0017, + "step": 58450 + }, + { + "epoch": 0.9879756301597897, + "grad_norm": 0.021296484395861626, + "learning_rate": 5.971373977558084e-06, + "loss": 0.0016, + "step": 58460 + }, + { + "epoch": 0.9881446304386402, + "grad_norm": 0.1948113888502121, + "learning_rate": 5.969927205835444e-06, + "loss": 0.0023, + "step": 58470 + }, + { + "epoch": 0.9883136307174907, + "grad_norm": 0.16366679966449738, + "learning_rate": 5.968480349724438e-06, + "loss": 0.0012, + "step": 58480 + }, + { + "epoch": 0.9884826309963411, + "grad_norm": 0.022841233760118484, + "learning_rate": 5.967033409350949e-06, + "loss": 0.0013, + "step": 58490 + }, + { + "epoch": 0.9886516312751916, + "grad_norm": 0.06846968084573746, + "learning_rate": 5.965586384840868e-06, + "loss": 0.0014, + "step": 58500 + }, + { + "epoch": 0.9888206315540421, + "grad_norm": 0.10312211513519287, + "learning_rate": 5.964139276320092e-06, + "loss": 0.0031, + "step": 58510 + }, + { + "epoch": 0.9889896318328926, + "grad_norm": 0.14728088676929474, + "learning_rate": 5.9626920839145295e-06, + "loss": 0.0025, + "step": 58520 + }, + { + "epoch": 0.9891586321117429, + "grad_norm": 0.2092452347278595, + "learning_rate": 5.96124480775009e-06, + "loss": 0.0091, + "step": 58530 + }, + { + "epoch": 0.9893276323905934, + "grad_norm": 0.0748872309923172, + "learning_rate": 5.959797447952697e-06, + "loss": 0.001, + "step": 58540 + }, + { + "epoch": 0.9894966326694439, + "grad_norm": 0.015515661798417568, + "learning_rate": 5.958350004648273e-06, + "loss": 0.002, + "step": 58550 + }, + { + "epoch": 0.9896656329482943, + "grad_norm": 0.042304836213588715, + "learning_rate": 5.956902477962757e-06, + "loss": 0.001, + "step": 58560 + }, + { + "epoch": 0.9898346332271448, + "grad_norm": 0.04571213945746422, + "learning_rate": 5.955454868022088e-06, + "loss": 0.0015, + "step": 58570 + }, + { + "epoch": 0.9900036335059953, + "grad_norm": 0.1851183921098709, + "learning_rate": 5.954007174952217e-06, + "loss": 0.0014, + "step": 58580 + }, + { + "epoch": 0.9901726337848458, + "grad_norm": 0.04956304654479027, + "learning_rate": 5.952559398879099e-06, + "loss": 0.0014, + "step": 58590 + }, + { + "epoch": 0.9903416340636962, + "grad_norm": 0.05614320561289787, + "learning_rate": 5.951111539928698e-06, + "loss": 0.0019, + "step": 58600 + }, + { + "epoch": 0.9905106343425467, + "grad_norm": 0.019860200583934784, + "learning_rate": 5.949663598226985e-06, + "loss": 0.001, + "step": 58610 + }, + { + "epoch": 0.9906796346213971, + "grad_norm": 0.044026993215084076, + "learning_rate": 5.948215573899938e-06, + "loss": 0.001, + "step": 58620 + }, + { + "epoch": 0.9908486349002475, + "grad_norm": 0.01446566917002201, + "learning_rate": 5.946767467073542e-06, + "loss": 0.002, + "step": 58630 + }, + { + "epoch": 0.991017635179098, + "grad_norm": 0.04005853831768036, + "learning_rate": 5.945319277873789e-06, + "loss": 0.0016, + "step": 58640 + }, + { + "epoch": 0.9911866354579485, + "grad_norm": 0.08893624693155289, + "learning_rate": 5.943871006426678e-06, + "loss": 0.0013, + "step": 58650 + }, + { + "epoch": 0.991355635736799, + "grad_norm": 0.04670179262757301, + "learning_rate": 5.942422652858218e-06, + "loss": 0.0011, + "step": 58660 + }, + { + "epoch": 0.9915246360156494, + "grad_norm": 0.035787198692560196, + "learning_rate": 5.940974217294419e-06, + "loss": 0.002, + "step": 58670 + }, + { + "epoch": 0.9916936362944999, + "grad_norm": 0.07763170450925827, + "learning_rate": 5.939525699861305e-06, + "loss": 0.0012, + "step": 58680 + }, + { + "epoch": 0.9918626365733504, + "grad_norm": 0.05746553838253021, + "learning_rate": 5.938077100684901e-06, + "loss": 0.0018, + "step": 58690 + }, + { + "epoch": 0.9920316368522009, + "grad_norm": 0.010973265394568443, + "learning_rate": 5.936628419891247e-06, + "loss": 0.0019, + "step": 58700 + }, + { + "epoch": 0.9922006371310512, + "grad_norm": 0.015557970851659775, + "learning_rate": 5.935179657606381e-06, + "loss": 0.0012, + "step": 58710 + }, + { + "epoch": 0.9923696374099017, + "grad_norm": 0.2120949923992157, + "learning_rate": 5.933730813956354e-06, + "loss": 0.0018, + "step": 58720 + }, + { + "epoch": 0.9925386376887522, + "grad_norm": 0.03492291271686554, + "learning_rate": 5.932281889067223e-06, + "loss": 0.0013, + "step": 58730 + }, + { + "epoch": 0.9927076379676026, + "grad_norm": 0.16607755422592163, + "learning_rate": 5.93083288306505e-06, + "loss": 0.0044, + "step": 58740 + }, + { + "epoch": 0.9928766382464531, + "grad_norm": 0.073784738779068, + "learning_rate": 5.929383796075906e-06, + "loss": 0.0008, + "step": 58750 + }, + { + "epoch": 0.9930456385253036, + "grad_norm": 0.010416547767817974, + "learning_rate": 5.927934628225868e-06, + "loss": 0.0012, + "step": 58760 + }, + { + "epoch": 0.9932146388041541, + "grad_norm": 0.023475607857108116, + "learning_rate": 5.926485379641024e-06, + "loss": 0.0015, + "step": 58770 + }, + { + "epoch": 0.9933836390830045, + "grad_norm": 0.23363591730594635, + "learning_rate": 5.9250360504474605e-06, + "loss": 0.0022, + "step": 58780 + }, + { + "epoch": 0.9935526393618549, + "grad_norm": 0.3121536374092102, + "learning_rate": 5.9235866407712796e-06, + "loss": 0.0104, + "step": 58790 + }, + { + "epoch": 0.9937216396407054, + "grad_norm": 0.09558659046888351, + "learning_rate": 5.922137150738585e-06, + "loss": 0.0016, + "step": 58800 + }, + { + "epoch": 0.9938906399195558, + "grad_norm": 0.08474873006343842, + "learning_rate": 5.920687580475492e-06, + "loss": 0.0017, + "step": 58810 + }, + { + "epoch": 0.9940596401984063, + "grad_norm": 0.08459119498729706, + "learning_rate": 5.919237930108117e-06, + "loss": 0.0013, + "step": 58820 + }, + { + "epoch": 0.9942286404772568, + "grad_norm": 0.06942421197891235, + "learning_rate": 5.917788199762589e-06, + "loss": 0.0013, + "step": 58830 + }, + { + "epoch": 0.9943976407561073, + "grad_norm": 0.056626077741384506, + "learning_rate": 5.916338389565042e-06, + "loss": 0.0019, + "step": 58840 + }, + { + "epoch": 0.9945666410349577, + "grad_norm": 0.07657656818628311, + "learning_rate": 5.9148884996416145e-06, + "loss": 0.0009, + "step": 58850 + }, + { + "epoch": 0.9947356413138082, + "grad_norm": 0.11495871841907501, + "learning_rate": 5.913438530118455e-06, + "loss": 0.0017, + "step": 58860 + }, + { + "epoch": 0.9949046415926587, + "grad_norm": 0.08522608876228333, + "learning_rate": 5.911988481121717e-06, + "loss": 0.0008, + "step": 58870 + }, + { + "epoch": 0.995073641871509, + "grad_norm": 0.09870191663503647, + "learning_rate": 5.9105383527775625e-06, + "loss": 0.0022, + "step": 58880 + }, + { + "epoch": 0.9952426421503595, + "grad_norm": 0.035707537084817886, + "learning_rate": 5.909088145212161e-06, + "loss": 0.0026, + "step": 58890 + }, + { + "epoch": 0.99541164242921, + "grad_norm": 0.02863745577633381, + "learning_rate": 5.907637858551684e-06, + "loss": 0.0013, + "step": 58900 + }, + { + "epoch": 0.9955806427080605, + "grad_norm": 0.014273157343268394, + "learning_rate": 5.906187492922318e-06, + "loss": 0.0009, + "step": 58910 + }, + { + "epoch": 0.9957496429869109, + "grad_norm": 0.04612638056278229, + "learning_rate": 5.904737048450249e-06, + "loss": 0.0011, + "step": 58920 + }, + { + "epoch": 0.9959186432657614, + "grad_norm": 0.05339609086513519, + "learning_rate": 5.903286525261673e-06, + "loss": 0.0011, + "step": 58930 + }, + { + "epoch": 0.9960876435446119, + "grad_norm": 0.05890113487839699, + "learning_rate": 5.901835923482793e-06, + "loss": 0.0014, + "step": 58940 + }, + { + "epoch": 0.9962566438234624, + "grad_norm": 0.03513943403959274, + "learning_rate": 5.9003852432398164e-06, + "loss": 0.0019, + "step": 58950 + }, + { + "epoch": 0.9964256441023128, + "grad_norm": 0.009775166399776936, + "learning_rate": 5.898934484658963e-06, + "loss": 0.0017, + "step": 58960 + }, + { + "epoch": 0.9965946443811632, + "grad_norm": 0.03837180137634277, + "learning_rate": 5.897483647866453e-06, + "loss": 0.0018, + "step": 58970 + }, + { + "epoch": 0.9967636446600137, + "grad_norm": 0.06849581003189087, + "learning_rate": 5.8960327329885185e-06, + "loss": 0.0018, + "step": 58980 + }, + { + "epoch": 0.9969326449388641, + "grad_norm": 0.06228544935584068, + "learning_rate": 5.894581740151394e-06, + "loss": 0.002, + "step": 58990 + }, + { + "epoch": 0.9971016452177146, + "grad_norm": 0.07643171399831772, + "learning_rate": 5.893130669481324e-06, + "loss": 0.0012, + "step": 59000 + }, + { + "epoch": 0.9972706454965651, + "grad_norm": 0.015796547755599022, + "learning_rate": 5.891679521104558e-06, + "loss": 0.0012, + "step": 59010 + }, + { + "epoch": 0.9974396457754156, + "grad_norm": 0.03740460425615311, + "learning_rate": 5.890228295147353e-06, + "loss": 0.0015, + "step": 59020 + }, + { + "epoch": 0.997608646054266, + "grad_norm": 0.03803800791501999, + "learning_rate": 5.8887769917359746e-06, + "loss": 0.0013, + "step": 59030 + }, + { + "epoch": 0.9977776463331165, + "grad_norm": 0.04857852682471275, + "learning_rate": 5.887325610996692e-06, + "loss": 0.0009, + "step": 59040 + }, + { + "epoch": 0.9979466466119669, + "grad_norm": 0.009476653300225735, + "learning_rate": 5.885874153055779e-06, + "loss": 0.0016, + "step": 59050 + }, + { + "epoch": 0.9981156468908173, + "grad_norm": 0.10072492808103561, + "learning_rate": 5.884422618039525e-06, + "loss": 0.0021, + "step": 59060 + }, + { + "epoch": 0.9982846471696678, + "grad_norm": 0.05873139202594757, + "learning_rate": 5.882971006074217e-06, + "loss": 0.0009, + "step": 59070 + }, + { + "epoch": 0.9984536474485183, + "grad_norm": 0.1507243812084198, + "learning_rate": 5.881519317286153e-06, + "loss": 0.0018, + "step": 59080 + }, + { + "epoch": 0.9986226477273688, + "grad_norm": 0.15657368302345276, + "learning_rate": 5.880067551801638e-06, + "loss": 0.0015, + "step": 59090 + }, + { + "epoch": 0.9987916480062192, + "grad_norm": 0.05158833786845207, + "learning_rate": 5.878615709746983e-06, + "loss": 0.0015, + "step": 59100 + }, + { + "epoch": 0.9989606482850697, + "grad_norm": 0.1752629280090332, + "learning_rate": 5.877163791248503e-06, + "loss": 0.0021, + "step": 59110 + }, + { + "epoch": 0.9991296485639202, + "grad_norm": 0.02186552993953228, + "learning_rate": 5.875711796432524e-06, + "loss": 0.0008, + "step": 59120 + }, + { + "epoch": 0.9992986488427706, + "grad_norm": 0.05802800878882408, + "learning_rate": 5.874259725425375e-06, + "loss": 0.0023, + "step": 59130 + }, + { + "epoch": 0.999467649121621, + "grad_norm": 0.06508190184831619, + "learning_rate": 5.872807578353396e-06, + "loss": 0.0011, + "step": 59140 + }, + { + "epoch": 0.9996366494004715, + "grad_norm": 0.03184705972671509, + "learning_rate": 5.871355355342927e-06, + "loss": 0.0017, + "step": 59150 + }, + { + "epoch": 0.999805649679322, + "grad_norm": 0.026573611423373222, + "learning_rate": 5.869903056520321e-06, + "loss": 0.0013, + "step": 59160 + }, + { + "epoch": 0.9999746499581724, + "grad_norm": 0.14917759597301483, + "learning_rate": 5.868450682011932e-06, + "loss": 0.0007, + "step": 59170 + }, + { + "epoch": 1.000143650237023, + "grad_norm": 0.03130548819899559, + "learning_rate": 5.8669982319441274e-06, + "loss": 0.0005, + "step": 59180 + }, + { + "epoch": 1.0003126505158733, + "grad_norm": 0.01425376906991005, + "learning_rate": 5.865545706443277e-06, + "loss": 0.0014, + "step": 59190 + }, + { + "epoch": 1.0004816507947238, + "grad_norm": 0.04437197372317314, + "learning_rate": 5.864093105635756e-06, + "loss": 0.0007, + "step": 59200 + }, + { + "epoch": 1.0006506510735742, + "grad_norm": 0.17034755647182465, + "learning_rate": 5.862640429647948e-06, + "loss": 0.0015, + "step": 59210 + }, + { + "epoch": 1.0008196513524248, + "grad_norm": 0.30543792247772217, + "learning_rate": 5.861187678606243e-06, + "loss": 0.0018, + "step": 59220 + }, + { + "epoch": 1.0009886516312752, + "grad_norm": 0.028305659070611, + "learning_rate": 5.859734852637038e-06, + "loss": 0.001, + "step": 59230 + }, + { + "epoch": 1.0011576519101257, + "grad_norm": 0.08342143148183823, + "learning_rate": 5.858281951866735e-06, + "loss": 0.0013, + "step": 59240 + }, + { + "epoch": 1.001326652188976, + "grad_norm": 0.0451873242855072, + "learning_rate": 5.856828976421743e-06, + "loss": 0.001, + "step": 59250 + }, + { + "epoch": 1.0014956524678267, + "grad_norm": 0.0032537258230149746, + "learning_rate": 5.855375926428478e-06, + "loss": 0.001, + "step": 59260 + }, + { + "epoch": 1.001664652746677, + "grad_norm": 0.10370821505784988, + "learning_rate": 5.853922802013364e-06, + "loss": 0.0042, + "step": 59270 + }, + { + "epoch": 1.0018336530255274, + "grad_norm": 0.04501200094819069, + "learning_rate": 5.85246960330283e-06, + "loss": 0.0011, + "step": 59280 + }, + { + "epoch": 1.002002653304378, + "grad_norm": 0.057593826204538345, + "learning_rate": 5.851016330423309e-06, + "loss": 0.002, + "step": 59290 + }, + { + "epoch": 1.0021716535832284, + "grad_norm": 0.048292193561792374, + "learning_rate": 5.8495629835012436e-06, + "loss": 0.0025, + "step": 59300 + }, + { + "epoch": 1.002340653862079, + "grad_norm": 0.04649476706981659, + "learning_rate": 5.848109562663083e-06, + "loss": 0.0008, + "step": 59310 + }, + { + "epoch": 1.0025096541409293, + "grad_norm": 0.024468235671520233, + "learning_rate": 5.846656068035281e-06, + "loss": 0.0018, + "step": 59320 + }, + { + "epoch": 1.0026786544197799, + "grad_norm": 0.03331344947218895, + "learning_rate": 5.8452024997443e-06, + "loss": 0.0017, + "step": 59330 + }, + { + "epoch": 1.0028476546986302, + "grad_norm": 0.12571880221366882, + "learning_rate": 5.843748857916605e-06, + "loss": 0.0008, + "step": 59340 + }, + { + "epoch": 1.0030166549774806, + "grad_norm": 0.12865477800369263, + "learning_rate": 5.842295142678671e-06, + "loss": 0.0018, + "step": 59350 + }, + { + "epoch": 1.0031856552563312, + "grad_norm": 0.03438074141740799, + "learning_rate": 5.84084135415698e-06, + "loss": 0.0009, + "step": 59360 + }, + { + "epoch": 1.0033546555351815, + "grad_norm": 0.09085816144943237, + "learning_rate": 5.839387492478016e-06, + "loss": 0.0023, + "step": 59370 + }, + { + "epoch": 1.0035236558140321, + "grad_norm": 0.00895149540156126, + "learning_rate": 5.837933557768274e-06, + "loss": 0.0013, + "step": 59380 + }, + { + "epoch": 1.0036926560928825, + "grad_norm": 0.22068193554878235, + "learning_rate": 5.836479550154253e-06, + "loss": 0.0026, + "step": 59390 + }, + { + "epoch": 1.003861656371733, + "grad_norm": 0.0855739638209343, + "learning_rate": 5.8350254697624575e-06, + "loss": 0.0032, + "step": 59400 + }, + { + "epoch": 1.0040306566505834, + "grad_norm": 0.06858029216527939, + "learning_rate": 5.8335713167194e-06, + "loss": 0.0011, + "step": 59410 + }, + { + "epoch": 1.004199656929434, + "grad_norm": 0.05948581174015999, + "learning_rate": 5.8321170911516e-06, + "loss": 0.0013, + "step": 59420 + }, + { + "epoch": 1.0043686572082844, + "grad_norm": 0.058058999478816986, + "learning_rate": 5.830662793185582e-06, + "loss": 0.0015, + "step": 59430 + }, + { + "epoch": 1.0045376574871347, + "grad_norm": 0.08761637657880783, + "learning_rate": 5.829208422947875e-06, + "loss": 0.0011, + "step": 59440 + }, + { + "epoch": 1.0047066577659853, + "grad_norm": 0.02783399261534214, + "learning_rate": 5.827753980565018e-06, + "loss": 0.0009, + "step": 59450 + }, + { + "epoch": 1.0048756580448357, + "grad_norm": 0.0013959665084257722, + "learning_rate": 5.8262994661635544e-06, + "loss": 0.0012, + "step": 59460 + }, + { + "epoch": 1.0050446583236863, + "grad_norm": 0.06014212965965271, + "learning_rate": 5.824844879870033e-06, + "loss": 0.0009, + "step": 59470 + }, + { + "epoch": 1.0052136586025366, + "grad_norm": 0.2674904465675354, + "learning_rate": 5.823390221811012e-06, + "loss": 0.0013, + "step": 59480 + }, + { + "epoch": 1.0053826588813872, + "grad_norm": 0.010724053718149662, + "learning_rate": 5.8219354921130515e-06, + "loss": 0.0009, + "step": 59490 + }, + { + "epoch": 1.0055516591602376, + "grad_norm": 0.14621977508068085, + "learning_rate": 5.82048069090272e-06, + "loss": 0.0016, + "step": 59500 + }, + { + "epoch": 1.0057206594390882, + "grad_norm": 0.008767823688685894, + "learning_rate": 5.819025818306594e-06, + "loss": 0.0012, + "step": 59510 + }, + { + "epoch": 1.0058896597179385, + "grad_norm": 0.015448343008756638, + "learning_rate": 5.8175708744512534e-06, + "loss": 0.0016, + "step": 59520 + }, + { + "epoch": 1.006058659996789, + "grad_norm": 0.03502323105931282, + "learning_rate": 5.816115859463286e-06, + "loss": 0.0007, + "step": 59530 + }, + { + "epoch": 1.0062276602756395, + "grad_norm": 0.0019112990703433752, + "learning_rate": 5.814660773469283e-06, + "loss": 0.0013, + "step": 59540 + }, + { + "epoch": 1.0063966605544898, + "grad_norm": 0.008734236471354961, + "learning_rate": 5.813205616595848e-06, + "loss": 0.001, + "step": 59550 + }, + { + "epoch": 1.0065656608333404, + "grad_norm": 0.03247779235243797, + "learning_rate": 5.811750388969582e-06, + "loss": 0.001, + "step": 59560 + }, + { + "epoch": 1.0067346611121908, + "grad_norm": 0.030376799404621124, + "learning_rate": 5.8102950907171e-06, + "loss": 0.0015, + "step": 59570 + }, + { + "epoch": 1.0069036613910414, + "grad_norm": 0.16987168788909912, + "learning_rate": 5.808839721965019e-06, + "loss": 0.0019, + "step": 59580 + }, + { + "epoch": 1.0070726616698917, + "grad_norm": 0.023650676012039185, + "learning_rate": 5.807384282839963e-06, + "loss": 0.0013, + "step": 59590 + }, + { + "epoch": 1.0072416619487423, + "grad_norm": 0.0017415835754945874, + "learning_rate": 5.805928773468563e-06, + "loss": 0.0016, + "step": 59600 + }, + { + "epoch": 1.0074106622275927, + "grad_norm": 0.0149134062230587, + "learning_rate": 5.804473193977455e-06, + "loss": 0.0009, + "step": 59610 + }, + { + "epoch": 1.007579662506443, + "grad_norm": 0.13160866498947144, + "learning_rate": 5.803017544493281e-06, + "loss": 0.0028, + "step": 59620 + }, + { + "epoch": 1.0077486627852936, + "grad_norm": 0.03560006618499756, + "learning_rate": 5.801561825142691e-06, + "loss": 0.0014, + "step": 59630 + }, + { + "epoch": 1.007917663064144, + "grad_norm": 0.046323128044605255, + "learning_rate": 5.800106036052337e-06, + "loss": 0.0012, + "step": 59640 + }, + { + "epoch": 1.0080866633429946, + "grad_norm": 0.2378297746181488, + "learning_rate": 5.798650177348883e-06, + "loss": 0.0016, + "step": 59650 + }, + { + "epoch": 1.008255663621845, + "grad_norm": 0.09480898827314377, + "learning_rate": 5.797194249158993e-06, + "loss": 0.001, + "step": 59660 + }, + { + "epoch": 1.0084246639006955, + "grad_norm": 0.05673360824584961, + "learning_rate": 5.7957382516093405e-06, + "loss": 0.0011, + "step": 59670 + }, + { + "epoch": 1.0085936641795459, + "grad_norm": 0.03825223073363304, + "learning_rate": 5.794282184826605e-06, + "loss": 0.0021, + "step": 59680 + }, + { + "epoch": 1.0087626644583965, + "grad_norm": 0.013609139248728752, + "learning_rate": 5.792826048937471e-06, + "loss": 0.0009, + "step": 59690 + }, + { + "epoch": 1.0089316647372468, + "grad_norm": 0.08244311064481735, + "learning_rate": 5.79136984406863e-06, + "loss": 0.0008, + "step": 59700 + }, + { + "epoch": 1.0091006650160972, + "grad_norm": 0.06593206524848938, + "learning_rate": 5.789913570346778e-06, + "loss": 0.0012, + "step": 59710 + }, + { + "epoch": 1.0092696652949478, + "grad_norm": 0.060266364365816116, + "learning_rate": 5.788457227898619e-06, + "loss": 0.0017, + "step": 59720 + }, + { + "epoch": 1.0094386655737981, + "grad_norm": 0.054617322981357574, + "learning_rate": 5.787000816850858e-06, + "loss": 0.0011, + "step": 59730 + }, + { + "epoch": 1.0096076658526487, + "grad_norm": 0.04017211124300957, + "learning_rate": 5.785544337330214e-06, + "loss": 0.0009, + "step": 59740 + }, + { + "epoch": 1.009776666131499, + "grad_norm": 0.006230284925550222, + "learning_rate": 5.784087789463408e-06, + "loss": 0.0009, + "step": 59750 + }, + { + "epoch": 1.0099456664103497, + "grad_norm": 0.050391849130392075, + "learning_rate": 5.782631173377166e-06, + "loss": 0.0006, + "step": 59760 + }, + { + "epoch": 1.0101146666892, + "grad_norm": 0.020542792975902557, + "learning_rate": 5.781174489198218e-06, + "loss": 0.0037, + "step": 59770 + }, + { + "epoch": 1.0102836669680504, + "grad_norm": 0.030491294339299202, + "learning_rate": 5.779717737053306e-06, + "loss": 0.001, + "step": 59780 + }, + { + "epoch": 1.010452667246901, + "grad_norm": 0.027004361152648926, + "learning_rate": 5.778260917069172e-06, + "loss": 0.001, + "step": 59790 + }, + { + "epoch": 1.0106216675257513, + "grad_norm": 0.06397947669029236, + "learning_rate": 5.776804029372568e-06, + "loss": 0.0014, + "step": 59800 + }, + { + "epoch": 1.010790667804602, + "grad_norm": 0.014559914357960224, + "learning_rate": 5.7753470740902495e-06, + "loss": 0.0011, + "step": 59810 + }, + { + "epoch": 1.0109596680834523, + "grad_norm": 0.09837585687637329, + "learning_rate": 5.77389005134898e-06, + "loss": 0.0012, + "step": 59820 + }, + { + "epoch": 1.0111286683623029, + "grad_norm": 0.08793274313211441, + "learning_rate": 5.772432961275523e-06, + "loss": 0.0008, + "step": 59830 + }, + { + "epoch": 1.0112976686411532, + "grad_norm": 0.10017421096563339, + "learning_rate": 5.770975803996659e-06, + "loss": 0.0011, + "step": 59840 + }, + { + "epoch": 1.0114666689200038, + "grad_norm": 0.06319086253643036, + "learning_rate": 5.769518579639163e-06, + "loss": 0.001, + "step": 59850 + }, + { + "epoch": 1.0116356691988542, + "grad_norm": 0.004494407679885626, + "learning_rate": 5.768061288329823e-06, + "loss": 0.0016, + "step": 59860 + }, + { + "epoch": 1.0118046694777045, + "grad_norm": 0.04238826781511307, + "learning_rate": 5.766603930195429e-06, + "loss": 0.0013, + "step": 59870 + }, + { + "epoch": 1.0119736697565551, + "grad_norm": 0.06290490180253983, + "learning_rate": 5.76514650536278e-06, + "loss": 0.0011, + "step": 59880 + }, + { + "epoch": 1.0121426700354055, + "grad_norm": 0.06323585659265518, + "learning_rate": 5.763689013958677e-06, + "loss": 0.0009, + "step": 59890 + }, + { + "epoch": 1.012311670314256, + "grad_norm": 0.06292340159416199, + "learning_rate": 5.76223145610993e-06, + "loss": 0.004, + "step": 59900 + }, + { + "epoch": 1.0124806705931064, + "grad_norm": 0.05352671816945076, + "learning_rate": 5.760773831943354e-06, + "loss": 0.0013, + "step": 59910 + }, + { + "epoch": 1.012649670871957, + "grad_norm": 0.049937695264816284, + "learning_rate": 5.7593161415857665e-06, + "loss": 0.0018, + "step": 59920 + }, + { + "epoch": 1.0128186711508074, + "grad_norm": 0.06884843111038208, + "learning_rate": 5.757858385163997e-06, + "loss": 0.0016, + "step": 59930 + }, + { + "epoch": 1.012987671429658, + "grad_norm": 0.06434638798236847, + "learning_rate": 5.756400562804876e-06, + "loss": 0.0008, + "step": 59940 + }, + { + "epoch": 1.0131566717085083, + "grad_norm": 0.1740601807832718, + "learning_rate": 5.754942674635241e-06, + "loss": 0.0012, + "step": 59950 + }, + { + "epoch": 1.0133256719873587, + "grad_norm": 0.02064857818186283, + "learning_rate": 5.753484720781936e-06, + "loss": 0.0007, + "step": 59960 + }, + { + "epoch": 1.0134946722662093, + "grad_norm": 0.05028160661458969, + "learning_rate": 5.752026701371809e-06, + "loss": 0.0008, + "step": 59970 + }, + { + "epoch": 1.0136636725450596, + "grad_norm": 0.05740112438797951, + "learning_rate": 5.750568616531716e-06, + "loss": 0.0012, + "step": 59980 + }, + { + "epoch": 1.0138326728239102, + "grad_norm": 0.09930772334337234, + "learning_rate": 5.749110466388516e-06, + "loss": 0.0017, + "step": 59990 + }, + { + "epoch": 1.0140016731027606, + "grad_norm": 0.027258582413196564, + "learning_rate": 5.747652251069076e-06, + "loss": 0.0019, + "step": 60000 + }, + { + "epoch": 1.0141706733816112, + "grad_norm": 0.020125612616539, + "learning_rate": 5.746193970700268e-06, + "loss": 0.0011, + "step": 60010 + }, + { + "epoch": 1.0143396736604615, + "grad_norm": 0.05175361782312393, + "learning_rate": 5.744735625408969e-06, + "loss": 0.0006, + "step": 60020 + }, + { + "epoch": 1.014508673939312, + "grad_norm": 0.04280708357691765, + "learning_rate": 5.743277215322062e-06, + "loss": 0.0008, + "step": 60030 + }, + { + "epoch": 1.0146776742181625, + "grad_norm": 0.00904067326337099, + "learning_rate": 5.741818740566436e-06, + "loss": 0.0009, + "step": 60040 + }, + { + "epoch": 1.0148466744970128, + "grad_norm": 0.021013254299759865, + "learning_rate": 5.740360201268986e-06, + "loss": 0.0018, + "step": 60050 + }, + { + "epoch": 1.0150156747758634, + "grad_norm": 0.10922824591398239, + "learning_rate": 5.738901597556611e-06, + "loss": 0.001, + "step": 60060 + }, + { + "epoch": 1.0151846750547138, + "grad_norm": 0.09556003659963608, + "learning_rate": 5.737442929556217e-06, + "loss": 0.0008, + "step": 60070 + }, + { + "epoch": 1.0153536753335644, + "grad_norm": 0.06434619426727295, + "learning_rate": 5.735984197394715e-06, + "loss": 0.001, + "step": 60080 + }, + { + "epoch": 1.0155226756124147, + "grad_norm": 0.350649356842041, + "learning_rate": 5.734525401199022e-06, + "loss": 0.0015, + "step": 60090 + }, + { + "epoch": 1.0156916758912653, + "grad_norm": 0.012149405665695667, + "learning_rate": 5.733066541096059e-06, + "loss": 0.0014, + "step": 60100 + }, + { + "epoch": 1.0158606761701157, + "grad_norm": 0.04299250245094299, + "learning_rate": 5.731607617212758e-06, + "loss": 0.0009, + "step": 60110 + }, + { + "epoch": 1.0160296764489662, + "grad_norm": 0.012249082326889038, + "learning_rate": 5.730148629676048e-06, + "loss": 0.001, + "step": 60120 + }, + { + "epoch": 1.0161986767278166, + "grad_norm": 0.052636779844760895, + "learning_rate": 5.728689578612868e-06, + "loss": 0.0015, + "step": 60130 + }, + { + "epoch": 1.016367677006667, + "grad_norm": 0.0249900221824646, + "learning_rate": 5.727230464150167e-06, + "loss": 0.0051, + "step": 60140 + }, + { + "epoch": 1.0165366772855176, + "grad_norm": 0.10780275613069534, + "learning_rate": 5.725771286414889e-06, + "loss": 0.0013, + "step": 60150 + }, + { + "epoch": 1.016705677564368, + "grad_norm": 0.02290751226246357, + "learning_rate": 5.724312045533995e-06, + "loss": 0.0013, + "step": 60160 + }, + { + "epoch": 1.0168746778432185, + "grad_norm": 0.11151617765426636, + "learning_rate": 5.722852741634444e-06, + "loss": 0.001, + "step": 60170 + }, + { + "epoch": 1.0170436781220689, + "grad_norm": 0.04342846944928169, + "learning_rate": 5.721393374843201e-06, + "loss": 0.0009, + "step": 60180 + }, + { + "epoch": 1.0172126784009194, + "grad_norm": 0.042505376040935516, + "learning_rate": 5.71993394528724e-06, + "loss": 0.0012, + "step": 60190 + }, + { + "epoch": 1.0173816786797698, + "grad_norm": 0.029010934755206108, + "learning_rate": 5.718474453093537e-06, + "loss": 0.0014, + "step": 60200 + }, + { + "epoch": 1.0175506789586204, + "grad_norm": 0.04606638476252556, + "learning_rate": 5.717014898389075e-06, + "loss": 0.0011, + "step": 60210 + }, + { + "epoch": 1.0177196792374708, + "grad_norm": 0.07484077662229538, + "learning_rate": 5.715555281300842e-06, + "loss": 0.0014, + "step": 60220 + }, + { + "epoch": 1.0178886795163211, + "grad_norm": 0.023599427193403244, + "learning_rate": 5.714095601955833e-06, + "loss": 0.0011, + "step": 60230 + }, + { + "epoch": 1.0180576797951717, + "grad_norm": 0.0500069223344326, + "learning_rate": 5.712635860481048e-06, + "loss": 0.0016, + "step": 60240 + }, + { + "epoch": 1.018226680074022, + "grad_norm": 0.03526817262172699, + "learning_rate": 5.711176057003491e-06, + "loss": 0.0018, + "step": 60250 + }, + { + "epoch": 1.0183956803528726, + "grad_norm": 0.030725175514817238, + "learning_rate": 5.709716191650171e-06, + "loss": 0.0004, + "step": 60260 + }, + { + "epoch": 1.018564680631723, + "grad_norm": 0.0388350747525692, + "learning_rate": 5.708256264548102e-06, + "loss": 0.0022, + "step": 60270 + }, + { + "epoch": 1.0187336809105736, + "grad_norm": 0.0803573951125145, + "learning_rate": 5.706796275824308e-06, + "loss": 0.0024, + "step": 60280 + }, + { + "epoch": 1.018902681189424, + "grad_norm": 0.021474266424775124, + "learning_rate": 5.705336225605813e-06, + "loss": 0.0011, + "step": 60290 + }, + { + "epoch": 1.0190716814682743, + "grad_norm": 0.5377752184867859, + "learning_rate": 5.703876114019649e-06, + "loss": 0.0018, + "step": 60300 + }, + { + "epoch": 1.019240681747125, + "grad_norm": 0.006219279952347279, + "learning_rate": 5.7024159411928516e-06, + "loss": 0.0023, + "step": 60310 + }, + { + "epoch": 1.0194096820259753, + "grad_norm": 0.07081045210361481, + "learning_rate": 5.700955707252465e-06, + "loss": 0.0016, + "step": 60320 + }, + { + "epoch": 1.0195786823048258, + "grad_norm": 0.029531359672546387, + "learning_rate": 5.699495412325535e-06, + "loss": 0.0013, + "step": 60330 + }, + { + "epoch": 1.0197476825836762, + "grad_norm": 0.01691374182701111, + "learning_rate": 5.698035056539117e-06, + "loss": 0.0009, + "step": 60340 + }, + { + "epoch": 1.0199166828625268, + "grad_norm": 0.09221971035003662, + "learning_rate": 5.6965746400202646e-06, + "loss": 0.0009, + "step": 60350 + }, + { + "epoch": 1.0200856831413772, + "grad_norm": 0.03064112365245819, + "learning_rate": 5.695114162896044e-06, + "loss": 0.0015, + "step": 60360 + }, + { + "epoch": 1.0202546834202277, + "grad_norm": 0.1137605831027031, + "learning_rate": 5.693653625293524e-06, + "loss": 0.0012, + "step": 60370 + }, + { + "epoch": 1.020423683699078, + "grad_norm": 0.04199506342411041, + "learning_rate": 5.6921930273397785e-06, + "loss": 0.0011, + "step": 60380 + }, + { + "epoch": 1.0205926839779285, + "grad_norm": 0.10365763306617737, + "learning_rate": 5.6907323691618845e-06, + "loss": 0.0018, + "step": 60390 + }, + { + "epoch": 1.020761684256779, + "grad_norm": 0.15501956641674042, + "learning_rate": 5.689271650886928e-06, + "loss": 0.0013, + "step": 60400 + }, + { + "epoch": 1.0209306845356294, + "grad_norm": 0.051700349897146225, + "learning_rate": 5.687810872641999e-06, + "loss": 0.001, + "step": 60410 + }, + { + "epoch": 1.02109968481448, + "grad_norm": 0.05651358515024185, + "learning_rate": 5.68635003455419e-06, + "loss": 0.0008, + "step": 60420 + }, + { + "epoch": 1.0212686850933304, + "grad_norm": 0.04288886487483978, + "learning_rate": 5.684889136750604e-06, + "loss": 0.001, + "step": 60430 + }, + { + "epoch": 1.021437685372181, + "grad_norm": 0.043509211391210556, + "learning_rate": 5.683428179358344e-06, + "loss": 0.0008, + "step": 60440 + }, + { + "epoch": 1.0216066856510313, + "grad_norm": 0.029147949069738388, + "learning_rate": 5.6819671625045225e-06, + "loss": 0.0009, + "step": 60450 + }, + { + "epoch": 1.0217756859298819, + "grad_norm": 0.003975256811827421, + "learning_rate": 5.680506086316252e-06, + "loss": 0.0067, + "step": 60460 + }, + { + "epoch": 1.0219446862087322, + "grad_norm": 0.08576459437608719, + "learning_rate": 5.679044950920656e-06, + "loss": 0.0019, + "step": 60470 + }, + { + "epoch": 1.0221136864875826, + "grad_norm": 0.11685976386070251, + "learning_rate": 5.677583756444859e-06, + "loss": 0.0011, + "step": 60480 + }, + { + "epoch": 1.0222826867664332, + "grad_norm": 0.026770997792482376, + "learning_rate": 5.676122503015992e-06, + "loss": 0.0011, + "step": 60490 + }, + { + "epoch": 1.0224516870452836, + "grad_norm": 0.07476416230201721, + "learning_rate": 5.674661190761191e-06, + "loss": 0.0018, + "step": 60500 + }, + { + "epoch": 1.0226206873241341, + "grad_norm": 0.21177829802036285, + "learning_rate": 5.673199819807598e-06, + "loss": 0.001, + "step": 60510 + }, + { + "epoch": 1.0227896876029845, + "grad_norm": 0.004483386874198914, + "learning_rate": 5.6717383902823576e-06, + "loss": 0.0011, + "step": 60520 + }, + { + "epoch": 1.022958687881835, + "grad_norm": 0.01776953786611557, + "learning_rate": 5.670276902312625e-06, + "loss": 0.0009, + "step": 60530 + }, + { + "epoch": 1.0231276881606854, + "grad_norm": 0.046271588653326035, + "learning_rate": 5.6688153560255525e-06, + "loss": 0.0007, + "step": 60540 + }, + { + "epoch": 1.023296688439536, + "grad_norm": 0.021583637222647667, + "learning_rate": 5.6673537515483045e-06, + "loss": 0.0013, + "step": 60550 + }, + { + "epoch": 1.0234656887183864, + "grad_norm": 0.040574200451374054, + "learning_rate": 5.6658920890080475e-06, + "loss": 0.001, + "step": 60560 + }, + { + "epoch": 1.0236346889972368, + "grad_norm": 0.04284026473760605, + "learning_rate": 5.66443036853195e-06, + "loss": 0.0013, + "step": 60570 + }, + { + "epoch": 1.0238036892760873, + "grad_norm": 0.1469365656375885, + "learning_rate": 5.6629685902471935e-06, + "loss": 0.0011, + "step": 60580 + }, + { + "epoch": 1.0239726895549377, + "grad_norm": 0.026097025722265244, + "learning_rate": 5.661506754280956e-06, + "loss": 0.0016, + "step": 60590 + }, + { + "epoch": 1.0241416898337883, + "grad_norm": 0.06931482255458832, + "learning_rate": 5.660044860760425e-06, + "loss": 0.0019, + "step": 60600 + }, + { + "epoch": 1.0243106901126386, + "grad_norm": 0.029825277626514435, + "learning_rate": 5.658582909812795e-06, + "loss": 0.0009, + "step": 60610 + }, + { + "epoch": 1.0244796903914892, + "grad_norm": 0.04135201498866081, + "learning_rate": 5.65712090156526e-06, + "loss": 0.0005, + "step": 60620 + }, + { + "epoch": 1.0246486906703396, + "grad_norm": 0.0761294886469841, + "learning_rate": 5.655658836145022e-06, + "loss": 0.0011, + "step": 60630 + }, + { + "epoch": 1.0248176909491902, + "grad_norm": 0.12756778299808502, + "learning_rate": 5.654196713679291e-06, + "loss": 0.0015, + "step": 60640 + }, + { + "epoch": 1.0249866912280405, + "grad_norm": 0.1018855944275856, + "learning_rate": 5.652734534295274e-06, + "loss": 0.001, + "step": 60650 + }, + { + "epoch": 1.025155691506891, + "grad_norm": 0.07063782960176468, + "learning_rate": 5.651272298120192e-06, + "loss": 0.0015, + "step": 60660 + }, + { + "epoch": 1.0253246917857415, + "grad_norm": 0.03172563761472702, + "learning_rate": 5.6498100052812635e-06, + "loss": 0.0013, + "step": 60670 + }, + { + "epoch": 1.0254936920645918, + "grad_norm": 0.06417392939329147, + "learning_rate": 5.648347655905716e-06, + "loss": 0.0037, + "step": 60680 + }, + { + "epoch": 1.0256626923434424, + "grad_norm": 0.02979256398975849, + "learning_rate": 5.6468852501207816e-06, + "loss": 0.0003, + "step": 60690 + }, + { + "epoch": 1.0258316926222928, + "grad_norm": 0.1255546510219574, + "learning_rate": 5.6454227880536945e-06, + "loss": 0.0012, + "step": 60700 + }, + { + "epoch": 1.0260006929011434, + "grad_norm": 0.017972994595766068, + "learning_rate": 5.6439602698316985e-06, + "loss": 0.0012, + "step": 60710 + }, + { + "epoch": 1.0261696931799937, + "grad_norm": 0.004161244723945856, + "learning_rate": 5.64249769558204e-06, + "loss": 0.0011, + "step": 60720 + }, + { + "epoch": 1.026338693458844, + "grad_norm": 0.0404852032661438, + "learning_rate": 5.641035065431969e-06, + "loss": 0.0013, + "step": 60730 + }, + { + "epoch": 1.0265076937376947, + "grad_norm": 0.11190905421972275, + "learning_rate": 5.639572379508741e-06, + "loss": 0.0012, + "step": 60740 + }, + { + "epoch": 1.026676694016545, + "grad_norm": 0.06214538961648941, + "learning_rate": 5.6381096379396174e-06, + "loss": 0.0013, + "step": 60750 + }, + { + "epoch": 1.0268456942953956, + "grad_norm": 0.09106887876987457, + "learning_rate": 5.636646840851863e-06, + "loss": 0.0008, + "step": 60760 + }, + { + "epoch": 1.027014694574246, + "grad_norm": 0.027696475386619568, + "learning_rate": 5.6351839883727485e-06, + "loss": 0.0009, + "step": 60770 + }, + { + "epoch": 1.0271836948530966, + "grad_norm": 0.0765504464507103, + "learning_rate": 5.633721080629551e-06, + "loss": 0.0034, + "step": 60780 + }, + { + "epoch": 1.027352695131947, + "grad_norm": 0.06260436028242111, + "learning_rate": 5.632258117749547e-06, + "loss": 0.0008, + "step": 60790 + }, + { + "epoch": 1.0275216954107975, + "grad_norm": 0.0022702463902533054, + "learning_rate": 5.630795099860024e-06, + "loss": 0.0008, + "step": 60800 + }, + { + "epoch": 1.0276906956896479, + "grad_norm": 0.04073888063430786, + "learning_rate": 5.6293320270882726e-06, + "loss": 0.0016, + "step": 60810 + }, + { + "epoch": 1.0278596959684982, + "grad_norm": 0.022465115413069725, + "learning_rate": 5.6278688995615836e-06, + "loss": 0.0026, + "step": 60820 + }, + { + "epoch": 1.0280286962473488, + "grad_norm": 0.06191631779074669, + "learning_rate": 5.62640571740726e-06, + "loss": 0.0031, + "step": 60830 + }, + { + "epoch": 1.0281976965261992, + "grad_norm": 0.07303152233362198, + "learning_rate": 5.624942480752603e-06, + "loss": 0.0016, + "step": 60840 + }, + { + "epoch": 1.0283666968050498, + "grad_norm": 0.07268793880939484, + "learning_rate": 5.623479189724923e-06, + "loss": 0.0011, + "step": 60850 + }, + { + "epoch": 1.0285356970839001, + "grad_norm": 0.015413266606628895, + "learning_rate": 5.622015844451533e-06, + "loss": 0.0011, + "step": 60860 + }, + { + "epoch": 1.0287046973627507, + "grad_norm": 0.030530164018273354, + "learning_rate": 5.620552445059748e-06, + "loss": 0.0006, + "step": 60870 + }, + { + "epoch": 1.028873697641601, + "grad_norm": 0.1251511573791504, + "learning_rate": 5.619088991676895e-06, + "loss": 0.0014, + "step": 60880 + }, + { + "epoch": 1.0290426979204517, + "grad_norm": 0.011009990237653255, + "learning_rate": 5.617625484430301e-06, + "loss": 0.001, + "step": 60890 + }, + { + "epoch": 1.029211698199302, + "grad_norm": 0.04360377788543701, + "learning_rate": 5.616161923447297e-06, + "loss": 0.0014, + "step": 60900 + }, + { + "epoch": 1.0293806984781524, + "grad_norm": 0.027645142748951912, + "learning_rate": 5.614698308855221e-06, + "loss": 0.001, + "step": 60910 + }, + { + "epoch": 1.029549698757003, + "grad_norm": 0.017806977033615112, + "learning_rate": 5.6132346407814135e-06, + "loss": 0.0007, + "step": 60920 + }, + { + "epoch": 1.0297186990358533, + "grad_norm": 0.011086252517998219, + "learning_rate": 5.61177091935322e-06, + "loss": 0.001, + "step": 60930 + }, + { + "epoch": 1.029887699314704, + "grad_norm": 0.026597237214446068, + "learning_rate": 5.610307144697994e-06, + "loss": 0.0007, + "step": 60940 + }, + { + "epoch": 1.0300566995935543, + "grad_norm": 0.04197683557868004, + "learning_rate": 5.608843316943089e-06, + "loss": 0.0011, + "step": 60950 + }, + { + "epoch": 1.0302256998724049, + "grad_norm": 0.11756736040115356, + "learning_rate": 5.607379436215865e-06, + "loss": 0.0009, + "step": 60960 + }, + { + "epoch": 1.0303947001512552, + "grad_norm": 0.03770684078335762, + "learning_rate": 5.605915502643687e-06, + "loss": 0.001, + "step": 60970 + }, + { + "epoch": 1.0305637004301058, + "grad_norm": 0.01879987120628357, + "learning_rate": 5.6044515163539244e-06, + "loss": 0.001, + "step": 60980 + }, + { + "epoch": 1.0307327007089562, + "grad_norm": 0.050969868898391724, + "learning_rate": 5.602987477473951e-06, + "loss": 0.0008, + "step": 60990 + }, + { + "epoch": 1.0309017009878065, + "grad_norm": 0.1651037186384201, + "learning_rate": 5.6015233861311465e-06, + "loss": 0.0009, + "step": 61000 + }, + { + "epoch": 1.0309017009878065, + "eval_loss": 0.001259764190763235, + "eval_runtime": 5.7929, + "eval_samples_per_second": 34.525, + "eval_steps_per_second": 8.631, + "step": 61000 + }, + { + "epoch": 1.0310707012666571, + "grad_norm": 0.05381014943122864, + "learning_rate": 5.600059242452893e-06, + "loss": 0.0008, + "step": 61010 + }, + { + "epoch": 1.0312397015455075, + "grad_norm": 0.17811964452266693, + "learning_rate": 5.598595046566579e-06, + "loss": 0.0014, + "step": 61020 + }, + { + "epoch": 1.031408701824358, + "grad_norm": 0.16397219896316528, + "learning_rate": 5.597130798599594e-06, + "loss": 0.0015, + "step": 61030 + }, + { + "epoch": 1.0315777021032084, + "grad_norm": 0.1764872968196869, + "learning_rate": 5.595666498679337e-06, + "loss": 0.0028, + "step": 61040 + }, + { + "epoch": 1.031746702382059, + "grad_norm": 0.15535208582878113, + "learning_rate": 5.594202146933209e-06, + "loss": 0.0015, + "step": 61050 + }, + { + "epoch": 1.0319157026609094, + "grad_norm": 0.046183567494153976, + "learning_rate": 5.592737743488614e-06, + "loss": 0.0008, + "step": 61060 + }, + { + "epoch": 1.03208470293976, + "grad_norm": 0.00033106733462773263, + "learning_rate": 5.591273288472964e-06, + "loss": 0.001, + "step": 61070 + }, + { + "epoch": 1.0322537032186103, + "grad_norm": 0.11447389423847198, + "learning_rate": 5.589808782013673e-06, + "loss": 0.0009, + "step": 61080 + }, + { + "epoch": 1.0324227034974607, + "grad_norm": 0.043528001755476, + "learning_rate": 5.58834422423816e-06, + "loss": 0.0006, + "step": 61090 + }, + { + "epoch": 1.0325917037763113, + "grad_norm": 0.04317929968237877, + "learning_rate": 5.586879615273849e-06, + "loss": 0.0009, + "step": 61100 + }, + { + "epoch": 1.0327607040551616, + "grad_norm": 0.02791532315313816, + "learning_rate": 5.58541495524817e-06, + "loss": 0.0007, + "step": 61110 + }, + { + "epoch": 1.0329297043340122, + "grad_norm": 0.06267747282981873, + "learning_rate": 5.58395024428855e-06, + "loss": 0.0033, + "step": 61120 + }, + { + "epoch": 1.0330987046128626, + "grad_norm": 0.08951187878847122, + "learning_rate": 5.582485482522432e-06, + "loss": 0.0008, + "step": 61130 + }, + { + "epoch": 1.0332677048917132, + "grad_norm": 0.05569801107048988, + "learning_rate": 5.581020670077253e-06, + "loss": 0.0017, + "step": 61140 + }, + { + "epoch": 1.0334367051705635, + "grad_norm": 0.02896769903600216, + "learning_rate": 5.579555807080462e-06, + "loss": 0.0014, + "step": 61150 + }, + { + "epoch": 1.033605705449414, + "grad_norm": 0.01657586172223091, + "learning_rate": 5.578090893659508e-06, + "loss": 0.0012, + "step": 61160 + }, + { + "epoch": 1.0337747057282645, + "grad_norm": 0.017979320138692856, + "learning_rate": 5.576625929941844e-06, + "loss": 0.0011, + "step": 61170 + }, + { + "epoch": 1.0339437060071148, + "grad_norm": 0.1074618473649025, + "learning_rate": 5.5751609160549315e-06, + "loss": 0.0012, + "step": 61180 + }, + { + "epoch": 1.0341127062859654, + "grad_norm": 0.061468396335840225, + "learning_rate": 5.573695852126232e-06, + "loss": 0.0009, + "step": 61190 + }, + { + "epoch": 1.0342817065648158, + "grad_norm": 0.29444196820259094, + "learning_rate": 5.572230738283213e-06, + "loss": 0.0009, + "step": 61200 + }, + { + "epoch": 1.0344507068436664, + "grad_norm": 0.06462724506855011, + "learning_rate": 5.570765574653349e-06, + "loss": 0.0013, + "step": 61210 + }, + { + "epoch": 1.0346197071225167, + "grad_norm": 0.12618573009967804, + "learning_rate": 5.569300361364114e-06, + "loss": 0.0012, + "step": 61220 + }, + { + "epoch": 1.0347887074013673, + "grad_norm": 0.07626745849847794, + "learning_rate": 5.567835098542988e-06, + "loss": 0.0011, + "step": 61230 + }, + { + "epoch": 1.0349577076802177, + "grad_norm": 0.016155019402503967, + "learning_rate": 5.5663697863174595e-06, + "loss": 0.0013, + "step": 61240 + }, + { + "epoch": 1.035126707959068, + "grad_norm": 0.016351575031876564, + "learning_rate": 5.564904424815014e-06, + "loss": 0.0015, + "step": 61250 + }, + { + "epoch": 1.0352957082379186, + "grad_norm": 0.025634871795773506, + "learning_rate": 5.563439014163146e-06, + "loss": 0.001, + "step": 61260 + }, + { + "epoch": 1.035464708516769, + "grad_norm": 0.02750859037041664, + "learning_rate": 5.561973554489354e-06, + "loss": 0.002, + "step": 61270 + }, + { + "epoch": 1.0356337087956196, + "grad_norm": 0.06129152700304985, + "learning_rate": 5.5605080459211405e-06, + "loss": 0.0011, + "step": 61280 + }, + { + "epoch": 1.03580270907447, + "grad_norm": 0.05943496152758598, + "learning_rate": 5.559042488586012e-06, + "loss": 0.0008, + "step": 61290 + }, + { + "epoch": 1.0359717093533205, + "grad_norm": 0.04022928327322006, + "learning_rate": 5.557576882611477e-06, + "loss": 0.0008, + "step": 61300 + }, + { + "epoch": 1.0361407096321709, + "grad_norm": 0.04034409672021866, + "learning_rate": 5.556111228125053e-06, + "loss": 0.0016, + "step": 61310 + }, + { + "epoch": 1.0363097099110214, + "grad_norm": 0.05896139144897461, + "learning_rate": 5.5546455252542564e-06, + "loss": 0.0012, + "step": 61320 + }, + { + "epoch": 1.0364787101898718, + "grad_norm": 0.019437892362475395, + "learning_rate": 5.553179774126612e-06, + "loss": 0.0004, + "step": 61330 + }, + { + "epoch": 1.0366477104687222, + "grad_norm": 0.07897966355085373, + "learning_rate": 5.551713974869648e-06, + "loss": 0.0005, + "step": 61340 + }, + { + "epoch": 1.0368167107475728, + "grad_norm": 0.022268425673246384, + "learning_rate": 5.550248127610894e-06, + "loss": 0.001, + "step": 61350 + }, + { + "epoch": 1.0369857110264231, + "grad_norm": 0.24211256206035614, + "learning_rate": 5.5487822324778876e-06, + "loss": 0.0016, + "step": 61360 + }, + { + "epoch": 1.0371547113052737, + "grad_norm": 0.07900585234165192, + "learning_rate": 5.547316289598168e-06, + "loss": 0.0011, + "step": 61370 + }, + { + "epoch": 1.037323711584124, + "grad_norm": 0.029902538284659386, + "learning_rate": 5.545850299099278e-06, + "loss": 0.0009, + "step": 61380 + }, + { + "epoch": 1.0374927118629746, + "grad_norm": 0.05169609189033508, + "learning_rate": 5.5443842611087686e-06, + "loss": 0.0012, + "step": 61390 + }, + { + "epoch": 1.037661712141825, + "grad_norm": 0.05702834203839302, + "learning_rate": 5.542918175754191e-06, + "loss": 0.0014, + "step": 61400 + }, + { + "epoch": 1.0378307124206756, + "grad_norm": 0.0235629603266716, + "learning_rate": 5.541452043163101e-06, + "loss": 0.0008, + "step": 61410 + }, + { + "epoch": 1.037999712699526, + "grad_norm": 0.011054320260882378, + "learning_rate": 5.539985863463061e-06, + "loss": 0.0007, + "step": 61420 + }, + { + "epoch": 1.0381687129783763, + "grad_norm": 0.0698440670967102, + "learning_rate": 5.5385196367816345e-06, + "loss": 0.0008, + "step": 61430 + }, + { + "epoch": 1.038337713257227, + "grad_norm": 0.011756319552659988, + "learning_rate": 5.5370533632463886e-06, + "loss": 0.0008, + "step": 61440 + }, + { + "epoch": 1.0385067135360773, + "grad_norm": 0.08873260021209717, + "learning_rate": 5.5355870429849005e-06, + "loss": 0.001, + "step": 61450 + }, + { + "epoch": 1.0386757138149278, + "grad_norm": 0.040042344480752945, + "learning_rate": 5.534120676124743e-06, + "loss": 0.0009, + "step": 61460 + }, + { + "epoch": 1.0388447140937782, + "grad_norm": 0.09399939328432083, + "learning_rate": 5.532654262793498e-06, + "loss": 0.0016, + "step": 61470 + }, + { + "epoch": 1.0390137143726288, + "grad_norm": 0.012367842718958855, + "learning_rate": 5.531187803118753e-06, + "loss": 0.0008, + "step": 61480 + }, + { + "epoch": 1.0391827146514792, + "grad_norm": 0.023874038830399513, + "learning_rate": 5.529721297228094e-06, + "loss": 0.0013, + "step": 61490 + }, + { + "epoch": 1.0393517149303297, + "grad_norm": 0.06434915214776993, + "learning_rate": 5.528254745249117e-06, + "loss": 0.0017, + "step": 61500 + }, + { + "epoch": 1.03952071520918, + "grad_norm": 0.0253615640103817, + "learning_rate": 5.526788147309417e-06, + "loss": 0.0014, + "step": 61510 + }, + { + "epoch": 1.0396897154880305, + "grad_norm": 0.0545208640396595, + "learning_rate": 5.525321503536597e-06, + "loss": 0.0016, + "step": 61520 + }, + { + "epoch": 1.039858715766881, + "grad_norm": 0.19076873362064362, + "learning_rate": 5.52385481405826e-06, + "loss": 0.0007, + "step": 61530 + }, + { + "epoch": 1.0400277160457314, + "grad_norm": 0.006800387986004353, + "learning_rate": 5.522388079002015e-06, + "loss": 0.0007, + "step": 61540 + }, + { + "epoch": 1.040196716324582, + "grad_norm": 0.003130823839455843, + "learning_rate": 5.520921298495479e-06, + "loss": 0.0008, + "step": 61550 + }, + { + "epoch": 1.0403657166034324, + "grad_norm": 0.035810377448797226, + "learning_rate": 5.519454472666263e-06, + "loss": 0.002, + "step": 61560 + }, + { + "epoch": 1.040534716882283, + "grad_norm": 0.22395989298820496, + "learning_rate": 5.517987601641992e-06, + "loss": 0.001, + "step": 61570 + }, + { + "epoch": 1.0407037171611333, + "grad_norm": 0.01395457424223423, + "learning_rate": 5.516520685550291e-06, + "loss": 0.002, + "step": 61580 + }, + { + "epoch": 1.0408727174399837, + "grad_norm": 0.009725483134388924, + "learning_rate": 5.515053724518787e-06, + "loss": 0.0007, + "step": 61590 + }, + { + "epoch": 1.0410417177188342, + "grad_norm": 0.008647358976304531, + "learning_rate": 5.5135867186751136e-06, + "loss": 0.0008, + "step": 61600 + }, + { + "epoch": 1.0412107179976846, + "grad_norm": 0.09219738841056824, + "learning_rate": 5.512119668146907e-06, + "loss": 0.0016, + "step": 61610 + }, + { + "epoch": 1.0413797182765352, + "grad_norm": 0.039802417159080505, + "learning_rate": 5.510652573061809e-06, + "loss": 0.0023, + "step": 61620 + }, + { + "epoch": 1.0415487185553856, + "grad_norm": 0.03601793944835663, + "learning_rate": 5.509185433547461e-06, + "loss": 0.0012, + "step": 61630 + }, + { + "epoch": 1.0417177188342361, + "grad_norm": 0.05075589194893837, + "learning_rate": 5.507718249731514e-06, + "loss": 0.0008, + "step": 61640 + }, + { + "epoch": 1.0418867191130865, + "grad_norm": 0.014225496910512447, + "learning_rate": 5.50625102174162e-06, + "loss": 0.001, + "step": 61650 + }, + { + "epoch": 1.042055719391937, + "grad_norm": 0.0011709729442372918, + "learning_rate": 5.504783749705435e-06, + "loss": 0.0008, + "step": 61660 + }, + { + "epoch": 1.0422247196707874, + "grad_norm": 0.05055329203605652, + "learning_rate": 5.503316433750615e-06, + "loss": 0.0011, + "step": 61670 + }, + { + "epoch": 1.0423937199496378, + "grad_norm": 0.10448620468378067, + "learning_rate": 5.501849074004829e-06, + "loss": 0.0009, + "step": 61680 + }, + { + "epoch": 1.0425627202284884, + "grad_norm": 0.08875548839569092, + "learning_rate": 5.5003816705957425e-06, + "loss": 0.0013, + "step": 61690 + }, + { + "epoch": 1.0427317205073388, + "grad_norm": 0.022355513647198677, + "learning_rate": 5.498914223651025e-06, + "loss": 0.001, + "step": 61700 + }, + { + "epoch": 1.0429007207861893, + "grad_norm": 0.024993648752570152, + "learning_rate": 5.497446733298354e-06, + "loss": 0.0022, + "step": 61710 + }, + { + "epoch": 1.0430697210650397, + "grad_norm": 0.03373303636908531, + "learning_rate": 5.495979199665405e-06, + "loss": 0.0015, + "step": 61720 + }, + { + "epoch": 1.0432387213438903, + "grad_norm": 0.18401862680912018, + "learning_rate": 5.4945116228798645e-06, + "loss": 0.0017, + "step": 61730 + }, + { + "epoch": 1.0434077216227406, + "grad_norm": 0.037265434861183167, + "learning_rate": 5.493044003069416e-06, + "loss": 0.0007, + "step": 61740 + }, + { + "epoch": 1.0435767219015912, + "grad_norm": 0.06706123054027557, + "learning_rate": 5.491576340361752e-06, + "loss": 0.001, + "step": 61750 + }, + { + "epoch": 1.0437457221804416, + "grad_norm": 0.04308038577437401, + "learning_rate": 5.4901086348845615e-06, + "loss": 0.0005, + "step": 61760 + }, + { + "epoch": 1.043914722459292, + "grad_norm": 0.0024284175597131252, + "learning_rate": 5.488640886765547e-06, + "loss": 0.001, + "step": 61770 + }, + { + "epoch": 1.0440837227381425, + "grad_norm": 0.035065487027168274, + "learning_rate": 5.487173096132408e-06, + "loss": 0.0009, + "step": 61780 + }, + { + "epoch": 1.044252723016993, + "grad_norm": 0.14422401785850525, + "learning_rate": 5.4857052631128485e-06, + "loss": 0.0012, + "step": 61790 + }, + { + "epoch": 1.0444217232958435, + "grad_norm": 0.09435462206602097, + "learning_rate": 5.484237387834579e-06, + "loss": 0.0015, + "step": 61800 + }, + { + "epoch": 1.0445907235746938, + "grad_norm": 0.0944463238120079, + "learning_rate": 5.4827694704253095e-06, + "loss": 0.0014, + "step": 61810 + }, + { + "epoch": 1.0447597238535444, + "grad_norm": 0.028856197372078896, + "learning_rate": 5.481301511012758e-06, + "loss": 0.0007, + "step": 61820 + }, + { + "epoch": 1.0449287241323948, + "grad_norm": 0.10152793675661087, + "learning_rate": 5.479833509724642e-06, + "loss": 0.0023, + "step": 61830 + }, + { + "epoch": 1.0450977244112454, + "grad_norm": 0.023587454110383987, + "learning_rate": 5.478365466688687e-06, + "loss": 0.0007, + "step": 61840 + }, + { + "epoch": 1.0452667246900957, + "grad_norm": 0.0464615672826767, + "learning_rate": 5.476897382032615e-06, + "loss": 0.0008, + "step": 61850 + }, + { + "epoch": 1.045435724968946, + "grad_norm": 0.015667250379920006, + "learning_rate": 5.4754292558841635e-06, + "loss": 0.0013, + "step": 61860 + }, + { + "epoch": 1.0456047252477967, + "grad_norm": 0.04416114091873169, + "learning_rate": 5.473961088371064e-06, + "loss": 0.0011, + "step": 61870 + }, + { + "epoch": 1.045773725526647, + "grad_norm": 0.05349062383174896, + "learning_rate": 5.472492879621052e-06, + "loss": 0.0044, + "step": 61880 + }, + { + "epoch": 1.0459427258054976, + "grad_norm": 0.03349825739860535, + "learning_rate": 5.471024629761869e-06, + "loss": 0.0008, + "step": 61890 + }, + { + "epoch": 1.046111726084348, + "grad_norm": 0.02115381322801113, + "learning_rate": 5.469556338921263e-06, + "loss": 0.0005, + "step": 61900 + }, + { + "epoch": 1.0462807263631986, + "grad_norm": 0.06258860230445862, + "learning_rate": 5.468088007226979e-06, + "loss": 0.0005, + "step": 61910 + }, + { + "epoch": 1.046449726642049, + "grad_norm": 0.03726901859045029, + "learning_rate": 5.466619634806771e-06, + "loss": 0.001, + "step": 61920 + }, + { + "epoch": 1.0466187269208995, + "grad_norm": 0.01326848566532135, + "learning_rate": 5.465151221788395e-06, + "loss": 0.0014, + "step": 61930 + }, + { + "epoch": 1.0467877271997499, + "grad_norm": 0.04024356231093407, + "learning_rate": 5.463682768299608e-06, + "loss": 0.001, + "step": 61940 + }, + { + "epoch": 1.0469567274786002, + "grad_norm": 0.10139014571905136, + "learning_rate": 5.462214274468173e-06, + "loss": 0.0016, + "step": 61950 + }, + { + "epoch": 1.0471257277574508, + "grad_norm": 0.18097710609436035, + "learning_rate": 5.4607457404218575e-06, + "loss": 0.0031, + "step": 61960 + }, + { + "epoch": 1.0472947280363012, + "grad_norm": 0.05461236461997032, + "learning_rate": 5.45927716628843e-06, + "loss": 0.0008, + "step": 61970 + }, + { + "epoch": 1.0474637283151518, + "grad_norm": 0.03354823216795921, + "learning_rate": 5.457808552195664e-06, + "loss": 0.0006, + "step": 61980 + }, + { + "epoch": 1.0476327285940021, + "grad_norm": 0.07403817772865295, + "learning_rate": 5.456339898271335e-06, + "loss": 0.0019, + "step": 61990 + }, + { + "epoch": 1.0478017288728527, + "grad_norm": 0.044637531042099, + "learning_rate": 5.454871204643226e-06, + "loss": 0.001, + "step": 62000 + }, + { + "epoch": 1.047970729151703, + "grad_norm": 0.029047558084130287, + "learning_rate": 5.453402471439117e-06, + "loss": 0.0014, + "step": 62010 + }, + { + "epoch": 1.0481397294305537, + "grad_norm": 0.14039036631584167, + "learning_rate": 5.451933698786796e-06, + "loss": 0.0011, + "step": 62020 + }, + { + "epoch": 1.048308729709404, + "grad_norm": 0.0889308899641037, + "learning_rate": 5.450464886814053e-06, + "loss": 0.0011, + "step": 62030 + }, + { + "epoch": 1.0484777299882544, + "grad_norm": 0.061940066516399384, + "learning_rate": 5.448996035648682e-06, + "loss": 0.0009, + "step": 62040 + }, + { + "epoch": 1.048646730267105, + "grad_norm": 0.053737200796604156, + "learning_rate": 5.447527145418482e-06, + "loss": 0.0016, + "step": 62050 + }, + { + "epoch": 1.0488157305459553, + "grad_norm": 0.04960788041353226, + "learning_rate": 5.446058216251251e-06, + "loss": 0.0009, + "step": 62060 + }, + { + "epoch": 1.048984730824806, + "grad_norm": 0.030090223997831345, + "learning_rate": 5.444589248274794e-06, + "loss": 0.0011, + "step": 62070 + }, + { + "epoch": 1.0491537311036563, + "grad_norm": 0.01693059131503105, + "learning_rate": 5.443120241616919e-06, + "loss": 0.0011, + "step": 62080 + }, + { + "epoch": 1.0493227313825069, + "grad_norm": 0.24588625133037567, + "learning_rate": 5.441651196405436e-06, + "loss": 0.0019, + "step": 62090 + }, + { + "epoch": 1.0494917316613572, + "grad_norm": 0.07555793970823288, + "learning_rate": 5.4401821127681584e-06, + "loss": 0.001, + "step": 62100 + }, + { + "epoch": 1.0496607319402078, + "grad_norm": 0.03147921338677406, + "learning_rate": 5.438712990832905e-06, + "loss": 0.0017, + "step": 62110 + }, + { + "epoch": 1.0498297322190582, + "grad_norm": 0.03343828022480011, + "learning_rate": 5.437243830727496e-06, + "loss": 0.0014, + "step": 62120 + }, + { + "epoch": 1.0499987324979085, + "grad_norm": 0.08114682883024216, + "learning_rate": 5.435774632579753e-06, + "loss": 0.0015, + "step": 62130 + }, + { + "epoch": 1.0501677327767591, + "grad_norm": 0.08246869593858719, + "learning_rate": 5.434305396517508e-06, + "loss": 0.0012, + "step": 62140 + }, + { + "epoch": 1.0503367330556095, + "grad_norm": 0.011487971059978008, + "learning_rate": 5.432836122668588e-06, + "loss": 0.0007, + "step": 62150 + }, + { + "epoch": 1.05050573333446, + "grad_norm": 0.019197674468159676, + "learning_rate": 5.431366811160829e-06, + "loss": 0.0012, + "step": 62160 + }, + { + "epoch": 1.0506747336133104, + "grad_norm": 0.14968179166316986, + "learning_rate": 5.4298974621220665e-06, + "loss": 0.0015, + "step": 62170 + }, + { + "epoch": 1.050843733892161, + "grad_norm": 0.04203634336590767, + "learning_rate": 5.428428075680142e-06, + "loss": 0.0011, + "step": 62180 + }, + { + "epoch": 1.0510127341710114, + "grad_norm": 0.041146960109472275, + "learning_rate": 5.4269586519629e-06, + "loss": 0.001, + "step": 62190 + }, + { + "epoch": 1.0511817344498617, + "grad_norm": 0.039256054908037186, + "learning_rate": 5.425489191098187e-06, + "loss": 0.0014, + "step": 62200 + }, + { + "epoch": 1.0513507347287123, + "grad_norm": 0.0404156930744648, + "learning_rate": 5.4240196932138515e-06, + "loss": 0.0016, + "step": 62210 + }, + { + "epoch": 1.0515197350075627, + "grad_norm": 0.03777492046356201, + "learning_rate": 5.422550158437749e-06, + "loss": 0.0008, + "step": 62220 + }, + { + "epoch": 1.0516887352864133, + "grad_norm": 0.015118823386728764, + "learning_rate": 5.421080586897736e-06, + "loss": 0.0005, + "step": 62230 + }, + { + "epoch": 1.0518577355652636, + "grad_norm": 0.10300882160663605, + "learning_rate": 5.419610978721669e-06, + "loss": 0.0006, + "step": 62240 + }, + { + "epoch": 1.0520267358441142, + "grad_norm": 0.08029638230800629, + "learning_rate": 5.418141334037415e-06, + "loss": 0.0004, + "step": 62250 + }, + { + "epoch": 1.0521957361229646, + "grad_norm": 0.165598064661026, + "learning_rate": 5.416671652972841e-06, + "loss": 0.001, + "step": 62260 + }, + { + "epoch": 1.0523647364018152, + "grad_norm": 0.0262772124260664, + "learning_rate": 5.415201935655813e-06, + "loss": 0.0019, + "step": 62270 + }, + { + "epoch": 1.0525337366806655, + "grad_norm": 0.05175478383898735, + "learning_rate": 5.413732182214205e-06, + "loss": 0.0006, + "step": 62280 + }, + { + "epoch": 1.0527027369595159, + "grad_norm": 0.2423478364944458, + "learning_rate": 5.412262392775893e-06, + "loss": 0.0011, + "step": 62290 + }, + { + "epoch": 1.0528717372383665, + "grad_norm": 0.04322667792439461, + "learning_rate": 5.410792567468755e-06, + "loss": 0.0015, + "step": 62300 + }, + { + "epoch": 1.0530407375172168, + "grad_norm": 0.11982213705778122, + "learning_rate": 5.409322706420673e-06, + "loss": 0.0009, + "step": 62310 + }, + { + "epoch": 1.0532097377960674, + "grad_norm": 0.054979804903268814, + "learning_rate": 5.4078528097595325e-06, + "loss": 0.0014, + "step": 62320 + }, + { + "epoch": 1.0533787380749178, + "grad_norm": 0.0444863960146904, + "learning_rate": 5.406382877613221e-06, + "loss": 0.0011, + "step": 62330 + }, + { + "epoch": 1.0535477383537684, + "grad_norm": 0.07523641735315323, + "learning_rate": 5.404912910109631e-06, + "loss": 0.0018, + "step": 62340 + }, + { + "epoch": 1.0537167386326187, + "grad_norm": 0.08459766954183578, + "learning_rate": 5.403442907376656e-06, + "loss": 0.0013, + "step": 62350 + }, + { + "epoch": 1.0538857389114693, + "grad_norm": 0.11866951733827591, + "learning_rate": 5.401972869542193e-06, + "loss": 0.0013, + "step": 62360 + }, + { + "epoch": 1.0540547391903197, + "grad_norm": 0.028530221432447433, + "learning_rate": 5.400502796734143e-06, + "loss": 0.0021, + "step": 62370 + }, + { + "epoch": 1.05422373946917, + "grad_norm": 0.0645567923784256, + "learning_rate": 5.399032689080409e-06, + "loss": 0.0012, + "step": 62380 + }, + { + "epoch": 1.0543927397480206, + "grad_norm": 0.05980142951011658, + "learning_rate": 5.397562546708898e-06, + "loss": 0.0014, + "step": 62390 + }, + { + "epoch": 1.054561740026871, + "grad_norm": 0.05293196439743042, + "learning_rate": 5.396092369747517e-06, + "loss": 0.0019, + "step": 62400 + }, + { + "epoch": 1.0547307403057216, + "grad_norm": 0.07668273895978928, + "learning_rate": 5.394622158324183e-06, + "loss": 0.0014, + "step": 62410 + }, + { + "epoch": 1.054899740584572, + "grad_norm": 0.04001685231924057, + "learning_rate": 5.393151912566809e-06, + "loss": 0.0028, + "step": 62420 + }, + { + "epoch": 1.0550687408634225, + "grad_norm": 0.0355171374976635, + "learning_rate": 5.391681632603313e-06, + "loss": 0.0009, + "step": 62430 + }, + { + "epoch": 1.0552377411422729, + "grad_norm": 0.023094194009900093, + "learning_rate": 5.390211318561618e-06, + "loss": 0.0013, + "step": 62440 + }, + { + "epoch": 1.0554067414211235, + "grad_norm": 0.021117648109793663, + "learning_rate": 5.388740970569647e-06, + "loss": 0.0013, + "step": 62450 + }, + { + "epoch": 1.0555757416999738, + "grad_norm": 0.0840069055557251, + "learning_rate": 5.387270588755329e-06, + "loss": 0.0013, + "step": 62460 + }, + { + "epoch": 1.0557447419788242, + "grad_norm": 0.056771233677864075, + "learning_rate": 5.385800173246592e-06, + "loss": 0.0013, + "step": 62470 + }, + { + "epoch": 1.0559137422576748, + "grad_norm": 0.06988224387168884, + "learning_rate": 5.3843297241713735e-06, + "loss": 0.0013, + "step": 62480 + }, + { + "epoch": 1.0560827425365251, + "grad_norm": 0.07018313556909561, + "learning_rate": 5.382859241657605e-06, + "loss": 0.0009, + "step": 62490 + }, + { + "epoch": 1.0562517428153757, + "grad_norm": 0.00574153708294034, + "learning_rate": 5.381388725833227e-06, + "loss": 0.0016, + "step": 62500 + }, + { + "epoch": 1.056420743094226, + "grad_norm": 0.06866547465324402, + "learning_rate": 5.379918176826182e-06, + "loss": 0.0014, + "step": 62510 + }, + { + "epoch": 1.0565897433730767, + "grad_norm": 0.039436422288417816, + "learning_rate": 5.3784475947644156e-06, + "loss": 0.0012, + "step": 62520 + }, + { + "epoch": 1.056758743651927, + "grad_norm": 0.022123584523797035, + "learning_rate": 5.3769769797758745e-06, + "loss": 0.0013, + "step": 62530 + }, + { + "epoch": 1.0569277439307774, + "grad_norm": 0.047539424151182175, + "learning_rate": 5.375506331988509e-06, + "loss": 0.0017, + "step": 62540 + }, + { + "epoch": 1.057096744209628, + "grad_norm": 0.11156386882066727, + "learning_rate": 5.3740356515302735e-06, + "loss": 0.002, + "step": 62550 + }, + { + "epoch": 1.0572657444884783, + "grad_norm": 0.06007852777838707, + "learning_rate": 5.372564938529126e-06, + "loss": 0.002, + "step": 62560 + }, + { + "epoch": 1.057434744767329, + "grad_norm": 0.02162274345755577, + "learning_rate": 5.371094193113022e-06, + "loss": 0.0015, + "step": 62570 + }, + { + "epoch": 1.0576037450461793, + "grad_norm": 0.099835604429245, + "learning_rate": 5.369623415409926e-06, + "loss": 0.0025, + "step": 62580 + }, + { + "epoch": 1.0577727453250299, + "grad_norm": 0.016269249841570854, + "learning_rate": 5.3681526055478e-06, + "loss": 0.0012, + "step": 62590 + }, + { + "epoch": 1.0579417456038802, + "grad_norm": 0.041425686329603195, + "learning_rate": 5.3666817636546165e-06, + "loss": 0.0006, + "step": 62600 + }, + { + "epoch": 1.0581107458827308, + "grad_norm": 0.11686749756336212, + "learning_rate": 5.36521088985834e-06, + "loss": 0.0017, + "step": 62610 + }, + { + "epoch": 1.0582797461615812, + "grad_norm": 0.02760067768394947, + "learning_rate": 5.363739984286949e-06, + "loss": 0.0007, + "step": 62620 + }, + { + "epoch": 1.0584487464404315, + "grad_norm": 0.060067202895879745, + "learning_rate": 5.362269047068416e-06, + "loss": 0.0014, + "step": 62630 + }, + { + "epoch": 1.058617746719282, + "grad_norm": 0.06818683445453644, + "learning_rate": 5.36079807833072e-06, + "loss": 0.0009, + "step": 62640 + }, + { + "epoch": 1.0587867469981325, + "grad_norm": 0.028272630646824837, + "learning_rate": 5.3593270782018436e-06, + "loss": 0.0011, + "step": 62650 + }, + { + "epoch": 1.058955747276983, + "grad_norm": 0.38542062044143677, + "learning_rate": 5.35785604680977e-06, + "loss": 0.0033, + "step": 62660 + }, + { + "epoch": 1.0591247475558334, + "grad_norm": 0.0479482002556324, + "learning_rate": 5.356384984282488e-06, + "loss": 0.0007, + "step": 62670 + }, + { + "epoch": 1.059293747834684, + "grad_norm": 0.031087413430213928, + "learning_rate": 5.354913890747985e-06, + "loss": 0.0009, + "step": 62680 + }, + { + "epoch": 1.0594627481135344, + "grad_norm": 0.26890724897384644, + "learning_rate": 5.353442766334253e-06, + "loss": 0.0019, + "step": 62690 + }, + { + "epoch": 1.059631748392385, + "grad_norm": 0.05777981877326965, + "learning_rate": 5.351971611169289e-06, + "loss": 0.0011, + "step": 62700 + }, + { + "epoch": 1.0598007486712353, + "grad_norm": 0.06634936481714249, + "learning_rate": 5.350500425381089e-06, + "loss": 0.0012, + "step": 62710 + }, + { + "epoch": 1.0599697489500857, + "grad_norm": 0.07184673845767975, + "learning_rate": 5.349029209097654e-06, + "loss": 0.0013, + "step": 62720 + }, + { + "epoch": 1.0601387492289363, + "grad_norm": 0.0021499255672097206, + "learning_rate": 5.347557962446985e-06, + "loss": 0.0007, + "step": 62730 + }, + { + "epoch": 1.0603077495077866, + "grad_norm": 0.14570766687393188, + "learning_rate": 5.346086685557091e-06, + "loss": 0.0016, + "step": 62740 + }, + { + "epoch": 1.0604767497866372, + "grad_norm": 0.13053706288337708, + "learning_rate": 5.34461537855598e-06, + "loss": 0.0013, + "step": 62750 + }, + { + "epoch": 1.0606457500654876, + "grad_norm": 0.033001068979501724, + "learning_rate": 5.34314404157166e-06, + "loss": 0.0019, + "step": 62760 + }, + { + "epoch": 1.0608147503443381, + "grad_norm": 0.09939400106668472, + "learning_rate": 5.341672674732145e-06, + "loss": 0.0009, + "step": 62770 + }, + { + "epoch": 1.0609837506231885, + "grad_norm": 0.025106241926550865, + "learning_rate": 5.3402012781654525e-06, + "loss": 0.0007, + "step": 62780 + }, + { + "epoch": 1.061152750902039, + "grad_norm": 0.030804786831140518, + "learning_rate": 5.3387298519996e-06, + "loss": 0.0018, + "step": 62790 + }, + { + "epoch": 1.0613217511808894, + "grad_norm": 0.06594132632017136, + "learning_rate": 5.33725839636261e-06, + "loss": 0.0013, + "step": 62800 + }, + { + "epoch": 1.0614907514597398, + "grad_norm": 0.054694630205631256, + "learning_rate": 5.335786911382504e-06, + "loss": 0.001, + "step": 62810 + }, + { + "epoch": 1.0616597517385904, + "grad_norm": 0.03607044741511345, + "learning_rate": 5.334315397187311e-06, + "loss": 0.0016, + "step": 62820 + }, + { + "epoch": 1.0618287520174408, + "grad_norm": 0.1117258295416832, + "learning_rate": 5.332843853905059e-06, + "loss": 0.0014, + "step": 62830 + }, + { + "epoch": 1.0619977522962913, + "grad_norm": 0.03165534511208534, + "learning_rate": 5.331372281663778e-06, + "loss": 0.0017, + "step": 62840 + }, + { + "epoch": 1.0621667525751417, + "grad_norm": 0.04908168315887451, + "learning_rate": 5.329900680591505e-06, + "loss": 0.0016, + "step": 62850 + }, + { + "epoch": 1.0623357528539923, + "grad_norm": 0.07058288902044296, + "learning_rate": 5.328429050816272e-06, + "loss": 0.0011, + "step": 62860 + }, + { + "epoch": 1.0625047531328426, + "grad_norm": 0.04213688522577286, + "learning_rate": 5.326957392466121e-06, + "loss": 0.0023, + "step": 62870 + }, + { + "epoch": 1.0626737534116932, + "grad_norm": 0.06736316531896591, + "learning_rate": 5.3254857056690936e-06, + "loss": 0.001, + "step": 62880 + }, + { + "epoch": 1.0628427536905436, + "grad_norm": 0.12017694860696793, + "learning_rate": 5.3240139905532314e-06, + "loss": 0.0015, + "step": 62890 + }, + { + "epoch": 1.063011753969394, + "grad_norm": 0.06744863092899323, + "learning_rate": 5.322542247246583e-06, + "loss": 0.0013, + "step": 62900 + }, + { + "epoch": 1.0631807542482445, + "grad_norm": 0.045404065400362015, + "learning_rate": 5.321070475877196e-06, + "loss": 0.0018, + "step": 62910 + }, + { + "epoch": 1.063349754527095, + "grad_norm": 0.06868870556354523, + "learning_rate": 5.319598676573121e-06, + "loss": 0.0014, + "step": 62920 + }, + { + "epoch": 1.0635187548059455, + "grad_norm": 0.04988713935017586, + "learning_rate": 5.318126849462414e-06, + "loss": 0.0009, + "step": 62930 + }, + { + "epoch": 1.0636877550847958, + "grad_norm": 0.05224590376019478, + "learning_rate": 5.31665499467313e-06, + "loss": 0.0023, + "step": 62940 + }, + { + "epoch": 1.0638567553636464, + "grad_norm": 0.05000672861933708, + "learning_rate": 5.315183112333326e-06, + "loss": 0.0014, + "step": 62950 + }, + { + "epoch": 1.0640257556424968, + "grad_norm": 0.011630984954535961, + "learning_rate": 5.313711202571065e-06, + "loss": 0.001, + "step": 62960 + }, + { + "epoch": 1.0641947559213474, + "grad_norm": 0.07782811671495438, + "learning_rate": 5.312239265514409e-06, + "loss": 0.0012, + "step": 62970 + }, + { + "epoch": 1.0643637562001977, + "grad_norm": 0.16299135982990265, + "learning_rate": 5.310767301291425e-06, + "loss": 0.0027, + "step": 62980 + }, + { + "epoch": 1.064532756479048, + "grad_norm": 0.05368863046169281, + "learning_rate": 5.309295310030179e-06, + "loss": 0.0017, + "step": 62990 + }, + { + "epoch": 1.0647017567578987, + "grad_norm": 0.06027548760175705, + "learning_rate": 5.307823291858743e-06, + "loss": 0.0012, + "step": 63000 + }, + { + "epoch": 1.064870757036749, + "grad_norm": 0.022448888048529625, + "learning_rate": 5.306351246905188e-06, + "loss": 0.0012, + "step": 63010 + }, + { + "epoch": 1.0650397573155996, + "grad_norm": 0.06651446968317032, + "learning_rate": 5.304879175297592e-06, + "loss": 0.002, + "step": 63020 + }, + { + "epoch": 1.06520875759445, + "grad_norm": 0.07847457379102707, + "learning_rate": 5.30340707716403e-06, + "loss": 0.0009, + "step": 63030 + }, + { + "epoch": 1.0653777578733006, + "grad_norm": 0.02626158483326435, + "learning_rate": 5.301934952632583e-06, + "loss": 0.0014, + "step": 63040 + }, + { + "epoch": 1.065546758152151, + "grad_norm": 0.0006106089567765594, + "learning_rate": 5.300462801831331e-06, + "loss": 0.0011, + "step": 63050 + }, + { + "epoch": 1.0657157584310015, + "grad_norm": 0.022541480138897896, + "learning_rate": 5.298990624888359e-06, + "loss": 0.0013, + "step": 63060 + }, + { + "epoch": 1.0658847587098519, + "grad_norm": 0.012469821609556675, + "learning_rate": 5.297518421931755e-06, + "loss": 0.0007, + "step": 63070 + }, + { + "epoch": 1.0660537589887022, + "grad_norm": 0.025536231696605682, + "learning_rate": 5.296046193089607e-06, + "loss": 0.0007, + "step": 63080 + }, + { + "epoch": 1.0662227592675528, + "grad_norm": 0.014045816846191883, + "learning_rate": 5.2945739384900045e-06, + "loss": 0.0006, + "step": 63090 + }, + { + "epoch": 1.0663917595464032, + "grad_norm": 0.024714739993214607, + "learning_rate": 5.293101658261043e-06, + "loss": 0.0009, + "step": 63100 + }, + { + "epoch": 1.0665607598252538, + "grad_norm": 0.05849656090140343, + "learning_rate": 5.291629352530817e-06, + "loss": 0.0009, + "step": 63110 + }, + { + "epoch": 1.0667297601041041, + "grad_norm": 0.03162253275513649, + "learning_rate": 5.290157021427424e-06, + "loss": 0.0008, + "step": 63120 + }, + { + "epoch": 1.0668987603829547, + "grad_norm": 0.046496231108903885, + "learning_rate": 5.288684665078966e-06, + "loss": 0.0011, + "step": 63130 + }, + { + "epoch": 1.067067760661805, + "grad_norm": 0.09088483452796936, + "learning_rate": 5.287212283613542e-06, + "loss": 0.0008, + "step": 63140 + }, + { + "epoch": 1.0672367609406554, + "grad_norm": 0.028946420177817345, + "learning_rate": 5.285739877159258e-06, + "loss": 0.0011, + "step": 63150 + }, + { + "epoch": 1.067405761219506, + "grad_norm": 0.07098438590765, + "learning_rate": 5.28426744584422e-06, + "loss": 0.0016, + "step": 63160 + }, + { + "epoch": 1.0675747614983564, + "grad_norm": 0.007547794375568628, + "learning_rate": 5.282794989796536e-06, + "loss": 0.0006, + "step": 63170 + }, + { + "epoch": 1.067743761777207, + "grad_norm": 0.013502542860805988, + "learning_rate": 5.281322509144319e-06, + "loss": 0.0008, + "step": 63180 + }, + { + "epoch": 1.0679127620560573, + "grad_norm": 0.02497120201587677, + "learning_rate": 5.279850004015681e-06, + "loss": 0.0009, + "step": 63190 + }, + { + "epoch": 1.068081762334908, + "grad_norm": 0.14901821315288544, + "learning_rate": 5.278377474538735e-06, + "loss": 0.0035, + "step": 63200 + }, + { + "epoch": 1.0682507626137583, + "grad_norm": 0.02553505264222622, + "learning_rate": 5.276904920841601e-06, + "loss": 0.001, + "step": 63210 + }, + { + "epoch": 1.0684197628926089, + "grad_norm": 0.014704728499054909, + "learning_rate": 5.275432343052398e-06, + "loss": 0.0006, + "step": 63220 + }, + { + "epoch": 1.0685887631714592, + "grad_norm": 0.026760470122098923, + "learning_rate": 5.273959741299246e-06, + "loss": 0.0011, + "step": 63230 + }, + { + "epoch": 1.0687577634503096, + "grad_norm": 0.14907385408878326, + "learning_rate": 5.27248711571027e-06, + "loss": 0.0008, + "step": 63240 + }, + { + "epoch": 1.0689267637291602, + "grad_norm": 0.03087923489511013, + "learning_rate": 5.271014466413593e-06, + "loss": 0.002, + "step": 63250 + }, + { + "epoch": 1.0690957640080105, + "grad_norm": 0.019032321870326996, + "learning_rate": 5.269541793537346e-06, + "loss": 0.0005, + "step": 63260 + }, + { + "epoch": 1.0692647642868611, + "grad_norm": 0.017722396180033684, + "learning_rate": 5.268069097209656e-06, + "loss": 0.0014, + "step": 63270 + }, + { + "epoch": 1.0694337645657115, + "grad_norm": 0.06067734956741333, + "learning_rate": 5.266596377558656e-06, + "loss": 0.001, + "step": 63280 + }, + { + "epoch": 1.069602764844562, + "grad_norm": 0.03219889476895332, + "learning_rate": 5.265123634712478e-06, + "loss": 0.0011, + "step": 63290 + }, + { + "epoch": 1.0697717651234124, + "grad_norm": 0.04848824813961983, + "learning_rate": 5.263650868799261e-06, + "loss": 0.0012, + "step": 63300 + }, + { + "epoch": 1.069940765402263, + "grad_norm": 0.09955284744501114, + "learning_rate": 5.262178079947141e-06, + "loss": 0.0008, + "step": 63310 + }, + { + "epoch": 1.0701097656811134, + "grad_norm": 0.1374005228281021, + "learning_rate": 5.260705268284258e-06, + "loss": 0.0013, + "step": 63320 + }, + { + "epoch": 1.0702787659599637, + "grad_norm": 0.03295893967151642, + "learning_rate": 5.259232433938752e-06, + "loss": 0.0006, + "step": 63330 + }, + { + "epoch": 1.0704477662388143, + "grad_norm": 0.012826895341277122, + "learning_rate": 5.2577595770387705e-06, + "loss": 0.001, + "step": 63340 + }, + { + "epoch": 1.0706167665176647, + "grad_norm": 0.01251635979861021, + "learning_rate": 5.256286697712455e-06, + "loss": 0.0014, + "step": 63350 + }, + { + "epoch": 1.0707857667965153, + "grad_norm": 0.07876032590866089, + "learning_rate": 5.254813796087956e-06, + "loss": 0.0027, + "step": 63360 + }, + { + "epoch": 1.0709547670753656, + "grad_norm": 0.04843059927225113, + "learning_rate": 5.253340872293421e-06, + "loss": 0.0034, + "step": 63370 + }, + { + "epoch": 1.0711237673542162, + "grad_norm": 0.0048154802061617374, + "learning_rate": 5.251867926457003e-06, + "loss": 0.0009, + "step": 63380 + }, + { + "epoch": 1.0712927676330666, + "grad_norm": 0.02693239226937294, + "learning_rate": 5.250394958706856e-06, + "loss": 0.0008, + "step": 63390 + }, + { + "epoch": 1.071461767911917, + "grad_norm": 0.17818357050418854, + "learning_rate": 5.248921969171134e-06, + "loss": 0.0038, + "step": 63400 + }, + { + "epoch": 1.0716307681907675, + "grad_norm": 0.06279647350311279, + "learning_rate": 5.247448957977994e-06, + "loss": 0.0013, + "step": 63410 + }, + { + "epoch": 1.0717997684696179, + "grad_norm": 0.00845323409885168, + "learning_rate": 5.245975925255596e-06, + "loss": 0.0005, + "step": 63420 + }, + { + "epoch": 1.0719687687484685, + "grad_norm": 0.11213696748018265, + "learning_rate": 5.2445028711321e-06, + "loss": 0.0014, + "step": 63430 + }, + { + "epoch": 1.0721377690273188, + "grad_norm": 0.005753799341619015, + "learning_rate": 5.243029795735671e-06, + "loss": 0.0004, + "step": 63440 + }, + { + "epoch": 1.0723067693061694, + "grad_norm": 0.03215279430150986, + "learning_rate": 5.2415566991944725e-06, + "loss": 0.001, + "step": 63450 + }, + { + "epoch": 1.0724757695850198, + "grad_norm": 0.11824256181716919, + "learning_rate": 5.24008358163667e-06, + "loss": 0.0016, + "step": 63460 + }, + { + "epoch": 1.0726447698638704, + "grad_norm": 0.023332377895712852, + "learning_rate": 5.2386104431904325e-06, + "loss": 0.0009, + "step": 63470 + }, + { + "epoch": 1.0728137701427207, + "grad_norm": 0.026816241443157196, + "learning_rate": 5.2371372839839305e-06, + "loss": 0.0016, + "step": 63480 + }, + { + "epoch": 1.072982770421571, + "grad_norm": 0.07007268816232681, + "learning_rate": 5.235664104145335e-06, + "loss": 0.0008, + "step": 63490 + }, + { + "epoch": 1.0731517707004217, + "grad_norm": 0.09333419799804688, + "learning_rate": 5.2341909038028216e-06, + "loss": 0.0019, + "step": 63500 + }, + { + "epoch": 1.073320770979272, + "grad_norm": 0.028545813634991646, + "learning_rate": 5.232717683084565e-06, + "loss": 0.0011, + "step": 63510 + }, + { + "epoch": 1.0734897712581226, + "grad_norm": 0.030302291736006737, + "learning_rate": 5.231244442118742e-06, + "loss": 0.001, + "step": 63520 + }, + { + "epoch": 1.073658771536973, + "grad_norm": 0.04001889377832413, + "learning_rate": 5.229771181033534e-06, + "loss": 0.0015, + "step": 63530 + }, + { + "epoch": 1.0738277718158236, + "grad_norm": 0.02755117230117321, + "learning_rate": 5.228297899957117e-06, + "loss": 0.001, + "step": 63540 + }, + { + "epoch": 1.073996772094674, + "grad_norm": 0.009755623526871204, + "learning_rate": 5.226824599017679e-06, + "loss": 0.0008, + "step": 63550 + }, + { + "epoch": 1.0741657723735245, + "grad_norm": 0.01657235063612461, + "learning_rate": 5.225351278343401e-06, + "loss": 0.0012, + "step": 63560 + }, + { + "epoch": 1.0743347726523749, + "grad_norm": 0.027861325070261955, + "learning_rate": 5.223877938062471e-06, + "loss": 0.001, + "step": 63570 + }, + { + "epoch": 1.0745037729312252, + "grad_norm": 0.021976448595523834, + "learning_rate": 5.222404578303072e-06, + "loss": 0.0012, + "step": 63580 + }, + { + "epoch": 1.0746727732100758, + "grad_norm": 0.058850470930337906, + "learning_rate": 5.2209311991934e-06, + "loss": 0.0014, + "step": 63590 + }, + { + "epoch": 1.0748417734889262, + "grad_norm": 0.013691349886357784, + "learning_rate": 5.2194578008616426e-06, + "loss": 0.001, + "step": 63600 + }, + { + "epoch": 1.0750107737677768, + "grad_norm": 0.03913870081305504, + "learning_rate": 5.217984383435995e-06, + "loss": 0.0013, + "step": 63610 + }, + { + "epoch": 1.0751797740466271, + "grad_norm": 0.019784534350037575, + "learning_rate": 5.216510947044647e-06, + "loss": 0.0009, + "step": 63620 + }, + { + "epoch": 1.0753487743254777, + "grad_norm": 0.027006670832633972, + "learning_rate": 5.2150374918158e-06, + "loss": 0.001, + "step": 63630 + }, + { + "epoch": 1.075517774604328, + "grad_norm": 0.0347067154943943, + "learning_rate": 5.213564017877648e-06, + "loss": 0.0017, + "step": 63640 + }, + { + "epoch": 1.0756867748831787, + "grad_norm": 0.08511506021022797, + "learning_rate": 5.212090525358392e-06, + "loss": 0.001, + "step": 63650 + }, + { + "epoch": 1.075855775162029, + "grad_norm": 0.050413474440574646, + "learning_rate": 5.2106170143862324e-06, + "loss": 0.001, + "step": 63660 + }, + { + "epoch": 1.0760247754408794, + "grad_norm": 0.020677484571933746, + "learning_rate": 5.209143485089372e-06, + "loss": 0.0013, + "step": 63670 + }, + { + "epoch": 1.07619377571973, + "grad_norm": 0.03208847716450691, + "learning_rate": 5.207669937596014e-06, + "loss": 0.0011, + "step": 63680 + }, + { + "epoch": 1.0763627759985803, + "grad_norm": 0.08395759016275406, + "learning_rate": 5.206196372034367e-06, + "loss": 0.0012, + "step": 63690 + }, + { + "epoch": 1.076531776277431, + "grad_norm": 0.028258448466658592, + "learning_rate": 5.204722788532637e-06, + "loss": 0.0013, + "step": 63700 + }, + { + "epoch": 1.0767007765562813, + "grad_norm": 0.16747726500034332, + "learning_rate": 5.203249187219032e-06, + "loss": 0.0014, + "step": 63710 + }, + { + "epoch": 1.0768697768351319, + "grad_norm": 0.016199585050344467, + "learning_rate": 5.201775568221762e-06, + "loss": 0.0015, + "step": 63720 + }, + { + "epoch": 1.0770387771139822, + "grad_norm": 0.1536611169576645, + "learning_rate": 5.20030193166904e-06, + "loss": 0.0014, + "step": 63730 + }, + { + "epoch": 1.0772077773928328, + "grad_norm": 0.10717665404081345, + "learning_rate": 5.198828277689081e-06, + "loss": 0.0015, + "step": 63740 + }, + { + "epoch": 1.0773767776716832, + "grad_norm": 0.018150193616747856, + "learning_rate": 5.197354606410098e-06, + "loss": 0.0007, + "step": 63750 + }, + { + "epoch": 1.0775457779505335, + "grad_norm": 0.017907075583934784, + "learning_rate": 5.195880917960307e-06, + "loss": 0.0011, + "step": 63760 + }, + { + "epoch": 1.077714778229384, + "grad_norm": 0.06702237576246262, + "learning_rate": 5.1944072124679265e-06, + "loss": 0.0011, + "step": 63770 + }, + { + "epoch": 1.0778837785082345, + "grad_norm": 0.006694977171719074, + "learning_rate": 5.192933490061179e-06, + "loss": 0.0014, + "step": 63780 + }, + { + "epoch": 1.078052778787085, + "grad_norm": 0.033626001328229904, + "learning_rate": 5.191459750868282e-06, + "loss": 0.001, + "step": 63790 + }, + { + "epoch": 1.0782217790659354, + "grad_norm": 0.059294719249010086, + "learning_rate": 5.189985995017458e-06, + "loss": 0.0019, + "step": 63800 + }, + { + "epoch": 1.078390779344786, + "grad_norm": 0.05930497124791145, + "learning_rate": 5.188512222636933e-06, + "loss": 0.001, + "step": 63810 + }, + { + "epoch": 1.0785597796236364, + "grad_norm": 0.0449826642870903, + "learning_rate": 5.187038433854932e-06, + "loss": 0.0009, + "step": 63820 + }, + { + "epoch": 1.078728779902487, + "grad_norm": 0.05867033451795578, + "learning_rate": 5.1855646287996795e-06, + "loss": 0.0011, + "step": 63830 + }, + { + "epoch": 1.0788977801813373, + "grad_norm": 0.030678899958729744, + "learning_rate": 5.1840908075994065e-06, + "loss": 0.0012, + "step": 63840 + }, + { + "epoch": 1.0790667804601877, + "grad_norm": 0.0650521069765091, + "learning_rate": 5.18261697038234e-06, + "loss": 0.0015, + "step": 63850 + }, + { + "epoch": 1.0792357807390383, + "grad_norm": 0.07857781648635864, + "learning_rate": 5.181143117276712e-06, + "loss": 0.002, + "step": 63860 + }, + { + "epoch": 1.0794047810178886, + "grad_norm": 0.07133198529481888, + "learning_rate": 5.179669248410757e-06, + "loss": 0.0014, + "step": 63870 + }, + { + "epoch": 1.0795737812967392, + "grad_norm": 0.08174729347229004, + "learning_rate": 5.178195363912706e-06, + "loss": 0.0006, + "step": 63880 + }, + { + "epoch": 1.0797427815755896, + "grad_norm": 0.015073689632117748, + "learning_rate": 5.176721463910795e-06, + "loss": 0.0015, + "step": 63890 + }, + { + "epoch": 1.0799117818544401, + "grad_norm": 0.02266828902065754, + "learning_rate": 5.17524754853326e-06, + "loss": 0.0003, + "step": 63900 + }, + { + "epoch": 1.0800807821332905, + "grad_norm": 0.02230769768357277, + "learning_rate": 5.17377361790834e-06, + "loss": 0.0008, + "step": 63910 + }, + { + "epoch": 1.080249782412141, + "grad_norm": 0.05522468313574791, + "learning_rate": 5.172299672164273e-06, + "loss": 0.001, + "step": 63920 + }, + { + "epoch": 1.0804187826909915, + "grad_norm": 0.023726046085357666, + "learning_rate": 5.170825711429298e-06, + "loss": 0.0015, + "step": 63930 + }, + { + "epoch": 1.0805877829698418, + "grad_norm": 0.03070794604718685, + "learning_rate": 5.16935173583166e-06, + "loss": 0.0003, + "step": 63940 + }, + { + "epoch": 1.0807567832486924, + "grad_norm": 0.002701481804251671, + "learning_rate": 5.167877745499601e-06, + "loss": 0.0009, + "step": 63950 + }, + { + "epoch": 1.0809257835275428, + "grad_norm": 0.02209090068936348, + "learning_rate": 5.166403740561363e-06, + "loss": 0.0007, + "step": 63960 + }, + { + "epoch": 1.0810947838063933, + "grad_norm": 0.06571850180625916, + "learning_rate": 5.164929721145194e-06, + "loss": 0.0017, + "step": 63970 + }, + { + "epoch": 1.0812637840852437, + "grad_norm": 0.09760637581348419, + "learning_rate": 5.163455687379341e-06, + "loss": 0.0015, + "step": 63980 + }, + { + "epoch": 1.0814327843640943, + "grad_norm": 0.04042598232626915, + "learning_rate": 5.161981639392051e-06, + "loss": 0.0009, + "step": 63990 + }, + { + "epoch": 1.0816017846429447, + "grad_norm": 0.016159027814865112, + "learning_rate": 5.160507577311573e-06, + "loss": 0.0012, + "step": 64000 + }, + { + "epoch": 1.0817707849217952, + "grad_norm": 0.012882671318948269, + "learning_rate": 5.1590335012661584e-06, + "loss": 0.0013, + "step": 64010 + }, + { + "epoch": 1.0819397852006456, + "grad_norm": 0.025966521352529526, + "learning_rate": 5.15755941138406e-06, + "loss": 0.0005, + "step": 64020 + }, + { + "epoch": 1.082108785479496, + "grad_norm": 0.0443761870265007, + "learning_rate": 5.156085307793528e-06, + "loss": 0.0013, + "step": 64030 + }, + { + "epoch": 1.0822777857583465, + "grad_norm": 0.05958549305796623, + "learning_rate": 5.154611190622818e-06, + "loss": 0.0018, + "step": 64040 + }, + { + "epoch": 1.082446786037197, + "grad_norm": 0.1546487808227539, + "learning_rate": 5.1531370600001855e-06, + "loss": 0.0012, + "step": 64050 + }, + { + "epoch": 1.0826157863160475, + "grad_norm": 0.04316802695393562, + "learning_rate": 5.151662916053886e-06, + "loss": 0.0006, + "step": 64060 + }, + { + "epoch": 1.0827847865948979, + "grad_norm": 0.07794240862131119, + "learning_rate": 5.1501887589121794e-06, + "loss": 0.0008, + "step": 64070 + }, + { + "epoch": 1.0829537868737484, + "grad_norm": 0.1040305644273758, + "learning_rate": 5.1487145887033214e-06, + "loss": 0.0021, + "step": 64080 + }, + { + "epoch": 1.0831227871525988, + "grad_norm": 0.07008879631757736, + "learning_rate": 5.147240405555574e-06, + "loss": 0.0009, + "step": 64090 + }, + { + "epoch": 1.0832917874314492, + "grad_norm": 0.019183754920959473, + "learning_rate": 5.145766209597199e-06, + "loss": 0.0008, + "step": 64100 + }, + { + "epoch": 1.0834607877102997, + "grad_norm": 0.00969164352864027, + "learning_rate": 5.1442920009564576e-06, + "loss": 0.0009, + "step": 64110 + }, + { + "epoch": 1.08362978798915, + "grad_norm": 0.023686842992901802, + "learning_rate": 5.142817779761613e-06, + "loss": 0.0011, + "step": 64120 + }, + { + "epoch": 1.0837987882680007, + "grad_norm": 0.12426068633794785, + "learning_rate": 5.141343546140929e-06, + "loss": 0.0018, + "step": 64130 + }, + { + "epoch": 1.083967788546851, + "grad_norm": 0.02402712032198906, + "learning_rate": 5.139869300222672e-06, + "loss": 0.0007, + "step": 64140 + }, + { + "epoch": 1.0841367888257016, + "grad_norm": 0.04177405312657356, + "learning_rate": 5.138395042135107e-06, + "loss": 0.0006, + "step": 64150 + }, + { + "epoch": 1.084305789104552, + "grad_norm": 0.021412570029497147, + "learning_rate": 5.136920772006504e-06, + "loss": 0.0009, + "step": 64160 + }, + { + "epoch": 1.0844747893834026, + "grad_norm": 0.061743564903736115, + "learning_rate": 5.13544648996513e-06, + "loss": 0.0005, + "step": 64170 + }, + { + "epoch": 1.084643789662253, + "grad_norm": 0.050587743520736694, + "learning_rate": 5.133972196139256e-06, + "loss": 0.0023, + "step": 64180 + }, + { + "epoch": 1.0848127899411033, + "grad_norm": 0.0479382649064064, + "learning_rate": 5.1324978906571495e-06, + "loss": 0.0015, + "step": 64190 + }, + { + "epoch": 1.084981790219954, + "grad_norm": 0.06690307706594467, + "learning_rate": 5.131023573647087e-06, + "loss": 0.0012, + "step": 64200 + }, + { + "epoch": 1.0851507904988043, + "grad_norm": 0.0020674539264291525, + "learning_rate": 5.129549245237337e-06, + "loss": 0.0014, + "step": 64210 + }, + { + "epoch": 1.0853197907776548, + "grad_norm": 0.031590402126312256, + "learning_rate": 5.128074905556177e-06, + "loss": 0.0008, + "step": 64220 + }, + { + "epoch": 1.0854887910565052, + "grad_norm": 0.18099823594093323, + "learning_rate": 5.126600554731878e-06, + "loss": 0.0014, + "step": 64230 + }, + { + "epoch": 1.0856577913353558, + "grad_norm": 0.018791290000081062, + "learning_rate": 5.125126192892719e-06, + "loss": 0.0011, + "step": 64240 + }, + { + "epoch": 1.0858267916142061, + "grad_norm": 0.0774259865283966, + "learning_rate": 5.123651820166973e-06, + "loss": 0.0011, + "step": 64250 + }, + { + "epoch": 1.0859957918930565, + "grad_norm": 0.03627191111445427, + "learning_rate": 5.12217743668292e-06, + "loss": 0.001, + "step": 64260 + }, + { + "epoch": 1.086164792171907, + "grad_norm": 0.05594772845506668, + "learning_rate": 5.120703042568838e-06, + "loss": 0.0026, + "step": 64270 + }, + { + "epoch": 1.0863337924507575, + "grad_norm": 0.06517887860536575, + "learning_rate": 5.119228637953007e-06, + "loss": 0.0008, + "step": 64280 + }, + { + "epoch": 1.086502792729608, + "grad_norm": 0.012375900521874428, + "learning_rate": 5.117754222963708e-06, + "loss": 0.0021, + "step": 64290 + }, + { + "epoch": 1.0866717930084584, + "grad_norm": 0.051953885704278946, + "learning_rate": 5.11627979772922e-06, + "loss": 0.0009, + "step": 64300 + }, + { + "epoch": 1.086840793287309, + "grad_norm": 0.019279394298791885, + "learning_rate": 5.114805362377826e-06, + "loss": 0.0014, + "step": 64310 + }, + { + "epoch": 1.0870097935661593, + "grad_norm": 0.04088306799530983, + "learning_rate": 5.11333091703781e-06, + "loss": 0.0036, + "step": 64320 + }, + { + "epoch": 1.08717879384501, + "grad_norm": 0.10127560049295425, + "learning_rate": 5.111856461837454e-06, + "loss": 0.0009, + "step": 64330 + }, + { + "epoch": 1.0873477941238603, + "grad_norm": 0.019220391288399696, + "learning_rate": 5.110381996905044e-06, + "loss": 0.0008, + "step": 64340 + }, + { + "epoch": 1.0875167944027107, + "grad_norm": 0.03603460267186165, + "learning_rate": 5.108907522368865e-06, + "loss": 0.0015, + "step": 64350 + }, + { + "epoch": 1.0876857946815612, + "grad_norm": 0.0060151806101202965, + "learning_rate": 5.107433038357205e-06, + "loss": 0.0008, + "step": 64360 + }, + { + "epoch": 1.0878547949604116, + "grad_norm": 0.04247978329658508, + "learning_rate": 5.10595854499835e-06, + "loss": 0.0016, + "step": 64370 + }, + { + "epoch": 1.0880237952392622, + "grad_norm": 0.002622088650241494, + "learning_rate": 5.104484042420588e-06, + "loss": 0.0004, + "step": 64380 + }, + { + "epoch": 1.0881927955181125, + "grad_norm": 0.05791708454489708, + "learning_rate": 5.103009530752209e-06, + "loss": 0.0031, + "step": 64390 + }, + { + "epoch": 1.0883617957969631, + "grad_norm": 0.0468166321516037, + "learning_rate": 5.1015350101215e-06, + "loss": 0.0017, + "step": 64400 + }, + { + "epoch": 1.0885307960758135, + "grad_norm": 0.056557219475507736, + "learning_rate": 5.100060480656754e-06, + "loss": 0.0013, + "step": 64410 + }, + { + "epoch": 1.088699796354664, + "grad_norm": 0.05838380381464958, + "learning_rate": 5.098585942486262e-06, + "loss": 0.0013, + "step": 64420 + }, + { + "epoch": 1.0888687966335144, + "grad_norm": 0.04393266513943672, + "learning_rate": 5.0971113957383135e-06, + "loss": 0.003, + "step": 64430 + }, + { + "epoch": 1.0890377969123648, + "grad_norm": 0.05707855895161629, + "learning_rate": 5.095636840541204e-06, + "loss": 0.0011, + "step": 64440 + }, + { + "epoch": 1.0892067971912154, + "grad_norm": 0.13437679409980774, + "learning_rate": 5.094162277023225e-06, + "loss": 0.002, + "step": 64450 + }, + { + "epoch": 1.0893757974700657, + "grad_norm": 0.12905579805374146, + "learning_rate": 5.092687705312673e-06, + "loss": 0.0016, + "step": 64460 + }, + { + "epoch": 1.0895447977489163, + "grad_norm": 0.0845353752374649, + "learning_rate": 5.091213125537842e-06, + "loss": 0.0015, + "step": 64470 + }, + { + "epoch": 1.0897137980277667, + "grad_norm": 0.017485635355114937, + "learning_rate": 5.089738537827027e-06, + "loss": 0.0013, + "step": 64480 + }, + { + "epoch": 1.0898827983066173, + "grad_norm": 0.04234213009476662, + "learning_rate": 5.088263942308523e-06, + "loss": 0.0026, + "step": 64490 + }, + { + "epoch": 1.0900517985854676, + "grad_norm": 0.02672339603304863, + "learning_rate": 5.086789339110631e-06, + "loss": 0.0013, + "step": 64500 + }, + { + "epoch": 1.0902207988643182, + "grad_norm": 0.22146789729595184, + "learning_rate": 5.085314728361644e-06, + "loss": 0.0026, + "step": 64510 + }, + { + "epoch": 1.0903897991431686, + "grad_norm": 0.008131207898259163, + "learning_rate": 5.083840110189863e-06, + "loss": 0.0007, + "step": 64520 + }, + { + "epoch": 1.090558799422019, + "grad_norm": 0.25171422958374023, + "learning_rate": 5.082365484723586e-06, + "loss": 0.0013, + "step": 64530 + }, + { + "epoch": 1.0907277997008695, + "grad_norm": 0.06383270770311356, + "learning_rate": 5.080890852091111e-06, + "loss": 0.001, + "step": 64540 + }, + { + "epoch": 1.0908967999797199, + "grad_norm": 0.03389836847782135, + "learning_rate": 5.079416212420742e-06, + "loss": 0.0009, + "step": 64550 + }, + { + "epoch": 1.0910658002585705, + "grad_norm": 0.030062347650527954, + "learning_rate": 5.077941565840777e-06, + "loss": 0.0013, + "step": 64560 + }, + { + "epoch": 1.0912348005374208, + "grad_norm": 0.02097107470035553, + "learning_rate": 5.076466912479519e-06, + "loss": 0.002, + "step": 64570 + }, + { + "epoch": 1.0914038008162714, + "grad_norm": 0.0992652028799057, + "learning_rate": 5.074992252465268e-06, + "loss": 0.0014, + "step": 64580 + }, + { + "epoch": 1.0915728010951218, + "grad_norm": 0.04203925281763077, + "learning_rate": 5.073517585926328e-06, + "loss": 0.0013, + "step": 64590 + }, + { + "epoch": 1.0917418013739724, + "grad_norm": 0.03911573812365532, + "learning_rate": 5.072042912991003e-06, + "loss": 0.001, + "step": 64600 + }, + { + "epoch": 1.0919108016528227, + "grad_norm": 0.028423482552170753, + "learning_rate": 5.070568233787595e-06, + "loss": 0.0016, + "step": 64610 + }, + { + "epoch": 1.092079801931673, + "grad_norm": 0.03550324961543083, + "learning_rate": 5.069093548444408e-06, + "loss": 0.0012, + "step": 64620 + }, + { + "epoch": 1.0922488022105237, + "grad_norm": 0.09978547692298889, + "learning_rate": 5.067618857089747e-06, + "loss": 0.001, + "step": 64630 + }, + { + "epoch": 1.092417802489374, + "grad_norm": 0.060302022844552994, + "learning_rate": 5.066144159851919e-06, + "loss": 0.0012, + "step": 64640 + }, + { + "epoch": 1.0925868027682246, + "grad_norm": 0.08676223456859589, + "learning_rate": 5.064669456859228e-06, + "loss": 0.0009, + "step": 64650 + }, + { + "epoch": 1.092755803047075, + "grad_norm": 0.04188084974884987, + "learning_rate": 5.0631947482399815e-06, + "loss": 0.0008, + "step": 64660 + }, + { + "epoch": 1.0929248033259256, + "grad_norm": 0.15830457210540771, + "learning_rate": 5.061720034122486e-06, + "loss": 0.002, + "step": 64670 + }, + { + "epoch": 1.093093803604776, + "grad_norm": 0.03596467152237892, + "learning_rate": 5.060245314635049e-06, + "loss": 0.0013, + "step": 64680 + }, + { + "epoch": 1.0932628038836265, + "grad_norm": 0.019881989806890488, + "learning_rate": 5.058770589905976e-06, + "loss": 0.0007, + "step": 64690 + }, + { + "epoch": 1.0934318041624769, + "grad_norm": 0.07606848329305649, + "learning_rate": 5.057295860063575e-06, + "loss": 0.0008, + "step": 64700 + }, + { + "epoch": 1.0936008044413272, + "grad_norm": 0.11718254536390305, + "learning_rate": 5.05582112523616e-06, + "loss": 0.0012, + "step": 64710 + }, + { + "epoch": 1.0937698047201778, + "grad_norm": 0.04764671251177788, + "learning_rate": 5.054346385552036e-06, + "loss": 0.0014, + "step": 64720 + }, + { + "epoch": 1.0939388049990282, + "grad_norm": 0.10151364654302597, + "learning_rate": 5.0528716411395126e-06, + "loss": 0.0014, + "step": 64730 + }, + { + "epoch": 1.0941078052778788, + "grad_norm": 0.02287687174975872, + "learning_rate": 5.0513968921269006e-06, + "loss": 0.0007, + "step": 64740 + }, + { + "epoch": 1.0942768055567291, + "grad_norm": 0.11843504756689072, + "learning_rate": 5.04992213864251e-06, + "loss": 0.0007, + "step": 64750 + }, + { + "epoch": 1.0944458058355797, + "grad_norm": 0.000851675751619041, + "learning_rate": 5.048447380814652e-06, + "loss": 0.0009, + "step": 64760 + }, + { + "epoch": 1.09461480611443, + "grad_norm": 0.04916158318519592, + "learning_rate": 5.0469726187716365e-06, + "loss": 0.0019, + "step": 64770 + }, + { + "epoch": 1.0947838063932807, + "grad_norm": 0.03001948446035385, + "learning_rate": 5.045497852641775e-06, + "loss": 0.0008, + "step": 64780 + }, + { + "epoch": 1.094952806672131, + "grad_norm": 0.021211637184023857, + "learning_rate": 5.044023082553381e-06, + "loss": 0.0012, + "step": 64790 + }, + { + "epoch": 1.0951218069509814, + "grad_norm": 0.05235179513692856, + "learning_rate": 5.042548308634765e-06, + "loss": 0.0007, + "step": 64800 + }, + { + "epoch": 1.095290807229832, + "grad_norm": 0.1343381553888321, + "learning_rate": 5.04107353101424e-06, + "loss": 0.0012, + "step": 64810 + }, + { + "epoch": 1.0954598075086823, + "grad_norm": 0.009196394123136997, + "learning_rate": 5.039598749820119e-06, + "loss": 0.0007, + "step": 64820 + }, + { + "epoch": 1.095628807787533, + "grad_norm": 0.03460807725787163, + "learning_rate": 5.038123965180713e-06, + "loss": 0.0008, + "step": 64830 + }, + { + "epoch": 1.0957978080663833, + "grad_norm": 0.4813380837440491, + "learning_rate": 5.036649177224339e-06, + "loss": 0.0008, + "step": 64840 + }, + { + "epoch": 1.0959668083452339, + "grad_norm": 0.04702454060316086, + "learning_rate": 5.03517438607931e-06, + "loss": 0.0018, + "step": 64850 + }, + { + "epoch": 1.0961358086240842, + "grad_norm": 0.12171593308448792, + "learning_rate": 5.033699591873939e-06, + "loss": 0.0012, + "step": 64860 + }, + { + "epoch": 1.0963048089029348, + "grad_norm": 0.13131020963191986, + "learning_rate": 5.032224794736539e-06, + "loss": 0.0011, + "step": 64870 + }, + { + "epoch": 1.0964738091817852, + "grad_norm": 0.0072104958817362785, + "learning_rate": 5.030749994795426e-06, + "loss": 0.001, + "step": 64880 + }, + { + "epoch": 1.0966428094606355, + "grad_norm": 0.028363553807139397, + "learning_rate": 5.029275192178914e-06, + "loss": 0.0013, + "step": 64890 + }, + { + "epoch": 1.096811809739486, + "grad_norm": 0.16457876563072205, + "learning_rate": 5.027800387015319e-06, + "loss": 0.0009, + "step": 64900 + }, + { + "epoch": 1.0969808100183365, + "grad_norm": 0.07283145189285278, + "learning_rate": 5.026325579432954e-06, + "loss": 0.0019, + "step": 64910 + }, + { + "epoch": 1.097149810297187, + "grad_norm": 0.03155135735869408, + "learning_rate": 5.024850769560139e-06, + "loss": 0.0006, + "step": 64920 + }, + { + "epoch": 1.0973188105760374, + "grad_norm": 0.031150473281741142, + "learning_rate": 5.023375957525185e-06, + "loss": 0.0009, + "step": 64930 + }, + { + "epoch": 1.097487810854888, + "grad_norm": 0.024125665426254272, + "learning_rate": 5.021901143456409e-06, + "loss": 0.0014, + "step": 64940 + }, + { + "epoch": 1.0976568111337384, + "grad_norm": 0.01180550642311573, + "learning_rate": 5.0204263274821275e-06, + "loss": 0.0008, + "step": 64950 + }, + { + "epoch": 1.097825811412589, + "grad_norm": 0.06741633266210556, + "learning_rate": 5.018951509730657e-06, + "loss": 0.0009, + "step": 64960 + }, + { + "epoch": 1.0979948116914393, + "grad_norm": 0.013154249638319016, + "learning_rate": 5.017476690330314e-06, + "loss": 0.0006, + "step": 64970 + }, + { + "epoch": 1.0981638119702897, + "grad_norm": 0.005113786086440086, + "learning_rate": 5.016001869409414e-06, + "loss": 0.0012, + "step": 64980 + }, + { + "epoch": 1.0983328122491403, + "grad_norm": 0.038728177547454834, + "learning_rate": 5.014527047096273e-06, + "loss": 0.0014, + "step": 64990 + }, + { + "epoch": 1.0985018125279906, + "grad_norm": 0.0678655281662941, + "learning_rate": 5.0130522235192095e-06, + "loss": 0.0012, + "step": 65000 + }, + { + "epoch": 1.0986708128068412, + "grad_norm": 0.0647139623761177, + "learning_rate": 5.011577398806539e-06, + "loss": 0.002, + "step": 65010 + }, + { + "epoch": 1.0988398130856916, + "grad_norm": 0.04998895153403282, + "learning_rate": 5.010102573086577e-06, + "loss": 0.0012, + "step": 65020 + }, + { + "epoch": 1.0990088133645421, + "grad_norm": 0.08676137030124664, + "learning_rate": 5.008627746487644e-06, + "loss": 0.0007, + "step": 65030 + }, + { + "epoch": 1.0991778136433925, + "grad_norm": 0.05025715008378029, + "learning_rate": 5.0071529191380555e-06, + "loss": 0.0007, + "step": 65040 + }, + { + "epoch": 1.0993468139222429, + "grad_norm": 0.03282903879880905, + "learning_rate": 5.005678091166128e-06, + "loss": 0.001, + "step": 65050 + }, + { + "epoch": 1.0995158142010935, + "grad_norm": 0.04838985204696655, + "learning_rate": 5.004203262700179e-06, + "loss": 0.0014, + "step": 65060 + }, + { + "epoch": 1.0996848144799438, + "grad_norm": 0.050552740693092346, + "learning_rate": 5.002728433868525e-06, + "loss": 0.001, + "step": 65070 + }, + { + "epoch": 1.0998538147587944, + "grad_norm": 0.08461810648441315, + "learning_rate": 5.001253604799485e-06, + "loss": 0.0014, + "step": 65080 + }, + { + "epoch": 1.1000228150376448, + "grad_norm": 0.06626548618078232, + "learning_rate": 4.999778775621375e-06, + "loss": 0.0021, + "step": 65090 + }, + { + "epoch": 1.1001918153164953, + "grad_norm": 0.09405693411827087, + "learning_rate": 4.998303946462514e-06, + "loss": 0.0015, + "step": 65100 + }, + { + "epoch": 1.1003608155953457, + "grad_norm": 0.013804643414914608, + "learning_rate": 4.996829117451215e-06, + "loss": 0.001, + "step": 65110 + }, + { + "epoch": 1.1005298158741963, + "grad_norm": 0.0818895548582077, + "learning_rate": 4.9953542887158e-06, + "loss": 0.0011, + "step": 65120 + }, + { + "epoch": 1.1006988161530467, + "grad_norm": 0.04563478007912636, + "learning_rate": 4.993879460384583e-06, + "loss": 0.0008, + "step": 65130 + }, + { + "epoch": 1.100867816431897, + "grad_norm": 0.08573456853628159, + "learning_rate": 4.992404632585885e-06, + "loss": 0.0013, + "step": 65140 + }, + { + "epoch": 1.1010368167107476, + "grad_norm": 0.09419383853673935, + "learning_rate": 4.990929805448018e-06, + "loss": 0.0028, + "step": 65150 + }, + { + "epoch": 1.101205816989598, + "grad_norm": 0.014462495222687721, + "learning_rate": 4.989454979099305e-06, + "loss": 0.0009, + "step": 65160 + }, + { + "epoch": 1.1013748172684485, + "grad_norm": 0.057236574590206146, + "learning_rate": 4.987980153668057e-06, + "loss": 0.0028, + "step": 65170 + }, + { + "epoch": 1.101543817547299, + "grad_norm": 0.05355300381779671, + "learning_rate": 4.986505329282596e-06, + "loss": 0.001, + "step": 65180 + }, + { + "epoch": 1.1017128178261495, + "grad_norm": 0.03428352624177933, + "learning_rate": 4.985030506071235e-06, + "loss": 0.002, + "step": 65190 + }, + { + "epoch": 1.1018818181049999, + "grad_norm": 0.001353323576040566, + "learning_rate": 4.983555684162294e-06, + "loss": 0.0024, + "step": 65200 + }, + { + "epoch": 1.1020508183838502, + "grad_norm": 0.017991015687584877, + "learning_rate": 4.982080863684087e-06, + "loss": 0.0014, + "step": 65210 + }, + { + "epoch": 1.1022198186627008, + "grad_norm": 0.057966869324445724, + "learning_rate": 4.980606044764932e-06, + "loss": 0.0007, + "step": 65220 + }, + { + "epoch": 1.1023888189415512, + "grad_norm": 0.02501198649406433, + "learning_rate": 4.979131227533145e-06, + "loss": 0.0007, + "step": 65230 + }, + { + "epoch": 1.1025578192204017, + "grad_norm": 0.07594097405672073, + "learning_rate": 4.9776564121170435e-06, + "loss": 0.0019, + "step": 65240 + }, + { + "epoch": 1.102726819499252, + "grad_norm": 0.025860225781798363, + "learning_rate": 4.9761815986449405e-06, + "loss": 0.0008, + "step": 65250 + }, + { + "epoch": 1.1028958197781027, + "grad_norm": 0.03838937729597092, + "learning_rate": 4.974706787245156e-06, + "loss": 0.0014, + "step": 65260 + }, + { + "epoch": 1.103064820056953, + "grad_norm": 0.03614159673452377, + "learning_rate": 4.973231978046001e-06, + "loss": 0.0008, + "step": 65270 + }, + { + "epoch": 1.1032338203358036, + "grad_norm": 0.015224494971334934, + "learning_rate": 4.971757171175797e-06, + "loss": 0.0011, + "step": 65280 + }, + { + "epoch": 1.103402820614654, + "grad_norm": 0.059320464730262756, + "learning_rate": 4.9702823667628526e-06, + "loss": 0.0012, + "step": 65290 + }, + { + "epoch": 1.1035718208935044, + "grad_norm": 0.06934010982513428, + "learning_rate": 4.968807564935487e-06, + "loss": 0.0011, + "step": 65300 + }, + { + "epoch": 1.103740821172355, + "grad_norm": 0.1251693069934845, + "learning_rate": 4.967332765822014e-06, + "loss": 0.0016, + "step": 65310 + }, + { + "epoch": 1.1039098214512053, + "grad_norm": 0.08632376044988632, + "learning_rate": 4.9658579695507515e-06, + "loss": 0.0011, + "step": 65320 + }, + { + "epoch": 1.104078821730056, + "grad_norm": 0.07911311835050583, + "learning_rate": 4.964383176250008e-06, + "loss": 0.0011, + "step": 65330 + }, + { + "epoch": 1.1042478220089063, + "grad_norm": 0.02994832582771778, + "learning_rate": 4.9629083860481005e-06, + "loss": 0.0012, + "step": 65340 + }, + { + "epoch": 1.1044168222877568, + "grad_norm": 0.0312307458370924, + "learning_rate": 4.9614335990733455e-06, + "loss": 0.0012, + "step": 65350 + }, + { + "epoch": 1.1045858225666072, + "grad_norm": 0.06177021190524101, + "learning_rate": 4.959958815454053e-06, + "loss": 0.0011, + "step": 65360 + }, + { + "epoch": 1.1047548228454578, + "grad_norm": 0.036960311233997345, + "learning_rate": 4.9584840353185384e-06, + "loss": 0.001, + "step": 65370 + }, + { + "epoch": 1.1049238231243081, + "grad_norm": 0.019146258011460304, + "learning_rate": 4.957009258795113e-06, + "loss": 0.0014, + "step": 65380 + }, + { + "epoch": 1.1050928234031585, + "grad_norm": 0.05155621096491814, + "learning_rate": 4.955534486012092e-06, + "loss": 0.0014, + "step": 65390 + }, + { + "epoch": 1.105261823682009, + "grad_norm": 0.11039842665195465, + "learning_rate": 4.954059717097783e-06, + "loss": 0.0017, + "step": 65400 + }, + { + "epoch": 1.1054308239608595, + "grad_norm": 0.08442549407482147, + "learning_rate": 4.952584952180504e-06, + "loss": 0.0011, + "step": 65410 + }, + { + "epoch": 1.10559982423971, + "grad_norm": 0.02455849200487137, + "learning_rate": 4.951110191388562e-06, + "loss": 0.0004, + "step": 65420 + }, + { + "epoch": 1.1057688245185604, + "grad_norm": 0.06026019901037216, + "learning_rate": 4.949635434850272e-06, + "loss": 0.0007, + "step": 65430 + }, + { + "epoch": 1.105937824797411, + "grad_norm": 0.0009374105138704181, + "learning_rate": 4.948160682693941e-06, + "loss": 0.0007, + "step": 65440 + }, + { + "epoch": 1.1061068250762613, + "grad_norm": 0.026579247787594795, + "learning_rate": 4.946685935047884e-06, + "loss": 0.0007, + "step": 65450 + }, + { + "epoch": 1.106275825355112, + "grad_norm": 0.04251579940319061, + "learning_rate": 4.945211192040408e-06, + "loss": 0.0006, + "step": 65460 + }, + { + "epoch": 1.1064448256339623, + "grad_norm": 0.02869006060063839, + "learning_rate": 4.943736453799824e-06, + "loss": 0.0006, + "step": 65470 + }, + { + "epoch": 1.1066138259128127, + "grad_norm": 0.1354653239250183, + "learning_rate": 4.94226172045444e-06, + "loss": 0.0011, + "step": 65480 + }, + { + "epoch": 1.1067828261916632, + "grad_norm": 0.06362824887037277, + "learning_rate": 4.940786992132568e-06, + "loss": 0.0017, + "step": 65490 + }, + { + "epoch": 1.1069518264705136, + "grad_norm": 0.02064809948205948, + "learning_rate": 4.939312268962513e-06, + "loss": 0.0011, + "step": 65500 + }, + { + "epoch": 1.1071208267493642, + "grad_norm": 0.01066114567220211, + "learning_rate": 4.9378375510725856e-06, + "loss": 0.0015, + "step": 65510 + }, + { + "epoch": 1.1072898270282145, + "grad_norm": 0.01520493533462286, + "learning_rate": 4.936362838591091e-06, + "loss": 0.0014, + "step": 65520 + }, + { + "epoch": 1.1074588273070651, + "grad_norm": 0.0660543218255043, + "learning_rate": 4.9348881316463406e-06, + "loss": 0.0009, + "step": 65530 + }, + { + "epoch": 1.1076278275859155, + "grad_norm": 0.02784154936671257, + "learning_rate": 4.9334134303666355e-06, + "loss": 0.0011, + "step": 65540 + }, + { + "epoch": 1.107796827864766, + "grad_norm": 0.06158173829317093, + "learning_rate": 4.931938734880287e-06, + "loss": 0.0009, + "step": 65550 + }, + { + "epoch": 1.1079658281436164, + "grad_norm": 0.02641584351658821, + "learning_rate": 4.9304640453155956e-06, + "loss": 0.001, + "step": 65560 + }, + { + "epoch": 1.1081348284224668, + "grad_norm": 0.0512726865708828, + "learning_rate": 4.928989361800871e-06, + "loss": 0.0009, + "step": 65570 + }, + { + "epoch": 1.1083038287013174, + "grad_norm": 0.04487888887524605, + "learning_rate": 4.927514684464415e-06, + "loss": 0.0016, + "step": 65580 + }, + { + "epoch": 1.1084728289801677, + "grad_norm": 0.007292418275028467, + "learning_rate": 4.926040013434532e-06, + "loss": 0.0005, + "step": 65590 + }, + { + "epoch": 1.1086418292590183, + "grad_norm": 0.14908470213413239, + "learning_rate": 4.924565348839528e-06, + "loss": 0.002, + "step": 65600 + }, + { + "epoch": 1.1088108295378687, + "grad_norm": 0.12314429134130478, + "learning_rate": 4.923090690807701e-06, + "loss": 0.0017, + "step": 65610 + }, + { + "epoch": 1.1089798298167193, + "grad_norm": 0.028657900169491768, + "learning_rate": 4.9216160394673605e-06, + "loss": 0.0008, + "step": 65620 + }, + { + "epoch": 1.1091488300955696, + "grad_norm": 0.011674246750772, + "learning_rate": 4.920141394946802e-06, + "loss": 0.0013, + "step": 65630 + }, + { + "epoch": 1.1093178303744202, + "grad_norm": 0.05232135206460953, + "learning_rate": 4.918666757374331e-06, + "loss": 0.001, + "step": 65640 + }, + { + "epoch": 1.1094868306532706, + "grad_norm": 0.059035900980234146, + "learning_rate": 4.917192126878244e-06, + "loss": 0.0015, + "step": 65650 + }, + { + "epoch": 1.109655830932121, + "grad_norm": 0.03378590941429138, + "learning_rate": 4.9157175035868455e-06, + "loss": 0.0022, + "step": 65660 + }, + { + "epoch": 1.1098248312109715, + "grad_norm": 0.10433820635080338, + "learning_rate": 4.91424288762843e-06, + "loss": 0.0019, + "step": 65670 + }, + { + "epoch": 1.109993831489822, + "grad_norm": 0.05964815244078636, + "learning_rate": 4.9127682791313e-06, + "loss": 0.0034, + "step": 65680 + }, + { + "epoch": 1.1101628317686725, + "grad_norm": 0.03746423125267029, + "learning_rate": 4.911293678223753e-06, + "loss": 0.0016, + "step": 65690 + }, + { + "epoch": 1.1103318320475228, + "grad_norm": 0.043141648173332214, + "learning_rate": 4.909819085034085e-06, + "loss": 0.001, + "step": 65700 + }, + { + "epoch": 1.1105008323263734, + "grad_norm": 0.025546276941895485, + "learning_rate": 4.9083444996905926e-06, + "loss": 0.0006, + "step": 65710 + }, + { + "epoch": 1.1106698326052238, + "grad_norm": 0.0015741289826110005, + "learning_rate": 4.9068699223215756e-06, + "loss": 0.0012, + "step": 65720 + }, + { + "epoch": 1.1108388328840744, + "grad_norm": 0.05023641139268875, + "learning_rate": 4.905395353055323e-06, + "loss": 0.0013, + "step": 65730 + }, + { + "epoch": 1.1110078331629247, + "grad_norm": 0.06063641980290413, + "learning_rate": 4.903920792020136e-06, + "loss": 0.0005, + "step": 65740 + }, + { + "epoch": 1.111176833441775, + "grad_norm": 0.06451669335365295, + "learning_rate": 4.902446239344305e-06, + "loss": 0.0011, + "step": 65750 + }, + { + "epoch": 1.1113458337206257, + "grad_norm": 0.03882955014705658, + "learning_rate": 4.900971695156124e-06, + "loss": 0.0014, + "step": 65760 + }, + { + "epoch": 1.111514833999476, + "grad_norm": 0.048457760363817215, + "learning_rate": 4.899497159583883e-06, + "loss": 0.0015, + "step": 65770 + }, + { + "epoch": 1.1116838342783266, + "grad_norm": 0.02654680795967579, + "learning_rate": 4.898022632755878e-06, + "loss": 0.0016, + "step": 65780 + }, + { + "epoch": 1.111852834557177, + "grad_norm": 0.051069073379039764, + "learning_rate": 4.896548114800397e-06, + "loss": 0.0016, + "step": 65790 + }, + { + "epoch": 1.1120218348360276, + "grad_norm": 0.013700217008590698, + "learning_rate": 4.895073605845733e-06, + "loss": 0.0009, + "step": 65800 + }, + { + "epoch": 1.112190835114878, + "grad_norm": 0.03329027071595192, + "learning_rate": 4.893599106020172e-06, + "loss": 0.0013, + "step": 65810 + }, + { + "epoch": 1.1123598353937285, + "grad_norm": 0.07942863553762436, + "learning_rate": 4.892124615452007e-06, + "loss": 0.0009, + "step": 65820 + }, + { + "epoch": 1.1125288356725789, + "grad_norm": 0.07885333895683289, + "learning_rate": 4.890650134269519e-06, + "loss": 0.0008, + "step": 65830 + }, + { + "epoch": 1.1126978359514292, + "grad_norm": 0.055787790566682816, + "learning_rate": 4.8891756626010035e-06, + "loss": 0.0012, + "step": 65840 + }, + { + "epoch": 1.1128668362302798, + "grad_norm": 0.006853477098047733, + "learning_rate": 4.887701200574739e-06, + "loss": 0.0009, + "step": 65850 + }, + { + "epoch": 1.1130358365091302, + "grad_norm": 0.016508853062987328, + "learning_rate": 4.886226748319014e-06, + "loss": 0.0008, + "step": 65860 + }, + { + "epoch": 1.1132048367879808, + "grad_norm": 0.0349777527153492, + "learning_rate": 4.884752305962115e-06, + "loss": 0.0019, + "step": 65870 + }, + { + "epoch": 1.1133738370668311, + "grad_norm": 0.0494348369538784, + "learning_rate": 4.883277873632323e-06, + "loss": 0.0012, + "step": 65880 + }, + { + "epoch": 1.1135428373456817, + "grad_norm": 0.02532506175339222, + "learning_rate": 4.881803451457922e-06, + "loss": 0.0008, + "step": 65890 + }, + { + "epoch": 1.113711837624532, + "grad_norm": 0.01580948196351528, + "learning_rate": 4.8803290395671916e-06, + "loss": 0.0008, + "step": 65900 + }, + { + "epoch": 1.1138808379033824, + "grad_norm": 0.06238386780023575, + "learning_rate": 4.8788546380884175e-06, + "loss": 0.0018, + "step": 65910 + }, + { + "epoch": 1.114049838182233, + "grad_norm": 0.04577694088220596, + "learning_rate": 4.877380247149874e-06, + "loss": 0.001, + "step": 65920 + }, + { + "epoch": 1.1142188384610834, + "grad_norm": 0.03396439179778099, + "learning_rate": 4.875905866879846e-06, + "loss": 0.0009, + "step": 65930 + }, + { + "epoch": 1.114387838739934, + "grad_norm": 0.03604747727513313, + "learning_rate": 4.874431497406607e-06, + "loss": 0.0013, + "step": 65940 + }, + { + "epoch": 1.1145568390187843, + "grad_norm": 0.09017330408096313, + "learning_rate": 4.8729571388584365e-06, + "loss": 0.0011, + "step": 65950 + }, + { + "epoch": 1.114725839297635, + "grad_norm": 0.48425760865211487, + "learning_rate": 4.87148279136361e-06, + "loss": 0.001, + "step": 65960 + }, + { + "epoch": 1.1148948395764853, + "grad_norm": 0.06308889389038086, + "learning_rate": 4.870008455050404e-06, + "loss": 0.0013, + "step": 65970 + }, + { + "epoch": 1.1150638398553359, + "grad_norm": 0.00627787783741951, + "learning_rate": 4.868534130047092e-06, + "loss": 0.0009, + "step": 65980 + }, + { + "epoch": 1.1152328401341862, + "grad_norm": 0.042335394769907, + "learning_rate": 4.867059816481948e-06, + "loss": 0.0012, + "step": 65990 + }, + { + "epoch": 1.1154018404130366, + "grad_norm": 0.033770300447940826, + "learning_rate": 4.865585514483243e-06, + "loss": 0.0014, + "step": 66000 + }, + { + "epoch": 1.1155708406918872, + "grad_norm": 0.016967343166470528, + "learning_rate": 4.864111224179251e-06, + "loss": 0.0006, + "step": 66010 + }, + { + "epoch": 1.1157398409707375, + "grad_norm": 0.014699154533445835, + "learning_rate": 4.862636945698239e-06, + "loss": 0.0012, + "step": 66020 + }, + { + "epoch": 1.1159088412495881, + "grad_norm": 0.019063862040638924, + "learning_rate": 4.861162679168481e-06, + "loss": 0.0022, + "step": 66030 + }, + { + "epoch": 1.1160778415284385, + "grad_norm": 0.04024651646614075, + "learning_rate": 4.85968842471824e-06, + "loss": 0.0005, + "step": 66040 + }, + { + "epoch": 1.116246841807289, + "grad_norm": 0.03585030138492584, + "learning_rate": 4.858214182475786e-06, + "loss": 0.0015, + "step": 66050 + }, + { + "epoch": 1.1164158420861394, + "grad_norm": 0.07168002426624298, + "learning_rate": 4.856739952569386e-06, + "loss": 0.0007, + "step": 66060 + }, + { + "epoch": 1.11658484236499, + "grad_norm": 0.09482478350400925, + "learning_rate": 4.855265735127305e-06, + "loss": 0.0014, + "step": 66070 + }, + { + "epoch": 1.1167538426438404, + "grad_norm": 0.03974776715040207, + "learning_rate": 4.853791530277804e-06, + "loss": 0.0006, + "step": 66080 + }, + { + "epoch": 1.1169228429226907, + "grad_norm": 0.037464916706085205, + "learning_rate": 4.852317338149151e-06, + "loss": 0.0008, + "step": 66090 + }, + { + "epoch": 1.1170918432015413, + "grad_norm": 0.14354050159454346, + "learning_rate": 4.850843158869603e-06, + "loss": 0.0013, + "step": 66100 + }, + { + "epoch": 1.1172608434803917, + "grad_norm": 0.04343586042523384, + "learning_rate": 4.849368992567422e-06, + "loss": 0.0007, + "step": 66110 + }, + { + "epoch": 1.1174298437592423, + "grad_norm": 0.05415260046720505, + "learning_rate": 4.847894839370872e-06, + "loss": 0.0016, + "step": 66120 + }, + { + "epoch": 1.1175988440380926, + "grad_norm": 0.0037539496552199125, + "learning_rate": 4.846420699408205e-06, + "loss": 0.0007, + "step": 66130 + }, + { + "epoch": 1.1177678443169432, + "grad_norm": 0.12358132749795914, + "learning_rate": 4.844946572807684e-06, + "loss": 0.0013, + "step": 66140 + }, + { + "epoch": 1.1179368445957936, + "grad_norm": 0.024757402017712593, + "learning_rate": 4.84347245969756e-06, + "loss": 0.0013, + "step": 66150 + }, + { + "epoch": 1.118105844874644, + "grad_norm": 0.13422614336013794, + "learning_rate": 4.841998360206091e-06, + "loss": 0.0008, + "step": 66160 + }, + { + "epoch": 1.1182748451534945, + "grad_norm": 0.011023541912436485, + "learning_rate": 4.840524274461531e-06, + "loss": 0.0011, + "step": 66170 + }, + { + "epoch": 1.1184438454323449, + "grad_norm": 0.025453826412558556, + "learning_rate": 4.839050202592131e-06, + "loss": 0.0016, + "step": 66180 + }, + { + "epoch": 1.1186128457111955, + "grad_norm": 0.09494690597057343, + "learning_rate": 4.837576144726142e-06, + "loss": 0.001, + "step": 66190 + }, + { + "epoch": 1.1187818459900458, + "grad_norm": 0.16201642155647278, + "learning_rate": 4.836102100991818e-06, + "loss": 0.0014, + "step": 66200 + }, + { + "epoch": 1.1189508462688964, + "grad_norm": 0.026667365804314613, + "learning_rate": 4.8346280715174034e-06, + "loss": 0.0012, + "step": 66210 + }, + { + "epoch": 1.1191198465477468, + "grad_norm": 0.06574511528015137, + "learning_rate": 4.83315405643115e-06, + "loss": 0.0015, + "step": 66220 + }, + { + "epoch": 1.1192888468265974, + "grad_norm": 0.05048028379678726, + "learning_rate": 4.8316800558612995e-06, + "loss": 0.0021, + "step": 66230 + }, + { + "epoch": 1.1194578471054477, + "grad_norm": 0.03519992530345917, + "learning_rate": 4.830206069936102e-06, + "loss": 0.0007, + "step": 66240 + }, + { + "epoch": 1.119626847384298, + "grad_norm": 0.025307748466730118, + "learning_rate": 4.828732098783796e-06, + "loss": 0.0021, + "step": 66250 + }, + { + "epoch": 1.1197958476631487, + "grad_norm": 0.03679641708731651, + "learning_rate": 4.827258142532629e-06, + "loss": 0.0006, + "step": 66260 + }, + { + "epoch": 1.119964847941999, + "grad_norm": 0.12949617207050323, + "learning_rate": 4.82578420131084e-06, + "loss": 0.0009, + "step": 66270 + }, + { + "epoch": 1.1201338482208496, + "grad_norm": 0.03277931734919548, + "learning_rate": 4.824310275246671e-06, + "loss": 0.0012, + "step": 66280 + }, + { + "epoch": 1.1203028484997, + "grad_norm": 0.05668530985713005, + "learning_rate": 4.8228363644683575e-06, + "loss": 0.0009, + "step": 66290 + }, + { + "epoch": 1.1204718487785505, + "grad_norm": 0.024053629487752914, + "learning_rate": 4.821362469104141e-06, + "loss": 0.0012, + "step": 66300 + }, + { + "epoch": 1.120640849057401, + "grad_norm": 0.07685207575559616, + "learning_rate": 4.819888589282254e-06, + "loss": 0.0012, + "step": 66310 + }, + { + "epoch": 1.1208098493362515, + "grad_norm": 0.18044798076152802, + "learning_rate": 4.818414725130933e-06, + "loss": 0.0008, + "step": 66320 + }, + { + "epoch": 1.1209788496151019, + "grad_norm": 0.0692492127418518, + "learning_rate": 4.816940876778409e-06, + "loss": 0.001, + "step": 66330 + }, + { + "epoch": 1.1211478498939522, + "grad_norm": 0.038930077105760574, + "learning_rate": 4.8154670443529165e-06, + "loss": 0.0011, + "step": 66340 + }, + { + "epoch": 1.1213168501728028, + "grad_norm": 0.0401238352060318, + "learning_rate": 4.813993227982685e-06, + "loss": 0.0004, + "step": 66350 + }, + { + "epoch": 1.1214858504516532, + "grad_norm": 0.034223757684230804, + "learning_rate": 4.812519427795944e-06, + "loss": 0.0016, + "step": 66360 + }, + { + "epoch": 1.1216548507305037, + "grad_norm": 0.05561797693371773, + "learning_rate": 4.811045643920921e-06, + "loss": 0.001, + "step": 66370 + }, + { + "epoch": 1.1218238510093541, + "grad_norm": 0.06368566304445267, + "learning_rate": 4.809571876485842e-06, + "loss": 0.0012, + "step": 66380 + }, + { + "epoch": 1.1219928512882047, + "grad_norm": 0.07481633871793747, + "learning_rate": 4.808098125618934e-06, + "loss": 0.0016, + "step": 66390 + }, + { + "epoch": 1.122161851567055, + "grad_norm": 0.033868514001369476, + "learning_rate": 4.8066243914484175e-06, + "loss": 0.0005, + "step": 66400 + }, + { + "epoch": 1.1223308518459056, + "grad_norm": 1.8786673545837402, + "learning_rate": 4.805150674102518e-06, + "loss": 0.0009, + "step": 66410 + }, + { + "epoch": 1.122499852124756, + "grad_norm": 0.06060846894979477, + "learning_rate": 4.803676973709451e-06, + "loss": 0.0006, + "step": 66420 + }, + { + "epoch": 1.1226688524036064, + "grad_norm": 0.037026919424533844, + "learning_rate": 4.802203290397441e-06, + "loss": 0.0014, + "step": 66430 + }, + { + "epoch": 1.122837852682457, + "grad_norm": 0.03700173646211624, + "learning_rate": 4.800729624294701e-06, + "loss": 0.001, + "step": 66440 + }, + { + "epoch": 1.1230068529613073, + "grad_norm": 0.014215175062417984, + "learning_rate": 4.799255975529451e-06, + "loss": 0.0011, + "step": 66450 + }, + { + "epoch": 1.123175853240158, + "grad_norm": 0.18786773085594177, + "learning_rate": 4.797782344229902e-06, + "loss": 0.0011, + "step": 66460 + }, + { + "epoch": 1.1233448535190083, + "grad_norm": 0.014328324235975742, + "learning_rate": 4.7963087305242705e-06, + "loss": 0.0008, + "step": 66470 + }, + { + "epoch": 1.1235138537978588, + "grad_norm": 0.07028224319219589, + "learning_rate": 4.794835134540764e-06, + "loss": 0.0007, + "step": 66480 + }, + { + "epoch": 1.1236828540767092, + "grad_norm": 0.031667813658714294, + "learning_rate": 4.793361556407598e-06, + "loss": 0.001, + "step": 66490 + }, + { + "epoch": 1.1238518543555598, + "grad_norm": 0.10028263181447983, + "learning_rate": 4.791887996252976e-06, + "loss": 0.0016, + "step": 66500 + }, + { + "epoch": 1.1240208546344101, + "grad_norm": 0.01621779054403305, + "learning_rate": 4.7904144542051065e-06, + "loss": 0.001, + "step": 66510 + }, + { + "epoch": 1.1241898549132605, + "grad_norm": 0.06411411613225937, + "learning_rate": 4.788940930392195e-06, + "loss": 0.0012, + "step": 66520 + }, + { + "epoch": 1.124358855192111, + "grad_norm": 0.08739247918128967, + "learning_rate": 4.787467424942446e-06, + "loss": 0.002, + "step": 66530 + }, + { + "epoch": 1.1245278554709615, + "grad_norm": 0.13845498859882355, + "learning_rate": 4.78599393798406e-06, + "loss": 0.0014, + "step": 66540 + }, + { + "epoch": 1.124696855749812, + "grad_norm": 0.029364168643951416, + "learning_rate": 4.7845204696452385e-06, + "loss": 0.0018, + "step": 66550 + }, + { + "epoch": 1.1248658560286624, + "grad_norm": 0.14498049020767212, + "learning_rate": 4.783047020054179e-06, + "loss": 0.0009, + "step": 66560 + }, + { + "epoch": 1.125034856307513, + "grad_norm": 0.013374504633247852, + "learning_rate": 4.7815735893390824e-06, + "loss": 0.0012, + "step": 66570 + }, + { + "epoch": 1.1252038565863633, + "grad_norm": 0.021575110033154488, + "learning_rate": 4.78010017762814e-06, + "loss": 0.0012, + "step": 66580 + }, + { + "epoch": 1.125372856865214, + "grad_norm": 0.0907462015748024, + "learning_rate": 4.778626785049548e-06, + "loss": 0.0016, + "step": 66590 + }, + { + "epoch": 1.1255418571440643, + "grad_norm": 0.010888309217989445, + "learning_rate": 4.777153411731498e-06, + "loss": 0.0011, + "step": 66600 + }, + { + "epoch": 1.1257108574229147, + "grad_norm": 0.018221450969576836, + "learning_rate": 4.775680057802181e-06, + "loss": 0.001, + "step": 66610 + }, + { + "epoch": 1.1258798577017652, + "grad_norm": 0.0347580760717392, + "learning_rate": 4.774206723389787e-06, + "loss": 0.0016, + "step": 66620 + }, + { + "epoch": 1.1260488579806156, + "grad_norm": 0.005065588746219873, + "learning_rate": 4.7727334086225e-06, + "loss": 0.0017, + "step": 66630 + }, + { + "epoch": 1.1262178582594662, + "grad_norm": 0.0021246259566396475, + "learning_rate": 4.771260113628509e-06, + "loss": 0.0006, + "step": 66640 + }, + { + "epoch": 1.1263868585383165, + "grad_norm": 0.07427023351192474, + "learning_rate": 4.769786838535996e-06, + "loss": 0.0006, + "step": 66650 + }, + { + "epoch": 1.1265558588171671, + "grad_norm": 0.10658226162195206, + "learning_rate": 4.768313583473144e-06, + "loss": 0.0015, + "step": 66660 + }, + { + "epoch": 1.1267248590960175, + "grad_norm": 0.022666780278086662, + "learning_rate": 4.7668403485681305e-06, + "loss": 0.0014, + "step": 66670 + }, + { + "epoch": 1.126893859374868, + "grad_norm": 0.051752593368291855, + "learning_rate": 4.76536713394914e-06, + "loss": 0.0014, + "step": 66680 + }, + { + "epoch": 1.1270628596537184, + "grad_norm": 0.08772103488445282, + "learning_rate": 4.763893939744343e-06, + "loss": 0.0022, + "step": 66690 + }, + { + "epoch": 1.1272318599325688, + "grad_norm": 0.04196293652057648, + "learning_rate": 4.76242076608192e-06, + "loss": 0.0016, + "step": 66700 + }, + { + "epoch": 1.1274008602114194, + "grad_norm": 0.008633043617010117, + "learning_rate": 4.760947613090038e-06, + "loss": 0.0005, + "step": 66710 + }, + { + "epoch": 1.1275698604902697, + "grad_norm": 0.04248087853193283, + "learning_rate": 4.7594744808968746e-06, + "loss": 0.0013, + "step": 66720 + }, + { + "epoch": 1.1277388607691203, + "grad_norm": 0.050820015370845795, + "learning_rate": 4.758001369630594e-06, + "loss": 0.0011, + "step": 66730 + }, + { + "epoch": 1.1279078610479707, + "grad_norm": 0.16319343447685242, + "learning_rate": 4.756528279419369e-06, + "loss": 0.0014, + "step": 66740 + }, + { + "epoch": 1.1280768613268213, + "grad_norm": 0.09688375890254974, + "learning_rate": 4.755055210391362e-06, + "loss": 0.0014, + "step": 66750 + }, + { + "epoch": 1.1282458616056716, + "grad_norm": 0.025112776085734367, + "learning_rate": 4.75358216267474e-06, + "loss": 0.0009, + "step": 66760 + }, + { + "epoch": 1.1284148618845222, + "grad_norm": 0.045800063759088516, + "learning_rate": 4.7521091363976615e-06, + "loss": 0.0009, + "step": 66770 + }, + { + "epoch": 1.1285838621633726, + "grad_norm": 0.0996677353978157, + "learning_rate": 4.750636131688292e-06, + "loss": 0.0007, + "step": 66780 + }, + { + "epoch": 1.128752862442223, + "grad_norm": 0.06506549566984177, + "learning_rate": 4.7491631486747845e-06, + "loss": 0.0012, + "step": 66790 + }, + { + "epoch": 1.1289218627210735, + "grad_norm": 0.03400241583585739, + "learning_rate": 4.747690187485301e-06, + "loss": 0.0009, + "step": 66800 + }, + { + "epoch": 1.129090862999924, + "grad_norm": 0.013674917630851269, + "learning_rate": 4.746217248247992e-06, + "loss": 0.0011, + "step": 66810 + }, + { + "epoch": 1.1292598632787745, + "grad_norm": 0.026519564911723137, + "learning_rate": 4.7447443310910125e-06, + "loss": 0.0008, + "step": 66820 + }, + { + "epoch": 1.1294288635576248, + "grad_norm": 0.03270822390913963, + "learning_rate": 4.7432714361425126e-06, + "loss": 0.0007, + "step": 66830 + }, + { + "epoch": 1.1295978638364754, + "grad_norm": 0.0035512656904757023, + "learning_rate": 4.7417985635306425e-06, + "loss": 0.0025, + "step": 66840 + }, + { + "epoch": 1.1297668641153258, + "grad_norm": 0.26910850405693054, + "learning_rate": 4.740325713383546e-06, + "loss": 0.0023, + "step": 66850 + }, + { + "epoch": 1.1299358643941764, + "grad_norm": 0.028381630778312683, + "learning_rate": 4.7388528858293746e-06, + "loss": 0.0011, + "step": 66860 + }, + { + "epoch": 1.1301048646730267, + "grad_norm": 0.052497945725917816, + "learning_rate": 4.7373800809962635e-06, + "loss": 0.0006, + "step": 66870 + }, + { + "epoch": 1.130273864951877, + "grad_norm": 0.003387107513844967, + "learning_rate": 4.735907299012358e-06, + "loss": 0.0012, + "step": 66880 + }, + { + "epoch": 1.1304428652307277, + "grad_norm": 0.04416866600513458, + "learning_rate": 4.7344345400058e-06, + "loss": 0.0013, + "step": 66890 + }, + { + "epoch": 1.130611865509578, + "grad_norm": 0.08238162845373154, + "learning_rate": 4.732961804104721e-06, + "loss": 0.0004, + "step": 66900 + }, + { + "epoch": 1.1307808657884286, + "grad_norm": 0.10110866278409958, + "learning_rate": 4.731489091437262e-06, + "loss": 0.0018, + "step": 66910 + }, + { + "epoch": 1.130949866067279, + "grad_norm": 0.030607158318161964, + "learning_rate": 4.73001640213155e-06, + "loss": 0.0022, + "step": 66920 + }, + { + "epoch": 1.1311188663461293, + "grad_norm": 0.045541878789663315, + "learning_rate": 4.7285437363157205e-06, + "loss": 0.0011, + "step": 66930 + }, + { + "epoch": 1.13128786662498, + "grad_norm": 0.12421415001153946, + "learning_rate": 4.727071094117901e-06, + "loss": 0.0018, + "step": 66940 + }, + { + "epoch": 1.1314568669038305, + "grad_norm": 0.14488165080547333, + "learning_rate": 4.725598475666218e-06, + "loss": 0.0013, + "step": 66950 + }, + { + "epoch": 1.1316258671826809, + "grad_norm": 0.05202499404549599, + "learning_rate": 4.7241258810887966e-06, + "loss": 0.0013, + "step": 66960 + }, + { + "epoch": 1.1317948674615312, + "grad_norm": 0.011785942129790783, + "learning_rate": 4.722653310513763e-06, + "loss": 0.001, + "step": 66970 + }, + { + "epoch": 1.1319638677403818, + "grad_norm": 0.10218577831983566, + "learning_rate": 4.721180764069232e-06, + "loss": 0.0015, + "step": 66980 + }, + { + "epoch": 1.1321328680192322, + "grad_norm": 0.0605245977640152, + "learning_rate": 4.719708241883329e-06, + "loss": 0.0011, + "step": 66990 + }, + { + "epoch": 1.1323018682980828, + "grad_norm": 0.03460155054926872, + "learning_rate": 4.718235744084164e-06, + "loss": 0.0008, + "step": 67000 + }, + { + "epoch": 1.1324708685769331, + "grad_norm": 0.023951146751642227, + "learning_rate": 4.716763270799856e-06, + "loss": 0.0014, + "step": 67010 + }, + { + "epoch": 1.1326398688557835, + "grad_norm": 0.04681273549795151, + "learning_rate": 4.715290822158514e-06, + "loss": 0.0012, + "step": 67020 + }, + { + "epoch": 1.132808869134634, + "grad_norm": 0.03391709923744202, + "learning_rate": 4.713818398288251e-06, + "loss": 0.001, + "step": 67030 + }, + { + "epoch": 1.1329778694134844, + "grad_norm": 0.0008612428791821003, + "learning_rate": 4.7123459993171735e-06, + "loss": 0.0013, + "step": 67040 + }, + { + "epoch": 1.133146869692335, + "grad_norm": 0.04772978276014328, + "learning_rate": 4.710873625373389e-06, + "loss": 0.0012, + "step": 67050 + }, + { + "epoch": 1.1333158699711854, + "grad_norm": 0.06175963208079338, + "learning_rate": 4.709401276584998e-06, + "loss": 0.0012, + "step": 67060 + }, + { + "epoch": 1.133484870250036, + "grad_norm": 0.09109710901975632, + "learning_rate": 4.707928953080106e-06, + "loss": 0.0017, + "step": 67070 + }, + { + "epoch": 1.1336538705288863, + "grad_norm": 0.021391067653894424, + "learning_rate": 4.706456654986809e-06, + "loss": 0.0008, + "step": 67080 + }, + { + "epoch": 1.133822870807737, + "grad_norm": 0.04566289111971855, + "learning_rate": 4.704984382433207e-06, + "loss": 0.0011, + "step": 67090 + }, + { + "epoch": 1.1339918710865873, + "grad_norm": 0.08351574093103409, + "learning_rate": 4.70351213554739e-06, + "loss": 0.0011, + "step": 67100 + }, + { + "epoch": 1.1341608713654376, + "grad_norm": 0.05140083283185959, + "learning_rate": 4.702039914457456e-06, + "loss": 0.0007, + "step": 67110 + }, + { + "epoch": 1.1343298716442882, + "grad_norm": 0.016718655824661255, + "learning_rate": 4.700567719291493e-06, + "loss": 0.0013, + "step": 67120 + }, + { + "epoch": 1.1344988719231386, + "grad_norm": 0.14465704560279846, + "learning_rate": 4.699095550177587e-06, + "loss": 0.0007, + "step": 67130 + }, + { + "epoch": 1.1346678722019892, + "grad_norm": 0.04567522555589676, + "learning_rate": 4.697623407243827e-06, + "loss": 0.0013, + "step": 67140 + }, + { + "epoch": 1.1348368724808395, + "grad_norm": 0.012041511945426464, + "learning_rate": 4.696151290618296e-06, + "loss": 0.0011, + "step": 67150 + }, + { + "epoch": 1.1350058727596901, + "grad_norm": 0.012735763564705849, + "learning_rate": 4.6946792004290765e-06, + "loss": 0.0008, + "step": 67160 + }, + { + "epoch": 1.1351748730385405, + "grad_norm": 0.05037974193692207, + "learning_rate": 4.693207136804244e-06, + "loss": 0.0009, + "step": 67170 + }, + { + "epoch": 1.135343873317391, + "grad_norm": 0.022585468366742134, + "learning_rate": 4.691735099871878e-06, + "loss": 0.0008, + "step": 67180 + }, + { + "epoch": 1.1355128735962414, + "grad_norm": 0.06173543259501457, + "learning_rate": 4.690263089760051e-06, + "loss": 0.0011, + "step": 67190 + }, + { + "epoch": 1.1356818738750918, + "grad_norm": 0.020044632256031036, + "learning_rate": 4.688791106596837e-06, + "loss": 0.0007, + "step": 67200 + }, + { + "epoch": 1.1358508741539424, + "grad_norm": 0.15761317312717438, + "learning_rate": 4.687319150510304e-06, + "loss": 0.0036, + "step": 67210 + }, + { + "epoch": 1.1360198744327927, + "grad_norm": 0.006815786939114332, + "learning_rate": 4.68584722162852e-06, + "loss": 0.0006, + "step": 67220 + }, + { + "epoch": 1.1361888747116433, + "grad_norm": 0.04175468906760216, + "learning_rate": 4.684375320079548e-06, + "loss": 0.0016, + "step": 67230 + }, + { + "epoch": 1.1363578749904937, + "grad_norm": 0.24287596344947815, + "learning_rate": 4.682903445991456e-06, + "loss": 0.0011, + "step": 67240 + }, + { + "epoch": 1.1365268752693443, + "grad_norm": 0.05699858069419861, + "learning_rate": 4.681431599492297e-06, + "loss": 0.0008, + "step": 67250 + }, + { + "epoch": 1.1366958755481946, + "grad_norm": 0.06530001759529114, + "learning_rate": 4.679959780710136e-06, + "loss": 0.0012, + "step": 67260 + }, + { + "epoch": 1.1368648758270452, + "grad_norm": 0.08941524475812912, + "learning_rate": 4.6784879897730215e-06, + "loss": 0.0018, + "step": 67270 + }, + { + "epoch": 1.1370338761058956, + "grad_norm": 0.04054303839802742, + "learning_rate": 4.677016226809012e-06, + "loss": 0.0015, + "step": 67280 + }, + { + "epoch": 1.137202876384746, + "grad_norm": 0.061838019639253616, + "learning_rate": 4.675544491946154e-06, + "loss": 0.0008, + "step": 67290 + }, + { + "epoch": 1.1373718766635965, + "grad_norm": 0.1300504505634308, + "learning_rate": 4.674072785312497e-06, + "loss": 0.0033, + "step": 67300 + }, + { + "epoch": 1.1375408769424469, + "grad_norm": 0.010322902351617813, + "learning_rate": 4.672601107036088e-06, + "loss": 0.0005, + "step": 67310 + }, + { + "epoch": 1.1377098772212975, + "grad_norm": 0.04041688144207001, + "learning_rate": 4.671129457244968e-06, + "loss": 0.0007, + "step": 67320 + }, + { + "epoch": 1.1378788775001478, + "grad_norm": 0.09284889698028564, + "learning_rate": 4.6696578360671785e-06, + "loss": 0.0006, + "step": 67330 + }, + { + "epoch": 1.1380478777789984, + "grad_norm": 0.0779462456703186, + "learning_rate": 4.66818624363076e-06, + "loss": 0.0008, + "step": 67340 + }, + { + "epoch": 1.1382168780578488, + "grad_norm": 0.06471201032400131, + "learning_rate": 4.666714680063743e-06, + "loss": 0.0013, + "step": 67350 + }, + { + "epoch": 1.1383858783366994, + "grad_norm": 0.0035908652935177088, + "learning_rate": 4.665243145494167e-06, + "loss": 0.0012, + "step": 67360 + }, + { + "epoch": 1.1385548786155497, + "grad_norm": 0.1354219913482666, + "learning_rate": 4.663771640050056e-06, + "loss": 0.0016, + "step": 67370 + }, + { + "epoch": 1.1387238788944, + "grad_norm": 0.08462052792310715, + "learning_rate": 4.662300163859442e-06, + "loss": 0.0006, + "step": 67380 + }, + { + "epoch": 1.1388928791732507, + "grad_norm": 0.00776013545691967, + "learning_rate": 4.660828717050352e-06, + "loss": 0.0005, + "step": 67390 + }, + { + "epoch": 1.139061879452101, + "grad_norm": 0.03530941531062126, + "learning_rate": 4.659357299750804e-06, + "loss": 0.0005, + "step": 67400 + }, + { + "epoch": 1.1392308797309516, + "grad_norm": 0.022199373692274094, + "learning_rate": 4.657885912088824e-06, + "loss": 0.0013, + "step": 67410 + }, + { + "epoch": 1.139399880009802, + "grad_norm": 0.005407856311649084, + "learning_rate": 4.656414554192426e-06, + "loss": 0.0012, + "step": 67420 + }, + { + "epoch": 1.1395688802886526, + "grad_norm": 0.04724668711423874, + "learning_rate": 4.654943226189627e-06, + "loss": 0.0027, + "step": 67430 + }, + { + "epoch": 1.139737880567503, + "grad_norm": 0.05699315667152405, + "learning_rate": 4.653471928208437e-06, + "loss": 0.0013, + "step": 67440 + }, + { + "epoch": 1.1399068808463535, + "grad_norm": 0.0339827723801136, + "learning_rate": 4.652000660376872e-06, + "loss": 0.0011, + "step": 67450 + }, + { + "epoch": 1.1400758811252039, + "grad_norm": 0.02883787266910076, + "learning_rate": 4.650529422822932e-06, + "loss": 0.001, + "step": 67460 + }, + { + "epoch": 1.1402448814040542, + "grad_norm": 0.009684630669653416, + "learning_rate": 4.6490582156746285e-06, + "loss": 0.0011, + "step": 67470 + }, + { + "epoch": 1.1404138816829048, + "grad_norm": 0.023157119750976562, + "learning_rate": 4.647587039059958e-06, + "loss": 0.0016, + "step": 67480 + }, + { + "epoch": 1.1405828819617552, + "grad_norm": 0.07178868353366852, + "learning_rate": 4.646115893106926e-06, + "loss": 0.0015, + "step": 67490 + }, + { + "epoch": 1.1407518822406058, + "grad_norm": 0.16174933314323425, + "learning_rate": 4.644644777943522e-06, + "loss": 0.0017, + "step": 67500 + }, + { + "epoch": 1.1409208825194561, + "grad_norm": 0.06819438189268112, + "learning_rate": 4.643173693697747e-06, + "loss": 0.0012, + "step": 67510 + }, + { + "epoch": 1.1410898827983067, + "grad_norm": 0.12232200801372528, + "learning_rate": 4.641702640497587e-06, + "loss": 0.0012, + "step": 67520 + }, + { + "epoch": 1.141258883077157, + "grad_norm": 0.00984139647334814, + "learning_rate": 4.640231618471036e-06, + "loss": 0.001, + "step": 67530 + }, + { + "epoch": 1.1414278833560076, + "grad_norm": 0.05852857604622841, + "learning_rate": 4.638760627746075e-06, + "loss": 0.0015, + "step": 67540 + }, + { + "epoch": 1.141596883634858, + "grad_norm": 0.03056730516254902, + "learning_rate": 4.637289668450692e-06, + "loss": 0.0007, + "step": 67550 + }, + { + "epoch": 1.1417658839137084, + "grad_norm": 0.062109462916851044, + "learning_rate": 4.6358187407128625e-06, + "loss": 0.0011, + "step": 67560 + }, + { + "epoch": 1.141934884192559, + "grad_norm": 0.006442485377192497, + "learning_rate": 4.63434784466057e-06, + "loss": 0.0018, + "step": 67570 + }, + { + "epoch": 1.1421038844714093, + "grad_norm": 0.06605667620897293, + "learning_rate": 4.6328769804217835e-06, + "loss": 0.0004, + "step": 67580 + }, + { + "epoch": 1.14227288475026, + "grad_norm": 0.06362783908843994, + "learning_rate": 4.63140614812448e-06, + "loss": 0.001, + "step": 67590 + }, + { + "epoch": 1.1424418850291103, + "grad_norm": 0.040291350334882736, + "learning_rate": 4.6299353478966275e-06, + "loss": 0.001, + "step": 67600 + }, + { + "epoch": 1.1426108853079608, + "grad_norm": 0.15025590360164642, + "learning_rate": 4.628464579866192e-06, + "loss": 0.0021, + "step": 67610 + }, + { + "epoch": 1.1427798855868112, + "grad_norm": 0.07118073850870132, + "learning_rate": 4.626993844161139e-06, + "loss": 0.0008, + "step": 67620 + }, + { + "epoch": 1.1429488858656618, + "grad_norm": 0.29182419180870056, + "learning_rate": 4.625523140909427e-06, + "loss": 0.001, + "step": 67630 + }, + { + "epoch": 1.1431178861445122, + "grad_norm": 0.013745423406362534, + "learning_rate": 4.624052470239019e-06, + "loss": 0.0014, + "step": 67640 + }, + { + "epoch": 1.1432868864233625, + "grad_norm": 0.08487940579652786, + "learning_rate": 4.6225818322778655e-06, + "loss": 0.0009, + "step": 67650 + }, + { + "epoch": 1.143455886702213, + "grad_norm": 0.04990014806389809, + "learning_rate": 4.6211112271539235e-06, + "loss": 0.0008, + "step": 67660 + }, + { + "epoch": 1.1436248869810635, + "grad_norm": 0.011304205283522606, + "learning_rate": 4.619640654995138e-06, + "loss": 0.0009, + "step": 67670 + }, + { + "epoch": 1.143793887259914, + "grad_norm": 0.09348660707473755, + "learning_rate": 4.6181701159294605e-06, + "loss": 0.0016, + "step": 67680 + }, + { + "epoch": 1.1439628875387644, + "grad_norm": 0.016546789556741714, + "learning_rate": 4.616699610084831e-06, + "loss": 0.0009, + "step": 67690 + }, + { + "epoch": 1.144131887817615, + "grad_norm": 0.03539736196398735, + "learning_rate": 4.615229137589193e-06, + "loss": 0.0017, + "step": 67700 + }, + { + "epoch": 1.1443008880964654, + "grad_norm": 0.09660420566797256, + "learning_rate": 4.613758698570485e-06, + "loss": 0.0025, + "step": 67710 + }, + { + "epoch": 1.144469888375316, + "grad_norm": 0.01964586041867733, + "learning_rate": 4.612288293156642e-06, + "loss": 0.0008, + "step": 67720 + }, + { + "epoch": 1.1446388886541663, + "grad_norm": 0.10755769908428192, + "learning_rate": 4.610817921475595e-06, + "loss": 0.0008, + "step": 67730 + }, + { + "epoch": 1.1448078889330167, + "grad_norm": 0.040303055197000504, + "learning_rate": 4.609347583655275e-06, + "loss": 0.0009, + "step": 67740 + }, + { + "epoch": 1.1449768892118672, + "grad_norm": 0.04203404486179352, + "learning_rate": 4.607877279823607e-06, + "loss": 0.0009, + "step": 67750 + }, + { + "epoch": 1.1451458894907176, + "grad_norm": 0.044235777109861374, + "learning_rate": 4.606407010108518e-06, + "loss": 0.001, + "step": 67760 + }, + { + "epoch": 1.1453148897695682, + "grad_norm": 0.061796385794878006, + "learning_rate": 4.604936774637923e-06, + "loss": 0.0009, + "step": 67770 + }, + { + "epoch": 1.1454838900484186, + "grad_norm": 0.03359169140458107, + "learning_rate": 4.603466573539745e-06, + "loss": 0.0007, + "step": 67780 + }, + { + "epoch": 1.1456528903272691, + "grad_norm": 0.012582000344991684, + "learning_rate": 4.601996406941895e-06, + "loss": 0.0012, + "step": 67790 + }, + { + "epoch": 1.1458218906061195, + "grad_norm": 0.07218701392412186, + "learning_rate": 4.600526274972287e-06, + "loss": 0.0011, + "step": 67800 + }, + { + "epoch": 1.14599089088497, + "grad_norm": 0.033153899013996124, + "learning_rate": 4.599056177758827e-06, + "loss": 0.0009, + "step": 67810 + }, + { + "epoch": 1.1461598911638204, + "grad_norm": 0.08208203315734863, + "learning_rate": 4.597586115429424e-06, + "loss": 0.0015, + "step": 67820 + }, + { + "epoch": 1.1463288914426708, + "grad_norm": 0.05478335916996002, + "learning_rate": 4.596116088111977e-06, + "loss": 0.0006, + "step": 67830 + }, + { + "epoch": 1.1464978917215214, + "grad_norm": 0.021915815770626068, + "learning_rate": 4.594646095934389e-06, + "loss": 0.0017, + "step": 67840 + }, + { + "epoch": 1.1466668920003718, + "grad_norm": 0.023021148517727852, + "learning_rate": 4.593176139024553e-06, + "loss": 0.0032, + "step": 67850 + }, + { + "epoch": 1.1468358922792223, + "grad_norm": 0.0049620214849710464, + "learning_rate": 4.591706217510366e-06, + "loss": 0.0011, + "step": 67860 + }, + { + "epoch": 1.1470048925580727, + "grad_norm": 0.05627528950572014, + "learning_rate": 4.590236331519714e-06, + "loss": 0.0007, + "step": 67870 + }, + { + "epoch": 1.147173892836923, + "grad_norm": 0.061663877218961716, + "learning_rate": 4.588766481180487e-06, + "loss": 0.0011, + "step": 67880 + }, + { + "epoch": 1.1473428931157736, + "grad_norm": 0.08341017365455627, + "learning_rate": 4.587296666620569e-06, + "loss": 0.0021, + "step": 67890 + }, + { + "epoch": 1.147511893394624, + "grad_norm": 0.012174475006759167, + "learning_rate": 4.585826887967841e-06, + "loss": 0.0006, + "step": 67900 + }, + { + "epoch": 1.1476808936734746, + "grad_norm": 0.04059426859021187, + "learning_rate": 4.584357145350181e-06, + "loss": 0.0018, + "step": 67910 + }, + { + "epoch": 1.147849893952325, + "grad_norm": 0.05350024253129959, + "learning_rate": 4.5828874388954605e-06, + "loss": 0.0009, + "step": 67920 + }, + { + "epoch": 1.1480188942311755, + "grad_norm": 0.04600991681218147, + "learning_rate": 4.581417768731558e-06, + "loss": 0.0011, + "step": 67930 + }, + { + "epoch": 1.148187894510026, + "grad_norm": 0.014942926354706287, + "learning_rate": 4.579948134986334e-06, + "loss": 0.0008, + "step": 67940 + }, + { + "epoch": 1.1483568947888765, + "grad_norm": 0.02997707575559616, + "learning_rate": 4.578478537787659e-06, + "loss": 0.0012, + "step": 67950 + }, + { + "epoch": 1.1485258950677268, + "grad_norm": 0.0712277963757515, + "learning_rate": 4.577008977263393e-06, + "loss": 0.0006, + "step": 67960 + }, + { + "epoch": 1.1486948953465772, + "grad_norm": 0.020164404064416885, + "learning_rate": 4.5755394535413976e-06, + "loss": 0.0009, + "step": 67970 + }, + { + "epoch": 1.1488638956254278, + "grad_norm": 0.06729643791913986, + "learning_rate": 4.574069966749523e-06, + "loss": 0.0012, + "step": 67980 + }, + { + "epoch": 1.1490328959042782, + "grad_norm": 0.00353659363463521, + "learning_rate": 4.572600517015627e-06, + "loss": 0.0006, + "step": 67990 + }, + { + "epoch": 1.1492018961831287, + "grad_norm": 0.09982866048812866, + "learning_rate": 4.571131104467555e-06, + "loss": 0.0013, + "step": 68000 + }, + { + "epoch": 1.149370896461979, + "grad_norm": 0.016557859256863594, + "learning_rate": 4.569661729233158e-06, + "loss": 0.0009, + "step": 68010 + }, + { + "epoch": 1.1495398967408297, + "grad_norm": 0.028532059863209724, + "learning_rate": 4.568192391440272e-06, + "loss": 0.0007, + "step": 68020 + }, + { + "epoch": 1.14970889701968, + "grad_norm": 0.027837703004479408, + "learning_rate": 4.566723091216743e-06, + "loss": 0.0011, + "step": 68030 + }, + { + "epoch": 1.1498778972985306, + "grad_norm": 0.049436330795288086, + "learning_rate": 4.565253828690402e-06, + "loss": 0.0011, + "step": 68040 + }, + { + "epoch": 1.150046897577381, + "grad_norm": 0.039981693029403687, + "learning_rate": 4.563784603989087e-06, + "loss": 0.0009, + "step": 68050 + }, + { + "epoch": 1.1502158978562314, + "grad_norm": 0.007820505648851395, + "learning_rate": 4.562315417240622e-06, + "loss": 0.0006, + "step": 68060 + }, + { + "epoch": 1.150384898135082, + "grad_norm": 0.0816367045044899, + "learning_rate": 4.560846268572838e-06, + "loss": 0.001, + "step": 68070 + }, + { + "epoch": 1.1505538984139323, + "grad_norm": 0.0554218627512455, + "learning_rate": 4.559377158113557e-06, + "loss": 0.0007, + "step": 68080 + }, + { + "epoch": 1.1507228986927829, + "grad_norm": 0.002819223329424858, + "learning_rate": 4.557908085990597e-06, + "loss": 0.005, + "step": 68090 + }, + { + "epoch": 1.1508918989716332, + "grad_norm": 0.04873186722397804, + "learning_rate": 4.556439052331775e-06, + "loss": 0.0014, + "step": 68100 + }, + { + "epoch": 1.1510608992504838, + "grad_norm": 0.07898057997226715, + "learning_rate": 4.554970057264907e-06, + "loss": 0.0011, + "step": 68110 + }, + { + "epoch": 1.1512298995293342, + "grad_norm": 0.04096459597349167, + "learning_rate": 4.5535011009177965e-06, + "loss": 0.0011, + "step": 68120 + }, + { + "epoch": 1.1513988998081848, + "grad_norm": 0.050151173025369644, + "learning_rate": 4.552032183418257e-06, + "loss": 0.001, + "step": 68130 + }, + { + "epoch": 1.1515679000870351, + "grad_norm": 0.06796563416719437, + "learning_rate": 4.550563304894086e-06, + "loss": 0.0009, + "step": 68140 + }, + { + "epoch": 1.1517369003658855, + "grad_norm": 0.02465634234249592, + "learning_rate": 4.549094465473085e-06, + "loss": 0.0011, + "step": 68150 + }, + { + "epoch": 1.151905900644736, + "grad_norm": 0.033169254660606384, + "learning_rate": 4.547625665283051e-06, + "loss": 0.0004, + "step": 68160 + }, + { + "epoch": 1.1520749009235864, + "grad_norm": 0.018418651074171066, + "learning_rate": 4.546156904451775e-06, + "loss": 0.0018, + "step": 68170 + }, + { + "epoch": 1.152243901202437, + "grad_norm": 0.0853998214006424, + "learning_rate": 4.544688183107048e-06, + "loss": 0.0009, + "step": 68180 + }, + { + "epoch": 1.1524129014812874, + "grad_norm": 0.01995234563946724, + "learning_rate": 4.5432195013766555e-06, + "loss": 0.0019, + "step": 68190 + }, + { + "epoch": 1.152581901760138, + "grad_norm": 0.0021944534964859486, + "learning_rate": 4.541750859388379e-06, + "loss": 0.0009, + "step": 68200 + }, + { + "epoch": 1.1527509020389883, + "grad_norm": 0.06678059697151184, + "learning_rate": 4.5402822572699976e-06, + "loss": 0.0013, + "step": 68210 + }, + { + "epoch": 1.152919902317839, + "grad_norm": 0.03664885088801384, + "learning_rate": 4.538813695149289e-06, + "loss": 0.0009, + "step": 68220 + }, + { + "epoch": 1.1530889025966893, + "grad_norm": 0.05517364665865898, + "learning_rate": 4.537345173154021e-06, + "loss": 0.0006, + "step": 68230 + }, + { + "epoch": 1.1532579028755396, + "grad_norm": 0.06198059394955635, + "learning_rate": 4.535876691411967e-06, + "loss": 0.0019, + "step": 68240 + }, + { + "epoch": 1.1534269031543902, + "grad_norm": 0.012155015021562576, + "learning_rate": 4.5344082500508874e-06, + "loss": 0.0007, + "step": 68250 + }, + { + "epoch": 1.1535959034332406, + "grad_norm": 0.04442523047327995, + "learning_rate": 4.532939849198547e-06, + "loss": 0.0009, + "step": 68260 + }, + { + "epoch": 1.1537649037120912, + "grad_norm": 0.0031372327357530594, + "learning_rate": 4.531471488982702e-06, + "loss": 0.0013, + "step": 68270 + }, + { + "epoch": 1.1539339039909415, + "grad_norm": 0.037043992429971695, + "learning_rate": 4.530003169531108e-06, + "loss": 0.0006, + "step": 68280 + }, + { + "epoch": 1.1541029042697921, + "grad_norm": 0.018792105838656425, + "learning_rate": 4.528534890971515e-06, + "loss": 0.001, + "step": 68290 + }, + { + "epoch": 1.1542719045486425, + "grad_norm": 0.053301285952329636, + "learning_rate": 4.527066653431672e-06, + "loss": 0.0012, + "step": 68300 + }, + { + "epoch": 1.154440904827493, + "grad_norm": 0.18253421783447266, + "learning_rate": 4.525598457039319e-06, + "loss": 0.0008, + "step": 68310 + }, + { + "epoch": 1.1546099051063434, + "grad_norm": 0.04951026290655136, + "learning_rate": 4.524130301922203e-06, + "loss": 0.0018, + "step": 68320 + }, + { + "epoch": 1.1547789053851938, + "grad_norm": 0.06628241389989853, + "learning_rate": 4.522662188208052e-06, + "loss": 0.0008, + "step": 68330 + }, + { + "epoch": 1.1549479056640444, + "grad_norm": 0.018393343314528465, + "learning_rate": 4.521194116024607e-06, + "loss": 0.0006, + "step": 68340 + }, + { + "epoch": 1.1551169059428947, + "grad_norm": 0.10031246393918991, + "learning_rate": 4.519726085499591e-06, + "loss": 0.0017, + "step": 68350 + }, + { + "epoch": 1.1552859062217453, + "grad_norm": 0.012092884629964828, + "learning_rate": 4.518258096760734e-06, + "loss": 0.001, + "step": 68360 + }, + { + "epoch": 1.1554549065005957, + "grad_norm": 0.08911342918872833, + "learning_rate": 4.5167901499357565e-06, + "loss": 0.0015, + "step": 68370 + }, + { + "epoch": 1.1556239067794463, + "grad_norm": 0.07730668783187866, + "learning_rate": 4.515322245152377e-06, + "loss": 0.001, + "step": 68380 + }, + { + "epoch": 1.1557929070582966, + "grad_norm": 0.05149351805448532, + "learning_rate": 4.513854382538309e-06, + "loss": 0.0011, + "step": 68390 + }, + { + "epoch": 1.1559619073371472, + "grad_norm": 0.05476520210504532, + "learning_rate": 4.512386562221266e-06, + "loss": 0.0018, + "step": 68400 + }, + { + "epoch": 1.1561309076159976, + "grad_norm": 1.653435230255127, + "learning_rate": 4.510918784328956e-06, + "loss": 0.0016, + "step": 68410 + }, + { + "epoch": 1.156299907894848, + "grad_norm": 0.015039338730275631, + "learning_rate": 4.50945104898908e-06, + "loss": 0.0009, + "step": 68420 + }, + { + "epoch": 1.1564689081736985, + "grad_norm": 0.03901722654700279, + "learning_rate": 4.507983356329341e-06, + "loss": 0.0008, + "step": 68430 + }, + { + "epoch": 1.1566379084525489, + "grad_norm": 0.07246369868516922, + "learning_rate": 4.506515706477433e-06, + "loss": 0.0007, + "step": 68440 + }, + { + "epoch": 1.1568069087313995, + "grad_norm": 0.252845823764801, + "learning_rate": 4.50504809956105e-06, + "loss": 0.0006, + "step": 68450 + }, + { + "epoch": 1.1569759090102498, + "grad_norm": 0.028666259720921516, + "learning_rate": 4.503580535707879e-06, + "loss": 0.0013, + "step": 68460 + }, + { + "epoch": 1.1571449092891004, + "grad_norm": 0.02905077487230301, + "learning_rate": 4.502113015045608e-06, + "loss": 0.0007, + "step": 68470 + }, + { + "epoch": 1.1573139095679508, + "grad_norm": 0.037265345454216, + "learning_rate": 4.500645537701915e-06, + "loss": 0.0006, + "step": 68480 + }, + { + "epoch": 1.1574829098468014, + "grad_norm": 0.03981657326221466, + "learning_rate": 4.499178103804483e-06, + "loss": 0.0011, + "step": 68490 + }, + { + "epoch": 1.1576519101256517, + "grad_norm": 0.0667591542005539, + "learning_rate": 4.4977107134809796e-06, + "loss": 0.0009, + "step": 68500 + }, + { + "epoch": 1.157820910404502, + "grad_norm": 0.025540033355355263, + "learning_rate": 4.49624336685908e-06, + "loss": 0.0008, + "step": 68510 + }, + { + "epoch": 1.1579899106833527, + "grad_norm": 0.03754876181483269, + "learning_rate": 4.4947760640664465e-06, + "loss": 0.0007, + "step": 68520 + }, + { + "epoch": 1.158158910962203, + "grad_norm": 0.07566549628973007, + "learning_rate": 4.493308805230745e-06, + "loss": 0.0014, + "step": 68530 + }, + { + "epoch": 1.1583279112410536, + "grad_norm": 0.043285906314849854, + "learning_rate": 4.49184159047963e-06, + "loss": 0.0004, + "step": 68540 + }, + { + "epoch": 1.158496911519904, + "grad_norm": 0.008623643778264523, + "learning_rate": 4.49037441994076e-06, + "loss": 0.0007, + "step": 68550 + }, + { + "epoch": 1.1586659117987546, + "grad_norm": 0.09939569234848022, + "learning_rate": 4.488907293741785e-06, + "loss": 0.0007, + "step": 68560 + }, + { + "epoch": 1.158834912077605, + "grad_norm": 0.05292747914791107, + "learning_rate": 4.487440212010352e-06, + "loss": 0.0016, + "step": 68570 + }, + { + "epoch": 1.1590039123564555, + "grad_norm": 0.03995686396956444, + "learning_rate": 4.485973174874102e-06, + "loss": 0.0006, + "step": 68580 + }, + { + "epoch": 1.1591729126353059, + "grad_norm": 0.02431139536201954, + "learning_rate": 4.484506182460679e-06, + "loss": 0.0013, + "step": 68590 + }, + { + "epoch": 1.1593419129141562, + "grad_norm": 0.07645460963249207, + "learning_rate": 4.483039234897713e-06, + "loss": 0.002, + "step": 68600 + }, + { + "epoch": 1.1595109131930068, + "grad_norm": 0.012741954997181892, + "learning_rate": 4.481572332312842e-06, + "loss": 0.0016, + "step": 68610 + }, + { + "epoch": 1.1596799134718572, + "grad_norm": 0.02713189460337162, + "learning_rate": 4.480105474833686e-06, + "loss": 0.0015, + "step": 68620 + }, + { + "epoch": 1.1598489137507078, + "grad_norm": 0.07144239544868469, + "learning_rate": 4.478638662587876e-06, + "loss": 0.0028, + "step": 68630 + }, + { + "epoch": 1.1600179140295581, + "grad_norm": 0.018587511032819748, + "learning_rate": 4.477171895703026e-06, + "loss": 0.0016, + "step": 68640 + }, + { + "epoch": 1.1601869143084087, + "grad_norm": 0.13099528849124908, + "learning_rate": 4.475705174306754e-06, + "loss": 0.0011, + "step": 68650 + }, + { + "epoch": 1.160355914587259, + "grad_norm": 0.0970952957868576, + "learning_rate": 4.474238498526673e-06, + "loss": 0.0012, + "step": 68660 + }, + { + "epoch": 1.1605249148661096, + "grad_norm": 0.014326502569019794, + "learning_rate": 4.47277186849039e-06, + "loss": 0.0008, + "step": 68670 + }, + { + "epoch": 1.16069391514496, + "grad_norm": 0.02737031690776348, + "learning_rate": 4.47130528432551e-06, + "loss": 0.0009, + "step": 68680 + }, + { + "epoch": 1.1608629154238104, + "grad_norm": 0.04608790948987007, + "learning_rate": 4.469838746159629e-06, + "loss": 0.0017, + "step": 68690 + }, + { + "epoch": 1.161031915702661, + "grad_norm": 0.0980270653963089, + "learning_rate": 4.468372254120349e-06, + "loss": 0.0017, + "step": 68700 + }, + { + "epoch": 1.1612009159815113, + "grad_norm": 0.012178586795926094, + "learning_rate": 4.466905808335256e-06, + "loss": 0.0017, + "step": 68710 + }, + { + "epoch": 1.161369916260362, + "grad_norm": 0.04473332688212395, + "learning_rate": 4.465439408931943e-06, + "loss": 0.0008, + "step": 68720 + }, + { + "epoch": 1.1615389165392123, + "grad_norm": 0.006338580511510372, + "learning_rate": 4.46397305603799e-06, + "loss": 0.0008, + "step": 68730 + }, + { + "epoch": 1.1617079168180628, + "grad_norm": 0.04377632215619087, + "learning_rate": 4.4625067497809795e-06, + "loss": 0.0012, + "step": 68740 + }, + { + "epoch": 1.1618769170969132, + "grad_norm": 0.017771463841199875, + "learning_rate": 4.4610404902884845e-06, + "loss": 0.0008, + "step": 68750 + }, + { + "epoch": 1.1620459173757638, + "grad_norm": 0.013027802109718323, + "learning_rate": 4.459574277688078e-06, + "loss": 0.001, + "step": 68760 + }, + { + "epoch": 1.1622149176546142, + "grad_norm": 0.01781844161450863, + "learning_rate": 4.458108112107328e-06, + "loss": 0.0008, + "step": 68770 + }, + { + "epoch": 1.1623839179334645, + "grad_norm": 0.03677886351943016, + "learning_rate": 4.4566419936737995e-06, + "loss": 0.0006, + "step": 68780 + }, + { + "epoch": 1.162552918212315, + "grad_norm": 0.03237110748887062, + "learning_rate": 4.455175922515048e-06, + "loss": 0.0019, + "step": 68790 + }, + { + "epoch": 1.1627219184911655, + "grad_norm": 0.1242651417851448, + "learning_rate": 4.453709898758633e-06, + "loss": 0.0006, + "step": 68800 + }, + { + "epoch": 1.162890918770016, + "grad_norm": 0.18245325982570648, + "learning_rate": 4.452243922532101e-06, + "loss": 0.0018, + "step": 68810 + }, + { + "epoch": 1.1630599190488664, + "grad_norm": 0.018882328644394875, + "learning_rate": 4.450777993963004e-06, + "loss": 0.0007, + "step": 68820 + }, + { + "epoch": 1.1632289193277168, + "grad_norm": 0.03818240389227867, + "learning_rate": 4.449312113178882e-06, + "loss": 0.001, + "step": 68830 + }, + { + "epoch": 1.1633979196065674, + "grad_norm": 0.06863964349031448, + "learning_rate": 4.447846280307274e-06, + "loss": 0.0014, + "step": 68840 + }, + { + "epoch": 1.1635669198854177, + "grad_norm": 0.03688272461295128, + "learning_rate": 4.446380495475715e-06, + "loss": 0.0013, + "step": 68850 + }, + { + "epoch": 1.1637359201642683, + "grad_norm": 0.2076423168182373, + "learning_rate": 4.444914758811735e-06, + "loss": 0.0015, + "step": 68860 + }, + { + "epoch": 1.1639049204431187, + "grad_norm": 0.0038024107925593853, + "learning_rate": 4.44344907044286e-06, + "loss": 0.001, + "step": 68870 + }, + { + "epoch": 1.1640739207219692, + "grad_norm": 0.021800760179758072, + "learning_rate": 4.441983430496614e-06, + "loss": 0.001, + "step": 68880 + }, + { + "epoch": 1.1642429210008196, + "grad_norm": 0.007418881636112928, + "learning_rate": 4.440517839100512e-06, + "loss": 0.0013, + "step": 68890 + }, + { + "epoch": 1.1644119212796702, + "grad_norm": 0.05220571160316467, + "learning_rate": 4.43905229638207e-06, + "loss": 0.0012, + "step": 68900 + }, + { + "epoch": 1.1645809215585206, + "grad_norm": 0.010897345840930939, + "learning_rate": 4.437586802468794e-06, + "loss": 0.0006, + "step": 68910 + }, + { + "epoch": 1.164749921837371, + "grad_norm": 0.14263969659805298, + "learning_rate": 4.436121357488191e-06, + "loss": 0.0031, + "step": 68920 + }, + { + "epoch": 1.1649189221162215, + "grad_norm": 0.01010989025235176, + "learning_rate": 4.434655961567764e-06, + "loss": 0.0013, + "step": 68930 + }, + { + "epoch": 1.1650879223950719, + "grad_norm": 0.027407299727201462, + "learning_rate": 4.433190614835006e-06, + "loss": 0.0011, + "step": 68940 + }, + { + "epoch": 1.1652569226739224, + "grad_norm": 0.07728515565395355, + "learning_rate": 4.431725317417412e-06, + "loss": 0.0015, + "step": 68950 + }, + { + "epoch": 1.1654259229527728, + "grad_norm": 0.03701232746243477, + "learning_rate": 4.430260069442467e-06, + "loss": 0.0006, + "step": 68960 + }, + { + "epoch": 1.1655949232316234, + "grad_norm": 0.029807789251208305, + "learning_rate": 4.428794871037659e-06, + "loss": 0.0007, + "step": 68970 + }, + { + "epoch": 1.1657639235104738, + "grad_norm": 0.10100337117910385, + "learning_rate": 4.427329722330462e-06, + "loss": 0.0011, + "step": 68980 + }, + { + "epoch": 1.1659329237893243, + "grad_norm": 0.041717350482940674, + "learning_rate": 4.425864623448357e-06, + "loss": 0.0012, + "step": 68990 + }, + { + "epoch": 1.1661019240681747, + "grad_norm": 0.3187407851219177, + "learning_rate": 4.4243995745188076e-06, + "loss": 0.0034, + "step": 69000 + }, + { + "epoch": 1.166270924347025, + "grad_norm": 0.05901302397251129, + "learning_rate": 4.4229345756692875e-06, + "loss": 0.0008, + "step": 69010 + }, + { + "epoch": 1.1664399246258756, + "grad_norm": 0.04284448176622391, + "learning_rate": 4.421469627027253e-06, + "loss": 0.0008, + "step": 69020 + }, + { + "epoch": 1.166608924904726, + "grad_norm": 0.019676562398672104, + "learning_rate": 4.4200047287201654e-06, + "loss": 0.0022, + "step": 69030 + }, + { + "epoch": 1.1667779251835766, + "grad_norm": 0.0075326645746827126, + "learning_rate": 4.418539880875476e-06, + "loss": 0.001, + "step": 69040 + }, + { + "epoch": 1.166946925462427, + "grad_norm": 0.06856024265289307, + "learning_rate": 4.4170750836206345e-06, + "loss": 0.0014, + "step": 69050 + }, + { + "epoch": 1.1671159257412775, + "grad_norm": 0.09046686440706253, + "learning_rate": 4.415610337083084e-06, + "loss": 0.0014, + "step": 69060 + }, + { + "epoch": 1.167284926020128, + "grad_norm": 0.013177769258618355, + "learning_rate": 4.4141456413902676e-06, + "loss": 0.0014, + "step": 69070 + }, + { + "epoch": 1.1674539262989785, + "grad_norm": 0.11559558659791946, + "learning_rate": 4.412680996669616e-06, + "loss": 0.0012, + "step": 69080 + }, + { + "epoch": 1.1676229265778288, + "grad_norm": 0.08094480633735657, + "learning_rate": 4.411216403048567e-06, + "loss": 0.0007, + "step": 69090 + }, + { + "epoch": 1.1677919268566792, + "grad_norm": 0.05502336844801903, + "learning_rate": 4.4097518606545404e-06, + "loss": 0.0011, + "step": 69100 + }, + { + "epoch": 1.1679609271355298, + "grad_norm": 0.049272000789642334, + "learning_rate": 4.408287369614965e-06, + "loss": 0.0011, + "step": 69110 + }, + { + "epoch": 1.1681299274143802, + "grad_norm": 0.10433492809534073, + "learning_rate": 4.406822930057252e-06, + "loss": 0.001, + "step": 69120 + }, + { + "epoch": 1.1682989276932307, + "grad_norm": 0.009764991700649261, + "learning_rate": 4.405358542108819e-06, + "loss": 0.0008, + "step": 69130 + }, + { + "epoch": 1.168467927972081, + "grad_norm": 0.016498325392603874, + "learning_rate": 4.4038942058970735e-06, + "loss": 0.0006, + "step": 69140 + }, + { + "epoch": 1.1686369282509317, + "grad_norm": 0.06452282518148422, + "learning_rate": 4.402429921549423e-06, + "loss": 0.0013, + "step": 69150 + }, + { + "epoch": 1.168805928529782, + "grad_norm": 0.0777597650885582, + "learning_rate": 4.400965689193262e-06, + "loss": 0.0008, + "step": 69160 + }, + { + "epoch": 1.1689749288086326, + "grad_norm": 0.04923969507217407, + "learning_rate": 4.399501508955988e-06, + "loss": 0.0018, + "step": 69170 + }, + { + "epoch": 1.169143929087483, + "grad_norm": 0.06249144300818443, + "learning_rate": 4.398037380964995e-06, + "loss": 0.0011, + "step": 69180 + }, + { + "epoch": 1.1693129293663334, + "grad_norm": 0.015166563913226128, + "learning_rate": 4.396573305347663e-06, + "loss": 0.0012, + "step": 69190 + }, + { + "epoch": 1.169481929645184, + "grad_norm": 0.03178955987095833, + "learning_rate": 4.395109282231381e-06, + "loss": 0.0006, + "step": 69200 + }, + { + "epoch": 1.1696509299240343, + "grad_norm": 0.04976994916796684, + "learning_rate": 4.393645311743519e-06, + "loss": 0.0008, + "step": 69210 + }, + { + "epoch": 1.1698199302028849, + "grad_norm": 0.02770996280014515, + "learning_rate": 4.3921813940114545e-06, + "loss": 0.001, + "step": 69220 + }, + { + "epoch": 1.1699889304817352, + "grad_norm": 0.07976898550987244, + "learning_rate": 4.390717529162553e-06, + "loss": 0.0009, + "step": 69230 + }, + { + "epoch": 1.1701579307605858, + "grad_norm": 0.012683488428592682, + "learning_rate": 4.389253717324178e-06, + "loss": 0.0006, + "step": 69240 + }, + { + "epoch": 1.1703269310394362, + "grad_norm": 0.006705684121698141, + "learning_rate": 4.387789958623689e-06, + "loss": 0.001, + "step": 69250 + }, + { + "epoch": 1.1704959313182868, + "grad_norm": 0.03242988511919975, + "learning_rate": 4.386326253188441e-06, + "loss": 0.0003, + "step": 69260 + }, + { + "epoch": 1.1706649315971371, + "grad_norm": 0.04196963831782341, + "learning_rate": 4.384862601145781e-06, + "loss": 0.0004, + "step": 69270 + }, + { + "epoch": 1.1708339318759875, + "grad_norm": 0.07474924623966217, + "learning_rate": 4.383399002623057e-06, + "loss": 0.0011, + "step": 69280 + }, + { + "epoch": 1.171002932154838, + "grad_norm": 0.01058933325111866, + "learning_rate": 4.3819354577476045e-06, + "loss": 0.0003, + "step": 69290 + }, + { + "epoch": 1.1711719324336884, + "grad_norm": 0.08922499418258667, + "learning_rate": 4.380471966646765e-06, + "loss": 0.0016, + "step": 69300 + }, + { + "epoch": 1.171340932712539, + "grad_norm": 0.0907360389828682, + "learning_rate": 4.3790085294478626e-06, + "loss": 0.0014, + "step": 69310 + }, + { + "epoch": 1.1715099329913894, + "grad_norm": 0.08829468488693237, + "learning_rate": 4.377545146278228e-06, + "loss": 0.0012, + "step": 69320 + }, + { + "epoch": 1.17167893327024, + "grad_norm": 0.02312985248863697, + "learning_rate": 4.376081817265182e-06, + "loss": 0.0012, + "step": 69330 + }, + { + "epoch": 1.1718479335490903, + "grad_norm": 0.010803410783410072, + "learning_rate": 4.3746185425360416e-06, + "loss": 0.0007, + "step": 69340 + }, + { + "epoch": 1.172016933827941, + "grad_norm": 0.23178300261497498, + "learning_rate": 4.373155322218116e-06, + "loss": 0.0015, + "step": 69350 + }, + { + "epoch": 1.1721859341067913, + "grad_norm": 0.01983078569173813, + "learning_rate": 4.371692156438717e-06, + "loss": 0.0005, + "step": 69360 + }, + { + "epoch": 1.1723549343856416, + "grad_norm": 0.014242745004594326, + "learning_rate": 4.370229045325142e-06, + "loss": 0.0009, + "step": 69370 + }, + { + "epoch": 1.1725239346644922, + "grad_norm": 0.03428000211715698, + "learning_rate": 4.368765989004695e-06, + "loss": 0.0006, + "step": 69380 + }, + { + "epoch": 1.1726929349433426, + "grad_norm": 0.015881333500146866, + "learning_rate": 4.3673029876046625e-06, + "loss": 0.0012, + "step": 69390 + }, + { + "epoch": 1.1728619352221932, + "grad_norm": 0.022723102942109108, + "learning_rate": 4.3658400412523375e-06, + "loss": 0.0011, + "step": 69400 + }, + { + "epoch": 1.1730309355010435, + "grad_norm": 0.024130703881382942, + "learning_rate": 4.364377150074998e-06, + "loss": 0.0006, + "step": 69410 + }, + { + "epoch": 1.1731999357798941, + "grad_norm": 0.06383432447910309, + "learning_rate": 4.362914314199928e-06, + "loss": 0.0039, + "step": 69420 + }, + { + "epoch": 1.1733689360587445, + "grad_norm": 0.016523024067282677, + "learning_rate": 4.3614515337544e-06, + "loss": 0.001, + "step": 69430 + }, + { + "epoch": 1.173537936337595, + "grad_norm": 0.016466936096549034, + "learning_rate": 4.359988808865682e-06, + "loss": 0.0011, + "step": 69440 + }, + { + "epoch": 1.1737069366164454, + "grad_norm": 0.023523833602666855, + "learning_rate": 4.358526139661039e-06, + "loss": 0.0011, + "step": 69450 + }, + { + "epoch": 1.1738759368952958, + "grad_norm": 0.052278563380241394, + "learning_rate": 4.357063526267729e-06, + "loss": 0.0009, + "step": 69460 + }, + { + "epoch": 1.1740449371741464, + "grad_norm": 0.11861348897218704, + "learning_rate": 4.355600968813009e-06, + "loss": 0.0014, + "step": 69470 + }, + { + "epoch": 1.1742139374529967, + "grad_norm": 0.028317276388406754, + "learning_rate": 4.354138467424125e-06, + "loss": 0.0013, + "step": 69480 + }, + { + "epoch": 1.1743829377318473, + "grad_norm": 0.04897003620862961, + "learning_rate": 4.352676022228326e-06, + "loss": 0.0012, + "step": 69490 + }, + { + "epoch": 1.1745519380106977, + "grad_norm": 0.029942458495497704, + "learning_rate": 4.351213633352846e-06, + "loss": 0.0017, + "step": 69500 + }, + { + "epoch": 1.1747209382895483, + "grad_norm": 0.026986481621861458, + "learning_rate": 4.349751300924926e-06, + "loss": 0.001, + "step": 69510 + }, + { + "epoch": 1.1748899385683986, + "grad_norm": 0.03264378383755684, + "learning_rate": 4.348289025071792e-06, + "loss": 0.0009, + "step": 69520 + }, + { + "epoch": 1.1750589388472492, + "grad_norm": 0.038534559309482574, + "learning_rate": 4.346826805920671e-06, + "loss": 0.0012, + "step": 69530 + }, + { + "epoch": 1.1752279391260996, + "grad_norm": 0.05850238725543022, + "learning_rate": 4.345364643598782e-06, + "loss": 0.0012, + "step": 69540 + }, + { + "epoch": 1.17539693940495, + "grad_norm": 0.04611220955848694, + "learning_rate": 4.343902538233342e-06, + "loss": 0.0009, + "step": 69550 + }, + { + "epoch": 1.1755659396838005, + "grad_norm": 0.03227304667234421, + "learning_rate": 4.3424404899515584e-06, + "loss": 0.0008, + "step": 69560 + }, + { + "epoch": 1.1757349399626509, + "grad_norm": 0.051997821778059006, + "learning_rate": 4.34097849888064e-06, + "loss": 0.001, + "step": 69570 + }, + { + "epoch": 1.1759039402415015, + "grad_norm": 0.055068809539079666, + "learning_rate": 4.339516565147783e-06, + "loss": 0.0008, + "step": 69580 + }, + { + "epoch": 1.1760729405203518, + "grad_norm": 0.01576688513159752, + "learning_rate": 4.3380546888801875e-06, + "loss": 0.0017, + "step": 69590 + }, + { + "epoch": 1.1762419407992024, + "grad_norm": 0.024856723845005035, + "learning_rate": 4.3365928702050395e-06, + "loss": 0.0008, + "step": 69600 + }, + { + "epoch": 1.1764109410780528, + "grad_norm": 0.020589660853147507, + "learning_rate": 4.335131109249527e-06, + "loss": 0.0011, + "step": 69610 + }, + { + "epoch": 1.1765799413569034, + "grad_norm": 0.02228492684662342, + "learning_rate": 4.333669406140828e-06, + "loss": 0.0008, + "step": 69620 + }, + { + "epoch": 1.1767489416357537, + "grad_norm": 0.033601656556129456, + "learning_rate": 4.332207761006121e-06, + "loss": 0.0013, + "step": 69630 + }, + { + "epoch": 1.176917941914604, + "grad_norm": 0.03049769066274166, + "learning_rate": 4.330746173972573e-06, + "loss": 0.0007, + "step": 69640 + }, + { + "epoch": 1.1770869421934547, + "grad_norm": 0.07383257150650024, + "learning_rate": 4.329284645167351e-06, + "loss": 0.0002, + "step": 69650 + }, + { + "epoch": 1.177255942472305, + "grad_norm": 0.0007665101438760757, + "learning_rate": 4.327823174717614e-06, + "loss": 0.0011, + "step": 69660 + }, + { + "epoch": 1.1774249427511556, + "grad_norm": 0.03920375555753708, + "learning_rate": 4.326361762750519e-06, + "loss": 0.0006, + "step": 69670 + }, + { + "epoch": 1.177593943030006, + "grad_norm": 0.018308712169528008, + "learning_rate": 4.324900409393212e-06, + "loss": 0.0007, + "step": 69680 + }, + { + "epoch": 1.1777629433088563, + "grad_norm": 0.030627572908997536, + "learning_rate": 4.3234391147728415e-06, + "loss": 0.0018, + "step": 69690 + }, + { + "epoch": 1.177931943587707, + "grad_norm": 0.042753156274557114, + "learning_rate": 4.321977879016547e-06, + "loss": 0.0007, + "step": 69700 + }, + { + "epoch": 1.1781009438665575, + "grad_norm": 0.0701015368103981, + "learning_rate": 4.320516702251461e-06, + "loss": 0.001, + "step": 69710 + }, + { + "epoch": 1.1782699441454079, + "grad_norm": 0.031348250806331635, + "learning_rate": 4.319055584604714e-06, + "loss": 0.0013, + "step": 69720 + }, + { + "epoch": 1.1784389444242582, + "grad_norm": 0.01220119558274746, + "learning_rate": 4.31759452620343e-06, + "loss": 0.0008, + "step": 69730 + }, + { + "epoch": 1.1786079447031088, + "grad_norm": 0.05721985921263695, + "learning_rate": 4.316133527174731e-06, + "loss": 0.0011, + "step": 69740 + }, + { + "epoch": 1.1787769449819592, + "grad_norm": 0.04866541922092438, + "learning_rate": 4.314672587645726e-06, + "loss": 0.0012, + "step": 69750 + }, + { + "epoch": 1.1789459452608098, + "grad_norm": 0.11219864338636398, + "learning_rate": 4.313211707743529e-06, + "loss": 0.0018, + "step": 69760 + }, + { + "epoch": 1.1791149455396601, + "grad_norm": 0.010870439931750298, + "learning_rate": 4.311750887595238e-06, + "loss": 0.0007, + "step": 69770 + }, + { + "epoch": 1.1792839458185105, + "grad_norm": 0.032270606607198715, + "learning_rate": 4.310290127327957e-06, + "loss": 0.0009, + "step": 69780 + }, + { + "epoch": 1.179452946097361, + "grad_norm": 0.11621737480163574, + "learning_rate": 4.308829427068775e-06, + "loss": 0.001, + "step": 69790 + }, + { + "epoch": 1.1796219463762114, + "grad_norm": 0.027848485857248306, + "learning_rate": 4.307368786944782e-06, + "loss": 0.0013, + "step": 69800 + }, + { + "epoch": 1.179790946655062, + "grad_norm": 0.015552806667983532, + "learning_rate": 4.3059082070830604e-06, + "loss": 0.0008, + "step": 69810 + }, + { + "epoch": 1.1799599469339124, + "grad_norm": 0.09121126681566238, + "learning_rate": 4.3044476876106876e-06, + "loss": 0.0009, + "step": 69820 + }, + { + "epoch": 1.180128947212763, + "grad_norm": 0.037971653044223785, + "learning_rate": 4.302987228654735e-06, + "loss": 0.0008, + "step": 69830 + }, + { + "epoch": 1.1802979474916133, + "grad_norm": 0.09327095001935959, + "learning_rate": 4.301526830342274e-06, + "loss": 0.001, + "step": 69840 + }, + { + "epoch": 1.180466947770464, + "grad_norm": 0.09236549586057663, + "learning_rate": 4.3000664928003594e-06, + "loss": 0.0009, + "step": 69850 + }, + { + "epoch": 1.1806359480493143, + "grad_norm": 0.027757450938224792, + "learning_rate": 4.2986062161560534e-06, + "loss": 0.0012, + "step": 69860 + }, + { + "epoch": 1.1808049483281646, + "grad_norm": 0.3103329837322235, + "learning_rate": 4.297146000536403e-06, + "loss": 0.001, + "step": 69870 + }, + { + "epoch": 1.1809739486070152, + "grad_norm": 0.035489924252033234, + "learning_rate": 4.295685846068459e-06, + "loss": 0.0005, + "step": 69880 + }, + { + "epoch": 1.1811429488858656, + "grad_norm": 0.029097232967615128, + "learning_rate": 4.2942257528792555e-06, + "loss": 0.0014, + "step": 69890 + }, + { + "epoch": 1.1813119491647162, + "grad_norm": 0.24144315719604492, + "learning_rate": 4.292765721095833e-06, + "loss": 0.0019, + "step": 69900 + }, + { + "epoch": 1.1814809494435665, + "grad_norm": 0.022012850269675255, + "learning_rate": 4.2913057508452175e-06, + "loss": 0.0015, + "step": 69910 + }, + { + "epoch": 1.181649949722417, + "grad_norm": 0.03142053633928299, + "learning_rate": 4.289845842254438e-06, + "loss": 0.0017, + "step": 69920 + }, + { + "epoch": 1.1818189500012675, + "grad_norm": 0.040227316319942474, + "learning_rate": 4.2883859954505085e-06, + "loss": 0.0012, + "step": 69930 + }, + { + "epoch": 1.181987950280118, + "grad_norm": 0.055836841464042664, + "learning_rate": 4.286926210560446e-06, + "loss": 0.0013, + "step": 69940 + }, + { + "epoch": 1.1821569505589684, + "grad_norm": 0.04443049803376198, + "learning_rate": 4.2854664877112595e-06, + "loss": 0.0013, + "step": 69950 + }, + { + "epoch": 1.1823259508378188, + "grad_norm": 0.013825136236846447, + "learning_rate": 4.284006827029949e-06, + "loss": 0.0016, + "step": 69960 + }, + { + "epoch": 1.1824949511166694, + "grad_norm": 0.05497096851468086, + "learning_rate": 4.2825472286435145e-06, + "loss": 0.0007, + "step": 69970 + }, + { + "epoch": 1.1826639513955197, + "grad_norm": 0.024187199771404266, + "learning_rate": 4.281087692678946e-06, + "loss": 0.0005, + "step": 69980 + }, + { + "epoch": 1.1828329516743703, + "grad_norm": 0.05082495138049126, + "learning_rate": 4.279628219263235e-06, + "loss": 0.001, + "step": 69990 + }, + { + "epoch": 1.1830019519532207, + "grad_norm": 0.13151904940605164, + "learning_rate": 4.278168808523355e-06, + "loss": 0.0024, + "step": 70000 + }, + { + "epoch": 1.1831709522320712, + "grad_norm": 0.004281432367861271, + "learning_rate": 4.2767094605862875e-06, + "loss": 0.0015, + "step": 70010 + }, + { + "epoch": 1.1833399525109216, + "grad_norm": 0.01116862054914236, + "learning_rate": 4.275250175579e-06, + "loss": 0.0016, + "step": 70020 + }, + { + "epoch": 1.1835089527897722, + "grad_norm": 0.12776902318000793, + "learning_rate": 4.273790953628462e-06, + "loss": 0.0021, + "step": 70030 + }, + { + "epoch": 1.1836779530686226, + "grad_norm": 0.002477040281519294, + "learning_rate": 4.272331794861627e-06, + "loss": 0.0007, + "step": 70040 + }, + { + "epoch": 1.183846953347473, + "grad_norm": 0.02622845210134983, + "learning_rate": 4.270872699405454e-06, + "loss": 0.0009, + "step": 70050 + }, + { + "epoch": 1.1840159536263235, + "grad_norm": 0.04474570229649544, + "learning_rate": 4.2694136673868855e-06, + "loss": 0.0011, + "step": 70060 + }, + { + "epoch": 1.1841849539051739, + "grad_norm": 0.037882525473833084, + "learning_rate": 4.267954698932871e-06, + "loss": 0.001, + "step": 70070 + }, + { + "epoch": 1.1843539541840244, + "grad_norm": 0.12966550886631012, + "learning_rate": 4.266495794170342e-06, + "loss": 0.0012, + "step": 70080 + }, + { + "epoch": 1.1845229544628748, + "grad_norm": 0.023467622697353363, + "learning_rate": 4.2650369532262335e-06, + "loss": 0.0008, + "step": 70090 + }, + { + "epoch": 1.1846919547417254, + "grad_norm": 0.07388714700937271, + "learning_rate": 4.263578176227471e-06, + "loss": 0.002, + "step": 70100 + }, + { + "epoch": 1.1848609550205758, + "grad_norm": 0.07243242114782333, + "learning_rate": 4.2621194633009764e-06, + "loss": 0.0008, + "step": 70110 + }, + { + "epoch": 1.1850299552994263, + "grad_norm": 0.016735875979065895, + "learning_rate": 4.260660814573662e-06, + "loss": 0.0006, + "step": 70120 + }, + { + "epoch": 1.1851989555782767, + "grad_norm": 0.05063145235180855, + "learning_rate": 4.25920223017244e-06, + "loss": 0.0009, + "step": 70130 + }, + { + "epoch": 1.185367955857127, + "grad_norm": 0.05517589673399925, + "learning_rate": 4.257743710224212e-06, + "loss": 0.0011, + "step": 70140 + }, + { + "epoch": 1.1855369561359776, + "grad_norm": 0.004348905757069588, + "learning_rate": 4.25628525485588e-06, + "loss": 0.0011, + "step": 70150 + }, + { + "epoch": 1.185705956414828, + "grad_norm": 0.17707771062850952, + "learning_rate": 4.254826864194332e-06, + "loss": 0.0015, + "step": 70160 + }, + { + "epoch": 1.1858749566936786, + "grad_norm": 0.0887473076581955, + "learning_rate": 4.253368538366458e-06, + "loss": 0.0013, + "step": 70170 + }, + { + "epoch": 1.186043956972529, + "grad_norm": 0.021793309599161148, + "learning_rate": 4.251910277499138e-06, + "loss": 0.0012, + "step": 70180 + }, + { + "epoch": 1.1862129572513795, + "grad_norm": 0.031324952840805054, + "learning_rate": 4.250452081719248e-06, + "loss": 0.0014, + "step": 70190 + }, + { + "epoch": 1.18638195753023, + "grad_norm": 0.028805961832404137, + "learning_rate": 4.2489939511536595e-06, + "loss": 0.0009, + "step": 70200 + }, + { + "epoch": 1.1865509578090805, + "grad_norm": 0.009609505534172058, + "learning_rate": 4.247535885929235e-06, + "loss": 0.0006, + "step": 70210 + }, + { + "epoch": 1.1867199580879308, + "grad_norm": 0.023490216583013535, + "learning_rate": 4.2460778861728366e-06, + "loss": 0.0008, + "step": 70220 + }, + { + "epoch": 1.1868889583667812, + "grad_norm": 0.0962447077035904, + "learning_rate": 4.244619952011312e-06, + "loss": 0.0008, + "step": 70230 + }, + { + "epoch": 1.1870579586456318, + "grad_norm": 0.04374274984002113, + "learning_rate": 4.243162083571514e-06, + "loss": 0.0004, + "step": 70240 + }, + { + "epoch": 1.1872269589244822, + "grad_norm": 0.05494653433561325, + "learning_rate": 4.24170428098028e-06, + "loss": 0.0009, + "step": 70250 + }, + { + "epoch": 1.1873959592033327, + "grad_norm": 0.017951268702745438, + "learning_rate": 4.240246544364449e-06, + "loss": 0.0025, + "step": 70260 + }, + { + "epoch": 1.187564959482183, + "grad_norm": 0.03345295041799545, + "learning_rate": 4.238788873850848e-06, + "loss": 0.001, + "step": 70270 + }, + { + "epoch": 1.1877339597610337, + "grad_norm": 0.06794202327728271, + "learning_rate": 4.237331269566304e-06, + "loss": 0.0011, + "step": 70280 + }, + { + "epoch": 1.187902960039884, + "grad_norm": 0.2515491247177124, + "learning_rate": 4.2358737316376355e-06, + "loss": 0.0021, + "step": 70290 + }, + { + "epoch": 1.1880719603187346, + "grad_norm": 0.03184161335229874, + "learning_rate": 4.234416260191654e-06, + "loss": 0.0013, + "step": 70300 + }, + { + "epoch": 1.188240960597585, + "grad_norm": 0.11049087345600128, + "learning_rate": 4.232958855355166e-06, + "loss": 0.001, + "step": 70310 + }, + { + "epoch": 1.1884099608764354, + "grad_norm": 0.06749023497104645, + "learning_rate": 4.231501517254977e-06, + "loss": 0.0006, + "step": 70320 + }, + { + "epoch": 1.188578961155286, + "grad_norm": 0.014731463976204395, + "learning_rate": 4.2300442460178766e-06, + "loss": 0.0012, + "step": 70330 + }, + { + "epoch": 1.1887479614341363, + "grad_norm": 0.02106318809092045, + "learning_rate": 4.228587041770659e-06, + "loss": 0.0011, + "step": 70340 + }, + { + "epoch": 1.1889169617129869, + "grad_norm": 0.06880449503660202, + "learning_rate": 4.227129904640105e-06, + "loss": 0.001, + "step": 70350 + }, + { + "epoch": 1.1890859619918372, + "grad_norm": 0.04685996472835541, + "learning_rate": 4.225672834752996e-06, + "loss": 0.001, + "step": 70360 + }, + { + "epoch": 1.1892549622706878, + "grad_norm": 0.07952843606472015, + "learning_rate": 4.224215832236099e-06, + "loss": 0.0011, + "step": 70370 + }, + { + "epoch": 1.1894239625495382, + "grad_norm": 0.1698327362537384, + "learning_rate": 4.222758897216186e-06, + "loss": 0.0014, + "step": 70380 + }, + { + "epoch": 1.1895929628283888, + "grad_norm": 0.09234167635440826, + "learning_rate": 4.221302029820013e-06, + "loss": 0.0011, + "step": 70390 + }, + { + "epoch": 1.1897619631072391, + "grad_norm": 0.05689745023846626, + "learning_rate": 4.219845230174338e-06, + "loss": 0.0011, + "step": 70400 + }, + { + "epoch": 1.1899309633860895, + "grad_norm": 0.08529359847307205, + "learning_rate": 4.2183884984059055e-06, + "loss": 0.0009, + "step": 70410 + }, + { + "epoch": 1.19009996366494, + "grad_norm": 0.009383260272443295, + "learning_rate": 4.2169318346414634e-06, + "loss": 0.0009, + "step": 70420 + }, + { + "epoch": 1.1902689639437904, + "grad_norm": 0.07850052416324615, + "learning_rate": 4.215475239007744e-06, + "loss": 0.0012, + "step": 70430 + }, + { + "epoch": 1.190437964222641, + "grad_norm": 0.06158560886979103, + "learning_rate": 4.214018711631479e-06, + "loss": 0.0011, + "step": 70440 + }, + { + "epoch": 1.1906069645014914, + "grad_norm": 0.04493800550699234, + "learning_rate": 4.212562252639397e-06, + "loss": 0.0012, + "step": 70450 + }, + { + "epoch": 1.190775964780342, + "grad_norm": 0.023765722289681435, + "learning_rate": 4.211105862158212e-06, + "loss": 0.0013, + "step": 70460 + }, + { + "epoch": 1.1909449650591923, + "grad_norm": 0.022813871502876282, + "learning_rate": 4.2096495403146414e-06, + "loss": 0.0005, + "step": 70470 + }, + { + "epoch": 1.191113965338043, + "grad_norm": 0.14413927495479584, + "learning_rate": 4.2081932872353884e-06, + "loss": 0.0012, + "step": 70480 + }, + { + "epoch": 1.1912829656168933, + "grad_norm": 0.5556153059005737, + "learning_rate": 4.206737103047156e-06, + "loss": 0.0008, + "step": 70490 + }, + { + "epoch": 1.1914519658957436, + "grad_norm": 0.057843245565891266, + "learning_rate": 4.205280987876638e-06, + "loss": 0.0014, + "step": 70500 + }, + { + "epoch": 1.1916209661745942, + "grad_norm": 0.01688377559185028, + "learning_rate": 4.203824941850527e-06, + "loss": 0.0013, + "step": 70510 + }, + { + "epoch": 1.1917899664534446, + "grad_norm": 0.04101600497961044, + "learning_rate": 4.202368965095502e-06, + "loss": 0.0013, + "step": 70520 + }, + { + "epoch": 1.1919589667322952, + "grad_norm": 0.10525278747081757, + "learning_rate": 4.2009130577382435e-06, + "loss": 0.0009, + "step": 70530 + }, + { + "epoch": 1.1921279670111455, + "grad_norm": 0.006513623986393213, + "learning_rate": 4.199457219905418e-06, + "loss": 0.0009, + "step": 70540 + }, + { + "epoch": 1.1922969672899961, + "grad_norm": 0.03790920600295067, + "learning_rate": 4.198001451723696e-06, + "loss": 0.0005, + "step": 70550 + }, + { + "epoch": 1.1924659675688465, + "grad_norm": 0.00907865073531866, + "learning_rate": 4.196545753319731e-06, + "loss": 0.0004, + "step": 70560 + }, + { + "epoch": 1.192634967847697, + "grad_norm": 0.0359298475086689, + "learning_rate": 4.1950901248201795e-06, + "loss": 0.0006, + "step": 70570 + }, + { + "epoch": 1.1928039681265474, + "grad_norm": 0.008871739730238914, + "learning_rate": 4.193634566351687e-06, + "loss": 0.0004, + "step": 70580 + }, + { + "epoch": 1.1929729684053978, + "grad_norm": 0.06354521214962006, + "learning_rate": 4.192179078040893e-06, + "loss": 0.0009, + "step": 70590 + }, + { + "epoch": 1.1931419686842484, + "grad_norm": 0.047149330377578735, + "learning_rate": 4.190723660014434e-06, + "loss": 0.0008, + "step": 70600 + }, + { + "epoch": 1.1933109689630987, + "grad_norm": 0.08436896651983261, + "learning_rate": 4.189268312398938e-06, + "loss": 0.0017, + "step": 70610 + }, + { + "epoch": 1.1934799692419493, + "grad_norm": 0.050835512578487396, + "learning_rate": 4.187813035321026e-06, + "loss": 0.0009, + "step": 70620 + }, + { + "epoch": 1.1936489695207997, + "grad_norm": 0.07753974944353104, + "learning_rate": 4.186357828907317e-06, + "loss": 0.0009, + "step": 70630 + }, + { + "epoch": 1.19381796979965, + "grad_norm": 0.05515003204345703, + "learning_rate": 4.184902693284417e-06, + "loss": 0.0009, + "step": 70640 + }, + { + "epoch": 1.1939869700785006, + "grad_norm": 0.10628211498260498, + "learning_rate": 4.1834476285789335e-06, + "loss": 0.0011, + "step": 70650 + }, + { + "epoch": 1.194155970357351, + "grad_norm": 0.02622995153069496, + "learning_rate": 4.1819926349174605e-06, + "loss": 0.001, + "step": 70660 + }, + { + "epoch": 1.1943249706362016, + "grad_norm": 0.06788217276334763, + "learning_rate": 4.180537712426593e-06, + "loss": 0.0011, + "step": 70670 + }, + { + "epoch": 1.194493970915052, + "grad_norm": 0.0016145723639056087, + "learning_rate": 4.179082861232914e-06, + "loss": 0.0008, + "step": 70680 + }, + { + "epoch": 1.1946629711939025, + "grad_norm": 0.012697342783212662, + "learning_rate": 4.177628081463005e-06, + "loss": 0.001, + "step": 70690 + }, + { + "epoch": 1.1948319714727529, + "grad_norm": 0.010623389855027199, + "learning_rate": 4.176173373243436e-06, + "loss": 0.0012, + "step": 70700 + }, + { + "epoch": 1.1950009717516035, + "grad_norm": 0.04370303824543953, + "learning_rate": 4.174718736700774e-06, + "loss": 0.002, + "step": 70710 + }, + { + "epoch": 1.1951699720304538, + "grad_norm": 0.04976026713848114, + "learning_rate": 4.173264171961584e-06, + "loss": 0.0008, + "step": 70720 + }, + { + "epoch": 1.1953389723093042, + "grad_norm": 0.04589367285370827, + "learning_rate": 4.171809679152414e-06, + "loss": 0.0027, + "step": 70730 + }, + { + "epoch": 1.1955079725881548, + "grad_norm": 0.014074057340621948, + "learning_rate": 4.1703552583998165e-06, + "loss": 0.0019, + "step": 70740 + }, + { + "epoch": 1.1956769728670051, + "grad_norm": 0.10194515436887741, + "learning_rate": 4.168900909830329e-06, + "loss": 0.0012, + "step": 70750 + }, + { + "epoch": 1.1958459731458557, + "grad_norm": 0.0017360273050144315, + "learning_rate": 4.167446633570492e-06, + "loss": 0.0009, + "step": 70760 + }, + { + "epoch": 1.196014973424706, + "grad_norm": 0.05988433584570885, + "learning_rate": 4.165992429746829e-06, + "loss": 0.0015, + "step": 70770 + }, + { + "epoch": 1.1961839737035567, + "grad_norm": 0.010143890045583248, + "learning_rate": 4.164538298485866e-06, + "loss": 0.0015, + "step": 70780 + }, + { + "epoch": 1.196352973982407, + "grad_norm": 0.023697197437286377, + "learning_rate": 4.163084239914119e-06, + "loss": 0.001, + "step": 70790 + }, + { + "epoch": 1.1965219742612576, + "grad_norm": 0.06591352820396423, + "learning_rate": 4.1616302541581e-06, + "loss": 0.0012, + "step": 70800 + }, + { + "epoch": 1.196690974540108, + "grad_norm": 0.11869916319847107, + "learning_rate": 4.160176341344308e-06, + "loss": 0.0018, + "step": 70810 + }, + { + "epoch": 1.1968599748189583, + "grad_norm": 0.02505866438150406, + "learning_rate": 4.158722501599246e-06, + "loss": 0.0007, + "step": 70820 + }, + { + "epoch": 1.197028975097809, + "grad_norm": 0.04185836389660835, + "learning_rate": 4.1572687350494e-06, + "loss": 0.0011, + "step": 70830 + }, + { + "epoch": 1.1971979753766593, + "grad_norm": 0.05058329924941063, + "learning_rate": 4.155815041821259e-06, + "loss": 0.0006, + "step": 70840 + }, + { + "epoch": 1.1973669756555099, + "grad_norm": 0.013063745573163033, + "learning_rate": 4.154361422041298e-06, + "loss": 0.0009, + "step": 70850 + }, + { + "epoch": 1.1975359759343602, + "grad_norm": 0.06879516690969467, + "learning_rate": 4.152907875835992e-06, + "loss": 0.0014, + "step": 70860 + }, + { + "epoch": 1.1977049762132108, + "grad_norm": 0.04310673847794533, + "learning_rate": 4.151454403331803e-06, + "loss": 0.0006, + "step": 70870 + }, + { + "epoch": 1.1978739764920612, + "grad_norm": 0.03932936117053032, + "learning_rate": 4.150001004655195e-06, + "loss": 0.001, + "step": 70880 + }, + { + "epoch": 1.1980429767709118, + "grad_norm": 0.08106826990842819, + "learning_rate": 4.1485476799326155e-06, + "loss": 0.001, + "step": 70890 + }, + { + "epoch": 1.1982119770497621, + "grad_norm": 0.03531781956553459, + "learning_rate": 4.147094429290516e-06, + "loss": 0.0012, + "step": 70900 + }, + { + "epoch": 1.1983809773286125, + "grad_norm": 0.03120686672627926, + "learning_rate": 4.145641252855331e-06, + "loss": 0.0009, + "step": 70910 + }, + { + "epoch": 1.198549977607463, + "grad_norm": 0.010923722758889198, + "learning_rate": 4.144188150753498e-06, + "loss": 0.0018, + "step": 70920 + }, + { + "epoch": 1.1987189778863134, + "grad_norm": 0.06399686634540558, + "learning_rate": 4.142735123111441e-06, + "loss": 0.0008, + "step": 70930 + }, + { + "epoch": 1.198887978165164, + "grad_norm": 0.024457722902297974, + "learning_rate": 4.141282170055583e-06, + "loss": 0.0016, + "step": 70940 + }, + { + "epoch": 1.1990569784440144, + "grad_norm": 0.11252973973751068, + "learning_rate": 4.139829291712336e-06, + "loss": 0.0013, + "step": 70950 + }, + { + "epoch": 1.199225978722865, + "grad_norm": 0.061953283846378326, + "learning_rate": 4.138376488208108e-06, + "loss": 0.0011, + "step": 70960 + }, + { + "epoch": 1.1993949790017153, + "grad_norm": 0.04622013121843338, + "learning_rate": 4.1369237596693005e-06, + "loss": 0.0011, + "step": 70970 + }, + { + "epoch": 1.199563979280566, + "grad_norm": 0.03799670934677124, + "learning_rate": 4.135471106222307e-06, + "loss": 0.001, + "step": 70980 + }, + { + "epoch": 1.1997329795594163, + "grad_norm": 0.04052281007170677, + "learning_rate": 4.134018527993518e-06, + "loss": 0.001, + "step": 70990 + }, + { + "epoch": 1.1999019798382666, + "grad_norm": 0.1576308012008667, + "learning_rate": 4.132566025109311e-06, + "loss": 0.0013, + "step": 71000 + }, + { + "epoch": 1.2000709801171172, + "grad_norm": 0.03853791952133179, + "learning_rate": 4.131113597696064e-06, + "loss": 0.0011, + "step": 71010 + }, + { + "epoch": 1.2002399803959676, + "grad_norm": 0.04681782424449921, + "learning_rate": 4.129661245880143e-06, + "loss": 0.001, + "step": 71020 + }, + { + "epoch": 1.2004089806748182, + "grad_norm": 0.05324620380997658, + "learning_rate": 4.128208969787911e-06, + "loss": 0.0007, + "step": 71030 + }, + { + "epoch": 1.2005779809536685, + "grad_norm": 0.023558132350444794, + "learning_rate": 4.1267567695457215e-06, + "loss": 0.0015, + "step": 71040 + }, + { + "epoch": 1.200746981232519, + "grad_norm": 0.04994186758995056, + "learning_rate": 4.125304645279925e-06, + "loss": 0.0009, + "step": 71050 + }, + { + "epoch": 1.2009159815113695, + "grad_norm": 0.014957442879676819, + "learning_rate": 4.123852597116862e-06, + "loss": 0.0022, + "step": 71060 + }, + { + "epoch": 1.20108498179022, + "grad_norm": 0.019813815131783485, + "learning_rate": 4.122400625182868e-06, + "loss": 0.0008, + "step": 71070 + }, + { + "epoch": 1.2012539820690704, + "grad_norm": 0.08933929353952408, + "learning_rate": 4.1209487296042715e-06, + "loss": 0.0015, + "step": 71080 + }, + { + "epoch": 1.2014229823479208, + "grad_norm": 0.15247413516044617, + "learning_rate": 4.119496910507397e-06, + "loss": 0.0028, + "step": 71090 + }, + { + "epoch": 1.2015919826267714, + "grad_norm": 0.017764583230018616, + "learning_rate": 4.118045168018554e-06, + "loss": 0.0009, + "step": 71100 + }, + { + "epoch": 1.2017609829056217, + "grad_norm": 0.017530683428049088, + "learning_rate": 4.116593502264057e-06, + "loss": 0.0005, + "step": 71110 + }, + { + "epoch": 1.2019299831844723, + "grad_norm": 0.016762100160121918, + "learning_rate": 4.115141913370203e-06, + "loss": 0.0007, + "step": 71120 + }, + { + "epoch": 1.2020989834633227, + "grad_norm": 0.07180909067392349, + "learning_rate": 4.113690401463293e-06, + "loss": 0.0015, + "step": 71130 + }, + { + "epoch": 1.2022679837421733, + "grad_norm": 0.005118554458022118, + "learning_rate": 4.1122389666696095e-06, + "loss": 0.0012, + "step": 71140 + }, + { + "epoch": 1.2024369840210236, + "grad_norm": 0.03537483513355255, + "learning_rate": 4.110787609115439e-06, + "loss": 0.0006, + "step": 71150 + }, + { + "epoch": 1.2026059842998742, + "grad_norm": 0.031238961964845657, + "learning_rate": 4.109336328927052e-06, + "loss": 0.0012, + "step": 71160 + }, + { + "epoch": 1.2027749845787246, + "grad_norm": 0.00018730736337602139, + "learning_rate": 4.107885126230724e-06, + "loss": 0.001, + "step": 71170 + }, + { + "epoch": 1.202943984857575, + "grad_norm": 0.030509380623698235, + "learning_rate": 4.106434001152708e-06, + "loss": 0.0014, + "step": 71180 + }, + { + "epoch": 1.2031129851364255, + "grad_norm": 0.029428528621792793, + "learning_rate": 4.104982953819267e-06, + "loss": 0.0005, + "step": 71190 + }, + { + "epoch": 1.2032819854152759, + "grad_norm": 0.07416586577892303, + "learning_rate": 4.103531984356644e-06, + "loss": 0.0008, + "step": 71200 + }, + { + "epoch": 1.2034509856941265, + "grad_norm": 0.021068410947918892, + "learning_rate": 4.102081092891081e-06, + "loss": 0.0009, + "step": 71210 + }, + { + "epoch": 1.2036199859729768, + "grad_norm": 0.01348723191767931, + "learning_rate": 4.100630279548815e-06, + "loss": 0.0007, + "step": 71220 + }, + { + "epoch": 1.2037889862518274, + "grad_norm": 0.037767812609672546, + "learning_rate": 4.099179544456071e-06, + "loss": 0.001, + "step": 71230 + }, + { + "epoch": 1.2039579865306778, + "grad_norm": 0.0031448148656636477, + "learning_rate": 4.097728887739073e-06, + "loss": 0.0006, + "step": 71240 + }, + { + "epoch": 1.2041269868095283, + "grad_norm": 0.023646043613553047, + "learning_rate": 4.096278309524031e-06, + "loss": 0.0008, + "step": 71250 + }, + { + "epoch": 1.2042959870883787, + "grad_norm": 0.08618577569723129, + "learning_rate": 4.094827809937156e-06, + "loss": 0.0009, + "step": 71260 + }, + { + "epoch": 1.204464987367229, + "grad_norm": 0.2516619861125946, + "learning_rate": 4.093377389104646e-06, + "loss": 0.0033, + "step": 71270 + }, + { + "epoch": 1.2046339876460797, + "grad_norm": 0.028300069272518158, + "learning_rate": 4.091927047152698e-06, + "loss": 0.0005, + "step": 71280 + }, + { + "epoch": 1.20480298792493, + "grad_norm": 0.0396931953728199, + "learning_rate": 4.090476784207495e-06, + "loss": 0.0011, + "step": 71290 + }, + { + "epoch": 1.2049719882037806, + "grad_norm": 0.009809630922973156, + "learning_rate": 4.0890266003952206e-06, + "loss": 0.0013, + "step": 71300 + }, + { + "epoch": 1.205140988482631, + "grad_norm": 0.06519563496112823, + "learning_rate": 4.087576495842043e-06, + "loss": 0.0012, + "step": 71310 + }, + { + "epoch": 1.2053099887614815, + "grad_norm": 0.010368295945227146, + "learning_rate": 4.0861264706741334e-06, + "loss": 0.0012, + "step": 71320 + }, + { + "epoch": 1.205478989040332, + "grad_norm": 0.0036968374624848366, + "learning_rate": 4.0846765250176466e-06, + "loss": 0.0034, + "step": 71330 + }, + { + "epoch": 1.2056479893191825, + "grad_norm": 0.0545491985976696, + "learning_rate": 4.083226658998738e-06, + "loss": 0.001, + "step": 71340 + }, + { + "epoch": 1.2058169895980329, + "grad_norm": 0.023208390921354294, + "learning_rate": 4.081776872743552e-06, + "loss": 0.0009, + "step": 71350 + }, + { + "epoch": 1.2059859898768832, + "grad_norm": 0.012889409437775612, + "learning_rate": 4.080327166378227e-06, + "loss": 0.0006, + "step": 71360 + }, + { + "epoch": 1.2061549901557338, + "grad_norm": 0.26329225301742554, + "learning_rate": 4.078877540028893e-06, + "loss": 0.0008, + "step": 71370 + }, + { + "epoch": 1.2063239904345842, + "grad_norm": 0.0013760413276031613, + "learning_rate": 4.077427993821678e-06, + "loss": 0.0012, + "step": 71380 + }, + { + "epoch": 1.2064929907134347, + "grad_norm": 0.01563761942088604, + "learning_rate": 4.075978527882696e-06, + "loss": 0.0008, + "step": 71390 + }, + { + "epoch": 1.206661990992285, + "grad_norm": 0.16220055520534515, + "learning_rate": 4.074529142338061e-06, + "loss": 0.0015, + "step": 71400 + }, + { + "epoch": 1.2068309912711357, + "grad_norm": 0.016545753926038742, + "learning_rate": 4.073079837313873e-06, + "loss": 0.001, + "step": 71410 + }, + { + "epoch": 1.206999991549986, + "grad_norm": 0.1064077764749527, + "learning_rate": 4.0716306129362295e-06, + "loss": 0.0011, + "step": 71420 + }, + { + "epoch": 1.2071689918288366, + "grad_norm": 0.03617030382156372, + "learning_rate": 4.070181469331222e-06, + "loss": 0.0007, + "step": 71430 + }, + { + "epoch": 1.207337992107687, + "grad_norm": 0.046915020793676376, + "learning_rate": 4.068732406624932e-06, + "loss": 0.0009, + "step": 71440 + }, + { + "epoch": 1.2075069923865374, + "grad_norm": 0.02355279214680195, + "learning_rate": 4.067283424943434e-06, + "loss": 0.0006, + "step": 71450 + }, + { + "epoch": 1.207675992665388, + "grad_norm": 0.0377669483423233, + "learning_rate": 4.065834524412796e-06, + "loss": 0.0006, + "step": 71460 + }, + { + "epoch": 1.2078449929442383, + "grad_norm": 0.00027997931465506554, + "learning_rate": 4.064385705159083e-06, + "loss": 0.0006, + "step": 71470 + }, + { + "epoch": 1.2080139932230889, + "grad_norm": 0.05463608354330063, + "learning_rate": 4.062936967308345e-06, + "loss": 0.0009, + "step": 71480 + }, + { + "epoch": 1.2081829935019393, + "grad_norm": 0.045695651322603226, + "learning_rate": 4.061488310986633e-06, + "loss": 0.0011, + "step": 71490 + }, + { + "epoch": 1.2083519937807898, + "grad_norm": 0.053160663694143295, + "learning_rate": 4.060039736319982e-06, + "loss": 0.0011, + "step": 71500 + }, + { + "epoch": 1.2085209940596402, + "grad_norm": 0.04242270439863205, + "learning_rate": 4.0585912434344314e-06, + "loss": 0.0018, + "step": 71510 + }, + { + "epoch": 1.2086899943384908, + "grad_norm": 0.005821699742227793, + "learning_rate": 4.057142832456001e-06, + "loss": 0.0007, + "step": 71520 + }, + { + "epoch": 1.2088589946173411, + "grad_norm": 0.04350055754184723, + "learning_rate": 4.055694503510715e-06, + "loss": 0.001, + "step": 71530 + }, + { + "epoch": 1.2090279948961915, + "grad_norm": 0.03617621213197708, + "learning_rate": 4.054246256724581e-06, + "loss": 0.0012, + "step": 71540 + }, + { + "epoch": 1.209196995175042, + "grad_norm": 0.14375846087932587, + "learning_rate": 4.0527980922236055e-06, + "loss": 0.0007, + "step": 71550 + }, + { + "epoch": 1.2093659954538925, + "grad_norm": 0.08785324543714523, + "learning_rate": 4.051350010133784e-06, + "loss": 0.0011, + "step": 71560 + }, + { + "epoch": 1.209534995732743, + "grad_norm": 0.10095148533582687, + "learning_rate": 4.049902010581111e-06, + "loss": 0.0008, + "step": 71570 + }, + { + "epoch": 1.2097039960115934, + "grad_norm": 0.08452757447957993, + "learning_rate": 4.048454093691563e-06, + "loss": 0.0013, + "step": 71580 + }, + { + "epoch": 1.2098729962904438, + "grad_norm": 0.051274657249450684, + "learning_rate": 4.047006259591122e-06, + "loss": 0.0014, + "step": 71590 + }, + { + "epoch": 1.2100419965692943, + "grad_norm": 0.015805142000317574, + "learning_rate": 4.0455585084057505e-06, + "loss": 0.0009, + "step": 71600 + }, + { + "epoch": 1.2102109968481447, + "grad_norm": 0.033467043191194534, + "learning_rate": 4.044110840261417e-06, + "loss": 0.0008, + "step": 71610 + }, + { + "epoch": 1.2103799971269953, + "grad_norm": 0.09230538457632065, + "learning_rate": 4.042663255284068e-06, + "loss": 0.0011, + "step": 71620 + }, + { + "epoch": 1.2105489974058457, + "grad_norm": 0.039076339453458786, + "learning_rate": 4.041215753599656e-06, + "loss": 0.0005, + "step": 71630 + }, + { + "epoch": 1.2107179976846962, + "grad_norm": 0.05938393250107765, + "learning_rate": 4.039768335334119e-06, + "loss": 0.0008, + "step": 71640 + }, + { + "epoch": 1.2108869979635466, + "grad_norm": 0.11329591274261475, + "learning_rate": 4.03832100061339e-06, + "loss": 0.0016, + "step": 71650 + }, + { + "epoch": 1.2110559982423972, + "grad_norm": 0.05184169113636017, + "learning_rate": 4.036873749563391e-06, + "loss": 0.0015, + "step": 71660 + }, + { + "epoch": 1.2112249985212475, + "grad_norm": 0.016765085980296135, + "learning_rate": 4.035426582310045e-06, + "loss": 0.0004, + "step": 71670 + }, + { + "epoch": 1.211393998800098, + "grad_norm": 0.3198559582233429, + "learning_rate": 4.033979498979258e-06, + "loss": 0.0009, + "step": 71680 + }, + { + "epoch": 1.2115629990789485, + "grad_norm": 0.012108954600989819, + "learning_rate": 4.032532499696936e-06, + "loss": 0.0009, + "step": 71690 + }, + { + "epoch": 1.2117319993577989, + "grad_norm": 0.10403885692358017, + "learning_rate": 4.031085584588974e-06, + "loss": 0.0008, + "step": 71700 + }, + { + "epoch": 1.2119009996366494, + "grad_norm": 0.10198567807674408, + "learning_rate": 4.02963875378126e-06, + "loss": 0.0006, + "step": 71710 + }, + { + "epoch": 1.2120699999154998, + "grad_norm": 0.03292039781808853, + "learning_rate": 4.028192007399676e-06, + "loss": 0.0012, + "step": 71720 + }, + { + "epoch": 1.2122390001943504, + "grad_norm": 0.05963369458913803, + "learning_rate": 4.026745345570096e-06, + "loss": 0.0015, + "step": 71730 + }, + { + "epoch": 1.2124080004732007, + "grad_norm": 0.004073624033480883, + "learning_rate": 4.025298768418386e-06, + "loss": 0.0011, + "step": 71740 + }, + { + "epoch": 1.2125770007520513, + "grad_norm": 0.05115113779902458, + "learning_rate": 4.023852276070405e-06, + "loss": 0.0007, + "step": 71750 + }, + { + "epoch": 1.2127460010309017, + "grad_norm": 0.09283419698476791, + "learning_rate": 4.022405868652008e-06, + "loss": 0.0009, + "step": 71760 + }, + { + "epoch": 1.212915001309752, + "grad_norm": 0.07475849241018295, + "learning_rate": 4.020959546289035e-06, + "loss": 0.0004, + "step": 71770 + }, + { + "epoch": 1.2130840015886026, + "grad_norm": 0.021336553618311882, + "learning_rate": 4.019513309107327e-06, + "loss": 0.0012, + "step": 71780 + }, + { + "epoch": 1.213253001867453, + "grad_norm": 0.07587479054927826, + "learning_rate": 4.018067157232709e-06, + "loss": 0.0009, + "step": 71790 + }, + { + "epoch": 1.2134220021463036, + "grad_norm": 0.056975387036800385, + "learning_rate": 4.016621090791008e-06, + "loss": 0.001, + "step": 71800 + }, + { + "epoch": 1.213591002425154, + "grad_norm": 0.0031521848868578672, + "learning_rate": 4.015175109908036e-06, + "loss": 0.0014, + "step": 71810 + }, + { + "epoch": 1.2137600027040045, + "grad_norm": 0.04272615164518356, + "learning_rate": 4.013729214709601e-06, + "loss": 0.0011, + "step": 71820 + }, + { + "epoch": 1.2139290029828549, + "grad_norm": 0.015306469984352589, + "learning_rate": 4.012283405321504e-06, + "loss": 0.0014, + "step": 71830 + }, + { + "epoch": 1.2140980032617055, + "grad_norm": 0.1312740296125412, + "learning_rate": 4.010837681869535e-06, + "loss": 0.001, + "step": 71840 + }, + { + "epoch": 1.2142670035405558, + "grad_norm": 0.05064448341727257, + "learning_rate": 4.0093920444794796e-06, + "loss": 0.0011, + "step": 71850 + }, + { + "epoch": 1.2144360038194062, + "grad_norm": 0.018574142828583717, + "learning_rate": 4.007946493277118e-06, + "loss": 0.0006, + "step": 71860 + }, + { + "epoch": 1.2146050040982568, + "grad_norm": 0.04546702653169632, + "learning_rate": 4.006501028388215e-06, + "loss": 0.0014, + "step": 71870 + }, + { + "epoch": 1.2147740043771071, + "grad_norm": 0.053701866418123245, + "learning_rate": 4.00505564993854e-06, + "loss": 0.0011, + "step": 71880 + }, + { + "epoch": 1.2149430046559577, + "grad_norm": 0.05961509048938751, + "learning_rate": 4.003610358053841e-06, + "loss": 0.0006, + "step": 71890 + }, + { + "epoch": 1.215112004934808, + "grad_norm": 0.044699832797050476, + "learning_rate": 4.002165152859871e-06, + "loss": 0.0009, + "step": 71900 + }, + { + "epoch": 1.2152810052136587, + "grad_norm": 0.06642137467861176, + "learning_rate": 4.000720034482365e-06, + "loss": 0.0005, + "step": 71910 + }, + { + "epoch": 1.215450005492509, + "grad_norm": 0.0566544309258461, + "learning_rate": 3.999275003047059e-06, + "loss": 0.0011, + "step": 71920 + }, + { + "epoch": 1.2156190057713596, + "grad_norm": 0.0494023896753788, + "learning_rate": 3.997830058679675e-06, + "loss": 0.001, + "step": 71930 + }, + { + "epoch": 1.21578800605021, + "grad_norm": 0.04000876098871231, + "learning_rate": 3.996385201505933e-06, + "loss": 0.0009, + "step": 71940 + }, + { + "epoch": 1.2159570063290603, + "grad_norm": 0.008254399523139, + "learning_rate": 3.994940431651541e-06, + "loss": 0.001, + "step": 71950 + }, + { + "epoch": 1.216126006607911, + "grad_norm": 0.09354092180728912, + "learning_rate": 3.993495749242201e-06, + "loss": 0.0007, + "step": 71960 + }, + { + "epoch": 1.2162950068867613, + "grad_norm": 0.032391320914030075, + "learning_rate": 3.992051154403606e-06, + "loss": 0.001, + "step": 71970 + }, + { + "epoch": 1.2164640071656119, + "grad_norm": 0.00970939826220274, + "learning_rate": 3.990606647261445e-06, + "loss": 0.0009, + "step": 71980 + }, + { + "epoch": 1.2166330074444622, + "grad_norm": 0.011265194043517113, + "learning_rate": 3.989162227941397e-06, + "loss": 0.0004, + "step": 71990 + }, + { + "epoch": 1.2168020077233128, + "grad_norm": 0.12789775431156158, + "learning_rate": 3.987717896569132e-06, + "loss": 0.0011, + "step": 72000 + }, + { + "epoch": 1.2169710080021632, + "grad_norm": 0.016483772546052933, + "learning_rate": 3.986273653270315e-06, + "loss": 0.0018, + "step": 72010 + }, + { + "epoch": 1.2171400082810138, + "grad_norm": 0.04038587212562561, + "learning_rate": 3.984829498170602e-06, + "loss": 0.0007, + "step": 72020 + }, + { + "epoch": 1.2173090085598641, + "grad_norm": 0.01905478537082672, + "learning_rate": 3.983385431395641e-06, + "loss": 0.0005, + "step": 72030 + }, + { + "epoch": 1.2174780088387145, + "grad_norm": 0.01647147536277771, + "learning_rate": 3.981941453071072e-06, + "loss": 0.0004, + "step": 72040 + }, + { + "epoch": 1.217647009117565, + "grad_norm": 0.06504178792238235, + "learning_rate": 3.9804975633225315e-06, + "loss": 0.001, + "step": 72050 + }, + { + "epoch": 1.2178160093964154, + "grad_norm": 0.0017739549512043595, + "learning_rate": 3.979053762275641e-06, + "loss": 0.0011, + "step": 72060 + }, + { + "epoch": 1.217985009675266, + "grad_norm": 0.05531751736998558, + "learning_rate": 3.977610050056021e-06, + "loss": 0.002, + "step": 72070 + }, + { + "epoch": 1.2181540099541164, + "grad_norm": 0.090264230966568, + "learning_rate": 3.976166426789279e-06, + "loss": 0.0012, + "step": 72080 + }, + { + "epoch": 1.218323010232967, + "grad_norm": 0.02547847293317318, + "learning_rate": 3.97472289260102e-06, + "loss": 0.0017, + "step": 72090 + }, + { + "epoch": 1.2184920105118173, + "grad_norm": 0.018822619691491127, + "learning_rate": 3.973279447616834e-06, + "loss": 0.0013, + "step": 72100 + }, + { + "epoch": 1.218661010790668, + "grad_norm": 0.08224533498287201, + "learning_rate": 3.971836091962314e-06, + "loss": 0.001, + "step": 72110 + }, + { + "epoch": 1.2188300110695183, + "grad_norm": 0.07459520548582077, + "learning_rate": 3.9703928257630325e-06, + "loss": 0.001, + "step": 72120 + }, + { + "epoch": 1.2189990113483686, + "grad_norm": 0.024879854172468185, + "learning_rate": 3.968949649144566e-06, + "loss": 0.001, + "step": 72130 + }, + { + "epoch": 1.2191680116272192, + "grad_norm": 0.041853051632642746, + "learning_rate": 3.9675065622324746e-06, + "loss": 0.0007, + "step": 72140 + }, + { + "epoch": 1.2193370119060696, + "grad_norm": 0.00012978816812392324, + "learning_rate": 3.966063565152316e-06, + "loss": 0.0012, + "step": 72150 + }, + { + "epoch": 1.2195060121849202, + "grad_norm": 0.19226211309432983, + "learning_rate": 3.964620658029635e-06, + "loss": 0.0015, + "step": 72160 + }, + { + "epoch": 1.2196750124637705, + "grad_norm": 0.0453561507165432, + "learning_rate": 3.963177840989975e-06, + "loss": 0.0009, + "step": 72170 + }, + { + "epoch": 1.219844012742621, + "grad_norm": 0.049020733684301376, + "learning_rate": 3.961735114158864e-06, + "loss": 0.0011, + "step": 72180 + }, + { + "epoch": 1.2200130130214715, + "grad_norm": 0.07267977297306061, + "learning_rate": 3.960292477661831e-06, + "loss": 0.0011, + "step": 72190 + }, + { + "epoch": 1.220182013300322, + "grad_norm": 0.00011391971202101558, + "learning_rate": 3.958849931624389e-06, + "loss": 0.0023, + "step": 72200 + }, + { + "epoch": 1.2203510135791724, + "grad_norm": 0.006536595989018679, + "learning_rate": 3.957407476172047e-06, + "loss": 0.001, + "step": 72210 + }, + { + "epoch": 1.2205200138580228, + "grad_norm": 0.12899255752563477, + "learning_rate": 3.955965111430306e-06, + "loss": 0.0025, + "step": 72220 + }, + { + "epoch": 1.2206890141368734, + "grad_norm": 0.02313540130853653, + "learning_rate": 3.954522837524658e-06, + "loss": 0.0009, + "step": 72230 + }, + { + "epoch": 1.2208580144157237, + "grad_norm": 0.020590925589203835, + "learning_rate": 3.953080654580591e-06, + "loss": 0.0012, + "step": 72240 + }, + { + "epoch": 1.2210270146945743, + "grad_norm": 0.08852645754814148, + "learning_rate": 3.951638562723577e-06, + "loss": 0.0011, + "step": 72250 + }, + { + "epoch": 1.2211960149734247, + "grad_norm": 0.15627345442771912, + "learning_rate": 3.950196562079091e-06, + "loss": 0.001, + "step": 72260 + }, + { + "epoch": 1.2213650152522753, + "grad_norm": 0.0558914989233017, + "learning_rate": 3.948754652772587e-06, + "loss": 0.0012, + "step": 72270 + }, + { + "epoch": 1.2215340155311256, + "grad_norm": 0.1008479967713356, + "learning_rate": 3.947312834929524e-06, + "loss": 0.001, + "step": 72280 + }, + { + "epoch": 1.2217030158099762, + "grad_norm": 0.04521102085709572, + "learning_rate": 3.945871108675342e-06, + "loss": 0.002, + "step": 72290 + }, + { + "epoch": 1.2218720160888266, + "grad_norm": 0.09771153330802917, + "learning_rate": 3.944429474135484e-06, + "loss": 0.0013, + "step": 72300 + }, + { + "epoch": 1.222041016367677, + "grad_norm": 0.03358219936490059, + "learning_rate": 3.942987931435374e-06, + "loss": 0.0017, + "step": 72310 + }, + { + "epoch": 1.2222100166465275, + "grad_norm": 0.06623164564371109, + "learning_rate": 3.9415464807004364e-06, + "loss": 0.002, + "step": 72320 + }, + { + "epoch": 1.2223790169253779, + "grad_norm": 0.01089906133711338, + "learning_rate": 3.940105122056082e-06, + "loss": 0.0009, + "step": 72330 + }, + { + "epoch": 1.2225480172042285, + "grad_norm": 0.0035987806040793657, + "learning_rate": 3.938663855627719e-06, + "loss": 0.0006, + "step": 72340 + }, + { + "epoch": 1.2227170174830788, + "grad_norm": 0.08150385320186615, + "learning_rate": 3.937222681540741e-06, + "loss": 0.0014, + "step": 72350 + }, + { + "epoch": 1.2228860177619294, + "grad_norm": 0.10174530744552612, + "learning_rate": 3.935781599920541e-06, + "loss": 0.0007, + "step": 72360 + }, + { + "epoch": 1.2230550180407798, + "grad_norm": 0.09403951466083527, + "learning_rate": 3.934340610892497e-06, + "loss": 0.0015, + "step": 72370 + }, + { + "epoch": 1.2232240183196303, + "grad_norm": 0.018554072827100754, + "learning_rate": 3.932899714581984e-06, + "loss": 0.0009, + "step": 72380 + }, + { + "epoch": 1.2233930185984807, + "grad_norm": 0.025134406983852386, + "learning_rate": 3.931458911114364e-06, + "loss": 0.0013, + "step": 72390 + }, + { + "epoch": 1.223562018877331, + "grad_norm": 0.01571774110198021, + "learning_rate": 3.930018200614998e-06, + "loss": 0.001, + "step": 72400 + }, + { + "epoch": 1.2237310191561817, + "grad_norm": 0.062236469238996506, + "learning_rate": 3.928577583209231e-06, + "loss": 0.0005, + "step": 72410 + }, + { + "epoch": 1.223900019435032, + "grad_norm": 0.0811903327703476, + "learning_rate": 3.927137059022407e-06, + "loss": 0.0008, + "step": 72420 + }, + { + "epoch": 1.2240690197138826, + "grad_norm": 0.05504392087459564, + "learning_rate": 3.925696628179856e-06, + "loss": 0.0014, + "step": 72430 + }, + { + "epoch": 1.224238019992733, + "grad_norm": 0.08194303512573242, + "learning_rate": 3.924256290806905e-06, + "loss": 0.0016, + "step": 72440 + }, + { + "epoch": 1.2244070202715833, + "grad_norm": 0.03738373890519142, + "learning_rate": 3.922816047028866e-06, + "loss": 0.0009, + "step": 72450 + }, + { + "epoch": 1.224576020550434, + "grad_norm": 0.03901316598057747, + "learning_rate": 3.921375896971053e-06, + "loss": 0.005, + "step": 72460 + }, + { + "epoch": 1.2247450208292845, + "grad_norm": 0.15741872787475586, + "learning_rate": 3.919935840758761e-06, + "loss": 0.0006, + "step": 72470 + }, + { + "epoch": 1.2249140211081349, + "grad_norm": 0.025328345596790314, + "learning_rate": 3.918495878517284e-06, + "loss": 0.0011, + "step": 72480 + }, + { + "epoch": 1.2250830213869852, + "grad_norm": 0.03609572350978851, + "learning_rate": 3.917056010371906e-06, + "loss": 0.0013, + "step": 72490 + }, + { + "epoch": 1.2252520216658358, + "grad_norm": 0.04493815079331398, + "learning_rate": 3.915616236447902e-06, + "loss": 0.0007, + "step": 72500 + }, + { + "epoch": 1.2254210219446862, + "grad_norm": 0.052706584334373474, + "learning_rate": 3.91417655687054e-06, + "loss": 0.0006, + "step": 72510 + }, + { + "epoch": 1.2255900222235367, + "grad_norm": 0.02652081288397312, + "learning_rate": 3.912736971765077e-06, + "loss": 0.0003, + "step": 72520 + }, + { + "epoch": 1.225759022502387, + "grad_norm": 0.14209231734275818, + "learning_rate": 3.911297481256767e-06, + "loss": 0.0015, + "step": 72530 + }, + { + "epoch": 1.2259280227812375, + "grad_norm": 0.1366054266691208, + "learning_rate": 3.909858085470849e-06, + "loss": 0.0012, + "step": 72540 + }, + { + "epoch": 1.226097023060088, + "grad_norm": 0.17087505757808685, + "learning_rate": 3.908418784532562e-06, + "loss": 0.0015, + "step": 72550 + }, + { + "epoch": 1.2262660233389384, + "grad_norm": 0.041053686290979385, + "learning_rate": 3.906979578567128e-06, + "loss": 0.0014, + "step": 72560 + }, + { + "epoch": 1.226435023617789, + "grad_norm": 0.034434977918863297, + "learning_rate": 3.9055404676997674e-06, + "loss": 0.0008, + "step": 72570 + }, + { + "epoch": 1.2266040238966394, + "grad_norm": 0.06338217854499817, + "learning_rate": 3.9041014520556875e-06, + "loss": 0.001, + "step": 72580 + }, + { + "epoch": 1.22677302417549, + "grad_norm": 0.11102625727653503, + "learning_rate": 3.902662531760092e-06, + "loss": 0.0014, + "step": 72590 + }, + { + "epoch": 1.2269420244543403, + "grad_norm": 0.036118436604738235, + "learning_rate": 3.901223706938172e-06, + "loss": 0.0015, + "step": 72600 + }, + { + "epoch": 1.227111024733191, + "grad_norm": 0.15463878214359283, + "learning_rate": 3.8997849777151156e-06, + "loss": 0.0013, + "step": 72610 + }, + { + "epoch": 1.2272800250120413, + "grad_norm": 0.06801795214414597, + "learning_rate": 3.898346344216094e-06, + "loss": 0.0006, + "step": 72620 + }, + { + "epoch": 1.2274490252908916, + "grad_norm": 0.029515203088521957, + "learning_rate": 3.896907806566281e-06, + "loss": 0.0012, + "step": 72630 + }, + { + "epoch": 1.2276180255697422, + "grad_norm": 0.14273680746555328, + "learning_rate": 3.89546936489083e-06, + "loss": 0.0009, + "step": 72640 + }, + { + "epoch": 1.2277870258485926, + "grad_norm": 0.07829339057207108, + "learning_rate": 3.894031019314899e-06, + "loss": 0.0007, + "step": 72650 + }, + { + "epoch": 1.2279560261274431, + "grad_norm": 0.07639341056346893, + "learning_rate": 3.8925927699636255e-06, + "loss": 0.0015, + "step": 72660 + }, + { + "epoch": 1.2281250264062935, + "grad_norm": 0.012791531160473824, + "learning_rate": 3.891154616962148e-06, + "loss": 0.001, + "step": 72670 + }, + { + "epoch": 1.228294026685144, + "grad_norm": 0.03249354660511017, + "learning_rate": 3.889716560435591e-06, + "loss": 0.0008, + "step": 72680 + }, + { + "epoch": 1.2284630269639945, + "grad_norm": 0.009360888972878456, + "learning_rate": 3.888278600509072e-06, + "loss": 0.0024, + "step": 72690 + }, + { + "epoch": 1.228632027242845, + "grad_norm": 0.11578324437141418, + "learning_rate": 3.886840737307701e-06, + "loss": 0.0009, + "step": 72700 + }, + { + "epoch": 1.2288010275216954, + "grad_norm": 0.014583390206098557, + "learning_rate": 3.885402970956581e-06, + "loss": 0.0005, + "step": 72710 + }, + { + "epoch": 1.2289700278005458, + "grad_norm": 0.029843565076589584, + "learning_rate": 3.883965301580801e-06, + "loss": 0.0028, + "step": 72720 + }, + { + "epoch": 1.2291390280793963, + "grad_norm": 0.028233207762241364, + "learning_rate": 3.882527729305448e-06, + "loss": 0.001, + "step": 72730 + }, + { + "epoch": 1.2293080283582467, + "grad_norm": 0.12675841152668, + "learning_rate": 3.881090254255596e-06, + "loss": 0.0013, + "step": 72740 + }, + { + "epoch": 1.2294770286370973, + "grad_norm": 0.01370705384761095, + "learning_rate": 3.8796528765563135e-06, + "loss": 0.0012, + "step": 72750 + }, + { + "epoch": 1.2296460289159477, + "grad_norm": 0.01915006898343563, + "learning_rate": 3.87821559633266e-06, + "loss": 0.001, + "step": 72760 + }, + { + "epoch": 1.2298150291947982, + "grad_norm": 0.046408962458372116, + "learning_rate": 3.876778413709683e-06, + "loss": 0.002, + "step": 72770 + }, + { + "epoch": 1.2299840294736486, + "grad_norm": 0.030629215762019157, + "learning_rate": 3.875341328812427e-06, + "loss": 0.0009, + "step": 72780 + }, + { + "epoch": 1.2301530297524992, + "grad_norm": 0.023360850289463997, + "learning_rate": 3.873904341765925e-06, + "loss": 0.0025, + "step": 72790 + }, + { + "epoch": 1.2303220300313495, + "grad_norm": 0.008983196690678596, + "learning_rate": 3.872467452695201e-06, + "loss": 0.0009, + "step": 72800 + }, + { + "epoch": 1.2304910303102, + "grad_norm": 0.03672792389988899, + "learning_rate": 3.871030661725271e-06, + "loss": 0.0014, + "step": 72810 + }, + { + "epoch": 1.2306600305890505, + "grad_norm": 0.06108082830905914, + "learning_rate": 3.869593968981145e-06, + "loss": 0.0012, + "step": 72820 + }, + { + "epoch": 1.2308290308679009, + "grad_norm": 0.15927709639072418, + "learning_rate": 3.868157374587819e-06, + "loss": 0.0015, + "step": 72830 + }, + { + "epoch": 1.2309980311467514, + "grad_norm": 0.05433737114071846, + "learning_rate": 3.866720878670287e-06, + "loss": 0.0009, + "step": 72840 + }, + { + "epoch": 1.2311670314256018, + "grad_norm": 0.01710580289363861, + "learning_rate": 3.865284481353527e-06, + "loss": 0.0012, + "step": 72850 + }, + { + "epoch": 1.2313360317044524, + "grad_norm": 0.08657790720462799, + "learning_rate": 3.8638481827625175e-06, + "loss": 0.001, + "step": 72860 + }, + { + "epoch": 1.2315050319833027, + "grad_norm": 0.042510438710451126, + "learning_rate": 3.862411983022219e-06, + "loss": 0.0012, + "step": 72870 + }, + { + "epoch": 1.2316740322621533, + "grad_norm": 0.06462328881025314, + "learning_rate": 3.860975882257591e-06, + "loss": 0.0012, + "step": 72880 + }, + { + "epoch": 1.2318430325410037, + "grad_norm": 0.022037969902157784, + "learning_rate": 3.859539880593578e-06, + "loss": 0.0007, + "step": 72890 + }, + { + "epoch": 1.232012032819854, + "grad_norm": 0.04847249761223793, + "learning_rate": 3.858103978155124e-06, + "loss": 0.0017, + "step": 72900 + }, + { + "epoch": 1.2321810330987046, + "grad_norm": 0.0007479141931980848, + "learning_rate": 3.856668175067154e-06, + "loss": 0.0011, + "step": 72910 + }, + { + "epoch": 1.232350033377555, + "grad_norm": 0.0014906972646713257, + "learning_rate": 3.855232471454594e-06, + "loss": 0.0012, + "step": 72920 + }, + { + "epoch": 1.2325190336564056, + "grad_norm": 0.0506197065114975, + "learning_rate": 3.8537968674423545e-06, + "loss": 0.001, + "step": 72930 + }, + { + "epoch": 1.232688033935256, + "grad_norm": 0.03149702027440071, + "learning_rate": 3.852361363155342e-06, + "loss": 0.0006, + "step": 72940 + }, + { + "epoch": 1.2328570342141065, + "grad_norm": 0.002010585507377982, + "learning_rate": 3.85092595871845e-06, + "loss": 0.0011, + "step": 72950 + }, + { + "epoch": 1.233026034492957, + "grad_norm": 0.35226213932037354, + "learning_rate": 3.8494906542565675e-06, + "loss": 0.002, + "step": 72960 + }, + { + "epoch": 1.2331950347718075, + "grad_norm": 0.13743427395820618, + "learning_rate": 3.848055449894573e-06, + "loss": 0.0006, + "step": 72970 + }, + { + "epoch": 1.2333640350506578, + "grad_norm": 0.01349994819611311, + "learning_rate": 3.846620345757335e-06, + "loss": 0.002, + "step": 72980 + }, + { + "epoch": 1.2335330353295082, + "grad_norm": 0.14405393600463867, + "learning_rate": 3.845185341969715e-06, + "loss": 0.0017, + "step": 72990 + }, + { + "epoch": 1.2337020356083588, + "grad_norm": 0.12188602983951569, + "learning_rate": 3.8437504386565636e-06, + "loss": 0.0015, + "step": 73000 + }, + { + "epoch": 1.2338710358872091, + "grad_norm": 0.04898308590054512, + "learning_rate": 3.84231563594273e-06, + "loss": 0.0008, + "step": 73010 + }, + { + "epoch": 1.2340400361660597, + "grad_norm": 0.017351139336824417, + "learning_rate": 3.840880933953043e-06, + "loss": 0.001, + "step": 73020 + }, + { + "epoch": 1.23420903644491, + "grad_norm": 0.03371371328830719, + "learning_rate": 3.8394463328123325e-06, + "loss": 0.0009, + "step": 73030 + }, + { + "epoch": 1.2343780367237607, + "grad_norm": 0.09450079500675201, + "learning_rate": 3.838011832645412e-06, + "loss": 0.001, + "step": 73040 + }, + { + "epoch": 1.234547037002611, + "grad_norm": 0.1007283478975296, + "learning_rate": 3.836577433577094e-06, + "loss": 0.0009, + "step": 73050 + }, + { + "epoch": 1.2347160372814616, + "grad_norm": 0.03049352392554283, + "learning_rate": 3.835143135732175e-06, + "loss": 0.001, + "step": 73060 + }, + { + "epoch": 1.234885037560312, + "grad_norm": 0.06046221777796745, + "learning_rate": 3.833708939235446e-06, + "loss": 0.0013, + "step": 73070 + }, + { + "epoch": 1.2350540378391623, + "grad_norm": 0.03119039349257946, + "learning_rate": 3.832274844211692e-06, + "loss": 0.0011, + "step": 73080 + }, + { + "epoch": 1.235223038118013, + "grad_norm": 0.05875023454427719, + "learning_rate": 3.8308408507856844e-06, + "loss": 0.0024, + "step": 73090 + }, + { + "epoch": 1.2353920383968633, + "grad_norm": 0.053951092064380646, + "learning_rate": 3.8294069590821856e-06, + "loss": 0.0005, + "step": 73100 + }, + { + "epoch": 1.2355610386757139, + "grad_norm": 0.09349772334098816, + "learning_rate": 3.8279731692259545e-06, + "loss": 0.0008, + "step": 73110 + }, + { + "epoch": 1.2357300389545642, + "grad_norm": 0.01611015386879444, + "learning_rate": 3.8265394813417355e-06, + "loss": 0.001, + "step": 73120 + }, + { + "epoch": 1.2358990392334148, + "grad_norm": 0.012593476101756096, + "learning_rate": 3.825105895554269e-06, + "loss": 0.0007, + "step": 73130 + }, + { + "epoch": 1.2360680395122652, + "grad_norm": 0.1291211098432541, + "learning_rate": 3.823672411988279e-06, + "loss": 0.0011, + "step": 73140 + }, + { + "epoch": 1.2362370397911158, + "grad_norm": 0.0016636535292491317, + "learning_rate": 3.822239030768492e-06, + "loss": 0.0012, + "step": 73150 + }, + { + "epoch": 1.2364060400699661, + "grad_norm": 0.044153131544589996, + "learning_rate": 3.820805752019613e-06, + "loss": 0.0007, + "step": 73160 + }, + { + "epoch": 1.2365750403488165, + "grad_norm": 0.04015633836388588, + "learning_rate": 3.819372575866348e-06, + "loss": 0.0012, + "step": 73170 + }, + { + "epoch": 1.236744040627667, + "grad_norm": 0.0036272837314754725, + "learning_rate": 3.8179395024333885e-06, + "loss": 0.0009, + "step": 73180 + }, + { + "epoch": 1.2369130409065174, + "grad_norm": 0.15639354288578033, + "learning_rate": 3.816506531845421e-06, + "loss": 0.0025, + "step": 73190 + }, + { + "epoch": 1.237082041185368, + "grad_norm": 0.08149423450231552, + "learning_rate": 3.815073664227118e-06, + "loss": 0.0009, + "step": 73200 + }, + { + "epoch": 1.2372510414642184, + "grad_norm": 0.15775452554225922, + "learning_rate": 3.8136408997031483e-06, + "loss": 0.0019, + "step": 73210 + }, + { + "epoch": 1.237420041743069, + "grad_norm": 0.36730340123176575, + "learning_rate": 3.8122082383981666e-06, + "loss": 0.0021, + "step": 73220 + }, + { + "epoch": 1.2375890420219193, + "grad_norm": 0.06837258487939835, + "learning_rate": 3.8107756804368258e-06, + "loss": 0.0013, + "step": 73230 + }, + { + "epoch": 1.23775804230077, + "grad_norm": 0.030986608937382698, + "learning_rate": 3.80934322594376e-06, + "loss": 0.0008, + "step": 73240 + }, + { + "epoch": 1.2379270425796203, + "grad_norm": 0.0400238111615181, + "learning_rate": 3.8079108750436033e-06, + "loss": 0.0009, + "step": 73250 + }, + { + "epoch": 1.2380960428584706, + "grad_norm": 0.030002465471625328, + "learning_rate": 3.8064786278609768e-06, + "loss": 0.001, + "step": 73260 + }, + { + "epoch": 1.2382650431373212, + "grad_norm": 0.1184472143650055, + "learning_rate": 3.805046484520492e-06, + "loss": 0.0008, + "step": 73270 + }, + { + "epoch": 1.2384340434161716, + "grad_norm": 0.03609028086066246, + "learning_rate": 3.803614445146753e-06, + "loss": 0.0007, + "step": 73280 + }, + { + "epoch": 1.2386030436950222, + "grad_norm": 0.02869199775159359, + "learning_rate": 3.8021825098643533e-06, + "loss": 0.0009, + "step": 73290 + }, + { + "epoch": 1.2387720439738725, + "grad_norm": 0.0353069044649601, + "learning_rate": 3.800750678797881e-06, + "loss": 0.0004, + "step": 73300 + }, + { + "epoch": 1.2389410442527231, + "grad_norm": 0.10248451679944992, + "learning_rate": 3.799318952071907e-06, + "loss": 0.0011, + "step": 73310 + }, + { + "epoch": 1.2391100445315735, + "grad_norm": 0.010547821410000324, + "learning_rate": 3.7978873298110046e-06, + "loss": 0.0007, + "step": 73320 + }, + { + "epoch": 1.239279044810424, + "grad_norm": 0.014905786141753197, + "learning_rate": 3.7964558121397268e-06, + "loss": 0.0016, + "step": 73330 + }, + { + "epoch": 1.2394480450892744, + "grad_norm": 0.026339823380112648, + "learning_rate": 3.7950243991826264e-06, + "loss": 0.0013, + "step": 73340 + }, + { + "epoch": 1.2396170453681248, + "grad_norm": 0.06282757967710495, + "learning_rate": 3.7935930910642394e-06, + "loss": 0.0013, + "step": 73350 + }, + { + "epoch": 1.2397860456469754, + "grad_norm": 0.01360777486115694, + "learning_rate": 3.7921618879090995e-06, + "loss": 0.0003, + "step": 73360 + }, + { + "epoch": 1.2399550459258257, + "grad_norm": 0.0012074375990778208, + "learning_rate": 3.790730789841727e-06, + "loss": 0.0006, + "step": 73370 + }, + { + "epoch": 1.2401240462046763, + "grad_norm": 0.0037419984582811594, + "learning_rate": 3.7892997969866362e-06, + "loss": 0.0011, + "step": 73380 + }, + { + "epoch": 1.2402930464835267, + "grad_norm": 0.015408365055918694, + "learning_rate": 3.7878689094683274e-06, + "loss": 0.0006, + "step": 73390 + }, + { + "epoch": 1.240462046762377, + "grad_norm": 0.02896169200539589, + "learning_rate": 3.7864381274112982e-06, + "loss": 0.0011, + "step": 73400 + }, + { + "epoch": 1.2406310470412276, + "grad_norm": 0.16518239676952362, + "learning_rate": 3.7850074509400303e-06, + "loss": 0.0011, + "step": 73410 + }, + { + "epoch": 1.240800047320078, + "grad_norm": 0.11798775941133499, + "learning_rate": 3.783576880179003e-06, + "loss": 0.001, + "step": 73420 + }, + { + "epoch": 1.2409690475989286, + "grad_norm": 0.04104173928499222, + "learning_rate": 3.7821464152526776e-06, + "loss": 0.0014, + "step": 73430 + }, + { + "epoch": 1.241138047877779, + "grad_norm": 0.05344299599528313, + "learning_rate": 3.7807160562855173e-06, + "loss": 0.0018, + "step": 73440 + }, + { + "epoch": 1.2413070481566295, + "grad_norm": 0.09682608395814896, + "learning_rate": 3.7792858034019665e-06, + "loss": 0.0011, + "step": 73450 + }, + { + "epoch": 1.2414760484354799, + "grad_norm": 0.002755597699433565, + "learning_rate": 3.777855656726466e-06, + "loss": 0.0005, + "step": 73460 + }, + { + "epoch": 1.2416450487143305, + "grad_norm": 0.07560652494430542, + "learning_rate": 3.7764256163834435e-06, + "loss": 0.0009, + "step": 73470 + }, + { + "epoch": 1.2418140489931808, + "grad_norm": 0.011091392487287521, + "learning_rate": 3.7749956824973223e-06, + "loss": 0.0016, + "step": 73480 + }, + { + "epoch": 1.2419830492720312, + "grad_norm": 0.02337898127734661, + "learning_rate": 3.7735658551925103e-06, + "loss": 0.0015, + "step": 73490 + }, + { + "epoch": 1.2421520495508818, + "grad_norm": 0.029667360708117485, + "learning_rate": 3.7721361345934138e-06, + "loss": 0.0004, + "step": 73500 + }, + { + "epoch": 1.2423210498297321, + "grad_norm": 0.064625084400177, + "learning_rate": 3.77070652082442e-06, + "loss": 0.0015, + "step": 73510 + }, + { + "epoch": 1.2424900501085827, + "grad_norm": 0.010021107271313667, + "learning_rate": 3.769277014009915e-06, + "loss": 0.0012, + "step": 73520 + }, + { + "epoch": 1.242659050387433, + "grad_norm": 0.1499299556016922, + "learning_rate": 3.7678476142742746e-06, + "loss": 0.0026, + "step": 73530 + }, + { + "epoch": 1.2428280506662837, + "grad_norm": 0.19507773220539093, + "learning_rate": 3.7664183217418597e-06, + "loss": 0.001, + "step": 73540 + }, + { + "epoch": 1.242997050945134, + "grad_norm": 0.015503432601690292, + "learning_rate": 3.7649891365370294e-06, + "loss": 0.001, + "step": 73550 + }, + { + "epoch": 1.2431660512239846, + "grad_norm": 0.03430059552192688, + "learning_rate": 3.763560058784127e-06, + "loss": 0.0005, + "step": 73560 + }, + { + "epoch": 1.243335051502835, + "grad_norm": 0.07987324893474579, + "learning_rate": 3.7621310886074912e-06, + "loss": 0.0012, + "step": 73570 + }, + { + "epoch": 1.2435040517816853, + "grad_norm": 0.16322799026966095, + "learning_rate": 3.7607022261314465e-06, + "loss": 0.0015, + "step": 73580 + }, + { + "epoch": 1.243673052060536, + "grad_norm": 0.09445358067750931, + "learning_rate": 3.759273471480315e-06, + "loss": 0.0017, + "step": 73590 + }, + { + "epoch": 1.2438420523393863, + "grad_norm": 0.03201274573802948, + "learning_rate": 3.7578448247784006e-06, + "loss": 0.0011, + "step": 73600 + }, + { + "epoch": 1.2440110526182369, + "grad_norm": 0.059998683631420135, + "learning_rate": 3.7564162861500076e-06, + "loss": 0.0007, + "step": 73610 + }, + { + "epoch": 1.2441800528970872, + "grad_norm": 0.0846458300948143, + "learning_rate": 3.7549878557194204e-06, + "loss": 0.001, + "step": 73620 + }, + { + "epoch": 1.2443490531759378, + "grad_norm": 0.007185360882431269, + "learning_rate": 3.753559533610924e-06, + "loss": 0.0009, + "step": 73630 + }, + { + "epoch": 1.2445180534547882, + "grad_norm": 0.043200962245464325, + "learning_rate": 3.752131319948785e-06, + "loss": 0.0006, + "step": 73640 + }, + { + "epoch": 1.2446870537336387, + "grad_norm": 0.026572002097964287, + "learning_rate": 3.7507032148572684e-06, + "loss": 0.0003, + "step": 73650 + }, + { + "epoch": 1.244856054012489, + "grad_norm": 0.04520958662033081, + "learning_rate": 3.7492752184606253e-06, + "loss": 0.0003, + "step": 73660 + }, + { + "epoch": 1.2450250542913395, + "grad_norm": 0.025674309581518173, + "learning_rate": 3.7478473308830986e-06, + "loss": 0.0006, + "step": 73670 + }, + { + "epoch": 1.24519405457019, + "grad_norm": 0.020547570660710335, + "learning_rate": 3.7464195522489193e-06, + "loss": 0.0024, + "step": 73680 + }, + { + "epoch": 1.2453630548490404, + "grad_norm": 0.01594940386712551, + "learning_rate": 3.7449918826823152e-06, + "loss": 0.0013, + "step": 73690 + }, + { + "epoch": 1.245532055127891, + "grad_norm": 0.005270745139569044, + "learning_rate": 3.7435643223074954e-06, + "loss": 0.0014, + "step": 73700 + }, + { + "epoch": 1.2457010554067414, + "grad_norm": 0.10284716635942459, + "learning_rate": 3.7421368712486696e-06, + "loss": 0.0007, + "step": 73710 + }, + { + "epoch": 1.245870055685592, + "grad_norm": 0.010431215167045593, + "learning_rate": 3.740709529630028e-06, + "loss": 0.0009, + "step": 73720 + }, + { + "epoch": 1.2460390559644423, + "grad_norm": 0.045114561915397644, + "learning_rate": 3.7392822975757596e-06, + "loss": 0.0016, + "step": 73730 + }, + { + "epoch": 1.246208056243293, + "grad_norm": 0.06875897198915482, + "learning_rate": 3.73785517521004e-06, + "loss": 0.0014, + "step": 73740 + }, + { + "epoch": 1.2463770565221433, + "grad_norm": 0.008149543777108192, + "learning_rate": 3.736428162657035e-06, + "loss": 0.0012, + "step": 73750 + }, + { + "epoch": 1.2465460568009936, + "grad_norm": 0.04132377728819847, + "learning_rate": 3.735001260040902e-06, + "loss": 0.0006, + "step": 73760 + }, + { + "epoch": 1.2467150570798442, + "grad_norm": 0.048792049288749695, + "learning_rate": 3.7335744674857877e-06, + "loss": 0.0007, + "step": 73770 + }, + { + "epoch": 1.2468840573586946, + "grad_norm": 0.02528366446495056, + "learning_rate": 3.732147785115833e-06, + "loss": 0.0012, + "step": 73780 + }, + { + "epoch": 1.2470530576375451, + "grad_norm": 0.08471834659576416, + "learning_rate": 3.7307212130551612e-06, + "loss": 0.0007, + "step": 73790 + }, + { + "epoch": 1.2472220579163955, + "grad_norm": 0.00016127926937770098, + "learning_rate": 3.7292947514278966e-06, + "loss": 0.0006, + "step": 73800 + }, + { + "epoch": 1.247391058195246, + "grad_norm": 0.0032390491105616093, + "learning_rate": 3.727868400358142e-06, + "loss": 0.0004, + "step": 73810 + }, + { + "epoch": 1.2475600584740965, + "grad_norm": 0.034975565969944, + "learning_rate": 3.726442159970004e-06, + "loss": 0.0012, + "step": 73820 + }, + { + "epoch": 1.247729058752947, + "grad_norm": 0.07279594242572784, + "learning_rate": 3.725016030387565e-06, + "loss": 0.001, + "step": 73830 + }, + { + "epoch": 1.2478980590317974, + "grad_norm": 0.04227467626333237, + "learning_rate": 3.723590011734911e-06, + "loss": 0.0011, + "step": 73840 + }, + { + "epoch": 1.2480670593106478, + "grad_norm": 0.10138219594955444, + "learning_rate": 3.7221641041361084e-06, + "loss": 0.0013, + "step": 73850 + }, + { + "epoch": 1.2482360595894983, + "grad_norm": 0.14254963397979736, + "learning_rate": 3.7207383077152227e-06, + "loss": 0.0011, + "step": 73860 + }, + { + "epoch": 1.2484050598683487, + "grad_norm": 0.04934743046760559, + "learning_rate": 3.7193126225962993e-06, + "loss": 0.0009, + "step": 73870 + }, + { + "epoch": 1.2485740601471993, + "grad_norm": 0.11442594975233078, + "learning_rate": 3.7178870489033857e-06, + "loss": 0.0011, + "step": 73880 + }, + { + "epoch": 1.2487430604260497, + "grad_norm": 0.0025327573530375957, + "learning_rate": 3.716461586760509e-06, + "loss": 0.0004, + "step": 73890 + }, + { + "epoch": 1.2489120607049002, + "grad_norm": 0.07135838270187378, + "learning_rate": 3.7150362362916946e-06, + "loss": 0.0014, + "step": 73900 + }, + { + "epoch": 1.2490810609837506, + "grad_norm": 0.1056516021490097, + "learning_rate": 3.713610997620952e-06, + "loss": 0.0009, + "step": 73910 + }, + { + "epoch": 1.2492500612626012, + "grad_norm": 0.026004845276474953, + "learning_rate": 3.712185870872286e-06, + "loss": 0.001, + "step": 73920 + }, + { + "epoch": 1.2494190615414515, + "grad_norm": 0.00045875273644924164, + "learning_rate": 3.710760856169689e-06, + "loss": 0.0003, + "step": 73930 + }, + { + "epoch": 1.249588061820302, + "grad_norm": 0.055750828236341476, + "learning_rate": 3.7093359536371438e-06, + "loss": 0.0009, + "step": 73940 + }, + { + "epoch": 1.2497570620991525, + "grad_norm": 0.019487323239445686, + "learning_rate": 3.707911163398623e-06, + "loss": 0.0009, + "step": 73950 + }, + { + "epoch": 1.2499260623780029, + "grad_norm": 0.05430120974779129, + "learning_rate": 3.7064864855780936e-06, + "loss": 0.001, + "step": 73960 + }, + { + "epoch": 1.2500950626568534, + "grad_norm": 0.08097860962152481, + "learning_rate": 3.7050619202995048e-06, + "loss": 0.0009, + "step": 73970 + }, + { + "epoch": 1.2502640629357038, + "grad_norm": 0.07676412165164948, + "learning_rate": 3.7036374676868053e-06, + "loss": 0.0011, + "step": 73980 + }, + { + "epoch": 1.2504330632145544, + "grad_norm": 0.04987164959311485, + "learning_rate": 3.702213127863925e-06, + "loss": 0.001, + "step": 73990 + }, + { + "epoch": 1.2506020634934047, + "grad_norm": 0.1494833081960678, + "learning_rate": 3.7007889009547927e-06, + "loss": 0.0007, + "step": 74000 + }, + { + "epoch": 1.2507710637722553, + "grad_norm": 0.04136582463979721, + "learning_rate": 3.6993647870833184e-06, + "loss": 0.0005, + "step": 74010 + }, + { + "epoch": 1.2509400640511057, + "grad_norm": 0.03510259464383125, + "learning_rate": 3.6979407863734095e-06, + "loss": 0.0004, + "step": 74020 + }, + { + "epoch": 1.251109064329956, + "grad_norm": 0.09876205772161484, + "learning_rate": 3.696516898948962e-06, + "loss": 0.001, + "step": 74030 + }, + { + "epoch": 1.2512780646088066, + "grad_norm": 0.10713042318820953, + "learning_rate": 3.6950931249338594e-06, + "loss": 0.0013, + "step": 74040 + }, + { + "epoch": 1.251447064887657, + "grad_norm": 0.02865634858608246, + "learning_rate": 3.6936694644519777e-06, + "loss": 0.0004, + "step": 74050 + }, + { + "epoch": 1.2516160651665076, + "grad_norm": 0.005249988753348589, + "learning_rate": 3.6922459176271813e-06, + "loss": 0.0023, + "step": 74060 + }, + { + "epoch": 1.251785065445358, + "grad_norm": 0.07062037289142609, + "learning_rate": 3.690822484583328e-06, + "loss": 0.0006, + "step": 74070 + }, + { + "epoch": 1.2519540657242083, + "grad_norm": 0.056166257709264755, + "learning_rate": 3.6893991654442595e-06, + "loss": 0.0007, + "step": 74080 + }, + { + "epoch": 1.252123066003059, + "grad_norm": 0.11034413427114487, + "learning_rate": 3.6879759603338154e-06, + "loss": 0.0009, + "step": 74090 + }, + { + "epoch": 1.2522920662819095, + "grad_norm": 0.027039768174290657, + "learning_rate": 3.686552869375818e-06, + "loss": 0.001, + "step": 74100 + }, + { + "epoch": 1.2524610665607598, + "grad_norm": 0.03380032628774643, + "learning_rate": 3.685129892694087e-06, + "loss": 0.0013, + "step": 74110 + }, + { + "epoch": 1.2526300668396102, + "grad_norm": 0.02615695632994175, + "learning_rate": 3.6837070304124236e-06, + "loss": 0.0013, + "step": 74120 + }, + { + "epoch": 1.2527990671184608, + "grad_norm": 0.023271802812814713, + "learning_rate": 3.6822842826546267e-06, + "loss": 0.0012, + "step": 74130 + }, + { + "epoch": 1.2529680673973111, + "grad_norm": 0.06563130021095276, + "learning_rate": 3.6808616495444816e-06, + "loss": 0.0007, + "step": 74140 + }, + { + "epoch": 1.2531370676761617, + "grad_norm": 0.05916164442896843, + "learning_rate": 3.6794391312057664e-06, + "loss": 0.0007, + "step": 74150 + }, + { + "epoch": 1.253306067955012, + "grad_norm": 0.006687621120363474, + "learning_rate": 3.6780167277622424e-06, + "loss": 0.0023, + "step": 74160 + }, + { + "epoch": 1.2534750682338625, + "grad_norm": 0.027524176985025406, + "learning_rate": 3.676594439337671e-06, + "loss": 0.0009, + "step": 74170 + }, + { + "epoch": 1.253644068512713, + "grad_norm": 0.055483873933553696, + "learning_rate": 3.675172266055792e-06, + "loss": 0.0012, + "step": 74180 + }, + { + "epoch": 1.2538130687915636, + "grad_norm": 0.07914508134126663, + "learning_rate": 3.673750208040348e-06, + "loss": 0.0013, + "step": 74190 + }, + { + "epoch": 1.253982069070414, + "grad_norm": 0.02009778842329979, + "learning_rate": 3.672328265415059e-06, + "loss": 0.0004, + "step": 74200 + }, + { + "epoch": 1.2541510693492643, + "grad_norm": 0.011674889363348484, + "learning_rate": 3.6709064383036454e-06, + "loss": 0.0006, + "step": 74210 + }, + { + "epoch": 1.254320069628115, + "grad_norm": 0.008638451807200909, + "learning_rate": 3.6694847268298107e-06, + "loss": 0.001, + "step": 74220 + }, + { + "epoch": 1.2544890699069653, + "grad_norm": 0.08116303384304047, + "learning_rate": 3.668063131117252e-06, + "loss": 0.0009, + "step": 74230 + }, + { + "epoch": 1.2546580701858159, + "grad_norm": 0.014926801435649395, + "learning_rate": 3.6666416512896524e-06, + "loss": 0.001, + "step": 74240 + }, + { + "epoch": 1.2548270704646662, + "grad_norm": 0.03558624908328056, + "learning_rate": 3.665220287470692e-06, + "loss": 0.0012, + "step": 74250 + }, + { + "epoch": 1.2549960707435166, + "grad_norm": 0.13499194383621216, + "learning_rate": 3.663799039784032e-06, + "loss": 0.0014, + "step": 74260 + }, + { + "epoch": 1.2551650710223672, + "grad_norm": 0.04808972403407097, + "learning_rate": 3.6623779083533286e-06, + "loss": 0.0011, + "step": 74270 + }, + { + "epoch": 1.2553340713012178, + "grad_norm": 0.04176962003111839, + "learning_rate": 3.6609568933022307e-06, + "loss": 0.0005, + "step": 74280 + }, + { + "epoch": 1.2555030715800681, + "grad_norm": 0.07730311900377274, + "learning_rate": 3.6595359947543686e-06, + "loss": 0.0007, + "step": 74290 + }, + { + "epoch": 1.2556720718589185, + "grad_norm": 0.07153651118278503, + "learning_rate": 3.658115212833372e-06, + "loss": 0.0012, + "step": 74300 + }, + { + "epoch": 1.255841072137769, + "grad_norm": 0.041518520563840866, + "learning_rate": 3.6566945476628514e-06, + "loss": 0.0013, + "step": 74310 + }, + { + "epoch": 1.2560100724166194, + "grad_norm": 0.037388209253549576, + "learning_rate": 3.6552739993664144e-06, + "loss": 0.0014, + "step": 74320 + }, + { + "epoch": 1.25617907269547, + "grad_norm": 0.0725250393152237, + "learning_rate": 3.6538535680676534e-06, + "loss": 0.001, + "step": 74330 + }, + { + "epoch": 1.2563480729743204, + "grad_norm": 0.006527617108076811, + "learning_rate": 3.6524332538901573e-06, + "loss": 0.001, + "step": 74340 + }, + { + "epoch": 1.2565170732531707, + "grad_norm": 0.0347895473241806, + "learning_rate": 3.6510130569574943e-06, + "loss": 0.0007, + "step": 74350 + }, + { + "epoch": 1.2566860735320213, + "grad_norm": 0.06007776036858559, + "learning_rate": 3.6495929773932338e-06, + "loss": 0.0011, + "step": 74360 + }, + { + "epoch": 1.256855073810872, + "grad_norm": 0.09146324545145035, + "learning_rate": 3.648173015320925e-06, + "loss": 0.0005, + "step": 74370 + }, + { + "epoch": 1.2570240740897223, + "grad_norm": 0.043120142072439194, + "learning_rate": 3.6467531708641156e-06, + "loss": 0.0005, + "step": 74380 + }, + { + "epoch": 1.2571930743685726, + "grad_norm": 0.05117538943886757, + "learning_rate": 3.645333444146335e-06, + "loss": 0.0011, + "step": 74390 + }, + { + "epoch": 1.2573620746474232, + "grad_norm": 0.09959695488214493, + "learning_rate": 3.64391383529111e-06, + "loss": 0.0009, + "step": 74400 + }, + { + "epoch": 1.2575310749262736, + "grad_norm": 0.05299863964319229, + "learning_rate": 3.642494344421951e-06, + "loss": 0.001, + "step": 74410 + }, + { + "epoch": 1.2577000752051242, + "grad_norm": 0.044101472944021225, + "learning_rate": 3.6410749716623604e-06, + "loss": 0.0006, + "step": 74420 + }, + { + "epoch": 1.2578690754839745, + "grad_norm": 0.05830482393503189, + "learning_rate": 3.6396557171358317e-06, + "loss": 0.0005, + "step": 74430 + }, + { + "epoch": 1.258038075762825, + "grad_norm": 0.014905164949595928, + "learning_rate": 3.6382365809658483e-06, + "loss": 0.0005, + "step": 74440 + }, + { + "epoch": 1.2582070760416755, + "grad_norm": 0.009240568615496159, + "learning_rate": 3.6368175632758786e-06, + "loss": 0.0019, + "step": 74450 + }, + { + "epoch": 1.258376076320526, + "grad_norm": 0.0460660383105278, + "learning_rate": 3.6353986641893866e-06, + "loss": 0.0009, + "step": 74460 + }, + { + "epoch": 1.2585450765993764, + "grad_norm": 0.07785634696483612, + "learning_rate": 3.633979883829821e-06, + "loss": 0.0026, + "step": 74470 + }, + { + "epoch": 1.2587140768782268, + "grad_norm": 0.00043464999180287123, + "learning_rate": 3.6325612223206264e-06, + "loss": 0.001, + "step": 74480 + }, + { + "epoch": 1.2588830771570774, + "grad_norm": 0.024840181693434715, + "learning_rate": 3.631142679785229e-06, + "loss": 0.0001, + "step": 74490 + }, + { + "epoch": 1.2590520774359277, + "grad_norm": 0.07785341143608093, + "learning_rate": 3.6297242563470515e-06, + "loss": 0.0005, + "step": 74500 + }, + { + "epoch": 1.2592210777147783, + "grad_norm": 0.07119804620742798, + "learning_rate": 3.6283059521295016e-06, + "loss": 0.001, + "step": 74510 + }, + { + "epoch": 1.2593900779936287, + "grad_norm": 0.0005774807068519294, + "learning_rate": 3.6268877672559816e-06, + "loss": 0.0007, + "step": 74520 + }, + { + "epoch": 1.259559078272479, + "grad_norm": 0.049248334020376205, + "learning_rate": 3.6254697018498777e-06, + "loss": 0.0017, + "step": 74530 + }, + { + "epoch": 1.2597280785513296, + "grad_norm": 0.02326933853328228, + "learning_rate": 3.624051756034568e-06, + "loss": 0.0005, + "step": 74540 + }, + { + "epoch": 1.2598970788301802, + "grad_norm": 0.05022501200437546, + "learning_rate": 3.6226339299334256e-06, + "loss": 0.0008, + "step": 74550 + }, + { + "epoch": 1.2600660791090306, + "grad_norm": 0.02887713722884655, + "learning_rate": 3.6212162236698017e-06, + "loss": 0.0014, + "step": 74560 + }, + { + "epoch": 1.260235079387881, + "grad_norm": 0.020068364217877388, + "learning_rate": 3.619798637367049e-06, + "loss": 0.0009, + "step": 74570 + }, + { + "epoch": 1.2604040796667315, + "grad_norm": 0.04366261512041092, + "learning_rate": 3.6183811711485005e-06, + "loss": 0.0005, + "step": 74580 + }, + { + "epoch": 1.2605730799455819, + "grad_norm": 0.054522644728422165, + "learning_rate": 3.616963825137486e-06, + "loss": 0.0009, + "step": 74590 + }, + { + "epoch": 1.2607420802244325, + "grad_norm": 0.006772140506654978, + "learning_rate": 3.615546599457318e-06, + "loss": 0.0004, + "step": 74600 + }, + { + "epoch": 1.2609110805032828, + "grad_norm": 0.07526401430368423, + "learning_rate": 3.6141294942313043e-06, + "loss": 0.0013, + "step": 74610 + }, + { + "epoch": 1.2610800807821332, + "grad_norm": 0.002872828394174576, + "learning_rate": 3.612712509582738e-06, + "loss": 0.0006, + "step": 74620 + }, + { + "epoch": 1.2612490810609838, + "grad_norm": 0.026724405586719513, + "learning_rate": 3.6112956456349073e-06, + "loss": 0.0008, + "step": 74630 + }, + { + "epoch": 1.2614180813398341, + "grad_norm": 0.019419973716139793, + "learning_rate": 3.609878902511082e-06, + "loss": 0.0009, + "step": 74640 + }, + { + "epoch": 1.2615870816186847, + "grad_norm": 0.012238175608217716, + "learning_rate": 3.608462280334529e-06, + "loss": 0.0018, + "step": 74650 + }, + { + "epoch": 1.261756081897535, + "grad_norm": 0.1558408886194229, + "learning_rate": 3.6070457792284987e-06, + "loss": 0.0008, + "step": 74660 + }, + { + "epoch": 1.2619250821763857, + "grad_norm": 0.06568319350481033, + "learning_rate": 3.6056293993162362e-06, + "loss": 0.0008, + "step": 74670 + }, + { + "epoch": 1.262094082455236, + "grad_norm": 0.025579141452908516, + "learning_rate": 3.604213140720969e-06, + "loss": 0.0006, + "step": 74680 + }, + { + "epoch": 1.2622630827340866, + "grad_norm": 0.09482182562351227, + "learning_rate": 3.6027970035659233e-06, + "loss": 0.0013, + "step": 74690 + }, + { + "epoch": 1.262432083012937, + "grad_norm": 0.0515873022377491, + "learning_rate": 3.6013809879743074e-06, + "loss": 0.0008, + "step": 74700 + }, + { + "epoch": 1.2626010832917873, + "grad_norm": 0.03804394602775574, + "learning_rate": 3.599965094069322e-06, + "loss": 0.0011, + "step": 74710 + }, + { + "epoch": 1.262770083570638, + "grad_norm": 0.05061568692326546, + "learning_rate": 3.598549321974156e-06, + "loss": 0.0013, + "step": 74720 + }, + { + "epoch": 1.2629390838494883, + "grad_norm": 0.06825969368219376, + "learning_rate": 3.597133671811991e-06, + "loss": 0.0015, + "step": 74730 + }, + { + "epoch": 1.2631080841283389, + "grad_norm": 0.009210345335304737, + "learning_rate": 3.595718143705992e-06, + "loss": 0.0006, + "step": 74740 + }, + { + "epoch": 1.2632770844071892, + "grad_norm": 0.08739422261714935, + "learning_rate": 3.59430273777932e-06, + "loss": 0.0008, + "step": 74750 + }, + { + "epoch": 1.2634460846860398, + "grad_norm": 0.011182409711182117, + "learning_rate": 3.5928874541551184e-06, + "loss": 0.0009, + "step": 74760 + }, + { + "epoch": 1.2636150849648902, + "grad_norm": 0.0437384769320488, + "learning_rate": 3.591472292956528e-06, + "loss": 0.0014, + "step": 74770 + }, + { + "epoch": 1.2637840852437408, + "grad_norm": 0.08981582522392273, + "learning_rate": 3.5900572543066707e-06, + "loss": 0.0009, + "step": 74780 + }, + { + "epoch": 1.2639530855225911, + "grad_norm": 0.02562858909368515, + "learning_rate": 3.588642338328664e-06, + "loss": 0.0007, + "step": 74790 + }, + { + "epoch": 1.2641220858014415, + "grad_norm": 0.008729532361030579, + "learning_rate": 3.587227545145612e-06, + "loss": 0.0008, + "step": 74800 + }, + { + "epoch": 1.264291086080292, + "grad_norm": 0.03705538436770439, + "learning_rate": 3.5858128748806097e-06, + "loss": 0.0009, + "step": 74810 + }, + { + "epoch": 1.2644600863591424, + "grad_norm": 0.017782073467969894, + "learning_rate": 3.5843983276567384e-06, + "loss": 0.0007, + "step": 74820 + }, + { + "epoch": 1.264629086637993, + "grad_norm": 0.011385198682546616, + "learning_rate": 3.582983903597071e-06, + "loss": 0.0006, + "step": 74830 + }, + { + "epoch": 1.2647980869168434, + "grad_norm": 0.05284975469112396, + "learning_rate": 3.5815696028246715e-06, + "loss": 0.0005, + "step": 74840 + }, + { + "epoch": 1.264967087195694, + "grad_norm": 0.018762778490781784, + "learning_rate": 3.580155425462587e-06, + "loss": 0.0009, + "step": 74850 + }, + { + "epoch": 1.2651360874745443, + "grad_norm": 0.09084206819534302, + "learning_rate": 3.5787413716338614e-06, + "loss": 0.0016, + "step": 74860 + }, + { + "epoch": 1.265305087753395, + "grad_norm": 0.019888978451490402, + "learning_rate": 3.5773274414615207e-06, + "loss": 0.0008, + "step": 74870 + }, + { + "epoch": 1.2654740880322453, + "grad_norm": 0.036904774606227875, + "learning_rate": 3.5759136350685876e-06, + "loss": 0.0015, + "step": 74880 + }, + { + "epoch": 1.2656430883110956, + "grad_norm": 0.10370167344808578, + "learning_rate": 3.5744999525780666e-06, + "loss": 0.0013, + "step": 74890 + }, + { + "epoch": 1.2658120885899462, + "grad_norm": 0.09771838784217834, + "learning_rate": 3.5730863941129566e-06, + "loss": 0.0016, + "step": 74900 + }, + { + "epoch": 1.2659810888687966, + "grad_norm": 0.032277822494506836, + "learning_rate": 3.5716729597962435e-06, + "loss": 0.001, + "step": 74910 + }, + { + "epoch": 1.2661500891476472, + "grad_norm": 0.03454013541340828, + "learning_rate": 3.5702596497509053e-06, + "loss": 0.0018, + "step": 74920 + }, + { + "epoch": 1.2663190894264975, + "grad_norm": 0.0273564625531435, + "learning_rate": 3.568846464099902e-06, + "loss": 0.0008, + "step": 74930 + }, + { + "epoch": 1.266488089705348, + "grad_norm": 0.04307074472308159, + "learning_rate": 3.5674334029661937e-06, + "loss": 0.0012, + "step": 74940 + }, + { + "epoch": 1.2666570899841985, + "grad_norm": 0.05055421218276024, + "learning_rate": 3.566020466472717e-06, + "loss": 0.0008, + "step": 74950 + }, + { + "epoch": 1.266826090263049, + "grad_norm": 0.0491347461938858, + "learning_rate": 3.5646076547424106e-06, + "loss": 0.0008, + "step": 74960 + }, + { + "epoch": 1.2669950905418994, + "grad_norm": 0.02737373672425747, + "learning_rate": 3.5631949678981904e-06, + "loss": 0.0011, + "step": 74970 + }, + { + "epoch": 1.2671640908207498, + "grad_norm": 0.05359240621328354, + "learning_rate": 3.561782406062971e-06, + "loss": 0.0007, + "step": 74980 + }, + { + "epoch": 1.2673330910996004, + "grad_norm": 0.040238119661808014, + "learning_rate": 3.560369969359649e-06, + "loss": 0.0009, + "step": 74990 + }, + { + "epoch": 1.2675020913784507, + "grad_norm": 0.05049556493759155, + "learning_rate": 3.5589576579111167e-06, + "loss": 0.0016, + "step": 75000 + }, + { + "epoch": 1.2676710916573013, + "grad_norm": 0.023869765922427177, + "learning_rate": 3.557545471840248e-06, + "loss": 0.0008, + "step": 75010 + }, + { + "epoch": 1.2678400919361517, + "grad_norm": 0.020744100213050842, + "learning_rate": 3.5561334112699154e-06, + "loss": 0.001, + "step": 75020 + }, + { + "epoch": 1.268009092215002, + "grad_norm": 0.07367073744535446, + "learning_rate": 3.5547214763229686e-06, + "loss": 0.0012, + "step": 75030 + }, + { + "epoch": 1.2681780924938526, + "grad_norm": 0.06526787579059601, + "learning_rate": 3.5533096671222556e-06, + "loss": 0.0018, + "step": 75040 + }, + { + "epoch": 1.2683470927727032, + "grad_norm": 0.11561532318592072, + "learning_rate": 3.5518979837906136e-06, + "loss": 0.001, + "step": 75050 + }, + { + "epoch": 1.2685160930515536, + "grad_norm": 0.06659328937530518, + "learning_rate": 3.550486426450861e-06, + "loss": 0.003, + "step": 75060 + }, + { + "epoch": 1.268685093330404, + "grad_norm": 0.019424647092819214, + "learning_rate": 3.549074995225815e-06, + "loss": 0.002, + "step": 75070 + }, + { + "epoch": 1.2688540936092545, + "grad_norm": 0.009543772786855698, + "learning_rate": 3.547663690238271e-06, + "loss": 0.0004, + "step": 75080 + }, + { + "epoch": 1.2690230938881049, + "grad_norm": 0.03657336160540581, + "learning_rate": 3.5462525116110246e-06, + "loss": 0.0013, + "step": 75090 + }, + { + "epoch": 1.2691920941669554, + "grad_norm": 0.17695069313049316, + "learning_rate": 3.5448414594668524e-06, + "loss": 0.0014, + "step": 75100 + }, + { + "epoch": 1.2693610944458058, + "grad_norm": 0.0062867943197488785, + "learning_rate": 3.543430533928525e-06, + "loss": 0.0018, + "step": 75110 + }, + { + "epoch": 1.2695300947246562, + "grad_norm": 0.04585151746869087, + "learning_rate": 3.5420197351187966e-06, + "loss": 0.0008, + "step": 75120 + }, + { + "epoch": 1.2696990950035068, + "grad_norm": 0.10385863482952118, + "learning_rate": 3.540609063160418e-06, + "loss": 0.0009, + "step": 75130 + }, + { + "epoch": 1.2698680952823573, + "grad_norm": 0.03469540923833847, + "learning_rate": 3.539198518176119e-06, + "loss": 0.0008, + "step": 75140 + }, + { + "epoch": 1.2700370955612077, + "grad_norm": 0.06419689953327179, + "learning_rate": 3.5377881002886293e-06, + "loss": 0.0005, + "step": 75150 + }, + { + "epoch": 1.270206095840058, + "grad_norm": 0.05026857554912567, + "learning_rate": 3.536377809620657e-06, + "loss": 0.0007, + "step": 75160 + }, + { + "epoch": 1.2703750961189086, + "grad_norm": 0.015963979065418243, + "learning_rate": 3.534967646294908e-06, + "loss": 0.0007, + "step": 75170 + }, + { + "epoch": 1.270544096397759, + "grad_norm": 0.08267915993928909, + "learning_rate": 3.5335576104340715e-06, + "loss": 0.0006, + "step": 75180 + }, + { + "epoch": 1.2707130966766096, + "grad_norm": 0.2071213573217392, + "learning_rate": 3.532147702160828e-06, + "loss": 0.0013, + "step": 75190 + }, + { + "epoch": 1.27088209695546, + "grad_norm": 0.054513413459062576, + "learning_rate": 3.5307379215978453e-06, + "loss": 0.0006, + "step": 75200 + }, + { + "epoch": 1.2710510972343103, + "grad_norm": 0.06152530387043953, + "learning_rate": 3.5293282688677843e-06, + "loss": 0.0006, + "step": 75210 + }, + { + "epoch": 1.271220097513161, + "grad_norm": 0.05172324180603027, + "learning_rate": 3.5279187440932883e-06, + "loss": 0.0011, + "step": 75220 + }, + { + "epoch": 1.2713890977920115, + "grad_norm": 0.020962441340088844, + "learning_rate": 3.5265093473969948e-06, + "loss": 0.0007, + "step": 75230 + }, + { + "epoch": 1.2715580980708618, + "grad_norm": 0.04996019974350929, + "learning_rate": 3.5251000789015255e-06, + "loss": 0.0009, + "step": 75240 + }, + { + "epoch": 1.2717270983497122, + "grad_norm": 0.03674384206533432, + "learning_rate": 3.523690938729498e-06, + "loss": 0.0007, + "step": 75250 + }, + { + "epoch": 1.2718960986285628, + "grad_norm": 0.08864463865756989, + "learning_rate": 3.522281927003509e-06, + "loss": 0.0007, + "step": 75260 + }, + { + "epoch": 1.2720650989074131, + "grad_norm": 0.03137887269258499, + "learning_rate": 3.5208730438461535e-06, + "loss": 0.0004, + "step": 75270 + }, + { + "epoch": 1.2722340991862637, + "grad_norm": 0.004854999948292971, + "learning_rate": 3.519464289380009e-06, + "loss": 0.0012, + "step": 75280 + }, + { + "epoch": 1.272403099465114, + "grad_norm": 0.01660696417093277, + "learning_rate": 3.5180556637276454e-06, + "loss": 0.0007, + "step": 75290 + }, + { + "epoch": 1.2725720997439645, + "grad_norm": 0.1040768101811409, + "learning_rate": 3.5166471670116188e-06, + "loss": 0.0007, + "step": 75300 + }, + { + "epoch": 1.272741100022815, + "grad_norm": 0.06876754760742188, + "learning_rate": 3.5152387993544753e-06, + "loss": 0.0011, + "step": 75310 + }, + { + "epoch": 1.2729101003016656, + "grad_norm": 0.004151761531829834, + "learning_rate": 3.5138305608787514e-06, + "loss": 0.0008, + "step": 75320 + }, + { + "epoch": 1.273079100580516, + "grad_norm": 0.003927671350538731, + "learning_rate": 3.5124224517069683e-06, + "loss": 0.0015, + "step": 75330 + }, + { + "epoch": 1.2732481008593663, + "grad_norm": 0.05192035809159279, + "learning_rate": 3.5110144719616408e-06, + "loss": 0.0007, + "step": 75340 + }, + { + "epoch": 1.273417101138217, + "grad_norm": 0.011413088999688625, + "learning_rate": 3.5096066217652668e-06, + "loss": 0.0005, + "step": 75350 + }, + { + "epoch": 1.2735861014170673, + "grad_norm": 0.020405396819114685, + "learning_rate": 3.5081989012403395e-06, + "loss": 0.0011, + "step": 75360 + }, + { + "epoch": 1.2737551016959179, + "grad_norm": 0.048676881939172745, + "learning_rate": 3.5067913105093337e-06, + "loss": 0.0005, + "step": 75370 + }, + { + "epoch": 1.2739241019747682, + "grad_norm": 0.09745845198631287, + "learning_rate": 3.50538384969472e-06, + "loss": 0.0014, + "step": 75380 + }, + { + "epoch": 1.2740931022536186, + "grad_norm": 0.0010723049053922296, + "learning_rate": 3.5039765189189515e-06, + "loss": 0.0011, + "step": 75390 + }, + { + "epoch": 1.2742621025324692, + "grad_norm": 0.12608641386032104, + "learning_rate": 3.5025693183044766e-06, + "loss": 0.0009, + "step": 75400 + }, + { + "epoch": 1.2744311028113198, + "grad_norm": 0.04926927015185356, + "learning_rate": 3.501162247973724e-06, + "loss": 0.0006, + "step": 75410 + }, + { + "epoch": 1.2746001030901701, + "grad_norm": 0.07403943687677383, + "learning_rate": 3.4997553080491203e-06, + "loss": 0.002, + "step": 75420 + }, + { + "epoch": 1.2747691033690205, + "grad_norm": 0.05200944095849991, + "learning_rate": 3.4983484986530713e-06, + "loss": 0.0011, + "step": 75430 + }, + { + "epoch": 1.274938103647871, + "grad_norm": 0.02016104944050312, + "learning_rate": 3.496941819907981e-06, + "loss": 0.0005, + "step": 75440 + }, + { + "epoch": 1.2751071039267214, + "grad_norm": 0.0014036950888112187, + "learning_rate": 3.4955352719362323e-06, + "loss": 0.0014, + "step": 75450 + }, + { + "epoch": 1.275276104205572, + "grad_norm": 0.04418787732720375, + "learning_rate": 3.4941288548602056e-06, + "loss": 0.0007, + "step": 75460 + }, + { + "epoch": 1.2754451044844224, + "grad_norm": 0.023052405565977097, + "learning_rate": 3.492722568802265e-06, + "loss": 0.0015, + "step": 75470 + }, + { + "epoch": 1.2756141047632727, + "grad_norm": 0.02741224505007267, + "learning_rate": 3.491316413884763e-06, + "loss": 0.0012, + "step": 75480 + }, + { + "epoch": 1.2757831050421233, + "grad_norm": 0.00827124435454607, + "learning_rate": 3.489910390230042e-06, + "loss": 0.0012, + "step": 75490 + }, + { + "epoch": 1.275952105320974, + "grad_norm": 0.008974979631602764, + "learning_rate": 3.4885044979604352e-06, + "loss": 0.0009, + "step": 75500 + }, + { + "epoch": 1.2761211055998243, + "grad_norm": 0.021223215386271477, + "learning_rate": 3.487098737198259e-06, + "loss": 0.0008, + "step": 75510 + }, + { + "epoch": 1.2762901058786746, + "grad_norm": 0.01636037603020668, + "learning_rate": 3.485693108065825e-06, + "loss": 0.0015, + "step": 75520 + }, + { + "epoch": 1.2764591061575252, + "grad_norm": 0.07580610364675522, + "learning_rate": 3.484287610685425e-06, + "loss": 0.001, + "step": 75530 + }, + { + "epoch": 1.2766281064363756, + "grad_norm": 0.04372125118970871, + "learning_rate": 3.482882245179349e-06, + "loss": 0.0011, + "step": 75540 + }, + { + "epoch": 1.2767971067152262, + "grad_norm": 0.041074804961681366, + "learning_rate": 3.4814770116698665e-06, + "loss": 0.0008, + "step": 75550 + }, + { + "epoch": 1.2769661069940765, + "grad_norm": 0.04173261299729347, + "learning_rate": 3.4800719102792412e-06, + "loss": 0.0006, + "step": 75560 + }, + { + "epoch": 1.277135107272927, + "grad_norm": 0.02142047882080078, + "learning_rate": 3.478666941129725e-06, + "loss": 0.0009, + "step": 75570 + }, + { + "epoch": 1.2773041075517775, + "grad_norm": 0.10476000607013702, + "learning_rate": 3.477262104343555e-06, + "loss": 0.0009, + "step": 75580 + }, + { + "epoch": 1.2774731078306278, + "grad_norm": 0.04013489559292793, + "learning_rate": 3.475857400042961e-06, + "loss": 0.0006, + "step": 75590 + }, + { + "epoch": 1.2776421081094784, + "grad_norm": 0.03097931295633316, + "learning_rate": 3.4744528283501566e-06, + "loss": 0.0005, + "step": 75600 + }, + { + "epoch": 1.2778111083883288, + "grad_norm": 0.01865074224770069, + "learning_rate": 3.4730483893873496e-06, + "loss": 0.0005, + "step": 75610 + }, + { + "epoch": 1.2779801086671794, + "grad_norm": 0.0692799761891365, + "learning_rate": 3.471644083276729e-06, + "loss": 0.0008, + "step": 75620 + }, + { + "epoch": 1.2781491089460297, + "grad_norm": 0.020755227655172348, + "learning_rate": 3.47023991014048e-06, + "loss": 0.0008, + "step": 75630 + }, + { + "epoch": 1.2783181092248803, + "grad_norm": 0.10251430422067642, + "learning_rate": 3.4688358701007686e-06, + "loss": 0.0006, + "step": 75640 + }, + { + "epoch": 1.2784871095037307, + "grad_norm": 0.07248004525899887, + "learning_rate": 3.467431963279756e-06, + "loss": 0.0004, + "step": 75650 + }, + { + "epoch": 1.278656109782581, + "grad_norm": 0.05969158187508583, + "learning_rate": 3.4660281897995885e-06, + "loss": 0.0004, + "step": 75660 + }, + { + "epoch": 1.2788251100614316, + "grad_norm": 0.00034028541995212436, + "learning_rate": 3.4646245497824004e-06, + "loss": 0.0004, + "step": 75670 + }, + { + "epoch": 1.278994110340282, + "grad_norm": 0.10661555081605911, + "learning_rate": 3.463221043350315e-06, + "loss": 0.0012, + "step": 75680 + }, + { + "epoch": 1.2791631106191326, + "grad_norm": 0.02095131203532219, + "learning_rate": 3.4618176706254466e-06, + "loss": 0.0019, + "step": 75690 + }, + { + "epoch": 1.279332110897983, + "grad_norm": 0.06987152993679047, + "learning_rate": 3.460414431729891e-06, + "loss": 0.0011, + "step": 75700 + }, + { + "epoch": 1.2795011111768335, + "grad_norm": 0.060856495052576065, + "learning_rate": 3.4590113267857417e-06, + "loss": 0.0021, + "step": 75710 + }, + { + "epoch": 1.2796701114556839, + "grad_norm": 0.06990408152341843, + "learning_rate": 3.457608355915071e-06, + "loss": 0.0009, + "step": 75720 + }, + { + "epoch": 1.2798391117345345, + "grad_norm": 0.06350047141313553, + "learning_rate": 3.4562055192399486e-06, + "loss": 0.0011, + "step": 75730 + }, + { + "epoch": 1.2800081120133848, + "grad_norm": 0.005618101917207241, + "learning_rate": 3.4548028168824237e-06, + "loss": 0.0012, + "step": 75740 + }, + { + "epoch": 1.2801771122922352, + "grad_norm": 0.04957776144146919, + "learning_rate": 3.4534002489645413e-06, + "loss": 0.0013, + "step": 75750 + }, + { + "epoch": 1.2803461125710858, + "grad_norm": 0.06811799854040146, + "learning_rate": 3.45199781560833e-06, + "loss": 0.0006, + "step": 75760 + }, + { + "epoch": 1.2805151128499361, + "grad_norm": 0.024274202063679695, + "learning_rate": 3.4505955169358108e-06, + "loss": 0.001, + "step": 75770 + }, + { + "epoch": 1.2806841131287867, + "grad_norm": 0.07120922207832336, + "learning_rate": 3.449193353068987e-06, + "loss": 0.0007, + "step": 75780 + }, + { + "epoch": 1.280853113407637, + "grad_norm": 0.06161806359887123, + "learning_rate": 3.447791324129857e-06, + "loss": 0.0015, + "step": 75790 + }, + { + "epoch": 1.2810221136864877, + "grad_norm": 0.03518872708082199, + "learning_rate": 3.4463894302404004e-06, + "loss": 0.0013, + "step": 75800 + }, + { + "epoch": 1.281191113965338, + "grad_norm": 0.016944030299782753, + "learning_rate": 3.444987671522591e-06, + "loss": 0.0006, + "step": 75810 + }, + { + "epoch": 1.2813601142441886, + "grad_norm": 0.04125964269042015, + "learning_rate": 3.4435860480983907e-06, + "loss": 0.0009, + "step": 75820 + }, + { + "epoch": 1.281529114523039, + "grad_norm": 0.035951171070337296, + "learning_rate": 3.4421845600897425e-06, + "loss": 0.0011, + "step": 75830 + }, + { + "epoch": 1.2816981148018893, + "grad_norm": 0.05894112214446068, + "learning_rate": 3.4407832076185876e-06, + "loss": 0.0017, + "step": 75840 + }, + { + "epoch": 1.28186711508074, + "grad_norm": 0.012359118089079857, + "learning_rate": 3.439381990806846e-06, + "loss": 0.0006, + "step": 75850 + }, + { + "epoch": 1.2820361153595903, + "grad_norm": 0.030763106420636177, + "learning_rate": 3.4379809097764336e-06, + "loss": 0.0007, + "step": 75860 + }, + { + "epoch": 1.2822051156384409, + "grad_norm": 0.034162361174821854, + "learning_rate": 3.43657996464925e-06, + "loss": 0.0014, + "step": 75870 + }, + { + "epoch": 1.2823741159172912, + "grad_norm": 0.09156583249568939, + "learning_rate": 3.435179155547186e-06, + "loss": 0.0009, + "step": 75880 + }, + { + "epoch": 1.2825431161961416, + "grad_norm": 0.03484535962343216, + "learning_rate": 3.433778482592115e-06, + "loss": 0.0007, + "step": 75890 + }, + { + "epoch": 1.2827121164749922, + "grad_norm": 0.033580485731363297, + "learning_rate": 3.432377945905906e-06, + "loss": 0.0003, + "step": 75900 + }, + { + "epoch": 1.2828811167538428, + "grad_norm": 0.07795017957687378, + "learning_rate": 3.43097754561041e-06, + "loss": 0.0008, + "step": 75910 + }, + { + "epoch": 1.2830501170326931, + "grad_norm": 0.03439640253782272, + "learning_rate": 3.429577281827471e-06, + "loss": 0.0008, + "step": 75920 + }, + { + "epoch": 1.2832191173115435, + "grad_norm": 0.10929620265960693, + "learning_rate": 3.4281771546789155e-06, + "loss": 0.001, + "step": 75930 + }, + { + "epoch": 1.283388117590394, + "grad_norm": 0.018059978261590004, + "learning_rate": 3.4267771642865645e-06, + "loss": 0.0011, + "step": 75940 + }, + { + "epoch": 1.2835571178692444, + "grad_norm": 0.0037063744384795427, + "learning_rate": 3.425377310772222e-06, + "loss": 0.0004, + "step": 75950 + }, + { + "epoch": 1.283726118148095, + "grad_norm": 0.01876419596374035, + "learning_rate": 3.4239775942576835e-06, + "loss": 0.0013, + "step": 75960 + }, + { + "epoch": 1.2838951184269454, + "grad_norm": 0.06629638373851776, + "learning_rate": 3.4225780148647285e-06, + "loss": 0.0012, + "step": 75970 + }, + { + "epoch": 1.2840641187057957, + "grad_norm": 0.0014231489039957523, + "learning_rate": 3.4211785727151314e-06, + "loss": 0.0007, + "step": 75980 + }, + { + "epoch": 1.2842331189846463, + "grad_norm": 0.025891413912177086, + "learning_rate": 3.4197792679306462e-06, + "loss": 0.0004, + "step": 75990 + }, + { + "epoch": 1.284402119263497, + "grad_norm": 0.13072508573532104, + "learning_rate": 3.418380100633023e-06, + "loss": 0.0016, + "step": 76000 + }, + { + "epoch": 1.2845711195423473, + "grad_norm": 0.0022091337013989687, + "learning_rate": 3.416981070943992e-06, + "loss": 0.0011, + "step": 76010 + }, + { + "epoch": 1.2847401198211976, + "grad_norm": 0.060467787086963654, + "learning_rate": 3.4155821789852796e-06, + "loss": 0.0011, + "step": 76020 + }, + { + "epoch": 1.2849091201000482, + "grad_norm": 0.061618030071258545, + "learning_rate": 3.414183424878592e-06, + "loss": 0.0011, + "step": 76030 + }, + { + "epoch": 1.2850781203788986, + "grad_norm": 0.022405657917261124, + "learning_rate": 3.412784808745632e-06, + "loss": 0.0011, + "step": 76040 + }, + { + "epoch": 1.2852471206577492, + "grad_norm": 0.007900248281657696, + "learning_rate": 3.411386330708082e-06, + "loss": 0.0009, + "step": 76050 + }, + { + "epoch": 1.2854161209365995, + "grad_norm": 0.03440115228295326, + "learning_rate": 3.40998799088762e-06, + "loss": 0.0005, + "step": 76060 + }, + { + "epoch": 1.2855851212154499, + "grad_norm": 0.004934143740683794, + "learning_rate": 3.4085897894059054e-06, + "loss": 0.0007, + "step": 76070 + }, + { + "epoch": 1.2857541214943005, + "grad_norm": 0.07494954019784927, + "learning_rate": 3.4071917263845894e-06, + "loss": 0.0018, + "step": 76080 + }, + { + "epoch": 1.285923121773151, + "grad_norm": 0.03333232179284096, + "learning_rate": 3.405793801945313e-06, + "loss": 0.0009, + "step": 76090 + }, + { + "epoch": 1.2860921220520014, + "grad_norm": 0.07441503554582596, + "learning_rate": 3.404396016209697e-06, + "loss": 0.0005, + "step": 76100 + }, + { + "epoch": 1.2862611223308518, + "grad_norm": 0.03767404705286026, + "learning_rate": 3.4029983692993607e-06, + "loss": 0.0008, + "step": 76110 + }, + { + "epoch": 1.2864301226097024, + "grad_norm": 0.08625879883766174, + "learning_rate": 3.401600861335902e-06, + "loss": 0.0008, + "step": 76120 + }, + { + "epoch": 1.2865991228885527, + "grad_norm": 0.03797721117734909, + "learning_rate": 3.4002034924409154e-06, + "loss": 0.0007, + "step": 76130 + }, + { + "epoch": 1.2867681231674033, + "grad_norm": 0.009482428431510925, + "learning_rate": 3.398806262735973e-06, + "loss": 0.001, + "step": 76140 + }, + { + "epoch": 1.2869371234462537, + "grad_norm": 0.395962119102478, + "learning_rate": 3.397409172342646e-06, + "loss": 0.0014, + "step": 76150 + }, + { + "epoch": 1.287106123725104, + "grad_norm": 0.007825309410691261, + "learning_rate": 3.3960122213824836e-06, + "loss": 0.0005, + "step": 76160 + }, + { + "epoch": 1.2872751240039546, + "grad_norm": 0.02285987325012684, + "learning_rate": 3.394615409977032e-06, + "loss": 0.0017, + "step": 76170 + }, + { + "epoch": 1.2874441242828052, + "grad_norm": 0.06611663848161697, + "learning_rate": 3.393218738247816e-06, + "loss": 0.0015, + "step": 76180 + }, + { + "epoch": 1.2876131245616556, + "grad_norm": 0.03683452680706978, + "learning_rate": 3.391822206316357e-06, + "loss": 0.0012, + "step": 76190 + }, + { + "epoch": 1.287782124840506, + "grad_norm": 0.0007462403154931962, + "learning_rate": 3.3904258143041556e-06, + "loss": 0.0018, + "step": 76200 + }, + { + "epoch": 1.2879511251193565, + "grad_norm": 0.0829184278845787, + "learning_rate": 3.3890295623327086e-06, + "loss": 0.0008, + "step": 76210 + }, + { + "epoch": 1.2881201253982069, + "grad_norm": 0.19146932661533356, + "learning_rate": 3.387633450523493e-06, + "loss": 0.001, + "step": 76220 + }, + { + "epoch": 1.2882891256770574, + "grad_norm": 0.00407722732052207, + "learning_rate": 3.386237478997981e-06, + "loss": 0.0021, + "step": 76230 + }, + { + "epoch": 1.2884581259559078, + "grad_norm": 0.002176871057599783, + "learning_rate": 3.384841647877626e-06, + "loss": 0.0005, + "step": 76240 + }, + { + "epoch": 1.2886271262347582, + "grad_norm": 0.07185068726539612, + "learning_rate": 3.3834459572838753e-06, + "loss": 0.0011, + "step": 76250 + }, + { + "epoch": 1.2887961265136088, + "grad_norm": 0.09929092973470688, + "learning_rate": 3.382050407338156e-06, + "loss": 0.001, + "step": 76260 + }, + { + "epoch": 1.2889651267924593, + "grad_norm": 0.06979778409004211, + "learning_rate": 3.380654998161893e-06, + "loss": 0.0011, + "step": 76270 + }, + { + "epoch": 1.2891341270713097, + "grad_norm": 0.05053688585758209, + "learning_rate": 3.3792597298764884e-06, + "loss": 0.0014, + "step": 76280 + }, + { + "epoch": 1.28930312735016, + "grad_norm": 0.022292081266641617, + "learning_rate": 3.377864602603343e-06, + "loss": 0.0006, + "step": 76290 + }, + { + "epoch": 1.2894721276290106, + "grad_norm": 0.04626830667257309, + "learning_rate": 3.376469616463834e-06, + "loss": 0.001, + "step": 76300 + }, + { + "epoch": 1.289641127907861, + "grad_norm": 0.023113220930099487, + "learning_rate": 3.375074771579335e-06, + "loss": 0.0006, + "step": 76310 + }, + { + "epoch": 1.2898101281867116, + "grad_norm": 0.07600270211696625, + "learning_rate": 3.373680068071205e-06, + "loss": 0.0011, + "step": 76320 + }, + { + "epoch": 1.289979128465562, + "grad_norm": 0.18534424901008606, + "learning_rate": 3.3722855060607874e-06, + "loss": 0.0025, + "step": 76330 + }, + { + "epoch": 1.2901481287444123, + "grad_norm": 0.03411588445305824, + "learning_rate": 3.3708910856694177e-06, + "loss": 0.0009, + "step": 76340 + }, + { + "epoch": 1.290317129023263, + "grad_norm": 0.04233044013381004, + "learning_rate": 3.3694968070184162e-06, + "loss": 0.0006, + "step": 76350 + }, + { + "epoch": 1.2904861293021135, + "grad_norm": 0.028893014416098595, + "learning_rate": 3.368102670229094e-06, + "loss": 0.001, + "step": 76360 + }, + { + "epoch": 1.2906551295809638, + "grad_norm": 0.05490986257791519, + "learning_rate": 3.366708675422745e-06, + "loss": 0.0006, + "step": 76370 + }, + { + "epoch": 1.2908241298598142, + "grad_norm": 0.009264852851629257, + "learning_rate": 3.365314822720657e-06, + "loss": 0.0009, + "step": 76380 + }, + { + "epoch": 1.2909931301386648, + "grad_norm": 0.15807749330997467, + "learning_rate": 3.3639211122440963e-06, + "loss": 0.001, + "step": 76390 + }, + { + "epoch": 1.2911621304175152, + "grad_norm": 0.06726587563753128, + "learning_rate": 3.362527544114329e-06, + "loss": 0.0009, + "step": 76400 + }, + { + "epoch": 1.2913311306963657, + "grad_norm": 0.009346742182970047, + "learning_rate": 3.3611341184525957e-06, + "loss": 0.0014, + "step": 76410 + }, + { + "epoch": 1.291500130975216, + "grad_norm": 0.037689968943595886, + "learning_rate": 3.359740835380137e-06, + "loss": 0.0008, + "step": 76420 + }, + { + "epoch": 1.2916691312540665, + "grad_norm": 0.013817720115184784, + "learning_rate": 3.3583476950181727e-06, + "loss": 0.001, + "step": 76430 + }, + { + "epoch": 1.291838131532917, + "grad_norm": 0.043633535504341125, + "learning_rate": 3.356954697487912e-06, + "loss": 0.0012, + "step": 76440 + }, + { + "epoch": 1.2920071318117674, + "grad_norm": 0.08570516854524612, + "learning_rate": 3.355561842910553e-06, + "loss": 0.0009, + "step": 76450 + }, + { + "epoch": 1.292176132090618, + "grad_norm": 0.12532709538936615, + "learning_rate": 3.3541691314072835e-06, + "loss": 0.0011, + "step": 76460 + }, + { + "epoch": 1.2923451323694684, + "grad_norm": 0.035797711461782455, + "learning_rate": 3.3527765630992715e-06, + "loss": 0.0007, + "step": 76470 + }, + { + "epoch": 1.292514132648319, + "grad_norm": 0.06022035330533981, + "learning_rate": 3.3513841381076812e-06, + "loss": 0.0013, + "step": 76480 + }, + { + "epoch": 1.2926831329271693, + "grad_norm": 0.06282058358192444, + "learning_rate": 3.3499918565536565e-06, + "loss": 0.001, + "step": 76490 + }, + { + "epoch": 1.2928521332060199, + "grad_norm": 0.022315459325909615, + "learning_rate": 3.3485997185583375e-06, + "loss": 0.001, + "step": 76500 + }, + { + "epoch": 1.2930211334848702, + "grad_norm": 0.02336900681257248, + "learning_rate": 3.3472077242428414e-06, + "loss": 0.001, + "step": 76510 + }, + { + "epoch": 1.2931901337637206, + "grad_norm": 0.042133525013923645, + "learning_rate": 3.3458158737282824e-06, + "loss": 0.0008, + "step": 76520 + }, + { + "epoch": 1.2933591340425712, + "grad_norm": 0.02696537785232067, + "learning_rate": 3.3444241671357563e-06, + "loss": 0.0011, + "step": 76530 + }, + { + "epoch": 1.2935281343214216, + "grad_norm": 0.05946613848209381, + "learning_rate": 3.3430326045863515e-06, + "loss": 0.0006, + "step": 76540 + }, + { + "epoch": 1.2936971346002721, + "grad_norm": 0.07703051716089249, + "learning_rate": 3.3416411862011356e-06, + "loss": 0.001, + "step": 76550 + }, + { + "epoch": 1.2938661348791225, + "grad_norm": 0.05500364303588867, + "learning_rate": 3.3402499121011734e-06, + "loss": 0.0015, + "step": 76560 + }, + { + "epoch": 1.294035135157973, + "grad_norm": 0.041644442826509476, + "learning_rate": 3.3388587824075094e-06, + "loss": 0.0008, + "step": 76570 + }, + { + "epoch": 1.2942041354368234, + "grad_norm": 0.12322068214416504, + "learning_rate": 3.337467797241179e-06, + "loss": 0.0006, + "step": 76580 + }, + { + "epoch": 1.294373135715674, + "grad_norm": 0.02212311513721943, + "learning_rate": 3.3360769567232077e-06, + "loss": 0.0009, + "step": 76590 + }, + { + "epoch": 1.2945421359945244, + "grad_norm": 0.09738665819168091, + "learning_rate": 3.3346862609746005e-06, + "loss": 0.0012, + "step": 76600 + }, + { + "epoch": 1.2947111362733748, + "grad_norm": 0.11395315825939178, + "learning_rate": 3.3332957101163597e-06, + "loss": 0.0028, + "step": 76610 + }, + { + "epoch": 1.2948801365522253, + "grad_norm": 0.04635939002037048, + "learning_rate": 3.3319053042694653e-06, + "loss": 0.001, + "step": 76620 + }, + { + "epoch": 1.2950491368310757, + "grad_norm": 0.020105794072151184, + "learning_rate": 3.3305150435548927e-06, + "loss": 0.0008, + "step": 76630 + }, + { + "epoch": 1.2952181371099263, + "grad_norm": 0.01157683227211237, + "learning_rate": 3.3291249280935995e-06, + "loss": 0.0004, + "step": 76640 + }, + { + "epoch": 1.2953871373887766, + "grad_norm": 0.0161611158400774, + "learning_rate": 3.3277349580065343e-06, + "loss": 0.0009, + "step": 76650 + }, + { + "epoch": 1.2955561376676272, + "grad_norm": 0.03194257989525795, + "learning_rate": 3.326345133414629e-06, + "loss": 0.0005, + "step": 76660 + }, + { + "epoch": 1.2957251379464776, + "grad_norm": 0.04428780823945999, + "learning_rate": 3.3249554544388074e-06, + "loss": 0.0008, + "step": 76670 + }, + { + "epoch": 1.2958941382253282, + "grad_norm": 0.02459871396422386, + "learning_rate": 3.3235659211999753e-06, + "loss": 0.0008, + "step": 76680 + }, + { + "epoch": 1.2960631385041785, + "grad_norm": 0.12832576036453247, + "learning_rate": 3.3221765338190326e-06, + "loss": 0.0013, + "step": 76690 + }, + { + "epoch": 1.296232138783029, + "grad_norm": 0.04543474689126015, + "learning_rate": 3.320787292416859e-06, + "loss": 0.001, + "step": 76700 + }, + { + "epoch": 1.2964011390618795, + "grad_norm": 0.044971760362386703, + "learning_rate": 3.3193981971143275e-06, + "loss": 0.0009, + "step": 76710 + }, + { + "epoch": 1.2965701393407298, + "grad_norm": 0.0427575409412384, + "learning_rate": 3.3180092480322943e-06, + "loss": 0.0006, + "step": 76720 + }, + { + "epoch": 1.2967391396195804, + "grad_norm": 0.06516630202531815, + "learning_rate": 3.3166204452916085e-06, + "loss": 0.0011, + "step": 76730 + }, + { + "epoch": 1.2969081398984308, + "grad_norm": 0.12783502042293549, + "learning_rate": 3.315231789013098e-06, + "loss": 0.001, + "step": 76740 + }, + { + "epoch": 1.2970771401772814, + "grad_norm": 0.05215373635292053, + "learning_rate": 3.313843279317587e-06, + "loss": 0.0013, + "step": 76750 + }, + { + "epoch": 1.2972461404561317, + "grad_norm": 0.0056024095974862576, + "learning_rate": 3.3124549163258777e-06, + "loss": 0.0013, + "step": 76760 + }, + { + "epoch": 1.2974151407349823, + "grad_norm": 0.05509374663233757, + "learning_rate": 3.3110667001587694e-06, + "loss": 0.0003, + "step": 76770 + }, + { + "epoch": 1.2975841410138327, + "grad_norm": 0.02442927099764347, + "learning_rate": 3.3096786309370387e-06, + "loss": 0.0008, + "step": 76780 + }, + { + "epoch": 1.297753141292683, + "grad_norm": 0.04514923319220543, + "learning_rate": 3.3082907087814585e-06, + "loss": 0.001, + "step": 76790 + }, + { + "epoch": 1.2979221415715336, + "grad_norm": 0.005436763167381287, + "learning_rate": 3.306902933812783e-06, + "loss": 0.001, + "step": 76800 + }, + { + "epoch": 1.298091141850384, + "grad_norm": 0.023903531953692436, + "learning_rate": 3.305515306151756e-06, + "loss": 0.0014, + "step": 76810 + }, + { + "epoch": 1.2982601421292346, + "grad_norm": 0.06894169002771378, + "learning_rate": 3.3041278259191056e-06, + "loss": 0.0011, + "step": 76820 + }, + { + "epoch": 1.298429142408085, + "grad_norm": 0.05309809744358063, + "learning_rate": 3.302740493235551e-06, + "loss": 0.0015, + "step": 76830 + }, + { + "epoch": 1.2985981426869353, + "grad_norm": 0.05424584448337555, + "learning_rate": 3.3013533082217997e-06, + "loss": 0.0014, + "step": 76840 + }, + { + "epoch": 1.2987671429657859, + "grad_norm": 0.0013488342519849539, + "learning_rate": 3.299966270998538e-06, + "loss": 0.0013, + "step": 76850 + }, + { + "epoch": 1.2989361432446365, + "grad_norm": 0.01820383593440056, + "learning_rate": 3.2985793816864496e-06, + "loss": 0.0009, + "step": 76860 + }, + { + "epoch": 1.2991051435234868, + "grad_norm": 0.017999805510044098, + "learning_rate": 3.297192640406197e-06, + "loss": 0.0006, + "step": 76870 + }, + { + "epoch": 1.2992741438023372, + "grad_norm": 0.07281816750764847, + "learning_rate": 3.295806047278437e-06, + "loss": 0.0007, + "step": 76880 + }, + { + "epoch": 1.2994431440811878, + "grad_norm": 0.12261531502008438, + "learning_rate": 3.2944196024238052e-06, + "loss": 0.0013, + "step": 76890 + }, + { + "epoch": 1.2996121443600381, + "grad_norm": 0.0003672442398965359, + "learning_rate": 3.293033305962934e-06, + "loss": 0.0005, + "step": 76900 + }, + { + "epoch": 1.2997811446388887, + "grad_norm": 0.010020782239735126, + "learning_rate": 3.2916471580164343e-06, + "loss": 0.0017, + "step": 76910 + }, + { + "epoch": 1.299950144917739, + "grad_norm": 0.031483955681324005, + "learning_rate": 3.2902611587049093e-06, + "loss": 0.0012, + "step": 76920 + }, + { + "epoch": 1.3001191451965894, + "grad_norm": 0.06877174973487854, + "learning_rate": 3.2888753081489467e-06, + "loss": 0.0011, + "step": 76930 + }, + { + "epoch": 1.30028814547544, + "grad_norm": 0.06118880957365036, + "learning_rate": 3.2874896064691246e-06, + "loss": 0.0007, + "step": 76940 + }, + { + "epoch": 1.3004571457542906, + "grad_norm": 0.06608627736568451, + "learning_rate": 3.2861040537860025e-06, + "loss": 0.0004, + "step": 76950 + }, + { + "epoch": 1.300626146033141, + "grad_norm": 0.15344126522541046, + "learning_rate": 3.2847186502201335e-06, + "loss": 0.0019, + "step": 76960 + }, + { + "epoch": 1.3007951463119913, + "grad_norm": 0.03516198322176933, + "learning_rate": 3.28333339589205e-06, + "loss": 0.0009, + "step": 76970 + }, + { + "epoch": 1.300964146590842, + "grad_norm": 0.002182116499170661, + "learning_rate": 3.281948290922281e-06, + "loss": 0.0007, + "step": 76980 + }, + { + "epoch": 1.3011331468696923, + "grad_norm": 0.023147817701101303, + "learning_rate": 3.2805633354313334e-06, + "loss": 0.0015, + "step": 76990 + }, + { + "epoch": 1.3013021471485429, + "grad_norm": 0.129550501704216, + "learning_rate": 3.279178529539707e-06, + "loss": 0.0006, + "step": 77000 + }, + { + "epoch": 1.3014711474273932, + "grad_norm": 0.018386520445346832, + "learning_rate": 3.277793873367885e-06, + "loss": 0.0008, + "step": 77010 + }, + { + "epoch": 1.3016401477062436, + "grad_norm": 0.04771873354911804, + "learning_rate": 3.2764093670363422e-06, + "loss": 0.0009, + "step": 77020 + }, + { + "epoch": 1.3018091479850942, + "grad_norm": 0.07971066981554031, + "learning_rate": 3.2750250106655336e-06, + "loss": 0.001, + "step": 77030 + }, + { + "epoch": 1.3019781482639448, + "grad_norm": 0.04912357032299042, + "learning_rate": 3.2736408043759095e-06, + "loss": 0.0006, + "step": 77040 + }, + { + "epoch": 1.3021471485427951, + "grad_norm": 0.02283174730837345, + "learning_rate": 3.2722567482878966e-06, + "loss": 0.0007, + "step": 77050 + }, + { + "epoch": 1.3023161488216455, + "grad_norm": 0.052379060536623, + "learning_rate": 3.2708728425219204e-06, + "loss": 0.0005, + "step": 77060 + }, + { + "epoch": 1.302485149100496, + "grad_norm": 0.000811600242741406, + "learning_rate": 3.2694890871983824e-06, + "loss": 0.0007, + "step": 77070 + }, + { + "epoch": 1.3026541493793464, + "grad_norm": 0.002285461872816086, + "learning_rate": 3.268105482437679e-06, + "loss": 0.0014, + "step": 77080 + }, + { + "epoch": 1.302823149658197, + "grad_norm": 0.031205566599965096, + "learning_rate": 3.266722028360191e-06, + "loss": 0.0008, + "step": 77090 + }, + { + "epoch": 1.3029921499370474, + "grad_norm": 0.008281203918159008, + "learning_rate": 3.2653387250862827e-06, + "loss": 0.0012, + "step": 77100 + }, + { + "epoch": 1.3031611502158977, + "grad_norm": 0.08469940721988678, + "learning_rate": 3.2639555727363115e-06, + "loss": 0.0009, + "step": 77110 + }, + { + "epoch": 1.3033301504947483, + "grad_norm": 0.10907159000635147, + "learning_rate": 3.262572571430615e-06, + "loss": 0.0009, + "step": 77120 + }, + { + "epoch": 1.303499150773599, + "grad_norm": 0.004431854467839003, + "learning_rate": 3.261189721289525e-06, + "loss": 0.0019, + "step": 77130 + }, + { + "epoch": 1.3036681510524493, + "grad_norm": 0.04053468629717827, + "learning_rate": 3.259807022433352e-06, + "loss": 0.0016, + "step": 77140 + }, + { + "epoch": 1.3038371513312996, + "grad_norm": 0.1110965833067894, + "learning_rate": 3.2584244749824024e-06, + "loss": 0.0011, + "step": 77150 + }, + { + "epoch": 1.3040061516101502, + "grad_norm": 0.06742440909147263, + "learning_rate": 3.2570420790569585e-06, + "loss": 0.0008, + "step": 77160 + }, + { + "epoch": 1.3041751518890006, + "grad_norm": 0.01611647941172123, + "learning_rate": 3.2556598347773003e-06, + "loss": 0.0014, + "step": 77170 + }, + { + "epoch": 1.3043441521678512, + "grad_norm": 0.04131495580077171, + "learning_rate": 3.254277742263687e-06, + "loss": 0.0008, + "step": 77180 + }, + { + "epoch": 1.3045131524467015, + "grad_norm": 0.04060171917080879, + "learning_rate": 3.252895801636369e-06, + "loss": 0.0006, + "step": 77190 + }, + { + "epoch": 1.3046821527255519, + "grad_norm": 0.0038544689305126667, + "learning_rate": 3.2515140130155808e-06, + "loss": 0.0007, + "step": 77200 + }, + { + "epoch": 1.3048511530044025, + "grad_norm": 0.027534153312444687, + "learning_rate": 3.2501323765215454e-06, + "loss": 0.0008, + "step": 77210 + }, + { + "epoch": 1.305020153283253, + "grad_norm": 0.054691337049007416, + "learning_rate": 3.2487508922744703e-06, + "loss": 0.0012, + "step": 77220 + }, + { + "epoch": 1.3051891535621034, + "grad_norm": 0.14498290419578552, + "learning_rate": 3.2473695603945553e-06, + "loss": 0.0015, + "step": 77230 + }, + { + "epoch": 1.3053581538409538, + "grad_norm": 0.028799815103411674, + "learning_rate": 3.245988381001978e-06, + "loss": 0.0012, + "step": 77240 + }, + { + "epoch": 1.3055271541198044, + "grad_norm": 0.0017579590203240514, + "learning_rate": 3.244607354216911e-06, + "loss": 0.0007, + "step": 77250 + }, + { + "epoch": 1.3056961543986547, + "grad_norm": 0.06550420075654984, + "learning_rate": 3.2432264801595082e-06, + "loss": 0.0011, + "step": 77260 + }, + { + "epoch": 1.3058651546775053, + "grad_norm": 0.030126405879855156, + "learning_rate": 3.2418457589499155e-06, + "loss": 0.0009, + "step": 77270 + }, + { + "epoch": 1.3060341549563557, + "grad_norm": 0.008543896488845348, + "learning_rate": 3.2404651907082575e-06, + "loss": 0.0021, + "step": 77280 + }, + { + "epoch": 1.306203155235206, + "grad_norm": 0.09629712253808975, + "learning_rate": 3.239084775554654e-06, + "loss": 0.0006, + "step": 77290 + }, + { + "epoch": 1.3063721555140566, + "grad_norm": 0.003114005085080862, + "learning_rate": 3.2377045136092065e-06, + "loss": 0.0012, + "step": 77300 + }, + { + "epoch": 1.3065411557929072, + "grad_norm": 0.5669236779212952, + "learning_rate": 3.2363244049920063e-06, + "loss": 0.0009, + "step": 77310 + }, + { + "epoch": 1.3067101560717576, + "grad_norm": 0.014506954699754715, + "learning_rate": 3.234944449823126e-06, + "loss": 0.0006, + "step": 77320 + }, + { + "epoch": 1.306879156350608, + "grad_norm": 0.07578347623348236, + "learning_rate": 3.2335646482226336e-06, + "loss": 0.0006, + "step": 77330 + }, + { + "epoch": 1.3070481566294585, + "grad_norm": 0.005560994613915682, + "learning_rate": 3.2321850003105726e-06, + "loss": 0.0007, + "step": 77340 + }, + { + "epoch": 1.3072171569083089, + "grad_norm": 0.044672898948192596, + "learning_rate": 3.2308055062069817e-06, + "loss": 0.0013, + "step": 77350 + }, + { + "epoch": 1.3073861571871594, + "grad_norm": 0.11879739165306091, + "learning_rate": 3.229426166031886e-06, + "loss": 0.0009, + "step": 77360 + }, + { + "epoch": 1.3075551574660098, + "grad_norm": 0.015471025370061398, + "learning_rate": 3.22804697990529e-06, + "loss": 0.0006, + "step": 77370 + }, + { + "epoch": 1.3077241577448602, + "grad_norm": 0.02590048499405384, + "learning_rate": 3.226667947947193e-06, + "loss": 0.001, + "step": 77380 + }, + { + "epoch": 1.3078931580237108, + "grad_norm": 0.019585004076361656, + "learning_rate": 3.2252890702775776e-06, + "loss": 0.0012, + "step": 77390 + }, + { + "epoch": 1.3080621583025611, + "grad_norm": 0.02094372734427452, + "learning_rate": 3.22391034701641e-06, + "loss": 0.001, + "step": 77400 + }, + { + "epoch": 1.3082311585814117, + "grad_norm": 0.02087017148733139, + "learning_rate": 3.222531778283648e-06, + "loss": 0.0006, + "step": 77410 + }, + { + "epoch": 1.308400158860262, + "grad_norm": 0.09364385157823563, + "learning_rate": 3.2211533641992353e-06, + "loss": 0.0009, + "step": 77420 + }, + { + "epoch": 1.3085691591391126, + "grad_norm": 0.008463174104690552, + "learning_rate": 3.219775104883096e-06, + "loss": 0.0006, + "step": 77430 + }, + { + "epoch": 1.308738159417963, + "grad_norm": 0.017918633297085762, + "learning_rate": 3.2183970004551503e-06, + "loss": 0.001, + "step": 77440 + }, + { + "epoch": 1.3089071596968136, + "grad_norm": 0.02036973461508751, + "learning_rate": 3.217019051035295e-06, + "loss": 0.0013, + "step": 77450 + }, + { + "epoch": 1.309076159975664, + "grad_norm": 0.0491136871278286, + "learning_rate": 3.215641256743424e-06, + "loss": 0.0008, + "step": 77460 + }, + { + "epoch": 1.3092451602545143, + "grad_norm": 0.005848989821970463, + "learning_rate": 3.214263617699407e-06, + "loss": 0.0012, + "step": 77470 + }, + { + "epoch": 1.309414160533365, + "grad_norm": 0.06523486971855164, + "learning_rate": 3.2128861340231076e-06, + "loss": 0.0005, + "step": 77480 + }, + { + "epoch": 1.3095831608122153, + "grad_norm": 0.0042354641482234, + "learning_rate": 3.2115088058343725e-06, + "loss": 0.0014, + "step": 77490 + }, + { + "epoch": 1.3097521610910658, + "grad_norm": 0.02519715204834938, + "learning_rate": 3.2101316332530387e-06, + "loss": 0.0007, + "step": 77500 + }, + { + "epoch": 1.3099211613699162, + "grad_norm": 0.06626711040735245, + "learning_rate": 3.2087546163989235e-06, + "loss": 0.0008, + "step": 77510 + }, + { + "epoch": 1.3100901616487668, + "grad_norm": 0.13485555350780487, + "learning_rate": 3.2073777553918373e-06, + "loss": 0.0012, + "step": 77520 + }, + { + "epoch": 1.3102591619276172, + "grad_norm": 0.02375718578696251, + "learning_rate": 3.206001050351569e-06, + "loss": 0.0007, + "step": 77530 + }, + { + "epoch": 1.3104281622064677, + "grad_norm": 0.06585943698883057, + "learning_rate": 3.2046245013979043e-06, + "loss": 0.001, + "step": 77540 + }, + { + "epoch": 1.310597162485318, + "grad_norm": 0.10992880910634995, + "learning_rate": 3.2032481086506044e-06, + "loss": 0.001, + "step": 77550 + }, + { + "epoch": 1.3107661627641685, + "grad_norm": 0.009292932227253914, + "learning_rate": 3.2018718722294255e-06, + "loss": 0.0011, + "step": 77560 + }, + { + "epoch": 1.310935163043019, + "grad_norm": 0.08823945373296738, + "learning_rate": 3.2004957922541057e-06, + "loss": 0.0011, + "step": 77570 + }, + { + "epoch": 1.3111041633218694, + "grad_norm": 0.023845108225941658, + "learning_rate": 3.1991198688443712e-06, + "loss": 0.0008, + "step": 77580 + }, + { + "epoch": 1.31127316360072, + "grad_norm": 0.04529750347137451, + "learning_rate": 3.197744102119933e-06, + "loss": 0.0012, + "step": 77590 + }, + { + "epoch": 1.3114421638795704, + "grad_norm": 0.04554332047700882, + "learning_rate": 3.196368492200489e-06, + "loss": 0.0009, + "step": 77600 + }, + { + "epoch": 1.311611164158421, + "grad_norm": 0.05958491936326027, + "learning_rate": 3.1949930392057275e-06, + "loss": 0.0008, + "step": 77610 + }, + { + "epoch": 1.3117801644372713, + "grad_norm": 0.013546890579164028, + "learning_rate": 3.193617743255315e-06, + "loss": 0.0004, + "step": 77620 + }, + { + "epoch": 1.3119491647161219, + "grad_norm": 0.06428714096546173, + "learning_rate": 3.1922426044689133e-06, + "loss": 0.0011, + "step": 77630 + }, + { + "epoch": 1.3121181649949722, + "grad_norm": 0.027526091784238815, + "learning_rate": 3.1908676229661612e-06, + "loss": 0.0005, + "step": 77640 + }, + { + "epoch": 1.3122871652738226, + "grad_norm": 0.04166054353117943, + "learning_rate": 3.1894927988666935e-06, + "loss": 0.0017, + "step": 77650 + }, + { + "epoch": 1.3124561655526732, + "grad_norm": 0.020531712099909782, + "learning_rate": 3.188118132290123e-06, + "loss": 0.0011, + "step": 77660 + }, + { + "epoch": 1.3126251658315236, + "grad_norm": 0.11892349272966385, + "learning_rate": 3.186743623356054e-06, + "loss": 0.0011, + "step": 77670 + }, + { + "epoch": 1.3127941661103741, + "grad_norm": 0.05005523934960365, + "learning_rate": 3.1853692721840755e-06, + "loss": 0.0008, + "step": 77680 + }, + { + "epoch": 1.3129631663892245, + "grad_norm": 0.10019006580114365, + "learning_rate": 3.1839950788937626e-06, + "loss": 0.001, + "step": 77690 + }, + { + "epoch": 1.313132166668075, + "grad_norm": 0.024045975878834724, + "learning_rate": 3.1826210436046752e-06, + "loss": 0.0009, + "step": 77700 + }, + { + "epoch": 1.3133011669469254, + "grad_norm": 0.029203811660408974, + "learning_rate": 3.181247166436364e-06, + "loss": 0.0011, + "step": 77710 + }, + { + "epoch": 1.313470167225776, + "grad_norm": 0.013211055658757687, + "learning_rate": 3.1798734475083606e-06, + "loss": 0.0003, + "step": 77720 + }, + { + "epoch": 1.3136391675046264, + "grad_norm": 0.06947149336338043, + "learning_rate": 3.1784998869401875e-06, + "loss": 0.0005, + "step": 77730 + }, + { + "epoch": 1.3138081677834768, + "grad_norm": 0.08241991698741913, + "learning_rate": 3.1771264848513473e-06, + "loss": 0.001, + "step": 77740 + }, + { + "epoch": 1.3139771680623273, + "grad_norm": 0.11345503479242325, + "learning_rate": 3.175753241361337e-06, + "loss": 0.0007, + "step": 77750 + }, + { + "epoch": 1.3141461683411777, + "grad_norm": 0.1144309788942337, + "learning_rate": 3.1743801565896316e-06, + "loss": 0.0014, + "step": 77760 + }, + { + "epoch": 1.3143151686200283, + "grad_norm": 0.07751011848449707, + "learning_rate": 3.1730072306556985e-06, + "loss": 0.0016, + "step": 77770 + }, + { + "epoch": 1.3144841688988786, + "grad_norm": 0.05221608653664589, + "learning_rate": 3.1716344636789876e-06, + "loss": 0.0011, + "step": 77780 + }, + { + "epoch": 1.314653169177729, + "grad_norm": 0.004115086980164051, + "learning_rate": 3.170261855778939e-06, + "loss": 0.0012, + "step": 77790 + }, + { + "epoch": 1.3148221694565796, + "grad_norm": 0.0636073425412178, + "learning_rate": 3.1688894070749722e-06, + "loss": 0.0015, + "step": 77800 + }, + { + "epoch": 1.3149911697354302, + "grad_norm": 0.012179211713373661, + "learning_rate": 3.167517117686501e-06, + "loss": 0.0009, + "step": 77810 + }, + { + "epoch": 1.3151601700142805, + "grad_norm": 0.0035941177047789097, + "learning_rate": 3.166144987732917e-06, + "loss": 0.0013, + "step": 77820 + }, + { + "epoch": 1.315329170293131, + "grad_norm": 0.03358374163508415, + "learning_rate": 3.1647730173336065e-06, + "loss": 0.0009, + "step": 77830 + }, + { + "epoch": 1.3154981705719815, + "grad_norm": 0.011771938763558865, + "learning_rate": 3.1634012066079333e-06, + "loss": 0.0004, + "step": 77840 + }, + { + "epoch": 1.3156671708508318, + "grad_norm": 0.015004804357886314, + "learning_rate": 3.1620295556752535e-06, + "loss": 0.0006, + "step": 77850 + }, + { + "epoch": 1.3158361711296824, + "grad_norm": 0.018552575260400772, + "learning_rate": 3.1606580646549094e-06, + "loss": 0.0009, + "step": 77860 + }, + { + "epoch": 1.3160051714085328, + "grad_norm": 0.006084776483476162, + "learning_rate": 3.1592867336662236e-06, + "loss": 0.0007, + "step": 77870 + }, + { + "epoch": 1.3161741716873832, + "grad_norm": 0.0012030642246827483, + "learning_rate": 3.157915562828512e-06, + "loss": 0.0013, + "step": 77880 + }, + { + "epoch": 1.3163431719662337, + "grad_norm": 0.050810765475034714, + "learning_rate": 3.15654455226107e-06, + "loss": 0.0009, + "step": 77890 + }, + { + "epoch": 1.3165121722450843, + "grad_norm": 0.014970553107559681, + "learning_rate": 3.155173702083186e-06, + "loss": 0.0016, + "step": 77900 + }, + { + "epoch": 1.3166811725239347, + "grad_norm": 0.08558324724435806, + "learning_rate": 3.153803012414126e-06, + "loss": 0.001, + "step": 77910 + }, + { + "epoch": 1.316850172802785, + "grad_norm": 0.05313260853290558, + "learning_rate": 3.1524324833731513e-06, + "loss": 0.0011, + "step": 77920 + }, + { + "epoch": 1.3170191730816356, + "grad_norm": 0.017633775249123573, + "learning_rate": 3.1510621150794997e-06, + "loss": 0.0015, + "step": 77930 + }, + { + "epoch": 1.317188173360486, + "grad_norm": 0.012275348417460918, + "learning_rate": 3.1496919076524048e-06, + "loss": 0.0013, + "step": 77940 + }, + { + "epoch": 1.3173571736393366, + "grad_norm": 0.02711687795817852, + "learning_rate": 3.148321861211077e-06, + "loss": 0.0014, + "step": 77950 + }, + { + "epoch": 1.317526173918187, + "grad_norm": 0.0425824373960495, + "learning_rate": 3.146951975874719e-06, + "loss": 0.0009, + "step": 77960 + }, + { + "epoch": 1.3176951741970373, + "grad_norm": 0.018861930817365646, + "learning_rate": 3.145582251762517e-06, + "loss": 0.0008, + "step": 77970 + }, + { + "epoch": 1.3178641744758879, + "grad_norm": 0.03422432765364647, + "learning_rate": 3.1442126889936456e-06, + "loss": 0.0005, + "step": 77980 + }, + { + "epoch": 1.3180331747547385, + "grad_norm": 0.03603680804371834, + "learning_rate": 3.1428432876872607e-06, + "loss": 0.0006, + "step": 77990 + }, + { + "epoch": 1.3182021750335888, + "grad_norm": 0.09233968704938889, + "learning_rate": 3.141474047962509e-06, + "loss": 0.0009, + "step": 78000 + }, + { + "epoch": 1.3183711753124392, + "grad_norm": 0.08342853933572769, + "learning_rate": 3.140104969938518e-06, + "loss": 0.0007, + "step": 78010 + }, + { + "epoch": 1.3185401755912898, + "grad_norm": 0.06904515624046326, + "learning_rate": 3.138736053734408e-06, + "loss": 0.0013, + "step": 78020 + }, + { + "epoch": 1.3187091758701401, + "grad_norm": 0.039718836545944214, + "learning_rate": 3.1373672994692777e-06, + "loss": 0.0007, + "step": 78030 + }, + { + "epoch": 1.3188781761489907, + "grad_norm": 0.15891526639461517, + "learning_rate": 3.135998707262218e-06, + "loss": 0.0021, + "step": 78040 + }, + { + "epoch": 1.319047176427841, + "grad_norm": 0.15801583230495453, + "learning_rate": 3.134630277232302e-06, + "loss": 0.0006, + "step": 78050 + }, + { + "epoch": 1.3192161767066914, + "grad_norm": 0.06909686326980591, + "learning_rate": 3.1332620094985893e-06, + "loss": 0.0008, + "step": 78060 + }, + { + "epoch": 1.319385176985542, + "grad_norm": 0.013529068790376186, + "learning_rate": 3.1318939041801253e-06, + "loss": 0.0011, + "step": 78070 + }, + { + "epoch": 1.3195541772643926, + "grad_norm": 0.10030799359083176, + "learning_rate": 3.130525961395946e-06, + "loss": 0.001, + "step": 78080 + }, + { + "epoch": 1.319723177543243, + "grad_norm": 0.021510707214474678, + "learning_rate": 3.1291581812650627e-06, + "loss": 0.0005, + "step": 78090 + }, + { + "epoch": 1.3198921778220933, + "grad_norm": 0.08789089322090149, + "learning_rate": 3.1277905639064825e-06, + "loss": 0.0009, + "step": 78100 + }, + { + "epoch": 1.320061178100944, + "grad_norm": 0.004744492471218109, + "learning_rate": 3.126423109439196e-06, + "loss": 0.0008, + "step": 78110 + }, + { + "epoch": 1.3202301783797943, + "grad_norm": 0.08623579144477844, + "learning_rate": 3.1250558179821754e-06, + "loss": 0.0017, + "step": 78120 + }, + { + "epoch": 1.3203991786586449, + "grad_norm": 0.00471165357157588, + "learning_rate": 3.1236886896543844e-06, + "loss": 0.0019, + "step": 78130 + }, + { + "epoch": 1.3205681789374952, + "grad_norm": 0.04368991404771805, + "learning_rate": 3.1223217245747667e-06, + "loss": 0.0012, + "step": 78140 + }, + { + "epoch": 1.3207371792163456, + "grad_norm": 0.018874410539865494, + "learning_rate": 3.120954922862257e-06, + "loss": 0.0011, + "step": 78150 + }, + { + "epoch": 1.3209061794951962, + "grad_norm": 0.014562326483428478, + "learning_rate": 3.119588284635774e-06, + "loss": 0.0011, + "step": 78160 + }, + { + "epoch": 1.3210751797740468, + "grad_norm": 0.06037477031350136, + "learning_rate": 3.11822181001422e-06, + "loss": 0.0006, + "step": 78170 + }, + { + "epoch": 1.3212441800528971, + "grad_norm": 0.037516556680202484, + "learning_rate": 3.1168554991164855e-06, + "loss": 0.0004, + "step": 78180 + }, + { + "epoch": 1.3214131803317475, + "grad_norm": 0.10493247210979462, + "learning_rate": 3.115489352061448e-06, + "loss": 0.001, + "step": 78190 + }, + { + "epoch": 1.321582180610598, + "grad_norm": 0.0857461616396904, + "learning_rate": 3.114123368967967e-06, + "loss": 0.0008, + "step": 78200 + }, + { + "epoch": 1.3217511808894484, + "grad_norm": 0.04702319577336311, + "learning_rate": 3.1127575499548913e-06, + "loss": 0.0018, + "step": 78210 + }, + { + "epoch": 1.321920181168299, + "grad_norm": 0.030510196462273598, + "learning_rate": 3.1113918951410504e-06, + "loss": 0.0004, + "step": 78220 + }, + { + "epoch": 1.3220891814471494, + "grad_norm": 0.067301906645298, + "learning_rate": 3.110026404645267e-06, + "loss": 0.0009, + "step": 78230 + }, + { + "epoch": 1.3222581817259997, + "grad_norm": 0.0009512768010608852, + "learning_rate": 3.1086610785863425e-06, + "loss": 0.0007, + "step": 78240 + }, + { + "epoch": 1.3224271820048503, + "grad_norm": 0.024270126596093178, + "learning_rate": 3.107295917083068e-06, + "loss": 0.0015, + "step": 78250 + }, + { + "epoch": 1.322596182283701, + "grad_norm": 0.04789629578590393, + "learning_rate": 3.105930920254219e-06, + "loss": 0.0008, + "step": 78260 + }, + { + "epoch": 1.3227651825625513, + "grad_norm": 0.0892537534236908, + "learning_rate": 3.104566088218558e-06, + "loss": 0.0006, + "step": 78270 + }, + { + "epoch": 1.3229341828414016, + "grad_norm": 0.13257360458374023, + "learning_rate": 3.1032014210948293e-06, + "loss": 0.0007, + "step": 78280 + }, + { + "epoch": 1.3231031831202522, + "grad_norm": 0.0010018249740824103, + "learning_rate": 3.1018369190017685e-06, + "loss": 0.0006, + "step": 78290 + }, + { + "epoch": 1.3232721833991026, + "grad_norm": 0.03295399248600006, + "learning_rate": 3.1004725820580917e-06, + "loss": 0.0007, + "step": 78300 + }, + { + "epoch": 1.3234411836779532, + "grad_norm": 0.010809667408466339, + "learning_rate": 3.0991084103825047e-06, + "loss": 0.0009, + "step": 78310 + }, + { + "epoch": 1.3236101839568035, + "grad_norm": 0.00015891263319645077, + "learning_rate": 3.0977444040936943e-06, + "loss": 0.0021, + "step": 78320 + }, + { + "epoch": 1.3237791842356539, + "grad_norm": 0.0351998396217823, + "learning_rate": 3.0963805633103385e-06, + "loss": 0.0008, + "step": 78330 + }, + { + "epoch": 1.3239481845145045, + "grad_norm": 0.07310052961111069, + "learning_rate": 3.0950168881510974e-06, + "loss": 0.001, + "step": 78340 + }, + { + "epoch": 1.3241171847933548, + "grad_norm": 0.004926603753119707, + "learning_rate": 3.093653378734616e-06, + "loss": 0.001, + "step": 78350 + }, + { + "epoch": 1.3242861850722054, + "grad_norm": 0.038179848343133926, + "learning_rate": 3.092290035179527e-06, + "loss": 0.0006, + "step": 78360 + }, + { + "epoch": 1.3244551853510558, + "grad_norm": 0.02062433399260044, + "learning_rate": 3.090926857604447e-06, + "loss": 0.0007, + "step": 78370 + }, + { + "epoch": 1.3246241856299064, + "grad_norm": 0.021784713491797447, + "learning_rate": 3.0895638461279833e-06, + "loss": 0.0005, + "step": 78380 + }, + { + "epoch": 1.3247931859087567, + "grad_norm": 0.009822769090533257, + "learning_rate": 3.088201000868718e-06, + "loss": 0.0007, + "step": 78390 + }, + { + "epoch": 1.3249621861876073, + "grad_norm": 0.06367561221122742, + "learning_rate": 3.0868383219452314e-06, + "loss": 0.0007, + "step": 78400 + }, + { + "epoch": 1.3251311864664577, + "grad_norm": 0.028616730123758316, + "learning_rate": 3.0854758094760784e-06, + "loss": 0.0005, + "step": 78410 + }, + { + "epoch": 1.325300186745308, + "grad_norm": 0.030751261860132217, + "learning_rate": 3.0841134635798077e-06, + "loss": 0.0007, + "step": 78420 + }, + { + "epoch": 1.3254691870241586, + "grad_norm": 0.13814707100391388, + "learning_rate": 3.0827512843749457e-06, + "loss": 0.001, + "step": 78430 + }, + { + "epoch": 1.325638187303009, + "grad_norm": 0.06487669795751572, + "learning_rate": 3.0813892719800133e-06, + "loss": 0.0015, + "step": 78440 + }, + { + "epoch": 1.3258071875818596, + "grad_norm": 0.1860516518354416, + "learning_rate": 3.0800274265135094e-06, + "loss": 0.0009, + "step": 78450 + }, + { + "epoch": 1.32597618786071, + "grad_norm": 0.027568094432353973, + "learning_rate": 3.078665748093922e-06, + "loss": 0.0012, + "step": 78460 + }, + { + "epoch": 1.3261451881395605, + "grad_norm": 0.005442751571536064, + "learning_rate": 3.0773042368397233e-06, + "loss": 0.0011, + "step": 78470 + }, + { + "epoch": 1.3263141884184109, + "grad_norm": 0.037079572677612305, + "learning_rate": 3.0759428928693724e-06, + "loss": 0.0005, + "step": 78480 + }, + { + "epoch": 1.3264831886972615, + "grad_norm": 0.002279214560985565, + "learning_rate": 3.0745817163013116e-06, + "loss": 0.0008, + "step": 78490 + }, + { + "epoch": 1.3266521889761118, + "grad_norm": 0.06447796523571014, + "learning_rate": 3.073220707253971e-06, + "loss": 0.0009, + "step": 78500 + }, + { + "epoch": 1.3268211892549622, + "grad_norm": 0.09001284092664719, + "learning_rate": 3.0718598658457634e-06, + "loss": 0.0011, + "step": 78510 + }, + { + "epoch": 1.3269901895338128, + "grad_norm": 0.045323025435209274, + "learning_rate": 3.0704991921950904e-06, + "loss": 0.0012, + "step": 78520 + }, + { + "epoch": 1.3271591898126631, + "grad_norm": 0.006849227007478476, + "learning_rate": 3.069138686420335e-06, + "loss": 0.0012, + "step": 78530 + }, + { + "epoch": 1.3273281900915137, + "grad_norm": 0.10984335094690323, + "learning_rate": 3.0677783486398698e-06, + "loss": 0.0008, + "step": 78540 + }, + { + "epoch": 1.327497190370364, + "grad_norm": 0.027667885646224022, + "learning_rate": 3.066418178972049e-06, + "loss": 0.0011, + "step": 78550 + }, + { + "epoch": 1.3276661906492147, + "grad_norm": 0.11743637174367905, + "learning_rate": 3.065058177535218e-06, + "loss": 0.0005, + "step": 78560 + }, + { + "epoch": 1.327835190928065, + "grad_norm": 0.033889032900333405, + "learning_rate": 3.0636983444476975e-06, + "loss": 0.0008, + "step": 78570 + }, + { + "epoch": 1.3280041912069156, + "grad_norm": 0.012779652141034603, + "learning_rate": 3.0623386798278054e-06, + "loss": 0.0004, + "step": 78580 + }, + { + "epoch": 1.328173191485766, + "grad_norm": 0.02534503862261772, + "learning_rate": 3.060979183793834e-06, + "loss": 0.0004, + "step": 78590 + }, + { + "epoch": 1.3283421917646163, + "grad_norm": 0.10777483880519867, + "learning_rate": 3.0596198564640706e-06, + "loss": 0.0008, + "step": 78600 + }, + { + "epoch": 1.328511192043467, + "grad_norm": 0.06269858032464981, + "learning_rate": 3.0582606979567784e-06, + "loss": 0.0005, + "step": 78610 + }, + { + "epoch": 1.3286801923223173, + "grad_norm": 0.033827923238277435, + "learning_rate": 3.0569017083902143e-06, + "loss": 0.0009, + "step": 78620 + }, + { + "epoch": 1.3288491926011679, + "grad_norm": 0.045441195368766785, + "learning_rate": 3.0555428878826164e-06, + "loss": 0.001, + "step": 78630 + }, + { + "epoch": 1.3290181928800182, + "grad_norm": 0.10550963133573532, + "learning_rate": 3.054184236552209e-06, + "loss": 0.0015, + "step": 78640 + }, + { + "epoch": 1.3291871931588686, + "grad_norm": 0.04889865219593048, + "learning_rate": 3.0528257545172003e-06, + "loss": 0.001, + "step": 78650 + }, + { + "epoch": 1.3293561934377192, + "grad_norm": 0.025506243109703064, + "learning_rate": 3.0514674418957842e-06, + "loss": 0.0004, + "step": 78660 + }, + { + "epoch": 1.3295251937165697, + "grad_norm": 0.031631048768758774, + "learning_rate": 3.050109298806143e-06, + "loss": 0.0008, + "step": 78670 + }, + { + "epoch": 1.32969419399542, + "grad_norm": 0.04925537109375, + "learning_rate": 3.048751325366439e-06, + "loss": 0.0009, + "step": 78680 + }, + { + "epoch": 1.3298631942742705, + "grad_norm": 0.035217687487602234, + "learning_rate": 3.0473935216948257e-06, + "loss": 0.0019, + "step": 78690 + }, + { + "epoch": 1.330032194553121, + "grad_norm": 0.021285902708768845, + "learning_rate": 3.0460358879094343e-06, + "loss": 0.0019, + "step": 78700 + }, + { + "epoch": 1.3302011948319714, + "grad_norm": 0.024787139147520065, + "learning_rate": 3.0446784241283898e-06, + "loss": 0.0009, + "step": 78710 + }, + { + "epoch": 1.330370195110822, + "grad_norm": 0.015254669822752476, + "learning_rate": 3.0433211304697953e-06, + "loss": 0.0009, + "step": 78720 + }, + { + "epoch": 1.3305391953896724, + "grad_norm": 0.043653395026922226, + "learning_rate": 3.041964007051742e-06, + "loss": 0.0012, + "step": 78730 + }, + { + "epoch": 1.3307081956685227, + "grad_norm": 0.08998756110668182, + "learning_rate": 3.040607053992307e-06, + "loss": 0.0007, + "step": 78740 + }, + { + "epoch": 1.3308771959473733, + "grad_norm": 0.09312793612480164, + "learning_rate": 3.039250271409554e-06, + "loss": 0.0008, + "step": 78750 + }, + { + "epoch": 1.3310461962262239, + "grad_norm": 0.1240491271018982, + "learning_rate": 3.037893659421525e-06, + "loss": 0.0008, + "step": 78760 + }, + { + "epoch": 1.3312151965050742, + "grad_norm": 0.04321032017469406, + "learning_rate": 3.036537218146256e-06, + "loss": 0.0005, + "step": 78770 + }, + { + "epoch": 1.3313841967839246, + "grad_norm": 0.043809086084365845, + "learning_rate": 3.0351809477017602e-06, + "loss": 0.0009, + "step": 78780 + }, + { + "epoch": 1.3315531970627752, + "grad_norm": 0.03958607465028763, + "learning_rate": 3.033824848206044e-06, + "loss": 0.001, + "step": 78790 + }, + { + "epoch": 1.3317221973416256, + "grad_norm": 0.0330476239323616, + "learning_rate": 3.0324689197770907e-06, + "loss": 0.0008, + "step": 78800 + }, + { + "epoch": 1.3318911976204761, + "grad_norm": 0.16270068287849426, + "learning_rate": 3.031113162532875e-06, + "loss": 0.0021, + "step": 78810 + }, + { + "epoch": 1.3320601978993265, + "grad_norm": 0.033717963844537735, + "learning_rate": 3.0297575765913536e-06, + "loss": 0.0007, + "step": 78820 + }, + { + "epoch": 1.3322291981781769, + "grad_norm": 0.013505863957107067, + "learning_rate": 3.0284021620704686e-06, + "loss": 0.0008, + "step": 78830 + }, + { + "epoch": 1.3323981984570274, + "grad_norm": 0.018628928810358047, + "learning_rate": 3.027046919088148e-06, + "loss": 0.0009, + "step": 78840 + }, + { + "epoch": 1.332567198735878, + "grad_norm": 0.2068597823381424, + "learning_rate": 3.025691847762306e-06, + "loss": 0.001, + "step": 78850 + }, + { + "epoch": 1.3327361990147284, + "grad_norm": 0.028667191043496132, + "learning_rate": 3.024336948210837e-06, + "loss": 0.0003, + "step": 78860 + }, + { + "epoch": 1.3329051992935788, + "grad_norm": 0.08055096119642258, + "learning_rate": 3.0229822205516255e-06, + "loss": 0.0006, + "step": 78870 + }, + { + "epoch": 1.3330741995724293, + "grad_norm": 0.03845233470201492, + "learning_rate": 3.021627664902543e-06, + "loss": 0.0004, + "step": 78880 + }, + { + "epoch": 1.3332431998512797, + "grad_norm": 0.06304703652858734, + "learning_rate": 3.020273281381436e-06, + "loss": 0.0008, + "step": 78890 + }, + { + "epoch": 1.3334122001301303, + "grad_norm": 0.09348312020301819, + "learning_rate": 3.0189190701061476e-06, + "loss": 0.0007, + "step": 78900 + }, + { + "epoch": 1.3335812004089806, + "grad_norm": 0.00907797459512949, + "learning_rate": 3.017565031194497e-06, + "loss": 0.0017, + "step": 78910 + }, + { + "epoch": 1.333750200687831, + "grad_norm": 0.0017043626867234707, + "learning_rate": 3.0162111647642946e-06, + "loss": 0.0008, + "step": 78920 + }, + { + "epoch": 1.3339192009666816, + "grad_norm": 0.11411120742559433, + "learning_rate": 3.0148574709333323e-06, + "loss": 0.001, + "step": 78930 + }, + { + "epoch": 1.3340882012455322, + "grad_norm": 0.05680382624268532, + "learning_rate": 3.0135039498193886e-06, + "loss": 0.0008, + "step": 78940 + }, + { + "epoch": 1.3342572015243825, + "grad_norm": 0.02499389462172985, + "learning_rate": 3.0121506015402253e-06, + "loss": 0.0004, + "step": 78950 + }, + { + "epoch": 1.334426201803233, + "grad_norm": 0.023089343681931496, + "learning_rate": 3.0107974262135923e-06, + "loss": 0.001, + "step": 78960 + }, + { + "epoch": 1.3345952020820835, + "grad_norm": 0.03036220371723175, + "learning_rate": 3.0094444239572195e-06, + "loss": 0.0005, + "step": 78970 + }, + { + "epoch": 1.3347642023609338, + "grad_norm": 0.032540082931518555, + "learning_rate": 3.0080915948888288e-06, + "loss": 0.0011, + "step": 78980 + }, + { + "epoch": 1.3349332026397844, + "grad_norm": 0.0686960443854332, + "learning_rate": 3.006738939126118e-06, + "loss": 0.0006, + "step": 78990 + }, + { + "epoch": 1.3351022029186348, + "grad_norm": 0.015140851028263569, + "learning_rate": 3.0053864567867785e-06, + "loss": 0.0008, + "step": 79000 + }, + { + "epoch": 1.3352712031974852, + "grad_norm": 0.031037839129567146, + "learning_rate": 3.0040341479884805e-06, + "loss": 0.0015, + "step": 79010 + }, + { + "epoch": 1.3354402034763357, + "grad_norm": 0.029850441962480545, + "learning_rate": 3.002682012848882e-06, + "loss": 0.0014, + "step": 79020 + }, + { + "epoch": 1.3356092037551863, + "grad_norm": 0.044762782752513885, + "learning_rate": 3.001330051485626e-06, + "loss": 0.0013, + "step": 79030 + }, + { + "epoch": 1.3357782040340367, + "grad_norm": 0.023029036819934845, + "learning_rate": 2.99997826401634e-06, + "loss": 0.001, + "step": 79040 + }, + { + "epoch": 1.335947204312887, + "grad_norm": 0.007485507521778345, + "learning_rate": 2.9986266505586338e-06, + "loss": 0.0005, + "step": 79050 + }, + { + "epoch": 1.3361162045917376, + "grad_norm": 0.0779392346739769, + "learning_rate": 2.9972752112301084e-06, + "loss": 0.001, + "step": 79060 + }, + { + "epoch": 1.336285204870588, + "grad_norm": 0.05729474872350693, + "learning_rate": 2.9959239461483403e-06, + "loss": 0.0007, + "step": 79070 + }, + { + "epoch": 1.3364542051494386, + "grad_norm": 0.05444788187742233, + "learning_rate": 2.9945728554309013e-06, + "loss": 0.0013, + "step": 79080 + }, + { + "epoch": 1.336623205428289, + "grad_norm": 0.0028525509405881166, + "learning_rate": 2.993221939195338e-06, + "loss": 0.0012, + "step": 79090 + }, + { + "epoch": 1.3367922057071393, + "grad_norm": 0.09689547121524811, + "learning_rate": 2.991871197559191e-06, + "loss": 0.0009, + "step": 79100 + }, + { + "epoch": 1.3369612059859899, + "grad_norm": 0.09920303523540497, + "learning_rate": 2.990520630639977e-06, + "loss": 0.0024, + "step": 79110 + }, + { + "epoch": 1.3371302062648405, + "grad_norm": 0.0005344424280337989, + "learning_rate": 2.9891702385552076e-06, + "loss": 0.0005, + "step": 79120 + }, + { + "epoch": 1.3372992065436908, + "grad_norm": 0.1610758751630783, + "learning_rate": 2.9878200214223676e-06, + "loss": 0.0007, + "step": 79130 + }, + { + "epoch": 1.3374682068225412, + "grad_norm": 0.14027635753154755, + "learning_rate": 2.9864699793589346e-06, + "loss": 0.0007, + "step": 79140 + }, + { + "epoch": 1.3376372071013918, + "grad_norm": 0.0003164792724419385, + "learning_rate": 2.9851201124823714e-06, + "loss": 0.001, + "step": 79150 + }, + { + "epoch": 1.3378062073802421, + "grad_norm": 0.031022123992443085, + "learning_rate": 2.983770420910119e-06, + "loss": 0.0007, + "step": 79160 + }, + { + "epoch": 1.3379752076590927, + "grad_norm": 0.021356869488954544, + "learning_rate": 2.9824209047596107e-06, + "loss": 0.0007, + "step": 79170 + }, + { + "epoch": 1.338144207937943, + "grad_norm": 0.0832192450761795, + "learning_rate": 2.981071564148257e-06, + "loss": 0.0009, + "step": 79180 + }, + { + "epoch": 1.3383132082167934, + "grad_norm": 0.03244505450129509, + "learning_rate": 2.9797223991934616e-06, + "loss": 0.0006, + "step": 79190 + }, + { + "epoch": 1.338482208495644, + "grad_norm": 0.00012445000174921006, + "learning_rate": 2.978373410012604e-06, + "loss": 0.0004, + "step": 79200 + }, + { + "epoch": 1.3386512087744946, + "grad_norm": 0.007832073606550694, + "learning_rate": 2.9770245967230548e-06, + "loss": 0.0012, + "step": 79210 + }, + { + "epoch": 1.338820209053345, + "grad_norm": 0.05080963298678398, + "learning_rate": 2.9756759594421667e-06, + "loss": 0.0009, + "step": 79220 + }, + { + "epoch": 1.3389892093321953, + "grad_norm": 0.02256394736468792, + "learning_rate": 2.9743274982872806e-06, + "loss": 0.0007, + "step": 79230 + }, + { + "epoch": 1.339158209611046, + "grad_norm": 0.008010189980268478, + "learning_rate": 2.972979213375714e-06, + "loss": 0.0012, + "step": 79240 + }, + { + "epoch": 1.3393272098898963, + "grad_norm": 0.1913597285747528, + "learning_rate": 2.971631104824779e-06, + "loss": 0.0006, + "step": 79250 + }, + { + "epoch": 1.3394962101687469, + "grad_norm": 0.07293778657913208, + "learning_rate": 2.9702831727517643e-06, + "loss": 0.0007, + "step": 79260 + }, + { + "epoch": 1.3396652104475972, + "grad_norm": 0.06996724009513855, + "learning_rate": 2.968935417273949e-06, + "loss": 0.0012, + "step": 79270 + }, + { + "epoch": 1.3398342107264476, + "grad_norm": 0.011677944101393223, + "learning_rate": 2.967587838508592e-06, + "loss": 0.0006, + "step": 79280 + }, + { + "epoch": 1.3400032110052982, + "grad_norm": 0.06278412789106369, + "learning_rate": 2.966240436572941e-06, + "loss": 0.0012, + "step": 79290 + }, + { + "epoch": 1.3401722112841485, + "grad_norm": 0.07919969409704208, + "learning_rate": 2.9648932115842255e-06, + "loss": 0.0016, + "step": 79300 + }, + { + "epoch": 1.3403412115629991, + "grad_norm": 0.002916355151683092, + "learning_rate": 2.9635461636596607e-06, + "loss": 0.0003, + "step": 79310 + }, + { + "epoch": 1.3405102118418495, + "grad_norm": 0.007013415917754173, + "learning_rate": 2.962199292916446e-06, + "loss": 0.0007, + "step": 79320 + }, + { + "epoch": 1.3406792121207, + "grad_norm": 0.06998938322067261, + "learning_rate": 2.9608525994717686e-06, + "loss": 0.001, + "step": 79330 + }, + { + "epoch": 1.3408482123995504, + "grad_norm": 0.009831363335251808, + "learning_rate": 2.9595060834427923e-06, + "loss": 0.0036, + "step": 79340 + }, + { + "epoch": 1.341017212678401, + "grad_norm": 0.029194116592407227, + "learning_rate": 2.958159744946675e-06, + "loss": 0.0007, + "step": 79350 + }, + { + "epoch": 1.3411862129572514, + "grad_norm": 0.0019040402257815003, + "learning_rate": 2.9568135841005512e-06, + "loss": 0.0007, + "step": 79360 + }, + { + "epoch": 1.3413552132361017, + "grad_norm": 0.045416876673698425, + "learning_rate": 2.9554676010215464e-06, + "loss": 0.0009, + "step": 79370 + }, + { + "epoch": 1.3415242135149523, + "grad_norm": 0.07052461057901382, + "learning_rate": 2.9541217958267653e-06, + "loss": 0.001, + "step": 79380 + }, + { + "epoch": 1.3416932137938027, + "grad_norm": 0.01906942017376423, + "learning_rate": 2.9527761686333e-06, + "loss": 0.0009, + "step": 79390 + }, + { + "epoch": 1.3418622140726533, + "grad_norm": 0.03192806988954544, + "learning_rate": 2.951430719558228e-06, + "loss": 0.0008, + "step": 79400 + }, + { + "epoch": 1.3420312143515036, + "grad_norm": 0.03816506266593933, + "learning_rate": 2.9500854487186093e-06, + "loss": 0.001, + "step": 79410 + }, + { + "epoch": 1.3422002146303542, + "grad_norm": 0.011286511085927486, + "learning_rate": 2.9487403562314887e-06, + "loss": 0.0005, + "step": 79420 + }, + { + "epoch": 1.3423692149092046, + "grad_norm": 0.052074991166591644, + "learning_rate": 2.947395442213894e-06, + "loss": 0.001, + "step": 79430 + }, + { + "epoch": 1.3425382151880552, + "grad_norm": 0.04629841819405556, + "learning_rate": 2.9460507067828437e-06, + "loss": 0.0015, + "step": 79440 + }, + { + "epoch": 1.3427072154669055, + "grad_norm": 0.013466979376971722, + "learning_rate": 2.9447061500553308e-06, + "loss": 0.0006, + "step": 79450 + }, + { + "epoch": 1.3428762157457559, + "grad_norm": 0.03133407235145569, + "learning_rate": 2.9433617721483433e-06, + "loss": 0.0012, + "step": 79460 + }, + { + "epoch": 1.3430452160246065, + "grad_norm": 0.027102619409561157, + "learning_rate": 2.9420175731788443e-06, + "loss": 0.0025, + "step": 79470 + }, + { + "epoch": 1.3432142163034568, + "grad_norm": 0.01388012245297432, + "learning_rate": 2.940673553263789e-06, + "loss": 0.0004, + "step": 79480 + }, + { + "epoch": 1.3433832165823074, + "grad_norm": 0.08248895406723022, + "learning_rate": 2.939329712520111e-06, + "loss": 0.0015, + "step": 79490 + }, + { + "epoch": 1.3435522168611578, + "grad_norm": 0.04260677471756935, + "learning_rate": 2.9379860510647328e-06, + "loss": 0.0021, + "step": 79500 + }, + { + "epoch": 1.3437212171400084, + "grad_norm": 0.023597152903676033, + "learning_rate": 2.9366425690145585e-06, + "loss": 0.0013, + "step": 79510 + }, + { + "epoch": 1.3438902174188587, + "grad_norm": 0.07294405996799469, + "learning_rate": 2.9352992664864787e-06, + "loss": 0.001, + "step": 79520 + }, + { + "epoch": 1.3440592176977093, + "grad_norm": 0.07735653221607208, + "learning_rate": 2.933956143597365e-06, + "loss": 0.0015, + "step": 79530 + }, + { + "epoch": 1.3442282179765597, + "grad_norm": 0.04344133287668228, + "learning_rate": 2.9326132004640793e-06, + "loss": 0.0006, + "step": 79540 + }, + { + "epoch": 1.34439721825541, + "grad_norm": 0.12408298999071121, + "learning_rate": 2.93127043720346e-06, + "loss": 0.0009, + "step": 79550 + }, + { + "epoch": 1.3445662185342606, + "grad_norm": 0.06359492987394333, + "learning_rate": 2.9299278539323374e-06, + "loss": 0.0006, + "step": 79560 + }, + { + "epoch": 1.344735218813111, + "grad_norm": 0.03914562240242958, + "learning_rate": 2.928585450767519e-06, + "loss": 0.0007, + "step": 79570 + }, + { + "epoch": 1.3449042190919616, + "grad_norm": 0.06646328419446945, + "learning_rate": 2.9272432278258045e-06, + "loss": 0.003, + "step": 79580 + }, + { + "epoch": 1.345073219370812, + "grad_norm": 0.009112788364291191, + "learning_rate": 2.925901185223972e-06, + "loss": 0.0005, + "step": 79590 + }, + { + "epoch": 1.3452422196496623, + "grad_norm": 0.04493261128664017, + "learning_rate": 2.924559323078785e-06, + "loss": 0.0011, + "step": 79600 + }, + { + "epoch": 1.3454112199285129, + "grad_norm": 0.033908944576978683, + "learning_rate": 2.9232176415069913e-06, + "loss": 0.001, + "step": 79610 + }, + { + "epoch": 1.3455802202073635, + "grad_norm": 0.03137993440032005, + "learning_rate": 2.921876140625327e-06, + "loss": 0.0007, + "step": 79620 + }, + { + "epoch": 1.3457492204862138, + "grad_norm": 0.015687420964241028, + "learning_rate": 2.9205348205505057e-06, + "loss": 0.0005, + "step": 79630 + }, + { + "epoch": 1.3459182207650642, + "grad_norm": 0.0345822349190712, + "learning_rate": 2.9191936813992305e-06, + "loss": 0.0007, + "step": 79640 + }, + { + "epoch": 1.3460872210439148, + "grad_norm": 0.0702509880065918, + "learning_rate": 2.9178527232881887e-06, + "loss": 0.0008, + "step": 79650 + }, + { + "epoch": 1.3462562213227651, + "grad_norm": 0.11144670099020004, + "learning_rate": 2.916511946334046e-06, + "loss": 0.0007, + "step": 79660 + }, + { + "epoch": 1.3464252216016157, + "grad_norm": 0.5236050486564636, + "learning_rate": 2.9151713506534606e-06, + "loss": 0.0018, + "step": 79670 + }, + { + "epoch": 1.346594221880466, + "grad_norm": 0.057299088686704636, + "learning_rate": 2.9138309363630666e-06, + "loss": 0.0006, + "step": 79680 + }, + { + "epoch": 1.3467632221593164, + "grad_norm": 0.0474783331155777, + "learning_rate": 2.9124907035794916e-06, + "loss": 0.0007, + "step": 79690 + }, + { + "epoch": 1.346932222438167, + "grad_norm": 0.1262008249759674, + "learning_rate": 2.911150652419337e-06, + "loss": 0.001, + "step": 79700 + }, + { + "epoch": 1.3471012227170176, + "grad_norm": 0.07362750917673111, + "learning_rate": 2.909810782999199e-06, + "loss": 0.0008, + "step": 79710 + }, + { + "epoch": 1.347270222995868, + "grad_norm": 0.00031968977418728173, + "learning_rate": 2.9084710954356477e-06, + "loss": 0.0009, + "step": 79720 + }, + { + "epoch": 1.3474392232747183, + "grad_norm": 0.07329060882329941, + "learning_rate": 2.9071315898452447e-06, + "loss": 0.0007, + "step": 79730 + }, + { + "epoch": 1.347608223553569, + "grad_norm": 0.024064164608716965, + "learning_rate": 2.905792266344536e-06, + "loss": 0.0024, + "step": 79740 + }, + { + "epoch": 1.3477772238324193, + "grad_norm": 0.06430544704198837, + "learning_rate": 2.904453125050044e-06, + "loss": 0.001, + "step": 79750 + }, + { + "epoch": 1.3479462241112699, + "grad_norm": 0.0032056604977697134, + "learning_rate": 2.9031141660782838e-06, + "loss": 0.0006, + "step": 79760 + }, + { + "epoch": 1.3481152243901202, + "grad_norm": 0.02469942532479763, + "learning_rate": 2.9017753895457525e-06, + "loss": 0.001, + "step": 79770 + }, + { + "epoch": 1.3482842246689706, + "grad_norm": 0.09063442051410675, + "learning_rate": 2.9004367955689266e-06, + "loss": 0.0009, + "step": 79780 + }, + { + "epoch": 1.3484532249478212, + "grad_norm": 0.027034692466259003, + "learning_rate": 2.899098384264274e-06, + "loss": 0.0007, + "step": 79790 + }, + { + "epoch": 1.3486222252266717, + "grad_norm": 0.00023188340128399432, + "learning_rate": 2.8977601557482393e-06, + "loss": 0.001, + "step": 79800 + }, + { + "epoch": 1.348791225505522, + "grad_norm": 0.09094345569610596, + "learning_rate": 2.896422110137259e-06, + "loss": 0.0013, + "step": 79810 + }, + { + "epoch": 1.3489602257843725, + "grad_norm": 0.010485582984983921, + "learning_rate": 2.8950842475477446e-06, + "loss": 0.0005, + "step": 79820 + }, + { + "epoch": 1.349129226063223, + "grad_norm": 0.055243708193302155, + "learning_rate": 2.8937465680961013e-06, + "loss": 0.0021, + "step": 79830 + }, + { + "epoch": 1.3492982263420734, + "grad_norm": 0.02478218823671341, + "learning_rate": 2.8924090718987096e-06, + "loss": 0.0005, + "step": 79840 + }, + { + "epoch": 1.349467226620924, + "grad_norm": 0.08459905534982681, + "learning_rate": 2.891071759071942e-06, + "loss": 0.0008, + "step": 79850 + }, + { + "epoch": 1.3496362268997744, + "grad_norm": 0.043836046010255814, + "learning_rate": 2.8897346297321484e-06, + "loss": 0.0006, + "step": 79860 + }, + { + "epoch": 1.3498052271786247, + "grad_norm": 0.10422411561012268, + "learning_rate": 2.8883976839956672e-06, + "loss": 0.0011, + "step": 79870 + }, + { + "epoch": 1.3499742274574753, + "grad_norm": 0.15190142393112183, + "learning_rate": 2.887060921978817e-06, + "loss": 0.0018, + "step": 79880 + }, + { + "epoch": 1.350143227736326, + "grad_norm": 0.00032463777461089194, + "learning_rate": 2.885724343797904e-06, + "loss": 0.0007, + "step": 79890 + }, + { + "epoch": 1.3503122280151763, + "grad_norm": 0.07115165144205093, + "learning_rate": 2.8843879495692185e-06, + "loss": 0.0006, + "step": 79900 + }, + { + "epoch": 1.3504812282940266, + "grad_norm": 0.0636988952755928, + "learning_rate": 2.883051739409031e-06, + "loss": 0.0003, + "step": 79910 + }, + { + "epoch": 1.3506502285728772, + "grad_norm": 0.1498696357011795, + "learning_rate": 2.881715713433599e-06, + "loss": 0.0015, + "step": 79920 + }, + { + "epoch": 1.3508192288517276, + "grad_norm": 0.034050557762384415, + "learning_rate": 2.880379871759163e-06, + "loss": 0.0014, + "step": 79930 + }, + { + "epoch": 1.3509882291305781, + "grad_norm": 0.03661184757947922, + "learning_rate": 2.879044214501947e-06, + "loss": 0.0007, + "step": 79940 + }, + { + "epoch": 1.3511572294094285, + "grad_norm": 0.08183200657367706, + "learning_rate": 2.8777087417781614e-06, + "loss": 0.0013, + "step": 79950 + }, + { + "epoch": 1.3513262296882789, + "grad_norm": 0.0048887101002037525, + "learning_rate": 2.876373453704e-06, + "loss": 0.0005, + "step": 79960 + }, + { + "epoch": 1.3514952299671295, + "grad_norm": 0.02189868502318859, + "learning_rate": 2.8750383503956347e-06, + "loss": 0.0009, + "step": 79970 + }, + { + "epoch": 1.35166423024598, + "grad_norm": 0.028512021526694298, + "learning_rate": 2.873703431969231e-06, + "loss": 0.0008, + "step": 79980 + }, + { + "epoch": 1.3518332305248304, + "grad_norm": 0.02241390012204647, + "learning_rate": 2.8723686985409283e-06, + "loss": 0.0004, + "step": 79990 + }, + { + "epoch": 1.3520022308036808, + "grad_norm": 0.12110210210084915, + "learning_rate": 2.87103415022686e-06, + "loss": 0.0007, + "step": 80000 + }, + { + "epoch": 1.3521712310825313, + "grad_norm": 0.0038254125975072384, + "learning_rate": 2.8696997871431333e-06, + "loss": 0.0012, + "step": 80010 + }, + { + "epoch": 1.3523402313613817, + "grad_norm": 0.055296361446380615, + "learning_rate": 2.8683656094058486e-06, + "loss": 0.0008, + "step": 80020 + }, + { + "epoch": 1.3525092316402323, + "grad_norm": 0.05945458635687828, + "learning_rate": 2.867031617131083e-06, + "loss": 0.0006, + "step": 80030 + }, + { + "epoch": 1.3526782319190827, + "grad_norm": 0.18918436765670776, + "learning_rate": 2.865697810434902e-06, + "loss": 0.0014, + "step": 80040 + }, + { + "epoch": 1.352847232197933, + "grad_norm": 0.0316217839717865, + "learning_rate": 2.8643641894333506e-06, + "loss": 0.0007, + "step": 80050 + }, + { + "epoch": 1.3530162324767836, + "grad_norm": 0.078201062977314, + "learning_rate": 2.8630307542424644e-06, + "loss": 0.0009, + "step": 80060 + }, + { + "epoch": 1.3531852327556342, + "grad_norm": 0.019307147711515427, + "learning_rate": 2.8616975049782548e-06, + "loss": 0.001, + "step": 80070 + }, + { + "epoch": 1.3533542330344845, + "grad_norm": 0.01422625221312046, + "learning_rate": 2.860364441756724e-06, + "loss": 0.0009, + "step": 80080 + }, + { + "epoch": 1.353523233313335, + "grad_norm": 0.00013813190162181854, + "learning_rate": 2.8590315646938515e-06, + "loss": 0.0008, + "step": 80090 + }, + { + "epoch": 1.3536922335921855, + "grad_norm": 0.08600315451622009, + "learning_rate": 2.8576988739056068e-06, + "loss": 0.0013, + "step": 80100 + }, + { + "epoch": 1.3538612338710359, + "grad_norm": 0.010337582789361477, + "learning_rate": 2.856366369507941e-06, + "loss": 0.0004, + "step": 80110 + }, + { + "epoch": 1.3540302341498864, + "grad_norm": 0.05166761204600334, + "learning_rate": 2.855034051616785e-06, + "loss": 0.0014, + "step": 80120 + }, + { + "epoch": 1.3541992344287368, + "grad_norm": 0.019852489233016968, + "learning_rate": 2.85370192034806e-06, + "loss": 0.0007, + "step": 80130 + }, + { + "epoch": 1.3543682347075872, + "grad_norm": 0.003500432940199971, + "learning_rate": 2.852369975817668e-06, + "loss": 0.0008, + "step": 80140 + }, + { + "epoch": 1.3545372349864377, + "grad_norm": 0.03737901151180267, + "learning_rate": 2.851038218141492e-06, + "loss": 0.0013, + "step": 80150 + }, + { + "epoch": 1.354706235265288, + "grad_norm": 0.004693951457738876, + "learning_rate": 2.8497066474354034e-06, + "loss": 0.0006, + "step": 80160 + }, + { + "epoch": 1.3548752355441387, + "grad_norm": 0.0182564128190279, + "learning_rate": 2.8483752638152568e-06, + "loss": 0.0005, + "step": 80170 + }, + { + "epoch": 1.355044235822989, + "grad_norm": 0.05262609198689461, + "learning_rate": 2.847044067396885e-06, + "loss": 0.0007, + "step": 80180 + }, + { + "epoch": 1.3552132361018396, + "grad_norm": 0.02698170579969883, + "learning_rate": 2.8457130582961124e-06, + "loss": 0.0008, + "step": 80190 + }, + { + "epoch": 1.35538223638069, + "grad_norm": 0.08764582872390747, + "learning_rate": 2.84438223662874e-06, + "loss": 0.0006, + "step": 80200 + }, + { + "epoch": 1.3555512366595406, + "grad_norm": 0.08219470083713531, + "learning_rate": 2.843051602510558e-06, + "loss": 0.0009, + "step": 80210 + }, + { + "epoch": 1.355720236938391, + "grad_norm": 0.42823415994644165, + "learning_rate": 2.8417211560573364e-06, + "loss": 0.001, + "step": 80220 + }, + { + "epoch": 1.3558892372172413, + "grad_norm": 0.011557972989976406, + "learning_rate": 2.840390897384833e-06, + "loss": 0.0011, + "step": 80230 + }, + { + "epoch": 1.356058237496092, + "grad_norm": 0.06456495821475983, + "learning_rate": 2.8390608266087834e-06, + "loss": 0.001, + "step": 80240 + }, + { + "epoch": 1.3562272377749423, + "grad_norm": 0.08364235609769821, + "learning_rate": 2.8377309438449137e-06, + "loss": 0.0007, + "step": 80250 + }, + { + "epoch": 1.3563962380537928, + "grad_norm": 0.050985436886548996, + "learning_rate": 2.836401249208926e-06, + "loss": 0.0007, + "step": 80260 + }, + { + "epoch": 1.3565652383326432, + "grad_norm": 0.04415946453809738, + "learning_rate": 2.8350717428165143e-06, + "loss": 0.0012, + "step": 80270 + }, + { + "epoch": 1.3567342386114938, + "grad_norm": 0.026892591267824173, + "learning_rate": 2.833742424783349e-06, + "loss": 0.0008, + "step": 80280 + }, + { + "epoch": 1.3569032388903441, + "grad_norm": 0.03614193946123123, + "learning_rate": 2.8324132952250904e-06, + "loss": 0.0011, + "step": 80290 + }, + { + "epoch": 1.3570722391691947, + "grad_norm": 0.05073138326406479, + "learning_rate": 2.8310843542573753e-06, + "loss": 0.0011, + "step": 80300 + }, + { + "epoch": 1.357241239448045, + "grad_norm": 0.10851094871759415, + "learning_rate": 2.8297556019958293e-06, + "loss": 0.0007, + "step": 80310 + }, + { + "epoch": 1.3574102397268955, + "grad_norm": 0.024640558287501335, + "learning_rate": 2.828427038556062e-06, + "loss": 0.001, + "step": 80320 + }, + { + "epoch": 1.357579240005746, + "grad_norm": 0.01880939118564129, + "learning_rate": 2.8270986640536644e-06, + "loss": 0.0002, + "step": 80330 + }, + { + "epoch": 1.3577482402845964, + "grad_norm": 0.03751187399029732, + "learning_rate": 2.8257704786042106e-06, + "loss": 0.0006, + "step": 80340 + }, + { + "epoch": 1.357917240563447, + "grad_norm": 0.046540506184101105, + "learning_rate": 2.824442482323261e-06, + "loss": 0.0008, + "step": 80350 + }, + { + "epoch": 1.3580862408422973, + "grad_norm": 0.0005142322042956948, + "learning_rate": 2.823114675326354e-06, + "loss": 0.0009, + "step": 80360 + }, + { + "epoch": 1.358255241121148, + "grad_norm": 0.030346006155014038, + "learning_rate": 2.8217870577290194e-06, + "loss": 0.0012, + "step": 80370 + }, + { + "epoch": 1.3584242413999983, + "grad_norm": 0.0235869362950325, + "learning_rate": 2.820459629646763e-06, + "loss": 0.0006, + "step": 80380 + }, + { + "epoch": 1.3585932416788489, + "grad_norm": 0.0027132586110383272, + "learning_rate": 2.8191323911950807e-06, + "loss": 0.0008, + "step": 80390 + }, + { + "epoch": 1.3587622419576992, + "grad_norm": 0.007963139563798904, + "learning_rate": 2.817805342489445e-06, + "loss": 0.0005, + "step": 80400 + }, + { + "epoch": 1.3589312422365496, + "grad_norm": 0.0007685494492761791, + "learning_rate": 2.8164784836453176e-06, + "loss": 0.0058, + "step": 80410 + }, + { + "epoch": 1.3591002425154002, + "grad_norm": 0.06528615951538086, + "learning_rate": 2.815151814778143e-06, + "loss": 0.0008, + "step": 80420 + }, + { + "epoch": 1.3592692427942505, + "grad_norm": 0.050124719738960266, + "learning_rate": 2.8138253360033446e-06, + "loss": 0.001, + "step": 80430 + }, + { + "epoch": 1.3594382430731011, + "grad_norm": 0.00018325158453080803, + "learning_rate": 2.812499047436336e-06, + "loss": 0.0005, + "step": 80440 + }, + { + "epoch": 1.3596072433519515, + "grad_norm": 0.010511750355362892, + "learning_rate": 2.8111729491925076e-06, + "loss": 0.0006, + "step": 80450 + }, + { + "epoch": 1.359776243630802, + "grad_norm": 0.06392191350460052, + "learning_rate": 2.8098470413872394e-06, + "loss": 0.0006, + "step": 80460 + }, + { + "epoch": 1.3599452439096524, + "grad_norm": 0.006926842965185642, + "learning_rate": 2.8085213241358875e-06, + "loss": 0.0014, + "step": 80470 + }, + { + "epoch": 1.360114244188503, + "grad_norm": 0.0003030473308172077, + "learning_rate": 2.807195797553801e-06, + "loss": 0.0004, + "step": 80480 + }, + { + "epoch": 1.3602832444673534, + "grad_norm": 0.033828362822532654, + "learning_rate": 2.8058704617563026e-06, + "loss": 0.0014, + "step": 80490 + }, + { + "epoch": 1.3604522447462037, + "grad_norm": 0.020338037982583046, + "learning_rate": 2.8045453168587043e-06, + "loss": 0.0007, + "step": 80500 + }, + { + "epoch": 1.3606212450250543, + "grad_norm": 0.025744963437318802, + "learning_rate": 2.8032203629763034e-06, + "loss": 0.001, + "step": 80510 + }, + { + "epoch": 1.3607902453039047, + "grad_norm": 0.03640247881412506, + "learning_rate": 2.8018956002243726e-06, + "loss": 0.0008, + "step": 80520 + }, + { + "epoch": 1.3609592455827553, + "grad_norm": 0.04088277369737625, + "learning_rate": 2.800571028718174e-06, + "loss": 0.0007, + "step": 80530 + }, + { + "epoch": 1.3611282458616056, + "grad_norm": 0.07791779190301895, + "learning_rate": 2.7992466485729543e-06, + "loss": 0.0011, + "step": 80540 + }, + { + "epoch": 1.361297246140456, + "grad_norm": 0.06431923806667328, + "learning_rate": 2.7979224599039377e-06, + "loss": 0.0011, + "step": 80550 + }, + { + "epoch": 1.3614662464193066, + "grad_norm": 0.026751643046736717, + "learning_rate": 2.7965984628263376e-06, + "loss": 0.0005, + "step": 80560 + }, + { + "epoch": 1.3616352466981572, + "grad_norm": 0.07414183765649796, + "learning_rate": 2.795274657455346e-06, + "loss": 0.0009, + "step": 80570 + }, + { + "epoch": 1.3618042469770075, + "grad_norm": 0.033633653074502945, + "learning_rate": 2.7939510439061425e-06, + "loss": 0.0014, + "step": 80580 + }, + { + "epoch": 1.3619732472558579, + "grad_norm": 0.005656505934894085, + "learning_rate": 2.7926276222938855e-06, + "loss": 0.0016, + "step": 80590 + }, + { + "epoch": 1.3621422475347085, + "grad_norm": 0.0321982204914093, + "learning_rate": 2.7913043927337212e-06, + "loss": 0.0005, + "step": 80600 + }, + { + "epoch": 1.3623112478135588, + "grad_norm": 0.06537581980228424, + "learning_rate": 2.789981355340775e-06, + "loss": 0.0012, + "step": 80610 + }, + { + "epoch": 1.3624802480924094, + "grad_norm": 0.19757148623466492, + "learning_rate": 2.78865851023016e-06, + "loss": 0.0009, + "step": 80620 + }, + { + "epoch": 1.3626492483712598, + "grad_norm": 0.03330465033650398, + "learning_rate": 2.7873358575169674e-06, + "loss": 0.0015, + "step": 80630 + }, + { + "epoch": 1.3628182486501101, + "grad_norm": 0.010190580040216446, + "learning_rate": 2.7860133973162773e-06, + "loss": 0.0008, + "step": 80640 + }, + { + "epoch": 1.3629872489289607, + "grad_norm": 0.004234407562762499, + "learning_rate": 2.7846911297431474e-06, + "loss": 0.001, + "step": 80650 + }, + { + "epoch": 1.3631562492078113, + "grad_norm": 0.03019251488149166, + "learning_rate": 2.7833690549126226e-06, + "loss": 0.0016, + "step": 80660 + }, + { + "epoch": 1.3633252494866617, + "grad_norm": 0.0046164970844984055, + "learning_rate": 2.782047172939731e-06, + "loss": 0.0005, + "step": 80670 + }, + { + "epoch": 1.363494249765512, + "grad_norm": 0.017667559906840324, + "learning_rate": 2.7807254839394804e-06, + "loss": 0.0008, + "step": 80680 + }, + { + "epoch": 1.3636632500443626, + "grad_norm": 0.048973340541124344, + "learning_rate": 2.779403988026864e-06, + "loss": 0.0007, + "step": 80690 + }, + { + "epoch": 1.363832250323213, + "grad_norm": 0.02754290960729122, + "learning_rate": 2.778082685316863e-06, + "loss": 0.0009, + "step": 80700 + }, + { + "epoch": 1.3640012506020636, + "grad_norm": 0.09129004180431366, + "learning_rate": 2.7767615759244313e-06, + "loss": 0.0015, + "step": 80710 + }, + { + "epoch": 1.364170250880914, + "grad_norm": 0.036322593688964844, + "learning_rate": 2.7754406599645147e-06, + "loss": 0.0005, + "step": 80720 + }, + { + "epoch": 1.3643392511597643, + "grad_norm": 0.07712910324335098, + "learning_rate": 2.774119937552041e-06, + "loss": 0.0009, + "step": 80730 + }, + { + "epoch": 1.3645082514386149, + "grad_norm": 0.06588491797447205, + "learning_rate": 2.772799408801915e-06, + "loss": 0.0013, + "step": 80740 + }, + { + "epoch": 1.3646772517174655, + "grad_norm": 0.035696469247341156, + "learning_rate": 2.7714790738290333e-06, + "loss": 0.0007, + "step": 80750 + }, + { + "epoch": 1.3648462519963158, + "grad_norm": 0.06322583556175232, + "learning_rate": 2.770158932748268e-06, + "loss": 0.0005, + "step": 80760 + }, + { + "epoch": 1.3650152522751662, + "grad_norm": 0.01938874088227749, + "learning_rate": 2.7688389856744813e-06, + "loss": 0.001, + "step": 80770 + }, + { + "epoch": 1.3651842525540168, + "grad_norm": 0.027592506259679794, + "learning_rate": 2.7675192327225107e-06, + "loss": 0.0007, + "step": 80780 + }, + { + "epoch": 1.3653532528328671, + "grad_norm": 0.06308586150407791, + "learning_rate": 2.766199674007186e-06, + "loss": 0.0019, + "step": 80790 + }, + { + "epoch": 1.3655222531117177, + "grad_norm": 0.20949746668338776, + "learning_rate": 2.764880309643311e-06, + "loss": 0.001, + "step": 80800 + }, + { + "epoch": 1.365691253390568, + "grad_norm": 0.02370143122971058, + "learning_rate": 2.76356113974568e-06, + "loss": 0.0005, + "step": 80810 + }, + { + "epoch": 1.3658602536694184, + "grad_norm": 0.1368524432182312, + "learning_rate": 2.7622421644290633e-06, + "loss": 0.0011, + "step": 80820 + }, + { + "epoch": 1.366029253948269, + "grad_norm": 0.06743310391902924, + "learning_rate": 2.7609233838082222e-06, + "loss": 0.0023, + "step": 80830 + }, + { + "epoch": 1.3661982542271196, + "grad_norm": 0.027281638234853745, + "learning_rate": 2.7596047979978935e-06, + "loss": 0.001, + "step": 80840 + }, + { + "epoch": 1.36636725450597, + "grad_norm": 0.10621394962072372, + "learning_rate": 2.758286407112805e-06, + "loss": 0.0012, + "step": 80850 + }, + { + "epoch": 1.3665362547848203, + "grad_norm": 0.01406975369900465, + "learning_rate": 2.756968211267658e-06, + "loss": 0.0006, + "step": 80860 + }, + { + "epoch": 1.366705255063671, + "grad_norm": 0.002855682745575905, + "learning_rate": 2.7556502105771443e-06, + "loss": 0.0008, + "step": 80870 + }, + { + "epoch": 1.3668742553425213, + "grad_norm": 0.017733998596668243, + "learning_rate": 2.754332405155939e-06, + "loss": 0.0018, + "step": 80880 + }, + { + "epoch": 1.3670432556213719, + "grad_norm": 0.037968385964632034, + "learning_rate": 2.7530147951186923e-06, + "loss": 0.0008, + "step": 80890 + }, + { + "epoch": 1.3672122559002222, + "grad_norm": 0.09487903863191605, + "learning_rate": 2.7516973805800457e-06, + "loss": 0.0007, + "step": 80900 + }, + { + "epoch": 1.3673812561790726, + "grad_norm": 0.0455913245677948, + "learning_rate": 2.75038016165462e-06, + "loss": 0.0008, + "step": 80910 + }, + { + "epoch": 1.3675502564579232, + "grad_norm": 0.01593964174389839, + "learning_rate": 2.749063138457022e-06, + "loss": 0.0007, + "step": 80920 + }, + { + "epoch": 1.3677192567367737, + "grad_norm": 0.02475392445921898, + "learning_rate": 2.747746311101835e-06, + "loss": 0.0012, + "step": 80930 + }, + { + "epoch": 1.367888257015624, + "grad_norm": 0.00532240467146039, + "learning_rate": 2.746429679703633e-06, + "loss": 0.0005, + "step": 80940 + }, + { + "epoch": 1.3680572572944745, + "grad_norm": 0.0069517092779278755, + "learning_rate": 2.745113244376966e-06, + "loss": 0.0008, + "step": 80950 + }, + { + "epoch": 1.368226257573325, + "grad_norm": 0.04656779021024704, + "learning_rate": 2.743797005236374e-06, + "loss": 0.0044, + "step": 80960 + }, + { + "epoch": 1.3683952578521754, + "grad_norm": 0.005446649622172117, + "learning_rate": 2.7424809623963722e-06, + "loss": 0.0013, + "step": 80970 + }, + { + "epoch": 1.368564258131026, + "grad_norm": 0.1676197052001953, + "learning_rate": 2.741165115971466e-06, + "loss": 0.0011, + "step": 80980 + }, + { + "epoch": 1.3687332584098764, + "grad_norm": 0.11519121378660202, + "learning_rate": 2.7398494660761378e-06, + "loss": 0.0011, + "step": 80990 + }, + { + "epoch": 1.3689022586887267, + "grad_norm": 0.01789015345275402, + "learning_rate": 2.7385340128248583e-06, + "loss": 0.0005, + "step": 81000 + }, + { + "epoch": 1.3690712589675773, + "grad_norm": 0.07445920258760452, + "learning_rate": 2.737218756332075e-06, + "loss": 0.0009, + "step": 81010 + }, + { + "epoch": 1.369240259246428, + "grad_norm": 0.016931943595409393, + "learning_rate": 2.735903696712225e-06, + "loss": 0.0011, + "step": 81020 + }, + { + "epoch": 1.3694092595252783, + "grad_norm": 0.004109029192477465, + "learning_rate": 2.734588834079721e-06, + "loss": 0.0015, + "step": 81030 + }, + { + "epoch": 1.3695782598041286, + "grad_norm": 0.044804804027080536, + "learning_rate": 2.733274168548967e-06, + "loss": 0.001, + "step": 81040 + }, + { + "epoch": 1.3697472600829792, + "grad_norm": 0.008167951367795467, + "learning_rate": 2.731959700234341e-06, + "loss": 0.0015, + "step": 81050 + }, + { + "epoch": 1.3699162603618296, + "grad_norm": 0.21587994694709778, + "learning_rate": 2.7306454292502115e-06, + "loss": 0.0029, + "step": 81060 + }, + { + "epoch": 1.3700852606406801, + "grad_norm": 0.04317808523774147, + "learning_rate": 2.7293313557109234e-06, + "loss": 0.0008, + "step": 81070 + }, + { + "epoch": 1.3702542609195305, + "grad_norm": 0.00516023114323616, + "learning_rate": 2.728017479730809e-06, + "loss": 0.0007, + "step": 81080 + }, + { + "epoch": 1.3704232611983809, + "grad_norm": 0.013336258940398693, + "learning_rate": 2.726703801424182e-06, + "loss": 0.0005, + "step": 81090 + }, + { + "epoch": 1.3705922614772315, + "grad_norm": 0.03444129228591919, + "learning_rate": 2.72539032090534e-06, + "loss": 0.0007, + "step": 81100 + }, + { + "epoch": 1.3707612617560818, + "grad_norm": 0.014178609475493431, + "learning_rate": 2.7240770382885594e-06, + "loss": 0.0008, + "step": 81110 + }, + { + "epoch": 1.3709302620349324, + "grad_norm": 0.18988923728466034, + "learning_rate": 2.722763953688105e-06, + "loss": 0.0009, + "step": 81120 + }, + { + "epoch": 1.3710992623137828, + "grad_norm": 0.1102895513176918, + "learning_rate": 2.7214510672182183e-06, + "loss": 0.0006, + "step": 81130 + }, + { + "epoch": 1.3712682625926333, + "grad_norm": 0.016499245539307594, + "learning_rate": 2.7201383789931314e-06, + "loss": 0.0007, + "step": 81140 + }, + { + "epoch": 1.3714372628714837, + "grad_norm": 0.02236087992787361, + "learning_rate": 2.7188258891270485e-06, + "loss": 0.0011, + "step": 81150 + }, + { + "epoch": 1.3716062631503343, + "grad_norm": 0.06486410647630692, + "learning_rate": 2.7175135977341683e-06, + "loss": 0.0012, + "step": 81160 + }, + { + "epoch": 1.3717752634291847, + "grad_norm": 0.04193085432052612, + "learning_rate": 2.716201504928662e-06, + "loss": 0.001, + "step": 81170 + }, + { + "epoch": 1.371944263708035, + "grad_norm": 0.11259046196937561, + "learning_rate": 2.71488961082469e-06, + "loss": 0.001, + "step": 81180 + }, + { + "epoch": 1.3721132639868856, + "grad_norm": 0.11563017964363098, + "learning_rate": 2.7135779155363957e-06, + "loss": 0.0017, + "step": 81190 + }, + { + "epoch": 1.372282264265736, + "grad_norm": 0.1372109204530716, + "learning_rate": 2.712266419177898e-06, + "loss": 0.0011, + "step": 81200 + }, + { + "epoch": 1.3724512645445865, + "grad_norm": 0.0071485610678792, + "learning_rate": 2.710955121863309e-06, + "loss": 0.0007, + "step": 81210 + }, + { + "epoch": 1.372620264823437, + "grad_norm": 0.017509106546640396, + "learning_rate": 2.709644023706713e-06, + "loss": 0.0006, + "step": 81220 + }, + { + "epoch": 1.3727892651022875, + "grad_norm": 0.08765660971403122, + "learning_rate": 2.7083331248221855e-06, + "loss": 0.0011, + "step": 81230 + }, + { + "epoch": 1.3729582653811379, + "grad_norm": 0.04982509836554527, + "learning_rate": 2.7070224253237775e-06, + "loss": 0.0009, + "step": 81240 + }, + { + "epoch": 1.3731272656599884, + "grad_norm": 0.07236922532320023, + "learning_rate": 2.7057119253255306e-06, + "loss": 0.001, + "step": 81250 + }, + { + "epoch": 1.3732962659388388, + "grad_norm": 0.09635771811008453, + "learning_rate": 2.70440162494146e-06, + "loss": 0.0009, + "step": 81260 + }, + { + "epoch": 1.3734652662176892, + "grad_norm": 0.10149110853672028, + "learning_rate": 2.7030915242855706e-06, + "loss": 0.0005, + "step": 81270 + }, + { + "epoch": 1.3736342664965397, + "grad_norm": 0.019749846309423447, + "learning_rate": 2.7017816234718474e-06, + "loss": 0.0005, + "step": 81280 + }, + { + "epoch": 1.37380326677539, + "grad_norm": 0.061225682497024536, + "learning_rate": 2.70047192261426e-06, + "loss": 0.0008, + "step": 81290 + }, + { + "epoch": 1.3739722670542407, + "grad_norm": 0.17834830284118652, + "learning_rate": 2.6991624218267553e-06, + "loss": 0.0013, + "step": 81300 + }, + { + "epoch": 1.374141267333091, + "grad_norm": 0.02971114031970501, + "learning_rate": 2.697853121223269e-06, + "loss": 0.0018, + "step": 81310 + }, + { + "epoch": 1.3743102676119416, + "grad_norm": 0.0017296327278017998, + "learning_rate": 2.6965440209177136e-06, + "loss": 0.0008, + "step": 81320 + }, + { + "epoch": 1.374479267890792, + "grad_norm": 0.02275587059557438, + "learning_rate": 2.695235121023991e-06, + "loss": 0.0009, + "step": 81330 + }, + { + "epoch": 1.3746482681696426, + "grad_norm": 0.035394806414842606, + "learning_rate": 2.6939264216559772e-06, + "loss": 0.0009, + "step": 81340 + }, + { + "epoch": 1.374817268448493, + "grad_norm": 0.02650953270494938, + "learning_rate": 2.6926179229275407e-06, + "loss": 0.0008, + "step": 81350 + }, + { + "epoch": 1.3749862687273433, + "grad_norm": 0.1949848234653473, + "learning_rate": 2.6913096249525217e-06, + "loss": 0.0008, + "step": 81360 + }, + { + "epoch": 1.375155269006194, + "grad_norm": 0.035317301750183105, + "learning_rate": 2.6900015278447534e-06, + "loss": 0.0007, + "step": 81370 + }, + { + "epoch": 1.3753242692850443, + "grad_norm": 0.15639999508857727, + "learning_rate": 2.688693631718042e-06, + "loss": 0.0011, + "step": 81380 + }, + { + "epoch": 1.3754932695638948, + "grad_norm": 0.023006858304142952, + "learning_rate": 2.6873859366861866e-06, + "loss": 0.0009, + "step": 81390 + }, + { + "epoch": 1.3756622698427452, + "grad_norm": 0.18025051057338715, + "learning_rate": 2.6860784428629563e-06, + "loss": 0.0011, + "step": 81400 + }, + { + "epoch": 1.3758312701215956, + "grad_norm": 0.001331401988863945, + "learning_rate": 2.684771150362115e-06, + "loss": 0.0009, + "step": 81410 + }, + { + "epoch": 1.3760002704004461, + "grad_norm": 0.003098771208897233, + "learning_rate": 2.683464059297399e-06, + "loss": 0.001, + "step": 81420 + }, + { + "epoch": 1.3761692706792967, + "grad_norm": 0.027324680238962173, + "learning_rate": 2.6821571697825342e-06, + "loss": 0.0007, + "step": 81430 + }, + { + "epoch": 1.376338270958147, + "grad_norm": 0.009578811936080456, + "learning_rate": 2.6808504819312275e-06, + "loss": 0.0005, + "step": 81440 + }, + { + "epoch": 1.3765072712369975, + "grad_norm": 0.016443606466054916, + "learning_rate": 2.6795439958571634e-06, + "loss": 0.0009, + "step": 81450 + }, + { + "epoch": 1.376676271515848, + "grad_norm": 0.011706710793077946, + "learning_rate": 2.6782377116740143e-06, + "loss": 0.0006, + "step": 81460 + }, + { + "epoch": 1.3768452717946984, + "grad_norm": 0.14949294924736023, + "learning_rate": 2.6769316294954364e-06, + "loss": 0.0021, + "step": 81470 + }, + { + "epoch": 1.377014272073549, + "grad_norm": 0.04487974941730499, + "learning_rate": 2.675625749435059e-06, + "loss": 0.0007, + "step": 81480 + }, + { + "epoch": 1.3771832723523993, + "grad_norm": 0.02011519856750965, + "learning_rate": 2.6743200716065044e-06, + "loss": 0.0002, + "step": 81490 + }, + { + "epoch": 1.3773522726312497, + "grad_norm": 0.019957944750785828, + "learning_rate": 2.673014596123373e-06, + "loss": 0.0007, + "step": 81500 + }, + { + "epoch": 1.3775212729101003, + "grad_norm": 0.00412454130128026, + "learning_rate": 2.671709323099244e-06, + "loss": 0.0006, + "step": 81510 + }, + { + "epoch": 1.3776902731889509, + "grad_norm": 0.005910065956413746, + "learning_rate": 2.670404252647687e-06, + "loss": 0.002, + "step": 81520 + }, + { + "epoch": 1.3778592734678012, + "grad_norm": 0.0417097806930542, + "learning_rate": 2.6690993848822457e-06, + "loss": 0.0007, + "step": 81530 + }, + { + "epoch": 1.3780282737466516, + "grad_norm": 0.07791711390018463, + "learning_rate": 2.6677947199164533e-06, + "loss": 0.001, + "step": 81540 + }, + { + "epoch": 1.3781972740255022, + "grad_norm": 0.058783017098903656, + "learning_rate": 2.6664902578638173e-06, + "loss": 0.0013, + "step": 81550 + }, + { + "epoch": 1.3783662743043525, + "grad_norm": 0.10798044502735138, + "learning_rate": 2.6651859988378377e-06, + "loss": 0.0006, + "step": 81560 + }, + { + "epoch": 1.3785352745832031, + "grad_norm": 0.01462811604142189, + "learning_rate": 2.663881942951986e-06, + "loss": 0.0007, + "step": 81570 + }, + { + "epoch": 1.3787042748620535, + "grad_norm": 0.10999801009893417, + "learning_rate": 2.6625780903197266e-06, + "loss": 0.0013, + "step": 81580 + }, + { + "epoch": 1.3788732751409039, + "grad_norm": 0.09494367986917496, + "learning_rate": 2.6612744410544966e-06, + "loss": 0.0013, + "step": 81590 + }, + { + "epoch": 1.3790422754197544, + "grad_norm": 0.021898848935961723, + "learning_rate": 2.6599709952697227e-06, + "loss": 0.0005, + "step": 81600 + }, + { + "epoch": 1.379211275698605, + "grad_norm": 0.01613488420844078, + "learning_rate": 2.6586677530788087e-06, + "loss": 0.0008, + "step": 81610 + }, + { + "epoch": 1.3793802759774554, + "grad_norm": 0.023948589339852333, + "learning_rate": 2.657364714595146e-06, + "loss": 0.0009, + "step": 81620 + }, + { + "epoch": 1.3795492762563057, + "grad_norm": 0.04615328833460808, + "learning_rate": 2.6560618799321015e-06, + "loss": 0.001, + "step": 81630 + }, + { + "epoch": 1.3797182765351563, + "grad_norm": 0.046796441078186035, + "learning_rate": 2.65475924920303e-06, + "loss": 0.0011, + "step": 81640 + }, + { + "epoch": 1.3798872768140067, + "grad_norm": 0.10986502468585968, + "learning_rate": 2.6534568225212687e-06, + "loss": 0.0014, + "step": 81650 + }, + { + "epoch": 1.3800562770928573, + "grad_norm": 0.039961110800504684, + "learning_rate": 2.6521546000001307e-06, + "loss": 0.0005, + "step": 81660 + }, + { + "epoch": 1.3802252773717076, + "grad_norm": 0.0009647384868003428, + "learning_rate": 2.650852581752919e-06, + "loss": 0.0009, + "step": 81670 + }, + { + "epoch": 1.380394277650558, + "grad_norm": 0.021076450124382973, + "learning_rate": 2.6495507678929144e-06, + "loss": 0.0007, + "step": 81680 + }, + { + "epoch": 1.3805632779294086, + "grad_norm": 0.01967843621969223, + "learning_rate": 2.6482491585333823e-06, + "loss": 0.0009, + "step": 81690 + }, + { + "epoch": 1.3807322782082592, + "grad_norm": 0.02331704832613468, + "learning_rate": 2.6469477537875665e-06, + "loss": 0.0005, + "step": 81700 + }, + { + "epoch": 1.3809012784871095, + "grad_norm": 0.06854772567749023, + "learning_rate": 2.6456465537686993e-06, + "loss": 0.0005, + "step": 81710 + }, + { + "epoch": 1.38107027876596, + "grad_norm": 0.042576905339956284, + "learning_rate": 2.644345558589987e-06, + "loss": 0.0021, + "step": 81720 + }, + { + "epoch": 1.3812392790448105, + "grad_norm": 0.0038385672960430384, + "learning_rate": 2.6430447683646254e-06, + "loss": 0.0007, + "step": 81730 + }, + { + "epoch": 1.3814082793236608, + "grad_norm": 0.017321884632110596, + "learning_rate": 2.641744183205788e-06, + "loss": 0.0005, + "step": 81740 + }, + { + "epoch": 1.3815772796025114, + "grad_norm": 0.06699923425912857, + "learning_rate": 2.6404438032266338e-06, + "loss": 0.003, + "step": 81750 + }, + { + "epoch": 1.3817462798813618, + "grad_norm": 0.03266004100441933, + "learning_rate": 2.639143628540299e-06, + "loss": 0.0017, + "step": 81760 + }, + { + "epoch": 1.3819152801602121, + "grad_norm": 0.028615793213248253, + "learning_rate": 2.63784365925991e-06, + "loss": 0.001, + "step": 81770 + }, + { + "epoch": 1.3820842804390627, + "grad_norm": 0.004312659613788128, + "learning_rate": 2.6365438954985646e-06, + "loss": 0.0005, + "step": 81780 + }, + { + "epoch": 1.3822532807179133, + "grad_norm": 0.05887260288000107, + "learning_rate": 2.6352443373693538e-06, + "loss": 0.0013, + "step": 81790 + }, + { + "epoch": 1.3824222809967637, + "grad_norm": 0.02822296693921089, + "learning_rate": 2.6339449849853416e-06, + "loss": 0.001, + "step": 81800 + }, + { + "epoch": 1.382591281275614, + "grad_norm": 0.011759044602513313, + "learning_rate": 2.632645838459581e-06, + "loss": 0.001, + "step": 81810 + }, + { + "epoch": 1.3827602815544646, + "grad_norm": 0.05984441190958023, + "learning_rate": 2.6313468979051003e-06, + "loss": 0.0009, + "step": 81820 + }, + { + "epoch": 1.382929281833315, + "grad_norm": 0.019043082371354103, + "learning_rate": 2.630048163434917e-06, + "loss": 0.0008, + "step": 81830 + }, + { + "epoch": 1.3830982821121656, + "grad_norm": 0.04396101459860802, + "learning_rate": 2.6287496351620273e-06, + "loss": 0.0011, + "step": 81840 + }, + { + "epoch": 1.383267282391016, + "grad_norm": 0.1366347372531891, + "learning_rate": 2.627451313199406e-06, + "loss": 0.0006, + "step": 81850 + }, + { + "epoch": 1.3834362826698663, + "grad_norm": 0.08047544211149216, + "learning_rate": 2.6261531976600164e-06, + "loss": 0.0009, + "step": 81860 + }, + { + "epoch": 1.3836052829487169, + "grad_norm": 0.053900931030511856, + "learning_rate": 2.6248552886568025e-06, + "loss": 0.0012, + "step": 81870 + }, + { + "epoch": 1.3837742832275675, + "grad_norm": 0.032839562743902206, + "learning_rate": 2.6235575863026837e-06, + "loss": 0.0007, + "step": 81880 + }, + { + "epoch": 1.3839432835064178, + "grad_norm": 0.10769818723201752, + "learning_rate": 2.622260090710571e-06, + "loss": 0.0012, + "step": 81890 + }, + { + "epoch": 1.3841122837852682, + "grad_norm": 0.00828024186193943, + "learning_rate": 2.6209628019933486e-06, + "loss": 0.0006, + "step": 81900 + }, + { + "epoch": 1.3842812840641188, + "grad_norm": 0.02269088849425316, + "learning_rate": 2.6196657202638913e-06, + "loss": 0.0009, + "step": 81910 + }, + { + "epoch": 1.3844502843429691, + "grad_norm": 0.05940607562661171, + "learning_rate": 2.6183688456350474e-06, + "loss": 0.0019, + "step": 81920 + }, + { + "epoch": 1.3846192846218197, + "grad_norm": 0.10371402651071548, + "learning_rate": 2.6170721782196534e-06, + "loss": 0.0009, + "step": 81930 + }, + { + "epoch": 1.38478828490067, + "grad_norm": 0.06377759575843811, + "learning_rate": 2.6157757181305276e-06, + "loss": 0.0006, + "step": 81940 + }, + { + "epoch": 1.3849572851795204, + "grad_norm": 0.0681513175368309, + "learning_rate": 2.614479465480464e-06, + "loss": 0.0007, + "step": 81950 + }, + { + "epoch": 1.385126285458371, + "grad_norm": 0.008973762392997742, + "learning_rate": 2.6131834203822463e-06, + "loss": 0.0014, + "step": 81960 + }, + { + "epoch": 1.3852952857372216, + "grad_norm": 0.03835931420326233, + "learning_rate": 2.6118875829486345e-06, + "loss": 0.0015, + "step": 81970 + }, + { + "epoch": 1.385464286016072, + "grad_norm": 0.17669308185577393, + "learning_rate": 2.610591953292375e-06, + "loss": 0.0017, + "step": 81980 + }, + { + "epoch": 1.3856332862949223, + "grad_norm": 0.04100028797984123, + "learning_rate": 2.609296531526191e-06, + "loss": 0.0005, + "step": 81990 + }, + { + "epoch": 1.385802286573773, + "grad_norm": 0.10174252837896347, + "learning_rate": 2.608001317762793e-06, + "loss": 0.0011, + "step": 82000 + }, + { + "epoch": 1.3859712868526233, + "grad_norm": 0.04097196087241173, + "learning_rate": 2.6067063121148682e-06, + "loss": 0.0007, + "step": 82010 + }, + { + "epoch": 1.3861402871314739, + "grad_norm": 0.031012501567602158, + "learning_rate": 2.605411514695093e-06, + "loss": 0.001, + "step": 82020 + }, + { + "epoch": 1.3863092874103242, + "grad_norm": 0.07320142537355423, + "learning_rate": 2.604116925616115e-06, + "loss": 0.0015, + "step": 82030 + }, + { + "epoch": 1.3864782876891746, + "grad_norm": 0.011016522534191608, + "learning_rate": 2.602822544990573e-06, + "loss": 0.0008, + "step": 82040 + }, + { + "epoch": 1.3866472879680252, + "grad_norm": 0.009944644756615162, + "learning_rate": 2.601528372931085e-06, + "loss": 0.0006, + "step": 82050 + }, + { + "epoch": 1.3868162882468755, + "grad_norm": 0.041490208357572556, + "learning_rate": 2.6002344095502507e-06, + "loss": 0.0009, + "step": 82060 + }, + { + "epoch": 1.3869852885257261, + "grad_norm": 0.05326971784234047, + "learning_rate": 2.5989406549606477e-06, + "loss": 0.0009, + "step": 82070 + }, + { + "epoch": 1.3871542888045765, + "grad_norm": 0.05080737918615341, + "learning_rate": 2.5976471092748436e-06, + "loss": 0.0007, + "step": 82080 + }, + { + "epoch": 1.387323289083427, + "grad_norm": 0.06964278221130371, + "learning_rate": 2.5963537726053785e-06, + "loss": 0.0009, + "step": 82090 + }, + { + "epoch": 1.3874922893622774, + "grad_norm": 0.0013241906417533755, + "learning_rate": 2.595060645064783e-06, + "loss": 0.0005, + "step": 82100 + }, + { + "epoch": 1.387661289641128, + "grad_norm": 0.03161228448152542, + "learning_rate": 2.5937677267655616e-06, + "loss": 0.0013, + "step": 82110 + }, + { + "epoch": 1.3878302899199784, + "grad_norm": 0.04791177436709404, + "learning_rate": 2.5924750178202083e-06, + "loss": 0.001, + "step": 82120 + }, + { + "epoch": 1.3879992901988287, + "grad_norm": 0.01146010123193264, + "learning_rate": 2.5911825183411914e-06, + "loss": 0.0003, + "step": 82130 + }, + { + "epoch": 1.3881682904776793, + "grad_norm": 0.04580337554216385, + "learning_rate": 2.5898902284409684e-06, + "loss": 0.0008, + "step": 82140 + }, + { + "epoch": 1.3883372907565297, + "grad_norm": 0.024946637451648712, + "learning_rate": 2.588598148231971e-06, + "loss": 0.0012, + "step": 82150 + }, + { + "epoch": 1.3885062910353803, + "grad_norm": 0.024746665731072426, + "learning_rate": 2.5873062778266194e-06, + "loss": 0.0003, + "step": 82160 + }, + { + "epoch": 1.3886752913142306, + "grad_norm": 0.07841707020998001, + "learning_rate": 2.58601461733731e-06, + "loss": 0.0006, + "step": 82170 + }, + { + "epoch": 1.3888442915930812, + "grad_norm": 0.048576850444078445, + "learning_rate": 2.584723166876427e-06, + "loss": 0.0009, + "step": 82180 + }, + { + "epoch": 1.3890132918719316, + "grad_norm": 0.014373579062521458, + "learning_rate": 2.5834319265563292e-06, + "loss": 0.0004, + "step": 82190 + }, + { + "epoch": 1.3891822921507821, + "grad_norm": 0.030972721055150032, + "learning_rate": 2.5821408964893614e-06, + "loss": 0.0005, + "step": 82200 + }, + { + "epoch": 1.3893512924296325, + "grad_norm": 0.054175764322280884, + "learning_rate": 2.5808500767878523e-06, + "loss": 0.0009, + "step": 82210 + }, + { + "epoch": 1.3895202927084829, + "grad_norm": 0.007382318377494812, + "learning_rate": 2.579559467564107e-06, + "loss": 0.0006, + "step": 82220 + }, + { + "epoch": 1.3896892929873335, + "grad_norm": 0.11436676979064941, + "learning_rate": 2.5782690689304136e-06, + "loss": 0.0011, + "step": 82230 + }, + { + "epoch": 1.3898582932661838, + "grad_norm": 0.2287551313638687, + "learning_rate": 2.5769788809990475e-06, + "loss": 0.0006, + "step": 82240 + }, + { + "epoch": 1.3900272935450344, + "grad_norm": 0.09457912296056747, + "learning_rate": 2.5756889038822562e-06, + "loss": 0.0008, + "step": 82250 + }, + { + "epoch": 1.3901962938238848, + "grad_norm": 0.17385007441043854, + "learning_rate": 2.574399137692277e-06, + "loss": 0.0015, + "step": 82260 + }, + { + "epoch": 1.3903652941027353, + "grad_norm": 0.20081576704978943, + "learning_rate": 2.5731095825413267e-06, + "loss": 0.001, + "step": 82270 + }, + { + "epoch": 1.3905342943815857, + "grad_norm": 0.10662265121936798, + "learning_rate": 2.5718202385415997e-06, + "loss": 0.002, + "step": 82280 + }, + { + "epoch": 1.3907032946604363, + "grad_norm": 0.038537610322237015, + "learning_rate": 2.5705311058052783e-06, + "loss": 0.0008, + "step": 82290 + }, + { + "epoch": 1.3908722949392867, + "grad_norm": 0.0011308231623843312, + "learning_rate": 2.5692421844445204e-06, + "loss": 0.001, + "step": 82300 + }, + { + "epoch": 1.391041295218137, + "grad_norm": 0.00529683381319046, + "learning_rate": 2.567953474571471e-06, + "loss": 0.0011, + "step": 82310 + }, + { + "epoch": 1.3912102954969876, + "grad_norm": 0.07235697656869888, + "learning_rate": 2.566664976298251e-06, + "loss": 0.0033, + "step": 82320 + }, + { + "epoch": 1.391379295775838, + "grad_norm": 0.07601780444383621, + "learning_rate": 2.5653766897369696e-06, + "loss": 0.0007, + "step": 82330 + }, + { + "epoch": 1.3915482960546885, + "grad_norm": 0.04762272536754608, + "learning_rate": 2.5640886149997108e-06, + "loss": 0.001, + "step": 82340 + }, + { + "epoch": 1.391717296333539, + "grad_norm": 0.04797697812318802, + "learning_rate": 2.5628007521985467e-06, + "loss": 0.0015, + "step": 82350 + }, + { + "epoch": 1.3918862966123893, + "grad_norm": 0.036652568727731705, + "learning_rate": 2.561513101445523e-06, + "loss": 0.0007, + "step": 82360 + }, + { + "epoch": 1.3920552968912399, + "grad_norm": 0.03651326522231102, + "learning_rate": 2.5602256628526765e-06, + "loss": 0.0006, + "step": 82370 + }, + { + "epoch": 1.3922242971700904, + "grad_norm": 0.0008418612414970994, + "learning_rate": 2.558938436532017e-06, + "loss": 0.0011, + "step": 82380 + }, + { + "epoch": 1.3923932974489408, + "grad_norm": 0.02465483732521534, + "learning_rate": 2.557651422595542e-06, + "loss": 0.0013, + "step": 82390 + }, + { + "epoch": 1.3925622977277912, + "grad_norm": 0.05674951896071434, + "learning_rate": 2.5563646211552252e-06, + "loss": 0.0017, + "step": 82400 + }, + { + "epoch": 1.3927312980066417, + "grad_norm": 0.04472305253148079, + "learning_rate": 2.5550780323230256e-06, + "loss": 0.0006, + "step": 82410 + }, + { + "epoch": 1.3929002982854921, + "grad_norm": 0.02844862826168537, + "learning_rate": 2.5537916562108835e-06, + "loss": 0.0006, + "step": 82420 + }, + { + "epoch": 1.3930692985643427, + "grad_norm": 0.04687836021184921, + "learning_rate": 2.5525054929307212e-06, + "loss": 0.0004, + "step": 82430 + }, + { + "epoch": 1.393238298843193, + "grad_norm": 0.3177129924297333, + "learning_rate": 2.5512195425944373e-06, + "loss": 0.0007, + "step": 82440 + }, + { + "epoch": 1.3934072991220434, + "grad_norm": 0.10119723528623581, + "learning_rate": 2.5499338053139177e-06, + "loss": 0.0009, + "step": 82450 + }, + { + "epoch": 1.393576299400894, + "grad_norm": 0.047569334506988525, + "learning_rate": 2.5486482812010303e-06, + "loss": 0.0005, + "step": 82460 + }, + { + "epoch": 1.3937452996797446, + "grad_norm": 0.04689168184995651, + "learning_rate": 2.547362970367617e-06, + "loss": 0.0007, + "step": 82470 + }, + { + "epoch": 1.393914299958595, + "grad_norm": 0.019643142819404602, + "learning_rate": 2.546077872925511e-06, + "loss": 0.0008, + "step": 82480 + }, + { + "epoch": 1.3940833002374453, + "grad_norm": 0.025296105071902275, + "learning_rate": 2.5447929889865174e-06, + "loss": 0.0007, + "step": 82490 + }, + { + "epoch": 1.394252300516296, + "grad_norm": 0.022164635360240936, + "learning_rate": 2.543508318662432e-06, + "loss": 0.0008, + "step": 82500 + }, + { + "epoch": 1.3944213007951463, + "grad_norm": 0.002421640558168292, + "learning_rate": 2.542223862065022e-06, + "loss": 0.0005, + "step": 82510 + }, + { + "epoch": 1.3945903010739968, + "grad_norm": 0.05165115371346474, + "learning_rate": 2.5409396193060465e-06, + "loss": 0.0005, + "step": 82520 + }, + { + "epoch": 1.3947593013528472, + "grad_norm": 0.07443195581436157, + "learning_rate": 2.5396555904972368e-06, + "loss": 0.0007, + "step": 82530 + }, + { + "epoch": 1.3949283016316976, + "grad_norm": 0.03140726685523987, + "learning_rate": 2.5383717757503136e-06, + "loss": 0.0006, + "step": 82540 + }, + { + "epoch": 1.3950973019105481, + "grad_norm": 0.04079822450876236, + "learning_rate": 2.5370881751769704e-06, + "loss": 0.0009, + "step": 82550 + }, + { + "epoch": 1.3952663021893987, + "grad_norm": 0.013004903681576252, + "learning_rate": 2.535804788888891e-06, + "loss": 0.0008, + "step": 82560 + }, + { + "epoch": 1.395435302468249, + "grad_norm": 0.010640832595527172, + "learning_rate": 2.5345216169977325e-06, + "loss": 0.0005, + "step": 82570 + }, + { + "epoch": 1.3956043027470995, + "grad_norm": 0.024120008572936058, + "learning_rate": 2.53323865961514e-06, + "loss": 0.0007, + "step": 82580 + }, + { + "epoch": 1.39577330302595, + "grad_norm": 0.026166046038269997, + "learning_rate": 2.5319559168527354e-06, + "loss": 0.001, + "step": 82590 + }, + { + "epoch": 1.3959423033048004, + "grad_norm": 0.06554225832223892, + "learning_rate": 2.5306733888221223e-06, + "loss": 0.0008, + "step": 82600 + }, + { + "epoch": 1.396111303583651, + "grad_norm": 0.018941501155495644, + "learning_rate": 2.5293910756348916e-06, + "loss": 0.0006, + "step": 82610 + }, + { + "epoch": 1.3962803038625013, + "grad_norm": 0.038295380771160126, + "learning_rate": 2.5281089774026056e-06, + "loss": 0.0006, + "step": 82620 + }, + { + "epoch": 1.3964493041413517, + "grad_norm": 0.0389002226293087, + "learning_rate": 2.526827094236815e-06, + "loss": 0.0006, + "step": 82630 + }, + { + "epoch": 1.3966183044202023, + "grad_norm": 0.019440731033682823, + "learning_rate": 2.525545426249052e-06, + "loss": 0.0007, + "step": 82640 + }, + { + "epoch": 1.3967873046990529, + "grad_norm": 0.4608055055141449, + "learning_rate": 2.524263973550824e-06, + "loss": 0.0008, + "step": 82650 + }, + { + "epoch": 1.3969563049779032, + "grad_norm": 0.051159411668777466, + "learning_rate": 2.5229827362536274e-06, + "loss": 0.0003, + "step": 82660 + }, + { + "epoch": 1.3971253052567536, + "grad_norm": 0.017769791185855865, + "learning_rate": 2.5217017144689323e-06, + "loss": 0.0008, + "step": 82670 + }, + { + "epoch": 1.3972943055356042, + "grad_norm": 0.01922992430627346, + "learning_rate": 2.5204209083081977e-06, + "loss": 0.0011, + "step": 82680 + }, + { + "epoch": 1.3974633058144545, + "grad_norm": 0.021616067737340927, + "learning_rate": 2.5191403178828554e-06, + "loss": 0.0033, + "step": 82690 + }, + { + "epoch": 1.3976323060933051, + "grad_norm": 0.03175405040383339, + "learning_rate": 2.517859943304326e-06, + "loss": 0.0008, + "step": 82700 + }, + { + "epoch": 1.3978013063721555, + "grad_norm": 0.10438563674688339, + "learning_rate": 2.5165797846840097e-06, + "loss": 0.0014, + "step": 82710 + }, + { + "epoch": 1.3979703066510059, + "grad_norm": 0.032787639647722244, + "learning_rate": 2.5152998421332827e-06, + "loss": 0.0006, + "step": 82720 + }, + { + "epoch": 1.3981393069298564, + "grad_norm": 0.058113645762205124, + "learning_rate": 2.5140201157635098e-06, + "loss": 0.0005, + "step": 82730 + }, + { + "epoch": 1.398308307208707, + "grad_norm": 0.05953868478536606, + "learning_rate": 2.5127406056860293e-06, + "loss": 0.0007, + "step": 82740 + }, + { + "epoch": 1.3984773074875574, + "grad_norm": 0.05751534551382065, + "learning_rate": 2.51146131201217e-06, + "loss": 0.0007, + "step": 82750 + }, + { + "epoch": 1.3986463077664077, + "grad_norm": 0.06238119676709175, + "learning_rate": 2.5101822348532314e-06, + "loss": 0.0014, + "step": 82760 + }, + { + "epoch": 1.3988153080452583, + "grad_norm": 0.040273841470479965, + "learning_rate": 2.5089033743205037e-06, + "loss": 0.001, + "step": 82770 + }, + { + "epoch": 1.3989843083241087, + "grad_norm": 0.011359905824065208, + "learning_rate": 2.5076247305252503e-06, + "loss": 0.0006, + "step": 82780 + }, + { + "epoch": 1.3991533086029593, + "grad_norm": 0.02924640290439129, + "learning_rate": 2.5063463035787234e-06, + "loss": 0.0006, + "step": 82790 + }, + { + "epoch": 1.3993223088818096, + "grad_norm": 0.029478926211595535, + "learning_rate": 2.5050680935921478e-06, + "loss": 0.0012, + "step": 82800 + }, + { + "epoch": 1.39949130916066, + "grad_norm": 0.02089492790400982, + "learning_rate": 2.5037901006767363e-06, + "loss": 0.0012, + "step": 82810 + }, + { + "epoch": 1.3996603094395106, + "grad_norm": 0.024508384987711906, + "learning_rate": 2.5025123249436807e-06, + "loss": 0.0005, + "step": 82820 + }, + { + "epoch": 1.3998293097183612, + "grad_norm": 0.006763719953596592, + "learning_rate": 2.501234766504155e-06, + "loss": 0.0004, + "step": 82830 + }, + { + "epoch": 1.3999983099972115, + "grad_norm": 0.08759523183107376, + "learning_rate": 2.4999574254693094e-06, + "loss": 0.0007, + "step": 82840 + }, + { + "epoch": 1.400167310276062, + "grad_norm": 0.03937356173992157, + "learning_rate": 2.498680301950283e-06, + "loss": 0.0005, + "step": 82850 + }, + { + "epoch": 1.4003363105549125, + "grad_norm": 0.040558457374572754, + "learning_rate": 2.497403396058188e-06, + "loss": 0.0004, + "step": 82860 + }, + { + "epoch": 1.4005053108337628, + "grad_norm": 0.0008733658469282091, + "learning_rate": 2.496126707904124e-06, + "loss": 0.0009, + "step": 82870 + }, + { + "epoch": 1.4006743111126134, + "grad_norm": 0.048882171511650085, + "learning_rate": 2.4948502375991664e-06, + "loss": 0.0006, + "step": 82880 + }, + { + "epoch": 1.4008433113914638, + "grad_norm": 0.0028946897946298122, + "learning_rate": 2.4935739852543777e-06, + "loss": 0.0005, + "step": 82890 + }, + { + "epoch": 1.4010123116703141, + "grad_norm": 0.046913258731365204, + "learning_rate": 2.4922979509807945e-06, + "loss": 0.0008, + "step": 82900 + }, + { + "epoch": 1.4011813119491647, + "grad_norm": 0.1057584136724472, + "learning_rate": 2.4910221348894413e-06, + "loss": 0.0007, + "step": 82910 + }, + { + "epoch": 1.401350312228015, + "grad_norm": 0.0601949468255043, + "learning_rate": 2.489746537091317e-06, + "loss": 0.0012, + "step": 82920 + }, + { + "epoch": 1.4015193125068657, + "grad_norm": 0.041038982570171356, + "learning_rate": 2.488471157697408e-06, + "loss": 0.0008, + "step": 82930 + }, + { + "epoch": 1.401688312785716, + "grad_norm": 0.10405878722667694, + "learning_rate": 2.4871959968186755e-06, + "loss": 0.0007, + "step": 82940 + }, + { + "epoch": 1.4018573130645666, + "grad_norm": 0.012591134756803513, + "learning_rate": 2.485921054566067e-06, + "loss": 0.0005, + "step": 82950 + }, + { + "epoch": 1.402026313343417, + "grad_norm": 0.011082008481025696, + "learning_rate": 2.484646331050507e-06, + "loss": 0.0011, + "step": 82960 + }, + { + "epoch": 1.4021953136222676, + "grad_norm": 0.07454565167427063, + "learning_rate": 2.483371826382903e-06, + "loss": 0.001, + "step": 82970 + }, + { + "epoch": 1.402364313901118, + "grad_norm": 0.0808311477303505, + "learning_rate": 2.482097540674145e-06, + "loss": 0.001, + "step": 82980 + }, + { + "epoch": 1.4025333141799683, + "grad_norm": 0.03571702912449837, + "learning_rate": 2.4808234740350988e-06, + "loss": 0.0011, + "step": 82990 + }, + { + "epoch": 1.4027023144588189, + "grad_norm": 0.02523273229598999, + "learning_rate": 2.479549626576616e-06, + "loss": 0.0002, + "step": 83000 + }, + { + "epoch": 1.4028713147376692, + "grad_norm": 0.09487809240818024, + "learning_rate": 2.4782759984095277e-06, + "loss": 0.0014, + "step": 83010 + }, + { + "epoch": 1.4030403150165198, + "grad_norm": 0.06667988002300262, + "learning_rate": 2.4770025896446477e-06, + "loss": 0.0007, + "step": 83020 + }, + { + "epoch": 1.4032093152953702, + "grad_norm": 0.024624986574053764, + "learning_rate": 2.4757294003927647e-06, + "loss": 0.0013, + "step": 83030 + }, + { + "epoch": 1.4033783155742208, + "grad_norm": 0.027179623022675514, + "learning_rate": 2.4744564307646555e-06, + "loss": 0.0004, + "step": 83040 + }, + { + "epoch": 1.4035473158530711, + "grad_norm": 0.006963491905480623, + "learning_rate": 2.4731836808710717e-06, + "loss": 0.0007, + "step": 83050 + }, + { + "epoch": 1.4037163161319217, + "grad_norm": 0.014814918860793114, + "learning_rate": 2.4719111508227523e-06, + "loss": 0.0005, + "step": 83060 + }, + { + "epoch": 1.403885316410772, + "grad_norm": 0.006815911270678043, + "learning_rate": 2.47063884073041e-06, + "loss": 0.001, + "step": 83070 + }, + { + "epoch": 1.4040543166896224, + "grad_norm": 0.037115439772605896, + "learning_rate": 2.4693667507047453e-06, + "loss": 0.0013, + "step": 83080 + }, + { + "epoch": 1.404223316968473, + "grad_norm": 0.13958847522735596, + "learning_rate": 2.4680948808564327e-06, + "loss": 0.0012, + "step": 83090 + }, + { + "epoch": 1.4043923172473234, + "grad_norm": 0.05505302920937538, + "learning_rate": 2.4668232312961345e-06, + "loss": 0.0007, + "step": 83100 + }, + { + "epoch": 1.404561317526174, + "grad_norm": 0.007390045560896397, + "learning_rate": 2.4655518021344873e-06, + "loss": 0.0012, + "step": 83110 + }, + { + "epoch": 1.4047303178050243, + "grad_norm": 0.015265069901943207, + "learning_rate": 2.464280593482114e-06, + "loss": 0.0007, + "step": 83120 + }, + { + "epoch": 1.404899318083875, + "grad_norm": 0.01129642128944397, + "learning_rate": 2.463009605449614e-06, + "loss": 0.0011, + "step": 83130 + }, + { + "epoch": 1.4050683183627253, + "grad_norm": 0.022243892773985863, + "learning_rate": 2.4617388381475715e-06, + "loss": 0.0004, + "step": 83140 + }, + { + "epoch": 1.4052373186415759, + "grad_norm": 0.00014853873290121555, + "learning_rate": 2.4604682916865467e-06, + "loss": 0.0008, + "step": 83150 + }, + { + "epoch": 1.4054063189204262, + "grad_norm": 0.05525534227490425, + "learning_rate": 2.4591979661770864e-06, + "loss": 0.0008, + "step": 83160 + }, + { + "epoch": 1.4055753191992766, + "grad_norm": 0.1048225536942482, + "learning_rate": 2.457927861729712e-06, + "loss": 0.0021, + "step": 83170 + }, + { + "epoch": 1.4057443194781272, + "grad_norm": 0.02692188322544098, + "learning_rate": 2.4566579784549303e-06, + "loss": 0.0009, + "step": 83180 + }, + { + "epoch": 1.4059133197569775, + "grad_norm": 0.011747177690267563, + "learning_rate": 2.455388316463227e-06, + "loss": 0.0014, + "step": 83190 + }, + { + "epoch": 1.4060823200358281, + "grad_norm": 0.0679253339767456, + "learning_rate": 2.4541188758650713e-06, + "loss": 0.0005, + "step": 83200 + }, + { + "epoch": 1.4062513203146785, + "grad_norm": 0.021077698096632957, + "learning_rate": 2.452849656770906e-06, + "loss": 0.0009, + "step": 83210 + }, + { + "epoch": 1.406420320593529, + "grad_norm": 0.0737432986497879, + "learning_rate": 2.4515806592911623e-06, + "loss": 0.0011, + "step": 83220 + }, + { + "epoch": 1.4065893208723794, + "grad_norm": 0.04319237917661667, + "learning_rate": 2.4503118835362503e-06, + "loss": 0.0009, + "step": 83230 + }, + { + "epoch": 1.40675832115123, + "grad_norm": 0.04714735969901085, + "learning_rate": 2.4490433296165563e-06, + "loss": 0.0007, + "step": 83240 + }, + { + "epoch": 1.4069273214300804, + "grad_norm": 0.014764646999537945, + "learning_rate": 2.4477749976424537e-06, + "loss": 0.0008, + "step": 83250 + }, + { + "epoch": 1.4070963217089307, + "grad_norm": 0.028909694403409958, + "learning_rate": 2.446506887724291e-06, + "loss": 0.0008, + "step": 83260 + }, + { + "epoch": 1.4072653219877813, + "grad_norm": 0.000218815024709329, + "learning_rate": 2.4452389999724023e-06, + "loss": 0.0006, + "step": 83270 + }, + { + "epoch": 1.4074343222666317, + "grad_norm": 0.028963739052414894, + "learning_rate": 2.4439713344970968e-06, + "loss": 0.0024, + "step": 83280 + }, + { + "epoch": 1.4076033225454823, + "grad_norm": 0.03829863294959068, + "learning_rate": 2.4427038914086715e-06, + "loss": 0.0007, + "step": 83290 + }, + { + "epoch": 1.4077723228243326, + "grad_norm": 0.015505461022257805, + "learning_rate": 2.441436670817396e-06, + "loss": 0.0003, + "step": 83300 + }, + { + "epoch": 1.407941323103183, + "grad_norm": 0.022600213065743446, + "learning_rate": 2.4401696728335287e-06, + "loss": 0.0006, + "step": 83310 + }, + { + "epoch": 1.4081103233820336, + "grad_norm": 0.048072341829538345, + "learning_rate": 2.4389028975673006e-06, + "loss": 0.001, + "step": 83320 + }, + { + "epoch": 1.4082793236608842, + "grad_norm": 0.047883547842502594, + "learning_rate": 2.4376363451289313e-06, + "loss": 0.001, + "step": 83330 + }, + { + "epoch": 1.4084483239397345, + "grad_norm": 0.01860985904932022, + "learning_rate": 2.436370015628613e-06, + "loss": 0.0007, + "step": 83340 + }, + { + "epoch": 1.4086173242185849, + "grad_norm": 0.04321892186999321, + "learning_rate": 2.435103909176526e-06, + "loss": 0.0009, + "step": 83350 + }, + { + "epoch": 1.4087863244974355, + "grad_norm": 0.03282180428504944, + "learning_rate": 2.4338380258828244e-06, + "loss": 0.0008, + "step": 83360 + }, + { + "epoch": 1.4089553247762858, + "grad_norm": 0.049251966178417206, + "learning_rate": 2.4325723658576478e-06, + "loss": 0.0014, + "step": 83370 + }, + { + "epoch": 1.4091243250551364, + "grad_norm": 0.027250435203313828, + "learning_rate": 2.431306929211117e-06, + "loss": 0.0014, + "step": 83380 + }, + { + "epoch": 1.4092933253339868, + "grad_norm": 0.007739432156085968, + "learning_rate": 2.430041716053327e-06, + "loss": 0.0006, + "step": 83390 + }, + { + "epoch": 1.4094623256128371, + "grad_norm": 0.00580311706289649, + "learning_rate": 2.428776726494359e-06, + "loss": 0.0008, + "step": 83400 + }, + { + "epoch": 1.4096313258916877, + "grad_norm": 0.023706577718257904, + "learning_rate": 2.427511960644276e-06, + "loss": 0.0008, + "step": 83410 + }, + { + "epoch": 1.4098003261705383, + "grad_norm": 0.03796723112463951, + "learning_rate": 2.4262474186131142e-06, + "loss": 0.001, + "step": 83420 + }, + { + "epoch": 1.4099693264493887, + "grad_norm": 0.012999052181839943, + "learning_rate": 2.424983100510899e-06, + "loss": 0.0006, + "step": 83430 + }, + { + "epoch": 1.410138326728239, + "grad_norm": 0.03713338077068329, + "learning_rate": 2.4237190064476284e-06, + "loss": 0.0005, + "step": 83440 + }, + { + "epoch": 1.4103073270070896, + "grad_norm": 0.02254197560250759, + "learning_rate": 2.422455136533289e-06, + "loss": 0.0006, + "step": 83450 + }, + { + "epoch": 1.41047632728594, + "grad_norm": 0.09923889487981796, + "learning_rate": 2.4211914908778387e-06, + "loss": 0.0009, + "step": 83460 + }, + { + "epoch": 1.4106453275647906, + "grad_norm": 0.03275029733777046, + "learning_rate": 2.4199280695912235e-06, + "loss": 0.0006, + "step": 83470 + }, + { + "epoch": 1.410814327843641, + "grad_norm": 0.04385049268603325, + "learning_rate": 2.4186648727833694e-06, + "loss": 0.0007, + "step": 83480 + }, + { + "epoch": 1.4109833281224913, + "grad_norm": 0.03857121989130974, + "learning_rate": 2.4174019005641757e-06, + "loss": 0.0008, + "step": 83490 + }, + { + "epoch": 1.4111523284013419, + "grad_norm": 0.040195971727371216, + "learning_rate": 2.4161391530435316e-06, + "loss": 0.0005, + "step": 83500 + }, + { + "epoch": 1.4113213286801924, + "grad_norm": 0.0192074254155159, + "learning_rate": 2.414876630331299e-06, + "loss": 0.0008, + "step": 83510 + }, + { + "epoch": 1.4114903289590428, + "grad_norm": 0.07239630818367004, + "learning_rate": 2.4136143325373263e-06, + "loss": 0.0007, + "step": 83520 + }, + { + "epoch": 1.4116593292378932, + "grad_norm": 0.0068862829357385635, + "learning_rate": 2.4123522597714354e-06, + "loss": 0.001, + "step": 83530 + }, + { + "epoch": 1.4118283295167438, + "grad_norm": 0.04745925962924957, + "learning_rate": 2.4110904121434383e-06, + "loss": 0.0005, + "step": 83540 + }, + { + "epoch": 1.4119973297955941, + "grad_norm": 0.012015627697110176, + "learning_rate": 2.409828789763116e-06, + "loss": 0.0006, + "step": 83550 + }, + { + "epoch": 1.4121663300744447, + "grad_norm": 0.081517793238163, + "learning_rate": 2.4085673927402416e-06, + "loss": 0.0004, + "step": 83560 + }, + { + "epoch": 1.412335330353295, + "grad_norm": 0.008573692291975021, + "learning_rate": 2.4073062211845573e-06, + "loss": 0.0004, + "step": 83570 + }, + { + "epoch": 1.4125043306321454, + "grad_norm": 0.026937799528241158, + "learning_rate": 2.406045275205794e-06, + "loss": 0.0005, + "step": 83580 + }, + { + "epoch": 1.412673330910996, + "grad_norm": 0.027086956426501274, + "learning_rate": 2.4047845549136588e-06, + "loss": 0.0005, + "step": 83590 + }, + { + "epoch": 1.4128423311898466, + "grad_norm": 0.06727180629968643, + "learning_rate": 2.4035240604178443e-06, + "loss": 0.0004, + "step": 83600 + }, + { + "epoch": 1.413011331468697, + "grad_norm": 0.10865618288516998, + "learning_rate": 2.4022637918280144e-06, + "loss": 0.0014, + "step": 83610 + }, + { + "epoch": 1.4131803317475473, + "grad_norm": 0.030005959793925285, + "learning_rate": 2.4010037492538235e-06, + "loss": 0.0014, + "step": 83620 + }, + { + "epoch": 1.413349332026398, + "grad_norm": 0.048618074506521225, + "learning_rate": 2.3997439328048962e-06, + "loss": 0.0007, + "step": 83630 + }, + { + "epoch": 1.4135183323052483, + "grad_norm": 0.015072825364768505, + "learning_rate": 2.398484342590847e-06, + "loss": 0.0007, + "step": 83640 + }, + { + "epoch": 1.4136873325840988, + "grad_norm": 0.024250496178865433, + "learning_rate": 2.3972249787212637e-06, + "loss": 0.001, + "step": 83650 + }, + { + "epoch": 1.4138563328629492, + "grad_norm": 0.047752391546964645, + "learning_rate": 2.3959658413057193e-06, + "loss": 0.0013, + "step": 83660 + }, + { + "epoch": 1.4140253331417996, + "grad_norm": 0.08491960167884827, + "learning_rate": 2.394706930453762e-06, + "loss": 0.0012, + "step": 83670 + }, + { + "epoch": 1.4141943334206502, + "grad_norm": 0.00630682660266757, + "learning_rate": 2.393448246274926e-06, + "loss": 0.0008, + "step": 83680 + }, + { + "epoch": 1.4143633336995007, + "grad_norm": 0.00278993952088058, + "learning_rate": 2.39218978887872e-06, + "loss": 0.0011, + "step": 83690 + }, + { + "epoch": 1.414532333978351, + "grad_norm": 0.024969255551695824, + "learning_rate": 2.3909315583746395e-06, + "loss": 0.0004, + "step": 83700 + }, + { + "epoch": 1.4147013342572015, + "grad_norm": 0.04617280885577202, + "learning_rate": 2.3896735548721523e-06, + "loss": 0.001, + "step": 83710 + }, + { + "epoch": 1.414870334536052, + "grad_norm": 0.008670149371027946, + "learning_rate": 2.3884157784807138e-06, + "loss": 0.0012, + "step": 83720 + }, + { + "epoch": 1.4150393348149024, + "grad_norm": 0.004349506925791502, + "learning_rate": 2.387158229309757e-06, + "loss": 0.0009, + "step": 83730 + }, + { + "epoch": 1.415208335093753, + "grad_norm": 0.06572866439819336, + "learning_rate": 2.385900907468693e-06, + "loss": 0.0012, + "step": 83740 + }, + { + "epoch": 1.4153773353726034, + "grad_norm": 0.03176647424697876, + "learning_rate": 2.384643813066917e-06, + "loss": 0.0014, + "step": 83750 + }, + { + "epoch": 1.4155463356514537, + "grad_norm": 0.036327064037323, + "learning_rate": 2.383386946213799e-06, + "loss": 0.0006, + "step": 83760 + }, + { + "epoch": 1.4157153359303043, + "grad_norm": 0.01255666371434927, + "learning_rate": 2.382130307018694e-06, + "loss": 0.0005, + "step": 83770 + }, + { + "epoch": 1.4158843362091549, + "grad_norm": 0.04504786431789398, + "learning_rate": 2.380873895590937e-06, + "loss": 0.0011, + "step": 83780 + }, + { + "epoch": 1.4160533364880052, + "grad_norm": 0.0008414179901592433, + "learning_rate": 2.3796177120398427e-06, + "loss": 0.0008, + "step": 83790 + }, + { + "epoch": 1.4162223367668556, + "grad_norm": 0.05456938594579697, + "learning_rate": 2.3783617564747016e-06, + "loss": 0.001, + "step": 83800 + }, + { + "epoch": 1.4163913370457062, + "grad_norm": 0.08158857375383377, + "learning_rate": 2.3771060290047915e-06, + "loss": 0.0015, + "step": 83810 + }, + { + "epoch": 1.4165603373245566, + "grad_norm": 0.08919025212526321, + "learning_rate": 2.375850529739363e-06, + "loss": 0.0009, + "step": 83820 + }, + { + "epoch": 1.4167293376034071, + "grad_norm": 0.07634237408638, + "learning_rate": 2.374595258787655e-06, + "loss": 0.0012, + "step": 83830 + }, + { + "epoch": 1.4168983378822575, + "grad_norm": 0.031027309596538544, + "learning_rate": 2.3733402162588782e-06, + "loss": 0.0009, + "step": 83840 + }, + { + "epoch": 1.4170673381611079, + "grad_norm": 0.016404179856181145, + "learning_rate": 2.3720854022622303e-06, + "loss": 0.0008, + "step": 83850 + }, + { + "epoch": 1.4172363384399584, + "grad_norm": 0.11980027705430984, + "learning_rate": 2.3708308169068832e-06, + "loss": 0.0013, + "step": 83860 + }, + { + "epoch": 1.4174053387188088, + "grad_norm": 0.0009762226836755872, + "learning_rate": 2.369576460301996e-06, + "loss": 0.0006, + "step": 83870 + }, + { + "epoch": 1.4175743389976594, + "grad_norm": 0.06345545500516891, + "learning_rate": 2.3683223325566997e-06, + "loss": 0.0007, + "step": 83880 + }, + { + "epoch": 1.4177433392765098, + "grad_norm": 0.10920125991106033, + "learning_rate": 2.3670684337801126e-06, + "loss": 0.0015, + "step": 83890 + }, + { + "epoch": 1.4179123395553603, + "grad_norm": 0.06489748507738113, + "learning_rate": 2.3658147640813267e-06, + "loss": 0.0015, + "step": 83900 + }, + { + "epoch": 1.4180813398342107, + "grad_norm": 0.06503353267908096, + "learning_rate": 2.364561323569421e-06, + "loss": 0.0014, + "step": 83910 + }, + { + "epoch": 1.4182503401130613, + "grad_norm": 0.028652485460042953, + "learning_rate": 2.3633081123534475e-06, + "loss": 0.0008, + "step": 83920 + }, + { + "epoch": 1.4184193403919116, + "grad_norm": 0.01862611249089241, + "learning_rate": 2.362055130542445e-06, + "loss": 0.0006, + "step": 83930 + }, + { + "epoch": 1.418588340670762, + "grad_norm": 0.1063927486538887, + "learning_rate": 2.360802378245426e-06, + "loss": 0.0011, + "step": 83940 + }, + { + "epoch": 1.4187573409496126, + "grad_norm": 0.014605813659727573, + "learning_rate": 2.3595498555713865e-06, + "loss": 0.0006, + "step": 83950 + }, + { + "epoch": 1.418926341228463, + "grad_norm": 0.020873332396149635, + "learning_rate": 2.3582975626293037e-06, + "loss": 0.0002, + "step": 83960 + }, + { + "epoch": 1.4190953415073135, + "grad_norm": 0.17552272975444794, + "learning_rate": 2.357045499528133e-06, + "loss": 0.0009, + "step": 83970 + }, + { + "epoch": 1.419264341786164, + "grad_norm": 0.07404538244009018, + "learning_rate": 2.355793666376808e-06, + "loss": 0.0007, + "step": 83980 + }, + { + "epoch": 1.4194333420650145, + "grad_norm": 0.013749707490205765, + "learning_rate": 2.354542063284246e-06, + "loss": 0.001, + "step": 83990 + }, + { + "epoch": 1.4196023423438648, + "grad_norm": 0.04214341565966606, + "learning_rate": 2.3532906903593434e-06, + "loss": 0.0012, + "step": 84000 + }, + { + "epoch": 1.4197713426227154, + "grad_norm": 0.008900276385247707, + "learning_rate": 2.3520395477109724e-06, + "loss": 0.0005, + "step": 84010 + }, + { + "epoch": 1.4199403429015658, + "grad_norm": 0.05283127725124359, + "learning_rate": 2.3507886354479927e-06, + "loss": 0.0008, + "step": 84020 + }, + { + "epoch": 1.4201093431804162, + "grad_norm": 0.02634141966700554, + "learning_rate": 2.349537953679235e-06, + "loss": 0.0005, + "step": 84030 + }, + { + "epoch": 1.4202783434592667, + "grad_norm": 0.0013195067876949906, + "learning_rate": 2.3482875025135195e-06, + "loss": 0.0007, + "step": 84040 + }, + { + "epoch": 1.420447343738117, + "grad_norm": 0.03893652185797691, + "learning_rate": 2.347037282059637e-06, + "loss": 0.0005, + "step": 84050 + }, + { + "epoch": 1.4206163440169677, + "grad_norm": 0.07587000727653503, + "learning_rate": 2.345787292426367e-06, + "loss": 0.0012, + "step": 84060 + }, + { + "epoch": 1.420785344295818, + "grad_norm": 0.0020923058036714792, + "learning_rate": 2.34453753372246e-06, + "loss": 0.0009, + "step": 84070 + }, + { + "epoch": 1.4209543445746686, + "grad_norm": 0.03815976530313492, + "learning_rate": 2.343288006056656e-06, + "loss": 0.001, + "step": 84080 + }, + { + "epoch": 1.421123344853519, + "grad_norm": 0.03211137279868126, + "learning_rate": 2.3420387095376655e-06, + "loss": 0.0013, + "step": 84090 + }, + { + "epoch": 1.4212923451323696, + "grad_norm": 0.011824673973023891, + "learning_rate": 2.3407896442741873e-06, + "loss": 0.0008, + "step": 84100 + }, + { + "epoch": 1.42146134541122, + "grad_norm": 0.06679586321115494, + "learning_rate": 2.3395408103748924e-06, + "loss": 0.001, + "step": 84110 + }, + { + "epoch": 1.4216303456900703, + "grad_norm": 0.06375548988580704, + "learning_rate": 2.338292207948439e-06, + "loss": 0.0005, + "step": 84120 + }, + { + "epoch": 1.4217993459689209, + "grad_norm": 0.02201116643846035, + "learning_rate": 2.337043837103458e-06, + "loss": 0.0005, + "step": 84130 + }, + { + "epoch": 1.4219683462477712, + "grad_norm": 0.04283535107970238, + "learning_rate": 2.335795697948565e-06, + "loss": 0.0007, + "step": 84140 + }, + { + "epoch": 1.4221373465266218, + "grad_norm": 0.013010313734412193, + "learning_rate": 2.3345477905923574e-06, + "loss": 0.0006, + "step": 84150 + }, + { + "epoch": 1.4223063468054722, + "grad_norm": 0.06410606950521469, + "learning_rate": 2.3333001151434042e-06, + "loss": 0.0007, + "step": 84160 + }, + { + "epoch": 1.4224753470843228, + "grad_norm": 0.01201480720192194, + "learning_rate": 2.332052671710261e-06, + "loss": 0.001, + "step": 84170 + }, + { + "epoch": 1.4226443473631731, + "grad_norm": 0.016264183446764946, + "learning_rate": 2.330805460401464e-06, + "loss": 0.0011, + "step": 84180 + }, + { + "epoch": 1.4228133476420237, + "grad_norm": 0.030496496707201004, + "learning_rate": 2.329558481325523e-06, + "loss": 0.0009, + "step": 84190 + }, + { + "epoch": 1.422982347920874, + "grad_norm": 0.06798092275857925, + "learning_rate": 2.328311734590934e-06, + "loss": 0.0007, + "step": 84200 + }, + { + "epoch": 1.4231513481997244, + "grad_norm": 0.0211226437240839, + "learning_rate": 2.327065220306167e-06, + "loss": 0.0005, + "step": 84210 + }, + { + "epoch": 1.423320348478575, + "grad_norm": 0.03784128651022911, + "learning_rate": 2.3258189385796786e-06, + "loss": 0.001, + "step": 84220 + }, + { + "epoch": 1.4234893487574254, + "grad_norm": 0.05676233768463135, + "learning_rate": 2.3245728895198978e-06, + "loss": 0.0011, + "step": 84230 + }, + { + "epoch": 1.423658349036276, + "grad_norm": 0.05738433077931404, + "learning_rate": 2.3233270732352377e-06, + "loss": 0.0005, + "step": 84240 + }, + { + "epoch": 1.4238273493151263, + "grad_norm": 0.08206577599048615, + "learning_rate": 2.3220814898340927e-06, + "loss": 0.0007, + "step": 84250 + }, + { + "epoch": 1.4239963495939767, + "grad_norm": 0.107279472053051, + "learning_rate": 2.3208361394248313e-06, + "loss": 0.0011, + "step": 84260 + }, + { + "epoch": 1.4241653498728273, + "grad_norm": 0.03278566524386406, + "learning_rate": 2.3195910221158085e-06, + "loss": 0.0006, + "step": 84270 + }, + { + "epoch": 1.4243343501516779, + "grad_norm": 0.0013548015849664807, + "learning_rate": 2.3183461380153522e-06, + "loss": 0.0009, + "step": 84280 + }, + { + "epoch": 1.4245033504305282, + "grad_norm": 0.036180540919303894, + "learning_rate": 2.317101487231776e-06, + "loss": 0.001, + "step": 84290 + }, + { + "epoch": 1.4246723507093786, + "grad_norm": 0.037334784865379333, + "learning_rate": 2.3158570698733678e-06, + "loss": 0.001, + "step": 84300 + }, + { + "epoch": 1.4248413509882292, + "grad_norm": 0.17496155202388763, + "learning_rate": 2.3146128860484014e-06, + "loss": 0.001, + "step": 84310 + }, + { + "epoch": 1.4250103512670795, + "grad_norm": 0.015429135411977768, + "learning_rate": 2.313368935865123e-06, + "loss": 0.0005, + "step": 84320 + }, + { + "epoch": 1.4251793515459301, + "grad_norm": 0.048344243317842484, + "learning_rate": 2.312125219431764e-06, + "loss": 0.0007, + "step": 84330 + }, + { + "epoch": 1.4253483518247805, + "grad_norm": 0.004564461763948202, + "learning_rate": 2.3108817368565362e-06, + "loss": 0.0027, + "step": 84340 + }, + { + "epoch": 1.4255173521036308, + "grad_norm": 0.01210798416286707, + "learning_rate": 2.3096384882476243e-06, + "loss": 0.0004, + "step": 84350 + }, + { + "epoch": 1.4256863523824814, + "grad_norm": 0.027822580188512802, + "learning_rate": 2.308395473713199e-06, + "loss": 0.0033, + "step": 84360 + }, + { + "epoch": 1.425855352661332, + "grad_norm": 0.08893080055713654, + "learning_rate": 2.3071526933614103e-06, + "loss": 0.0011, + "step": 84370 + }, + { + "epoch": 1.4260243529401824, + "grad_norm": 0.0183196272701025, + "learning_rate": 2.305910147300383e-06, + "loss": 0.0006, + "step": 84380 + }, + { + "epoch": 1.4261933532190327, + "grad_norm": 0.05367874726653099, + "learning_rate": 2.3046678356382275e-06, + "loss": 0.0006, + "step": 84390 + }, + { + "epoch": 1.4263623534978833, + "grad_norm": 0.0326240099966526, + "learning_rate": 2.3034257584830276e-06, + "loss": 0.0008, + "step": 84400 + }, + { + "epoch": 1.4265313537767337, + "grad_norm": 0.020917244255542755, + "learning_rate": 2.3021839159428543e-06, + "loss": 0.0006, + "step": 84410 + }, + { + "epoch": 1.4267003540555843, + "grad_norm": 0.02447577938437462, + "learning_rate": 2.3009423081257493e-06, + "loss": 0.0008, + "step": 84420 + }, + { + "epoch": 1.4268693543344346, + "grad_norm": 0.19547536969184875, + "learning_rate": 2.2997009351397425e-06, + "loss": 0.0013, + "step": 84430 + }, + { + "epoch": 1.427038354613285, + "grad_norm": 0.027016576379537582, + "learning_rate": 2.2984597970928364e-06, + "loss": 0.0008, + "step": 84440 + }, + { + "epoch": 1.4272073548921356, + "grad_norm": 0.0012160215992480516, + "learning_rate": 2.2972188940930185e-06, + "loss": 0.0005, + "step": 84450 + }, + { + "epoch": 1.4273763551709862, + "grad_norm": 0.1148276999592781, + "learning_rate": 2.29597822624825e-06, + "loss": 0.0009, + "step": 84460 + }, + { + "epoch": 1.4275453554498365, + "grad_norm": 0.013243789784610271, + "learning_rate": 2.2947377936664794e-06, + "loss": 0.0011, + "step": 84470 + }, + { + "epoch": 1.4277143557286869, + "grad_norm": 0.0687238946557045, + "learning_rate": 2.293497596455626e-06, + "loss": 0.0004, + "step": 84480 + }, + { + "epoch": 1.4278833560075375, + "grad_norm": 0.02749786525964737, + "learning_rate": 2.2922576347235947e-06, + "loss": 0.0008, + "step": 84490 + }, + { + "epoch": 1.4280523562863878, + "grad_norm": 0.0005987897166050971, + "learning_rate": 2.2910179085782706e-06, + "loss": 0.0002, + "step": 84500 + }, + { + "epoch": 1.4282213565652384, + "grad_norm": 0.039005473256111145, + "learning_rate": 2.2897784181275123e-06, + "loss": 0.0014, + "step": 84510 + }, + { + "epoch": 1.4283903568440888, + "grad_norm": 0.04905512556433678, + "learning_rate": 2.288539163479165e-06, + "loss": 0.0006, + "step": 84520 + }, + { + "epoch": 1.4285593571229391, + "grad_norm": 0.04409101605415344, + "learning_rate": 2.2873001447410448e-06, + "loss": 0.0012, + "step": 84530 + }, + { + "epoch": 1.4287283574017897, + "grad_norm": 0.01638905517756939, + "learning_rate": 2.2860613620209567e-06, + "loss": 0.0017, + "step": 84540 + }, + { + "epoch": 1.4288973576806403, + "grad_norm": 0.06980341672897339, + "learning_rate": 2.2848228154266784e-06, + "loss": 0.0012, + "step": 84550 + }, + { + "epoch": 1.4290663579594907, + "grad_norm": 0.055803362280130386, + "learning_rate": 2.283584505065973e-06, + "loss": 0.0006, + "step": 84560 + }, + { + "epoch": 1.429235358238341, + "grad_norm": 0.045138075947761536, + "learning_rate": 2.2823464310465742e-06, + "loss": 0.0009, + "step": 84570 + }, + { + "epoch": 1.4294043585171916, + "grad_norm": 0.028129177168011665, + "learning_rate": 2.2811085934762064e-06, + "loss": 0.0008, + "step": 84580 + }, + { + "epoch": 1.429573358796042, + "grad_norm": 0.16420289874076843, + "learning_rate": 2.279870992462561e-06, + "loss": 0.0008, + "step": 84590 + }, + { + "epoch": 1.4297423590748926, + "grad_norm": 0.02660529501736164, + "learning_rate": 2.2786336281133213e-06, + "loss": 0.0012, + "step": 84600 + }, + { + "epoch": 1.429911359353743, + "grad_norm": 0.001202136161737144, + "learning_rate": 2.2773965005361397e-06, + "loss": 0.0009, + "step": 84610 + }, + { + "epoch": 1.4300803596325933, + "grad_norm": 0.029954062774777412, + "learning_rate": 2.276159609838655e-06, + "loss": 0.0008, + "step": 84620 + }, + { + "epoch": 1.4302493599114439, + "grad_norm": 0.03627415746450424, + "learning_rate": 2.2749229561284804e-06, + "loss": 0.0005, + "step": 84630 + }, + { + "epoch": 1.4304183601902944, + "grad_norm": 0.07238475233316422, + "learning_rate": 2.2736865395132134e-06, + "loss": 0.0008, + "step": 84640 + }, + { + "epoch": 1.4305873604691448, + "grad_norm": 0.058888595551252365, + "learning_rate": 2.272450360100425e-06, + "loss": 0.001, + "step": 84650 + }, + { + "epoch": 1.4307563607479952, + "grad_norm": 0.04588101804256439, + "learning_rate": 2.2712144179976723e-06, + "loss": 0.0012, + "step": 84660 + }, + { + "epoch": 1.4309253610268458, + "grad_norm": 0.08574783802032471, + "learning_rate": 2.2699787133124845e-06, + "loss": 0.0009, + "step": 84670 + }, + { + "epoch": 1.4310943613056961, + "grad_norm": 0.07861129194498062, + "learning_rate": 2.2687432461523778e-06, + "loss": 0.0012, + "step": 84680 + }, + { + "epoch": 1.4312633615845467, + "grad_norm": 0.05322394147515297, + "learning_rate": 2.2675080166248403e-06, + "loss": 0.0009, + "step": 84690 + }, + { + "epoch": 1.431432361863397, + "grad_norm": 0.028422387316823006, + "learning_rate": 2.2662730248373465e-06, + "loss": 0.001, + "step": 84700 + }, + { + "epoch": 1.4316013621422474, + "grad_norm": 0.0500117689371109, + "learning_rate": 2.265038270897343e-06, + "loss": 0.001, + "step": 84710 + }, + { + "epoch": 1.431770362421098, + "grad_norm": 0.04492614418268204, + "learning_rate": 2.2638037549122605e-06, + "loss": 0.0008, + "step": 84720 + }, + { + "epoch": 1.4319393626999486, + "grad_norm": 0.030771596357226372, + "learning_rate": 2.262569476989509e-06, + "loss": 0.0005, + "step": 84730 + }, + { + "epoch": 1.432108362978799, + "grad_norm": 0.03537273406982422, + "learning_rate": 2.261335437236476e-06, + "loss": 0.0007, + "step": 84740 + }, + { + "epoch": 1.4322773632576493, + "grad_norm": 0.09645888209342957, + "learning_rate": 2.260101635760531e-06, + "loss": 0.0019, + "step": 84750 + }, + { + "epoch": 1.4324463635365, + "grad_norm": 0.0512833297252655, + "learning_rate": 2.258868072669017e-06, + "loss": 0.0008, + "step": 84760 + }, + { + "epoch": 1.4326153638153503, + "grad_norm": 0.006439016200602055, + "learning_rate": 2.2576347480692636e-06, + "loss": 0.0005, + "step": 84770 + }, + { + "epoch": 1.4327843640942008, + "grad_norm": 0.04037424549460411, + "learning_rate": 2.2564016620685724e-06, + "loss": 0.0007, + "step": 84780 + }, + { + "epoch": 1.4329533643730512, + "grad_norm": 0.004947283770889044, + "learning_rate": 2.255168814774231e-06, + "loss": 0.0013, + "step": 84790 + }, + { + "epoch": 1.4331223646519016, + "grad_norm": 0.014233722351491451, + "learning_rate": 2.2539362062935007e-06, + "loss": 0.0008, + "step": 84800 + }, + { + "epoch": 1.4332913649307522, + "grad_norm": 0.0038694292306900024, + "learning_rate": 2.2527038367336267e-06, + "loss": 0.001, + "step": 84810 + }, + { + "epoch": 1.4334603652096025, + "grad_norm": 0.01896851696074009, + "learning_rate": 2.251471706201828e-06, + "loss": 0.0005, + "step": 84820 + }, + { + "epoch": 1.433629365488453, + "grad_norm": 0.04535583034157753, + "learning_rate": 2.25023981480531e-06, + "loss": 0.0005, + "step": 84830 + }, + { + "epoch": 1.4337983657673035, + "grad_norm": 0.0728137418627739, + "learning_rate": 2.249008162651249e-06, + "loss": 0.0016, + "step": 84840 + }, + { + "epoch": 1.433967366046154, + "grad_norm": 0.002345575951039791, + "learning_rate": 2.247776749846808e-06, + "loss": 0.0004, + "step": 84850 + }, + { + "epoch": 1.4341363663250044, + "grad_norm": 0.03326883539557457, + "learning_rate": 2.2465455764991232e-06, + "loss": 0.0005, + "step": 84860 + }, + { + "epoch": 1.434305366603855, + "grad_norm": 0.006739679723978043, + "learning_rate": 2.2453146427153167e-06, + "loss": 0.001, + "step": 84870 + }, + { + "epoch": 1.4344743668827054, + "grad_norm": 0.08603805303573608, + "learning_rate": 2.24408394860248e-06, + "loss": 0.0034, + "step": 84880 + }, + { + "epoch": 1.4346433671615557, + "grad_norm": 0.06419666111469269, + "learning_rate": 2.242853494267695e-06, + "loss": 0.0006, + "step": 84890 + }, + { + "epoch": 1.4348123674404063, + "grad_norm": 0.021079787984490395, + "learning_rate": 2.2416232798180124e-06, + "loss": 0.0009, + "step": 84900 + }, + { + "epoch": 1.4349813677192567, + "grad_norm": 0.05516824498772621, + "learning_rate": 2.240393305360469e-06, + "loss": 0.0008, + "step": 84910 + }, + { + "epoch": 1.4351503679981072, + "grad_norm": 0.0007657507085241377, + "learning_rate": 2.2391635710020792e-06, + "loss": 0.0012, + "step": 84920 + }, + { + "epoch": 1.4353193682769576, + "grad_norm": 0.016834547743201256, + "learning_rate": 2.237934076849837e-06, + "loss": 0.0035, + "step": 84930 + }, + { + "epoch": 1.4354883685558082, + "grad_norm": 0.020797796547412872, + "learning_rate": 2.2367048230107107e-06, + "loss": 0.0006, + "step": 84940 + }, + { + "epoch": 1.4356573688346586, + "grad_norm": 0.020580783486366272, + "learning_rate": 2.2354758095916553e-06, + "loss": 0.0014, + "step": 84950 + }, + { + "epoch": 1.4358263691135091, + "grad_norm": 0.04580893740057945, + "learning_rate": 2.2342470366995968e-06, + "loss": 0.0007, + "step": 84960 + }, + { + "epoch": 1.4359953693923595, + "grad_norm": 0.05132446438074112, + "learning_rate": 2.233018504441448e-06, + "loss": 0.0006, + "step": 84970 + }, + { + "epoch": 1.4361643696712099, + "grad_norm": 0.11026140302419662, + "learning_rate": 2.2317902129240943e-06, + "loss": 0.0018, + "step": 84980 + }, + { + "epoch": 1.4363333699500604, + "grad_norm": 0.13513745367527008, + "learning_rate": 2.230562162254406e-06, + "loss": 0.0005, + "step": 84990 + }, + { + "epoch": 1.4365023702289108, + "grad_norm": 0.0034779072739183903, + "learning_rate": 2.229334352539226e-06, + "loss": 0.0007, + "step": 85000 + }, + { + "epoch": 1.4366713705077614, + "grad_norm": 0.0015267017297446728, + "learning_rate": 2.2281067838853816e-06, + "loss": 0.0005, + "step": 85010 + }, + { + "epoch": 1.4368403707866118, + "grad_norm": 0.014659667387604713, + "learning_rate": 2.2268794563996787e-06, + "loss": 0.0005, + "step": 85020 + }, + { + "epoch": 1.4370093710654623, + "grad_norm": 0.01734306290745735, + "learning_rate": 2.2256523701888976e-06, + "loss": 0.0008, + "step": 85030 + }, + { + "epoch": 1.4371783713443127, + "grad_norm": 0.0024235215969383717, + "learning_rate": 2.224425525359804e-06, + "loss": 0.001, + "step": 85040 + }, + { + "epoch": 1.4373473716231633, + "grad_norm": 0.046465914696455, + "learning_rate": 2.2231989220191364e-06, + "loss": 0.0013, + "step": 85050 + }, + { + "epoch": 1.4375163719020136, + "grad_norm": 0.048767272382974625, + "learning_rate": 2.2219725602736175e-06, + "loss": 0.0003, + "step": 85060 + }, + { + "epoch": 1.437685372180864, + "grad_norm": 0.015352829359471798, + "learning_rate": 2.2207464402299445e-06, + "loss": 0.0009, + "step": 85070 + }, + { + "epoch": 1.4378543724597146, + "grad_norm": 0.06857003271579742, + "learning_rate": 2.2195205619947983e-06, + "loss": 0.0007, + "step": 85080 + }, + { + "epoch": 1.438023372738565, + "grad_norm": 0.07974264770746231, + "learning_rate": 2.218294925674834e-06, + "loss": 0.0009, + "step": 85090 + }, + { + "epoch": 1.4381923730174155, + "grad_norm": 0.04741501063108444, + "learning_rate": 2.217069531376688e-06, + "loss": 0.0015, + "step": 85100 + }, + { + "epoch": 1.438361373296266, + "grad_norm": 0.05591466650366783, + "learning_rate": 2.215844379206978e-06, + "loss": 0.0009, + "step": 85110 + }, + { + "epoch": 1.4385303735751163, + "grad_norm": 0.022825388237833977, + "learning_rate": 2.2146194692722954e-06, + "loss": 0.0005, + "step": 85120 + }, + { + "epoch": 1.4386993738539668, + "grad_norm": 0.0009319696109741926, + "learning_rate": 2.213394801679214e-06, + "loss": 0.0005, + "step": 85130 + }, + { + "epoch": 1.4388683741328174, + "grad_norm": 0.0101107032969594, + "learning_rate": 2.2121703765342883e-06, + "loss": 0.0005, + "step": 85140 + }, + { + "epoch": 1.4390373744116678, + "grad_norm": 0.024927053600549698, + "learning_rate": 2.2109461939440454e-06, + "loss": 0.0006, + "step": 85150 + }, + { + "epoch": 1.4392063746905182, + "grad_norm": 0.025109082460403442, + "learning_rate": 2.2097222540149987e-06, + "loss": 0.0009, + "step": 85160 + }, + { + "epoch": 1.4393753749693687, + "grad_norm": 0.023030636832118034, + "learning_rate": 2.2084985568536334e-06, + "loss": 0.0007, + "step": 85170 + }, + { + "epoch": 1.439544375248219, + "grad_norm": 0.038196589797735214, + "learning_rate": 2.2072751025664204e-06, + "loss": 0.0009, + "step": 85180 + }, + { + "epoch": 1.4397133755270697, + "grad_norm": 0.0019264441216364503, + "learning_rate": 2.206051891259803e-06, + "loss": 0.002, + "step": 85190 + }, + { + "epoch": 1.43988237580592, + "grad_norm": 0.047808222472667694, + "learning_rate": 2.204828923040209e-06, + "loss": 0.0007, + "step": 85200 + }, + { + "epoch": 1.4400513760847704, + "grad_norm": 0.05627620220184326, + "learning_rate": 2.203606198014041e-06, + "loss": 0.0006, + "step": 85210 + }, + { + "epoch": 1.440220376363621, + "grad_norm": 0.0782071202993393, + "learning_rate": 2.202383716287684e-06, + "loss": 0.0007, + "step": 85220 + }, + { + "epoch": 1.4403893766424716, + "grad_norm": 0.2014874815940857, + "learning_rate": 2.201161477967496e-06, + "loss": 0.0006, + "step": 85230 + }, + { + "epoch": 1.440558376921322, + "grad_norm": 0.04733043909072876, + "learning_rate": 2.1999394831598225e-06, + "loss": 0.0009, + "step": 85240 + }, + { + "epoch": 1.4407273772001723, + "grad_norm": 0.09516479820013046, + "learning_rate": 2.198717731970979e-06, + "loss": 0.0005, + "step": 85250 + }, + { + "epoch": 1.4408963774790229, + "grad_norm": 0.006495791953057051, + "learning_rate": 2.197496224507265e-06, + "loss": 0.0004, + "step": 85260 + }, + { + "epoch": 1.4410653777578732, + "grad_norm": 0.05586622655391693, + "learning_rate": 2.1962749608749595e-06, + "loss": 0.0008, + "step": 85270 + }, + { + "epoch": 1.4412343780367238, + "grad_norm": 0.057371292263269424, + "learning_rate": 2.1950539411803156e-06, + "loss": 0.0009, + "step": 85280 + }, + { + "epoch": 1.4414033783155742, + "grad_norm": 0.06461921334266663, + "learning_rate": 2.19383316552957e-06, + "loss": 0.0005, + "step": 85290 + }, + { + "epoch": 1.4415723785944246, + "grad_norm": 0.010810323059558868, + "learning_rate": 2.1926126340289345e-06, + "loss": 0.0014, + "step": 85300 + }, + { + "epoch": 1.4417413788732751, + "grad_norm": 0.08385465294122696, + "learning_rate": 2.191392346784601e-06, + "loss": 0.0007, + "step": 85310 + }, + { + "epoch": 1.4419103791521257, + "grad_norm": 0.026941556483507156, + "learning_rate": 2.1901723039027417e-06, + "loss": 0.0008, + "step": 85320 + }, + { + "epoch": 1.442079379430976, + "grad_norm": 0.08849550038576126, + "learning_rate": 2.1889525054895077e-06, + "loss": 0.0006, + "step": 85330 + }, + { + "epoch": 1.4422483797098264, + "grad_norm": 0.08569113165140152, + "learning_rate": 2.1877329516510236e-06, + "loss": 0.0007, + "step": 85340 + }, + { + "epoch": 1.442417379988677, + "grad_norm": 0.01780073158442974, + "learning_rate": 2.186513642493401e-06, + "loss": 0.0004, + "step": 85350 + }, + { + "epoch": 1.4425863802675274, + "grad_norm": 0.04532487690448761, + "learning_rate": 2.1852945781227203e-06, + "loss": 0.0007, + "step": 85360 + }, + { + "epoch": 1.442755380546378, + "grad_norm": 0.058953043073415756, + "learning_rate": 2.184075758645051e-06, + "loss": 0.0009, + "step": 85370 + }, + { + "epoch": 1.4429243808252283, + "grad_norm": 0.1885237693786621, + "learning_rate": 2.1828571841664327e-06, + "loss": 0.001, + "step": 85380 + }, + { + "epoch": 1.4430933811040787, + "grad_norm": 0.020072024315595627, + "learning_rate": 2.1816388547928903e-06, + "loss": 0.0008, + "step": 85390 + }, + { + "epoch": 1.4432623813829293, + "grad_norm": 0.010699886828660965, + "learning_rate": 2.180420770630421e-06, + "loss": 0.001, + "step": 85400 + }, + { + "epoch": 1.4434313816617799, + "grad_norm": 0.03203423693776131, + "learning_rate": 2.1792029317850077e-06, + "loss": 0.0004, + "step": 85410 + }, + { + "epoch": 1.4436003819406302, + "grad_norm": 0.019209813326597214, + "learning_rate": 2.1779853383626043e-06, + "loss": 0.0012, + "step": 85420 + }, + { + "epoch": 1.4437693822194806, + "grad_norm": 0.05294565483927727, + "learning_rate": 2.176767990469152e-06, + "loss": 0.0008, + "step": 85430 + }, + { + "epoch": 1.4439383824983312, + "grad_norm": 0.02167557179927826, + "learning_rate": 2.1755508882105607e-06, + "loss": 0.0007, + "step": 85440 + }, + { + "epoch": 1.4441073827771815, + "grad_norm": 0.04798426106572151, + "learning_rate": 2.1743340316927294e-06, + "loss": 0.0012, + "step": 85450 + }, + { + "epoch": 1.4442763830560321, + "grad_norm": 0.058179788291454315, + "learning_rate": 2.1731174210215256e-06, + "loss": 0.0006, + "step": 85460 + }, + { + "epoch": 1.4444453833348825, + "grad_norm": 0.00041537615470588207, + "learning_rate": 2.171901056302803e-06, + "loss": 0.0006, + "step": 85470 + }, + { + "epoch": 1.4446143836137328, + "grad_norm": 0.03446631133556366, + "learning_rate": 2.1706849376423933e-06, + "loss": 0.0006, + "step": 85480 + }, + { + "epoch": 1.4447833838925834, + "grad_norm": 0.048760924488306046, + "learning_rate": 2.1694690651460997e-06, + "loss": 0.0008, + "step": 85490 + }, + { + "epoch": 1.444952384171434, + "grad_norm": 0.01005274523049593, + "learning_rate": 2.1682534389197125e-06, + "loss": 0.0004, + "step": 85500 + }, + { + "epoch": 1.4451213844502844, + "grad_norm": 0.001322588766925037, + "learning_rate": 2.1670380590689953e-06, + "loss": 0.0006, + "step": 85510 + }, + { + "epoch": 1.4452903847291347, + "grad_norm": 0.045743152499198914, + "learning_rate": 2.1658229256996955e-06, + "loss": 0.0007, + "step": 85520 + }, + { + "epoch": 1.4454593850079853, + "grad_norm": 0.06122135743498802, + "learning_rate": 2.1646080389175312e-06, + "loss": 0.0005, + "step": 85530 + }, + { + "epoch": 1.4456283852868357, + "grad_norm": 0.03301031142473221, + "learning_rate": 2.1633933988282067e-06, + "loss": 0.0003, + "step": 85540 + }, + { + "epoch": 1.4457973855656863, + "grad_norm": 0.029237085953354836, + "learning_rate": 2.1621790055373986e-06, + "loss": 0.0006, + "step": 85550 + }, + { + "epoch": 1.4459663858445366, + "grad_norm": 0.06849449872970581, + "learning_rate": 2.1609648591507687e-06, + "loss": 0.0005, + "step": 85560 + }, + { + "epoch": 1.446135386123387, + "grad_norm": 0.05567774921655655, + "learning_rate": 2.159750959773949e-06, + "loss": 0.0008, + "step": 85570 + }, + { + "epoch": 1.4463043864022376, + "grad_norm": 0.02503601461648941, + "learning_rate": 2.158537307512559e-06, + "loss": 0.0009, + "step": 85580 + }, + { + "epoch": 1.4464733866810882, + "grad_norm": 0.04423130676150322, + "learning_rate": 2.1573239024721893e-06, + "loss": 0.0006, + "step": 85590 + }, + { + "epoch": 1.4466423869599385, + "grad_norm": 0.025956764817237854, + "learning_rate": 2.1561107447584147e-06, + "loss": 0.0008, + "step": 85600 + }, + { + "epoch": 1.4468113872387889, + "grad_norm": 0.06159791350364685, + "learning_rate": 2.1548978344767824e-06, + "loss": 0.001, + "step": 85610 + }, + { + "epoch": 1.4469803875176395, + "grad_norm": 0.1021602526307106, + "learning_rate": 2.153685171732825e-06, + "loss": 0.0008, + "step": 85620 + }, + { + "epoch": 1.4471493877964898, + "grad_norm": 0.03043730929493904, + "learning_rate": 2.152472756632047e-06, + "loss": 0.0009, + "step": 85630 + }, + { + "epoch": 1.4473183880753404, + "grad_norm": 0.00028773280791938305, + "learning_rate": 2.151260589279937e-06, + "loss": 0.001, + "step": 85640 + }, + { + "epoch": 1.4474873883541908, + "grad_norm": 0.031069571152329445, + "learning_rate": 2.1500486697819567e-06, + "loss": 0.0007, + "step": 85650 + }, + { + "epoch": 1.4476563886330411, + "grad_norm": 0.03495510667562485, + "learning_rate": 2.1488369982435527e-06, + "loss": 0.0007, + "step": 85660 + }, + { + "epoch": 1.4478253889118917, + "grad_norm": 0.005145241506397724, + "learning_rate": 2.147625574770141e-06, + "loss": 0.001, + "step": 85670 + }, + { + "epoch": 1.447994389190742, + "grad_norm": 0.1505383849143982, + "learning_rate": 2.1464143994671257e-06, + "loss": 0.0009, + "step": 85680 + }, + { + "epoch": 1.4481633894695927, + "grad_norm": 0.04290211573243141, + "learning_rate": 2.145203472439883e-06, + "loss": 0.0009, + "step": 85690 + }, + { + "epoch": 1.448332389748443, + "grad_norm": 0.05467577278614044, + "learning_rate": 2.1439927937937717e-06, + "loss": 0.0006, + "step": 85700 + }, + { + "epoch": 1.4485013900272936, + "grad_norm": 0.04019749537110329, + "learning_rate": 2.1427823636341233e-06, + "loss": 0.0006, + "step": 85710 + }, + { + "epoch": 1.448670390306144, + "grad_norm": 0.014146536588668823, + "learning_rate": 2.141572182066255e-06, + "loss": 0.0007, + "step": 85720 + }, + { + "epoch": 1.4488393905849946, + "grad_norm": 0.0225143451243639, + "learning_rate": 2.140362249195454e-06, + "loss": 0.001, + "step": 85730 + }, + { + "epoch": 1.449008390863845, + "grad_norm": 0.0570768304169178, + "learning_rate": 2.1391525651269944e-06, + "loss": 0.001, + "step": 85740 + }, + { + "epoch": 1.4491773911426953, + "grad_norm": 0.024209171533584595, + "learning_rate": 2.1379431299661215e-06, + "loss": 0.0006, + "step": 85750 + }, + { + "epoch": 1.4493463914215459, + "grad_norm": 0.047281306236982346, + "learning_rate": 2.1367339438180625e-06, + "loss": 0.0025, + "step": 85760 + }, + { + "epoch": 1.4495153917003962, + "grad_norm": 0.04109838977456093, + "learning_rate": 2.1355250067880252e-06, + "loss": 0.0007, + "step": 85770 + }, + { + "epoch": 1.4496843919792468, + "grad_norm": 0.005282451398670673, + "learning_rate": 2.1343163189811896e-06, + "loss": 0.0004, + "step": 85780 + }, + { + "epoch": 1.4498533922580972, + "grad_norm": 0.0228683240711689, + "learning_rate": 2.1331078805027206e-06, + "loss": 0.0008, + "step": 85790 + }, + { + "epoch": 1.4500223925369478, + "grad_norm": 0.047111328691244125, + "learning_rate": 2.131899691457754e-06, + "loss": 0.0012, + "step": 85800 + }, + { + "epoch": 1.4501913928157981, + "grad_norm": 0.05333612114191055, + "learning_rate": 2.1306917519514124e-06, + "loss": 0.0007, + "step": 85810 + }, + { + "epoch": 1.4503603930946487, + "grad_norm": 0.04406392201781273, + "learning_rate": 2.1294840620887895e-06, + "loss": 0.0013, + "step": 85820 + }, + { + "epoch": 1.450529393373499, + "grad_norm": 0.027557572349905968, + "learning_rate": 2.1282766219749623e-06, + "loss": 0.0008, + "step": 85830 + }, + { + "epoch": 1.4506983936523494, + "grad_norm": 0.012420267798006535, + "learning_rate": 2.1270694317149815e-06, + "loss": 0.0006, + "step": 85840 + }, + { + "epoch": 1.4508673939312, + "grad_norm": 0.004347395151853561, + "learning_rate": 2.125862491413881e-06, + "loss": 0.0005, + "step": 85850 + }, + { + "epoch": 1.4510363942100504, + "grad_norm": 0.019361618906259537, + "learning_rate": 2.1246558011766676e-06, + "loss": 0.0007, + "step": 85860 + }, + { + "epoch": 1.451205394488901, + "grad_norm": 0.01567954197525978, + "learning_rate": 2.1234493611083314e-06, + "loss": 0.0007, + "step": 85870 + }, + { + "epoch": 1.4513743947677513, + "grad_norm": 0.01867993175983429, + "learning_rate": 2.122243171313839e-06, + "loss": 0.0008, + "step": 85880 + }, + { + "epoch": 1.451543395046602, + "grad_norm": 0.03657732158899307, + "learning_rate": 2.1210372318981325e-06, + "loss": 0.0007, + "step": 85890 + }, + { + "epoch": 1.4517123953254523, + "grad_norm": 0.020141759887337685, + "learning_rate": 2.1198315429661354e-06, + "loss": 0.0015, + "step": 85900 + }, + { + "epoch": 1.4518813956043028, + "grad_norm": 0.015410812571644783, + "learning_rate": 2.1186261046227507e-06, + "loss": 0.0014, + "step": 85910 + }, + { + "epoch": 1.4520503958831532, + "grad_norm": 0.07391265779733658, + "learning_rate": 2.1174209169728537e-06, + "loss": 0.001, + "step": 85920 + }, + { + "epoch": 1.4522193961620036, + "grad_norm": 0.318547785282135, + "learning_rate": 2.1162159801213054e-06, + "loss": 0.0006, + "step": 85930 + }, + { + "epoch": 1.4523883964408542, + "grad_norm": 0.007752739824354649, + "learning_rate": 2.115011294172937e-06, + "loss": 0.0005, + "step": 85940 + }, + { + "epoch": 1.4525573967197045, + "grad_norm": 0.07410235702991486, + "learning_rate": 2.113806859232566e-06, + "loss": 0.0009, + "step": 85950 + }, + { + "epoch": 1.452726396998555, + "grad_norm": 0.03195637837052345, + "learning_rate": 2.1126026754049804e-06, + "loss": 0.0009, + "step": 85960 + }, + { + "epoch": 1.4528953972774055, + "grad_norm": 0.0022371308878064156, + "learning_rate": 2.1113987427949532e-06, + "loss": 0.0006, + "step": 85970 + }, + { + "epoch": 1.453064397556256, + "grad_norm": 0.024299968034029007, + "learning_rate": 2.11019506150723e-06, + "loss": 0.0019, + "step": 85980 + }, + { + "epoch": 1.4532333978351064, + "grad_norm": 0.017303941771388054, + "learning_rate": 2.1089916316465393e-06, + "loss": 0.0008, + "step": 85990 + }, + { + "epoch": 1.453402398113957, + "grad_norm": 0.02556757442653179, + "learning_rate": 2.1077884533175818e-06, + "loss": 0.0014, + "step": 86000 + }, + { + "epoch": 1.4535713983928074, + "grad_norm": 0.09371578693389893, + "learning_rate": 2.106585526625044e-06, + "loss": 0.0008, + "step": 86010 + }, + { + "epoch": 1.4537403986716577, + "grad_norm": 0.024883732199668884, + "learning_rate": 2.1053828516735826e-06, + "loss": 0.0007, + "step": 86020 + }, + { + "epoch": 1.4539093989505083, + "grad_norm": 0.006892631761729717, + "learning_rate": 2.1041804285678376e-06, + "loss": 0.0005, + "step": 86030 + }, + { + "epoch": 1.4540783992293587, + "grad_norm": 0.06672769039869308, + "learning_rate": 2.102978257412428e-06, + "loss": 0.0012, + "step": 86040 + }, + { + "epoch": 1.4542473995082092, + "grad_norm": 0.0031189655419439077, + "learning_rate": 2.101776338311944e-06, + "loss": 0.0004, + "step": 86050 + }, + { + "epoch": 1.4544163997870596, + "grad_norm": 0.025004854425787926, + "learning_rate": 2.1005746713709614e-06, + "loss": 0.0003, + "step": 86060 + }, + { + "epoch": 1.45458540006591, + "grad_norm": 0.0011318206088617444, + "learning_rate": 2.099373256694032e-06, + "loss": 0.0018, + "step": 86070 + }, + { + "epoch": 1.4547544003447606, + "grad_norm": 0.027253031730651855, + "learning_rate": 2.098172094385681e-06, + "loss": 0.0006, + "step": 86080 + }, + { + "epoch": 1.4549234006236111, + "grad_norm": 0.013232845813035965, + "learning_rate": 2.096971184550418e-06, + "loss": 0.0004, + "step": 86090 + }, + { + "epoch": 1.4550924009024615, + "grad_norm": 0.09382961690425873, + "learning_rate": 2.0957705272927293e-06, + "loss": 0.0007, + "step": 86100 + }, + { + "epoch": 1.4552614011813119, + "grad_norm": 0.07172657549381256, + "learning_rate": 2.0945701227170736e-06, + "loss": 0.0028, + "step": 86110 + }, + { + "epoch": 1.4554304014601624, + "grad_norm": 0.042001448571681976, + "learning_rate": 2.0933699709278965e-06, + "loss": 0.0006, + "step": 86120 + }, + { + "epoch": 1.4555994017390128, + "grad_norm": 0.12435533851385117, + "learning_rate": 2.0921700720296135e-06, + "loss": 0.0009, + "step": 86130 + }, + { + "epoch": 1.4557684020178634, + "grad_norm": 0.02972419187426567, + "learning_rate": 2.090970426126624e-06, + "loss": 0.0012, + "step": 86140 + }, + { + "epoch": 1.4559374022967138, + "grad_norm": 0.004238105844706297, + "learning_rate": 2.0897710333233008e-06, + "loss": 0.0004, + "step": 86150 + }, + { + "epoch": 1.4561064025755641, + "grad_norm": 0.17487910389900208, + "learning_rate": 2.0885718937239995e-06, + "loss": 0.0015, + "step": 86160 + }, + { + "epoch": 1.4562754028544147, + "grad_norm": 0.08014755696058273, + "learning_rate": 2.087373007433048e-06, + "loss": 0.0005, + "step": 86170 + }, + { + "epoch": 1.4564444031332653, + "grad_norm": 0.1412859410047531, + "learning_rate": 2.0861743745547588e-06, + "loss": 0.0008, + "step": 86180 + }, + { + "epoch": 1.4566134034121156, + "grad_norm": 0.0705047994852066, + "learning_rate": 2.084975995193415e-06, + "loss": 0.0009, + "step": 86190 + }, + { + "epoch": 1.456782403690966, + "grad_norm": 0.04238751530647278, + "learning_rate": 2.0837778694532845e-06, + "loss": 0.0007, + "step": 86200 + }, + { + "epoch": 1.4569514039698166, + "grad_norm": 0.014505607075989246, + "learning_rate": 2.082579997438608e-06, + "loss": 0.0009, + "step": 86210 + }, + { + "epoch": 1.457120404248667, + "grad_norm": 0.02632046863436699, + "learning_rate": 2.0813823792536082e-06, + "loss": 0.0005, + "step": 86220 + }, + { + "epoch": 1.4572894045275175, + "grad_norm": 0.031719546765089035, + "learning_rate": 2.0801850150024805e-06, + "loss": 0.0006, + "step": 86230 + }, + { + "epoch": 1.457458404806368, + "grad_norm": 0.015553918667137623, + "learning_rate": 2.078987904789403e-06, + "loss": 0.0007, + "step": 86240 + }, + { + "epoch": 1.4576274050852183, + "grad_norm": 0.010944337584078312, + "learning_rate": 2.0777910487185325e-06, + "loss": 0.0009, + "step": 86250 + }, + { + "epoch": 1.4577964053640688, + "grad_norm": 0.06787962466478348, + "learning_rate": 2.076594446893998e-06, + "loss": 0.0008, + "step": 86260 + }, + { + "epoch": 1.4579654056429194, + "grad_norm": 0.01724882423877716, + "learning_rate": 2.0753980994199103e-06, + "loss": 0.0009, + "step": 86270 + }, + { + "epoch": 1.4581344059217698, + "grad_norm": 0.0009835042292252183, + "learning_rate": 2.0742020064003576e-06, + "loss": 0.0005, + "step": 86280 + }, + { + "epoch": 1.4583034062006202, + "grad_norm": 0.015828389674425125, + "learning_rate": 2.0730061679394086e-06, + "loss": 0.0004, + "step": 86290 + }, + { + "epoch": 1.4584724064794707, + "grad_norm": 0.022186938673257828, + "learning_rate": 2.071810584141103e-06, + "loss": 0.0005, + "step": 86300 + }, + { + "epoch": 1.458641406758321, + "grad_norm": 0.012610320001840591, + "learning_rate": 2.070615255109465e-06, + "loss": 0.0012, + "step": 86310 + }, + { + "epoch": 1.4588104070371717, + "grad_norm": 0.05332818254828453, + "learning_rate": 2.0694201809484914e-06, + "loss": 0.0012, + "step": 86320 + }, + { + "epoch": 1.458979407316022, + "grad_norm": 0.0006286121206358075, + "learning_rate": 2.0682253617621636e-06, + "loss": 0.0008, + "step": 86330 + }, + { + "epoch": 1.4591484075948724, + "grad_norm": 0.028262170031666756, + "learning_rate": 2.0670307976544313e-06, + "loss": 0.0009, + "step": 86340 + }, + { + "epoch": 1.459317407873723, + "grad_norm": 0.03445601835846901, + "learning_rate": 2.0658364887292327e-06, + "loss": 0.0008, + "step": 86350 + }, + { + "epoch": 1.4594864081525736, + "grad_norm": 0.023881226778030396, + "learning_rate": 2.0646424350904737e-06, + "loss": 0.0006, + "step": 86360 + }, + { + "epoch": 1.459655408431424, + "grad_norm": 0.02888556756079197, + "learning_rate": 2.063448636842047e-06, + "loss": 0.001, + "step": 86370 + }, + { + "epoch": 1.4598244087102743, + "grad_norm": 0.04445100948214531, + "learning_rate": 2.062255094087815e-06, + "loss": 0.0006, + "step": 86380 + }, + { + "epoch": 1.4599934089891249, + "grad_norm": 0.049608681350946426, + "learning_rate": 2.061061806931625e-06, + "loss": 0.0006, + "step": 86390 + }, + { + "epoch": 1.4601624092679752, + "grad_norm": 0.03627485781908035, + "learning_rate": 2.0598687754772957e-06, + "loss": 0.0006, + "step": 86400 + }, + { + "epoch": 1.4603314095468258, + "grad_norm": 0.0006325527792796493, + "learning_rate": 2.05867599982863e-06, + "loss": 0.0006, + "step": 86410 + }, + { + "epoch": 1.4605004098256762, + "grad_norm": 0.07899534702301025, + "learning_rate": 2.0574834800894018e-06, + "loss": 0.001, + "step": 86420 + }, + { + "epoch": 1.4606694101045266, + "grad_norm": 0.06240850314497948, + "learning_rate": 2.056291216363369e-06, + "loss": 0.0018, + "step": 86430 + }, + { + "epoch": 1.4608384103833771, + "grad_norm": 0.02459152415394783, + "learning_rate": 2.0550992087542614e-06, + "loss": 0.0009, + "step": 86440 + }, + { + "epoch": 1.4610074106622277, + "grad_norm": 0.06909990310668945, + "learning_rate": 2.053907457365791e-06, + "loss": 0.0005, + "step": 86450 + }, + { + "epoch": 1.461176410941078, + "grad_norm": 0.029086565598845482, + "learning_rate": 2.0527159623016457e-06, + "loss": 0.0003, + "step": 86460 + }, + { + "epoch": 1.4613454112199284, + "grad_norm": 0.043895281851291656, + "learning_rate": 2.051524723665494e-06, + "loss": 0.0004, + "step": 86470 + }, + { + "epoch": 1.461514411498779, + "grad_norm": 0.09322455525398254, + "learning_rate": 2.0503337415609747e-06, + "loss": 0.001, + "step": 86480 + }, + { + "epoch": 1.4616834117776294, + "grad_norm": 0.008567149750888348, + "learning_rate": 2.0491430160917135e-06, + "loss": 0.001, + "step": 86490 + }, + { + "epoch": 1.46185241205648, + "grad_norm": 0.02848733961582184, + "learning_rate": 2.047952547361305e-06, + "loss": 0.0004, + "step": 86500 + }, + { + "epoch": 1.4620214123353303, + "grad_norm": 0.03597874194383621, + "learning_rate": 2.04676233547333e-06, + "loss": 0.001, + "step": 86510 + }, + { + "epoch": 1.4621904126141807, + "grad_norm": 0.11492574959993362, + "learning_rate": 2.045572380531339e-06, + "loss": 0.0011, + "step": 86520 + }, + { + "epoch": 1.4623594128930313, + "grad_norm": 0.18648619949817657, + "learning_rate": 2.0443826826388648e-06, + "loss": 0.0006, + "step": 86530 + }, + { + "epoch": 1.4625284131718819, + "grad_norm": 0.06965423375368118, + "learning_rate": 2.0431932418994195e-06, + "loss": 0.0014, + "step": 86540 + }, + { + "epoch": 1.4626974134507322, + "grad_norm": 0.02029821090400219, + "learning_rate": 2.0420040584164863e-06, + "loss": 0.0012, + "step": 86550 + }, + { + "epoch": 1.4628664137295826, + "grad_norm": 0.1096748486161232, + "learning_rate": 2.0408151322935336e-06, + "loss": 0.0027, + "step": 86560 + }, + { + "epoch": 1.4630354140084332, + "grad_norm": 0.10865205526351929, + "learning_rate": 2.0396264636340007e-06, + "loss": 0.0006, + "step": 86570 + }, + { + "epoch": 1.4632044142872835, + "grad_norm": 0.06341142952442169, + "learning_rate": 2.0384380525413106e-06, + "loss": 0.0005, + "step": 86580 + }, + { + "epoch": 1.4633734145661341, + "grad_norm": 0.023144427686929703, + "learning_rate": 2.0372498991188565e-06, + "loss": 0.0009, + "step": 86590 + }, + { + "epoch": 1.4635424148449845, + "grad_norm": 0.03163832426071167, + "learning_rate": 2.0360620034700184e-06, + "loss": 0.0013, + "step": 86600 + }, + { + "epoch": 1.4637114151238348, + "grad_norm": 0.011350251734256744, + "learning_rate": 2.0348743656981447e-06, + "loss": 0.0006, + "step": 86610 + }, + { + "epoch": 1.4638804154026854, + "grad_norm": 0.0069978744722902775, + "learning_rate": 2.0336869859065694e-06, + "loss": 0.0005, + "step": 86620 + }, + { + "epoch": 1.4640494156815358, + "grad_norm": 0.03031867742538452, + "learning_rate": 2.0324998641985966e-06, + "loss": 0.0005, + "step": 86630 + }, + { + "epoch": 1.4642184159603864, + "grad_norm": 0.0002373101160628721, + "learning_rate": 2.0313130006775134e-06, + "loss": 0.0005, + "step": 86640 + }, + { + "epoch": 1.4643874162392367, + "grad_norm": 0.06348241865634918, + "learning_rate": 2.030126395446583e-06, + "loss": 0.0011, + "step": 86650 + }, + { + "epoch": 1.4645564165180873, + "grad_norm": 0.05334573611617088, + "learning_rate": 2.0289400486090465e-06, + "loss": 0.0004, + "step": 86660 + }, + { + "epoch": 1.4647254167969377, + "grad_norm": 0.02373746782541275, + "learning_rate": 2.02775396026812e-06, + "loss": 0.0012, + "step": 86670 + }, + { + "epoch": 1.4648944170757883, + "grad_norm": 0.017289143055677414, + "learning_rate": 2.0265681305270015e-06, + "loss": 0.0011, + "step": 86680 + }, + { + "epoch": 1.4650634173546386, + "grad_norm": 0.04153512045741081, + "learning_rate": 2.0253825594888605e-06, + "loss": 0.0006, + "step": 86690 + }, + { + "epoch": 1.465232417633489, + "grad_norm": 0.03110233135521412, + "learning_rate": 2.0241972472568504e-06, + "loss": 0.0005, + "step": 86700 + }, + { + "epoch": 1.4654014179123396, + "grad_norm": 0.00017331923299934715, + "learning_rate": 2.0230121939340965e-06, + "loss": 0.0009, + "step": 86710 + }, + { + "epoch": 1.46557041819119, + "grad_norm": 0.10718990862369537, + "learning_rate": 2.0218273996237075e-06, + "loss": 0.0012, + "step": 86720 + }, + { + "epoch": 1.4657394184700405, + "grad_norm": 0.03949207440018654, + "learning_rate": 2.0206428644287624e-06, + "loss": 0.0013, + "step": 86730 + }, + { + "epoch": 1.4659084187488909, + "grad_norm": 0.03155754134058952, + "learning_rate": 2.019458588452325e-06, + "loss": 0.001, + "step": 86740 + }, + { + "epoch": 1.4660774190277415, + "grad_norm": 0.026663416996598244, + "learning_rate": 2.01827457179743e-06, + "loss": 0.0009, + "step": 86750 + }, + { + "epoch": 1.4662464193065918, + "grad_norm": 0.06034409627318382, + "learning_rate": 2.0170908145670954e-06, + "loss": 0.0009, + "step": 86760 + }, + { + "epoch": 1.4664154195854424, + "grad_norm": 0.02038990706205368, + "learning_rate": 2.015907316864311e-06, + "loss": 0.0013, + "step": 86770 + }, + { + "epoch": 1.4665844198642928, + "grad_norm": 0.017077386379241943, + "learning_rate": 2.01472407879205e-06, + "loss": 0.0007, + "step": 86780 + }, + { + "epoch": 1.4667534201431431, + "grad_norm": 0.08096768707036972, + "learning_rate": 2.013541100453256e-06, + "loss": 0.0007, + "step": 86790 + }, + { + "epoch": 1.4669224204219937, + "grad_norm": 0.011129752732813358, + "learning_rate": 2.012358381950857e-06, + "loss": 0.0012, + "step": 86800 + }, + { + "epoch": 1.467091420700844, + "grad_norm": 0.02629377320408821, + "learning_rate": 2.0111759233877555e-06, + "loss": 0.0005, + "step": 86810 + }, + { + "epoch": 1.4672604209796947, + "grad_norm": 0.0009242766536772251, + "learning_rate": 2.0099937248668276e-06, + "loss": 0.0006, + "step": 86820 + }, + { + "epoch": 1.467429421258545, + "grad_norm": 0.02693902887403965, + "learning_rate": 2.008811786490933e-06, + "loss": 0.0009, + "step": 86830 + }, + { + "epoch": 1.4675984215373956, + "grad_norm": 0.02461072988808155, + "learning_rate": 2.0076301083629075e-06, + "loss": 0.0008, + "step": 86840 + }, + { + "epoch": 1.467767421816246, + "grad_norm": 0.008230219595134258, + "learning_rate": 2.0064486905855583e-06, + "loss": 0.0007, + "step": 86850 + }, + { + "epoch": 1.4679364220950966, + "grad_norm": 0.020722288638353348, + "learning_rate": 2.0052675332616785e-06, + "loss": 0.0003, + "step": 86860 + }, + { + "epoch": 1.468105422373947, + "grad_norm": 0.013037968426942825, + "learning_rate": 2.0040866364940335e-06, + "loss": 0.0005, + "step": 86870 + }, + { + "epoch": 1.4682744226527973, + "grad_norm": 0.029389988631010056, + "learning_rate": 2.0029060003853658e-06, + "loss": 0.001, + "step": 86880 + }, + { + "epoch": 1.4684434229316479, + "grad_norm": 0.0019620771054178476, + "learning_rate": 2.001725625038399e-06, + "loss": 0.0009, + "step": 86890 + }, + { + "epoch": 1.4686124232104982, + "grad_norm": 0.023910041898489, + "learning_rate": 2.0005455105558275e-06, + "loss": 0.0012, + "step": 86900 + }, + { + "epoch": 1.4687814234893488, + "grad_norm": 0.11860333383083344, + "learning_rate": 1.999365657040331e-06, + "loss": 0.0004, + "step": 86910 + }, + { + "epoch": 1.4689504237681992, + "grad_norm": 0.08853831887245178, + "learning_rate": 1.99818606459456e-06, + "loss": 0.0008, + "step": 86920 + }, + { + "epoch": 1.4691194240470498, + "grad_norm": 0.04686124622821808, + "learning_rate": 1.997006733321147e-06, + "loss": 0.0007, + "step": 86930 + }, + { + "epoch": 1.4692884243259001, + "grad_norm": 0.020372433587908745, + "learning_rate": 1.995827663322695e-06, + "loss": 0.0007, + "step": 86940 + }, + { + "epoch": 1.4694574246047507, + "grad_norm": 0.03424699977040291, + "learning_rate": 1.994648854701795e-06, + "loss": 0.0006, + "step": 86950 + }, + { + "epoch": 1.469626424883601, + "grad_norm": 0.016948726028203964, + "learning_rate": 1.9934703075610035e-06, + "loss": 0.001, + "step": 86960 + }, + { + "epoch": 1.4697954251624514, + "grad_norm": 0.02187652513384819, + "learning_rate": 1.9922920220028643e-06, + "loss": 0.0011, + "step": 86970 + }, + { + "epoch": 1.469964425441302, + "grad_norm": 0.05699220299720764, + "learning_rate": 1.99111399812989e-06, + "loss": 0.0007, + "step": 86980 + }, + { + "epoch": 1.4701334257201524, + "grad_norm": 0.005953615996986628, + "learning_rate": 1.989936236044578e-06, + "loss": 0.0012, + "step": 86990 + }, + { + "epoch": 1.470302425999003, + "grad_norm": 0.009984872303903103, + "learning_rate": 1.9887587358493956e-06, + "loss": 0.0006, + "step": 87000 + }, + { + "epoch": 1.4704714262778533, + "grad_norm": 0.012570555321872234, + "learning_rate": 1.9875814976467935e-06, + "loss": 0.0007, + "step": 87010 + }, + { + "epoch": 1.4706404265567037, + "grad_norm": 0.03216434642672539, + "learning_rate": 1.9864045215391974e-06, + "loss": 0.0008, + "step": 87020 + }, + { + "epoch": 1.4708094268355543, + "grad_norm": 0.025994719937443733, + "learning_rate": 1.985227807629008e-06, + "loss": 0.0006, + "step": 87030 + }, + { + "epoch": 1.4709784271144049, + "grad_norm": 0.011998817324638367, + "learning_rate": 1.9840513560186063e-06, + "loss": 0.0006, + "step": 87040 + }, + { + "epoch": 1.4711474273932552, + "grad_norm": 0.01960444636642933, + "learning_rate": 1.9828751668103492e-06, + "loss": 0.0029, + "step": 87050 + }, + { + "epoch": 1.4713164276721056, + "grad_norm": 0.03137729689478874, + "learning_rate": 1.9816992401065726e-06, + "loss": 0.0007, + "step": 87060 + }, + { + "epoch": 1.4714854279509562, + "grad_norm": 0.04449846222996712, + "learning_rate": 1.980523576009584e-06, + "loss": 0.0004, + "step": 87070 + }, + { + "epoch": 1.4716544282298065, + "grad_norm": 0.04958295449614525, + "learning_rate": 1.9793481746216763e-06, + "loss": 0.001, + "step": 87080 + }, + { + "epoch": 1.471823428508657, + "grad_norm": 0.053153663873672485, + "learning_rate": 1.9781730360451113e-06, + "loss": 0.0006, + "step": 87090 + }, + { + "epoch": 1.4719924287875075, + "grad_norm": 0.010244421660900116, + "learning_rate": 1.9769981603821342e-06, + "loss": 0.0012, + "step": 87100 + }, + { + "epoch": 1.4721614290663578, + "grad_norm": 0.04046174883842468, + "learning_rate": 1.9758235477349626e-06, + "loss": 0.0005, + "step": 87110 + }, + { + "epoch": 1.4723304293452084, + "grad_norm": 0.010600178502500057, + "learning_rate": 1.9746491982057965e-06, + "loss": 0.0009, + "step": 87120 + }, + { + "epoch": 1.472499429624059, + "grad_norm": 0.212949737906456, + "learning_rate": 1.9734751118968066e-06, + "loss": 0.0023, + "step": 87130 + }, + { + "epoch": 1.4726684299029094, + "grad_norm": 0.05141410976648331, + "learning_rate": 1.9723012889101477e-06, + "loss": 0.0005, + "step": 87140 + }, + { + "epoch": 1.4728374301817597, + "grad_norm": 0.028595589101314545, + "learning_rate": 1.9711277293479444e-06, + "loss": 0.0004, + "step": 87150 + }, + { + "epoch": 1.4730064304606103, + "grad_norm": 0.06812410801649094, + "learning_rate": 1.9699544333123057e-06, + "loss": 0.0008, + "step": 87160 + }, + { + "epoch": 1.4731754307394607, + "grad_norm": 0.06886996328830719, + "learning_rate": 1.9687814009053096e-06, + "loss": 0.0021, + "step": 87170 + }, + { + "epoch": 1.4733444310183113, + "grad_norm": 0.0342499203979969, + "learning_rate": 1.967608632229021e-06, + "loss": 0.0004, + "step": 87180 + }, + { + "epoch": 1.4735134312971616, + "grad_norm": 0.055144939571619034, + "learning_rate": 1.9664361273854714e-06, + "loss": 0.0009, + "step": 87190 + }, + { + "epoch": 1.473682431576012, + "grad_norm": 0.07175352424383163, + "learning_rate": 1.9652638864766772e-06, + "loss": 0.0006, + "step": 87200 + }, + { + "epoch": 1.4738514318548626, + "grad_norm": 0.0303585696965456, + "learning_rate": 1.9640919096046296e-06, + "loss": 0.0009, + "step": 87210 + }, + { + "epoch": 1.4740204321337131, + "grad_norm": 0.0030788208823651075, + "learning_rate": 1.9629201968712935e-06, + "loss": 0.0009, + "step": 87220 + }, + { + "epoch": 1.4741894324125635, + "grad_norm": 0.04283014312386513, + "learning_rate": 1.9617487483786157e-06, + "loss": 0.001, + "step": 87230 + }, + { + "epoch": 1.4743584326914139, + "grad_norm": 0.026105936616659164, + "learning_rate": 1.960577564228519e-06, + "loss": 0.0007, + "step": 87240 + }, + { + "epoch": 1.4745274329702645, + "grad_norm": 0.04074949771165848, + "learning_rate": 1.9594066445228986e-06, + "loss": 0.0013, + "step": 87250 + }, + { + "epoch": 1.4746964332491148, + "grad_norm": 0.02833901159465313, + "learning_rate": 1.9582359893636344e-06, + "loss": 0.0007, + "step": 87260 + }, + { + "epoch": 1.4748654335279654, + "grad_norm": 0.06579770147800446, + "learning_rate": 1.9570655988525745e-06, + "loss": 0.0011, + "step": 87270 + }, + { + "epoch": 1.4750344338068158, + "grad_norm": 0.00045693680294789374, + "learning_rate": 1.955895473091553e-06, + "loss": 0.001, + "step": 87280 + }, + { + "epoch": 1.4752034340856661, + "grad_norm": 0.004687410779297352, + "learning_rate": 1.954725612182372e-06, + "loss": 0.0004, + "step": 87290 + }, + { + "epoch": 1.4753724343645167, + "grad_norm": 0.016979416832327843, + "learning_rate": 1.953556016226818e-06, + "loss": 0.0006, + "step": 87300 + }, + { + "epoch": 1.4755414346433673, + "grad_norm": 0.042390208691358566, + "learning_rate": 1.9523866853266528e-06, + "loss": 0.0009, + "step": 87310 + }, + { + "epoch": 1.4757104349222177, + "grad_norm": 0.01581060327589512, + "learning_rate": 1.95121761958361e-06, + "loss": 0.0014, + "step": 87320 + }, + { + "epoch": 1.475879435201068, + "grad_norm": 0.011384384706616402, + "learning_rate": 1.9500488190994075e-06, + "loss": 0.0011, + "step": 87330 + }, + { + "epoch": 1.4760484354799186, + "grad_norm": 0.04996991902589798, + "learning_rate": 1.9488802839757335e-06, + "loss": 0.0007, + "step": 87340 + }, + { + "epoch": 1.476217435758769, + "grad_norm": 0.026351723819971085, + "learning_rate": 1.9477120143142604e-06, + "loss": 0.001, + "step": 87350 + }, + { + "epoch": 1.4763864360376195, + "grad_norm": 0.002755114808678627, + "learning_rate": 1.946544010216628e-06, + "loss": 0.0009, + "step": 87360 + }, + { + "epoch": 1.47655543631647, + "grad_norm": 0.02086162567138672, + "learning_rate": 1.945376271784463e-06, + "loss": 0.0005, + "step": 87370 + }, + { + "epoch": 1.4767244365953203, + "grad_norm": 0.02455678954720497, + "learning_rate": 1.9442087991193607e-06, + "loss": 0.0013, + "step": 87380 + }, + { + "epoch": 1.4768934368741709, + "grad_norm": 0.04566321521997452, + "learning_rate": 1.9430415923229e-06, + "loss": 0.0017, + "step": 87390 + }, + { + "epoch": 1.4770624371530214, + "grad_norm": 0.050864171236753464, + "learning_rate": 1.9418746514966307e-06, + "loss": 0.0014, + "step": 87400 + }, + { + "epoch": 1.4772314374318718, + "grad_norm": 0.042852431535720825, + "learning_rate": 1.940707976742084e-06, + "loss": 0.0006, + "step": 87410 + }, + { + "epoch": 1.4774004377107222, + "grad_norm": 0.044308166950941086, + "learning_rate": 1.939541568160765e-06, + "loss": 0.0012, + "step": 87420 + }, + { + "epoch": 1.4775694379895727, + "grad_norm": 0.02402975969016552, + "learning_rate": 1.9383754258541604e-06, + "loss": 0.0005, + "step": 87430 + }, + { + "epoch": 1.477738438268423, + "grad_norm": 0.06683152168989182, + "learning_rate": 1.9372095499237255e-06, + "loss": 0.0009, + "step": 87440 + }, + { + "epoch": 1.4779074385472737, + "grad_norm": 0.030294183641672134, + "learning_rate": 1.9360439404709007e-06, + "loss": 0.0006, + "step": 87450 + }, + { + "epoch": 1.478076438826124, + "grad_norm": 0.033787600696086884, + "learning_rate": 1.9348785975970973e-06, + "loss": 0.0005, + "step": 87460 + }, + { + "epoch": 1.4782454391049744, + "grad_norm": 0.027894029393792152, + "learning_rate": 1.933713521403708e-06, + "loss": 0.0008, + "step": 87470 + }, + { + "epoch": 1.478414439383825, + "grad_norm": 0.022178735584020615, + "learning_rate": 1.932548711992097e-06, + "loss": 0.0005, + "step": 87480 + }, + { + "epoch": 1.4785834396626756, + "grad_norm": 0.08248352259397507, + "learning_rate": 1.931384169463613e-06, + "loss": 0.0009, + "step": 87490 + }, + { + "epoch": 1.478752439941526, + "grad_norm": 0.0213492251932621, + "learning_rate": 1.930219893919571e-06, + "loss": 0.0009, + "step": 87500 + }, + { + "epoch": 1.4789214402203763, + "grad_norm": 0.021869516000151634, + "learning_rate": 1.929055885461274e-06, + "loss": 0.0005, + "step": 87510 + }, + { + "epoch": 1.4790904404992269, + "grad_norm": 0.00892605260014534, + "learning_rate": 1.927892144189992e-06, + "loss": 0.0002, + "step": 87520 + }, + { + "epoch": 1.4792594407780773, + "grad_norm": 0.04024768993258476, + "learning_rate": 1.9267286702069803e-06, + "loss": 0.0006, + "step": 87530 + }, + { + "epoch": 1.4794284410569278, + "grad_norm": 0.08087485283613205, + "learning_rate": 1.9255654636134628e-06, + "loss": 0.0014, + "step": 87540 + }, + { + "epoch": 1.4795974413357782, + "grad_norm": 0.030795546248555183, + "learning_rate": 1.9244025245106457e-06, + "loss": 0.0004, + "step": 87550 + }, + { + "epoch": 1.4797664416146286, + "grad_norm": 0.10120958834886551, + "learning_rate": 1.9232398529997125e-06, + "loss": 0.0009, + "step": 87560 + }, + { + "epoch": 1.4799354418934791, + "grad_norm": 0.09195226430892944, + "learning_rate": 1.9220774491818178e-06, + "loss": 0.001, + "step": 87570 + }, + { + "epoch": 1.4801044421723295, + "grad_norm": 0.03049328923225403, + "learning_rate": 1.920915313158099e-06, + "loss": 0.0005, + "step": 87580 + }, + { + "epoch": 1.48027344245118, + "grad_norm": 0.03200387582182884, + "learning_rate": 1.919753445029665e-06, + "loss": 0.002, + "step": 87590 + }, + { + "epoch": 1.4804424427300305, + "grad_norm": 0.09777234494686127, + "learning_rate": 1.918591844897606e-06, + "loss": 0.001, + "step": 87600 + }, + { + "epoch": 1.480611443008881, + "grad_norm": 0.009541748091578484, + "learning_rate": 1.917430512862987e-06, + "loss": 0.0008, + "step": 87610 + }, + { + "epoch": 1.4807804432877314, + "grad_norm": 0.019821155816316605, + "learning_rate": 1.9162694490268478e-06, + "loss": 0.0007, + "step": 87620 + }, + { + "epoch": 1.480949443566582, + "grad_norm": 0.015410860069096088, + "learning_rate": 1.9151086534902077e-06, + "loss": 0.0008, + "step": 87630 + }, + { + "epoch": 1.4811184438454323, + "grad_norm": 0.06320604681968689, + "learning_rate": 1.9139481263540626e-06, + "loss": 0.0005, + "step": 87640 + }, + { + "epoch": 1.4812874441242827, + "grad_norm": 0.030254274606704712, + "learning_rate": 1.912787867719381e-06, + "loss": 0.0011, + "step": 87650 + }, + { + "epoch": 1.4814564444031333, + "grad_norm": 0.06914281845092773, + "learning_rate": 1.911627877687115e-06, + "loss": 0.0008, + "step": 87660 + }, + { + "epoch": 1.4816254446819836, + "grad_norm": 0.009485420770943165, + "learning_rate": 1.910468156358185e-06, + "loss": 0.001, + "step": 87670 + }, + { + "epoch": 1.4817944449608342, + "grad_norm": 0.023814881220459938, + "learning_rate": 1.909308703833496e-06, + "loss": 0.0011, + "step": 87680 + }, + { + "epoch": 1.4819634452396846, + "grad_norm": 0.0059132324531674385, + "learning_rate": 1.9081495202139233e-06, + "loss": 0.0006, + "step": 87690 + }, + { + "epoch": 1.4821324455185352, + "grad_norm": 0.0694541186094284, + "learning_rate": 1.9069906056003245e-06, + "loss": 0.0008, + "step": 87700 + }, + { + "epoch": 1.4823014457973855, + "grad_norm": 0.055805131793022156, + "learning_rate": 1.9058319600935272e-06, + "loss": 0.0005, + "step": 87710 + }, + { + "epoch": 1.4824704460762361, + "grad_norm": 0.04586062207818031, + "learning_rate": 1.9046735837943426e-06, + "loss": 0.0007, + "step": 87720 + }, + { + "epoch": 1.4826394463550865, + "grad_norm": 0.029063422232866287, + "learning_rate": 1.9035154768035512e-06, + "loss": 0.0006, + "step": 87730 + }, + { + "epoch": 1.4828084466339368, + "grad_norm": 0.009174218401312828, + "learning_rate": 1.9023576392219184e-06, + "loss": 0.0012, + "step": 87740 + }, + { + "epoch": 1.4829774469127874, + "grad_norm": 0.09952740371227264, + "learning_rate": 1.9012000711501777e-06, + "loss": 0.0017, + "step": 87750 + }, + { + "epoch": 1.4831464471916378, + "grad_norm": 0.020948514342308044, + "learning_rate": 1.9000427726890464e-06, + "loss": 0.0011, + "step": 87760 + }, + { + "epoch": 1.4833154474704884, + "grad_norm": 0.1276942640542984, + "learning_rate": 1.8988857439392117e-06, + "loss": 0.001, + "step": 87770 + }, + { + "epoch": 1.4834844477493387, + "grad_norm": 0.05418195202946663, + "learning_rate": 1.897728985001343e-06, + "loss": 0.0009, + "step": 87780 + }, + { + "epoch": 1.4836534480281893, + "grad_norm": 0.044336918741464615, + "learning_rate": 1.896572495976083e-06, + "loss": 0.0014, + "step": 87790 + }, + { + "epoch": 1.4838224483070397, + "grad_norm": 0.08884186297655106, + "learning_rate": 1.8954162769640544e-06, + "loss": 0.001, + "step": 87800 + }, + { + "epoch": 1.4839914485858903, + "grad_norm": 0.005170523654669523, + "learning_rate": 1.8942603280658495e-06, + "loss": 0.0007, + "step": 87810 + }, + { + "epoch": 1.4841604488647406, + "grad_norm": 0.008166944608092308, + "learning_rate": 1.8931046493820438e-06, + "loss": 0.0005, + "step": 87820 + }, + { + "epoch": 1.484329449143591, + "grad_norm": 0.12038251757621765, + "learning_rate": 1.891949241013189e-06, + "loss": 0.0013, + "step": 87830 + }, + { + "epoch": 1.4844984494224416, + "grad_norm": 0.17388568818569183, + "learning_rate": 1.890794103059807e-06, + "loss": 0.0009, + "step": 87840 + }, + { + "epoch": 1.484667449701292, + "grad_norm": 0.0022980275098234415, + "learning_rate": 1.889639235622404e-06, + "loss": 0.0004, + "step": 87850 + }, + { + "epoch": 1.4848364499801425, + "grad_norm": 0.0297295693308115, + "learning_rate": 1.8884846388014566e-06, + "loss": 0.0007, + "step": 87860 + }, + { + "epoch": 1.4850054502589929, + "grad_norm": 0.012024343013763428, + "learning_rate": 1.8873303126974223e-06, + "loss": 0.0005, + "step": 87870 + }, + { + "epoch": 1.4851744505378432, + "grad_norm": 0.08143452554941177, + "learning_rate": 1.8861762574107307e-06, + "loss": 0.0007, + "step": 87880 + }, + { + "epoch": 1.4853434508166938, + "grad_norm": 0.00044382974738255143, + "learning_rate": 1.8850224730417933e-06, + "loss": 0.0005, + "step": 87890 + }, + { + "epoch": 1.4855124510955444, + "grad_norm": 0.0237219650298357, + "learning_rate": 1.8838689596909909e-06, + "loss": 0.0007, + "step": 87900 + }, + { + "epoch": 1.4856814513743948, + "grad_norm": 0.021469522267580032, + "learning_rate": 1.882715717458689e-06, + "loss": 0.0005, + "step": 87910 + }, + { + "epoch": 1.4858504516532451, + "grad_norm": 0.03975404426455498, + "learning_rate": 1.8815627464452218e-06, + "loss": 0.0008, + "step": 87920 + }, + { + "epoch": 1.4860194519320957, + "grad_norm": 0.24657444655895233, + "learning_rate": 1.8804100467509062e-06, + "loss": 0.0007, + "step": 87930 + }, + { + "epoch": 1.486188452210946, + "grad_norm": 0.02665461227297783, + "learning_rate": 1.8792576184760297e-06, + "loss": 0.0008, + "step": 87940 + }, + { + "epoch": 1.4863574524897967, + "grad_norm": 0.033536460250616074, + "learning_rate": 1.8781054617208628e-06, + "loss": 0.0007, + "step": 87950 + }, + { + "epoch": 1.486526452768647, + "grad_norm": 0.05046175792813301, + "learning_rate": 1.8769535765856444e-06, + "loss": 0.0011, + "step": 87960 + }, + { + "epoch": 1.4866954530474974, + "grad_norm": 0.01105108205229044, + "learning_rate": 1.8758019631705964e-06, + "loss": 0.0005, + "step": 87970 + }, + { + "epoch": 1.486864453326348, + "grad_norm": 0.022933317348361015, + "learning_rate": 1.874650621575917e-06, + "loss": 0.0008, + "step": 87980 + }, + { + "epoch": 1.4870334536051986, + "grad_norm": 0.07909981161355972, + "learning_rate": 1.873499551901774e-06, + "loss": 0.0012, + "step": 87990 + }, + { + "epoch": 1.487202453884049, + "grad_norm": 0.025490691885352135, + "learning_rate": 1.8723487542483182e-06, + "loss": 0.0003, + "step": 88000 + }, + { + "epoch": 1.4873714541628993, + "grad_norm": 0.023210370913147926, + "learning_rate": 1.8711982287156767e-06, + "loss": 0.0003, + "step": 88010 + }, + { + "epoch": 1.4875404544417499, + "grad_norm": 0.05981844663619995, + "learning_rate": 1.8700479754039464e-06, + "loss": 0.0008, + "step": 88020 + }, + { + "epoch": 1.4877094547206002, + "grad_norm": 0.09941408038139343, + "learning_rate": 1.868897994413209e-06, + "loss": 0.0018, + "step": 88030 + }, + { + "epoch": 1.4878784549994508, + "grad_norm": 0.085413359105587, + "learning_rate": 1.8677482858435153e-06, + "loss": 0.0012, + "step": 88040 + }, + { + "epoch": 1.4880474552783012, + "grad_norm": 0.017946895211935043, + "learning_rate": 1.8665988497948983e-06, + "loss": 0.0009, + "step": 88050 + }, + { + "epoch": 1.4882164555571515, + "grad_norm": 0.022523336112499237, + "learning_rate": 1.8654496863673616e-06, + "loss": 0.0015, + "step": 88060 + }, + { + "epoch": 1.4883854558360021, + "grad_norm": 0.03276972100138664, + "learning_rate": 1.8643007956608893e-06, + "loss": 0.0009, + "step": 88070 + }, + { + "epoch": 1.4885544561148527, + "grad_norm": 0.1450221985578537, + "learning_rate": 1.8631521777754418e-06, + "loss": 0.0016, + "step": 88080 + }, + { + "epoch": 1.488723456393703, + "grad_norm": 0.042774394154548645, + "learning_rate": 1.8620038328109524e-06, + "loss": 0.0008, + "step": 88090 + }, + { + "epoch": 1.4888924566725534, + "grad_norm": 0.009172528050839901, + "learning_rate": 1.8608557608673345e-06, + "loss": 0.0006, + "step": 88100 + }, + { + "epoch": 1.489061456951404, + "grad_norm": 0.0414559468626976, + "learning_rate": 1.8597079620444735e-06, + "loss": 0.0006, + "step": 88110 + }, + { + "epoch": 1.4892304572302544, + "grad_norm": 0.10334787517786026, + "learning_rate": 1.8585604364422367e-06, + "loss": 0.0006, + "step": 88120 + }, + { + "epoch": 1.489399457509105, + "grad_norm": 0.058103326708078384, + "learning_rate": 1.8574131841604604e-06, + "loss": 0.0008, + "step": 88130 + }, + { + "epoch": 1.4895684577879553, + "grad_norm": 0.04806658625602722, + "learning_rate": 1.8562662052989656e-06, + "loss": 0.0018, + "step": 88140 + }, + { + "epoch": 1.4897374580668057, + "grad_norm": 0.02748355269432068, + "learning_rate": 1.8551194999575406e-06, + "loss": 0.0006, + "step": 88150 + }, + { + "epoch": 1.4899064583456563, + "grad_norm": 0.08941474556922913, + "learning_rate": 1.853973068235958e-06, + "loss": 0.002, + "step": 88160 + }, + { + "epoch": 1.4900754586245069, + "grad_norm": 0.10284332931041718, + "learning_rate": 1.8528269102339597e-06, + "loss": 0.0007, + "step": 88170 + }, + { + "epoch": 1.4902444589033572, + "grad_norm": 0.05748404935002327, + "learning_rate": 1.8516810260512686e-06, + "loss": 0.0004, + "step": 88180 + }, + { + "epoch": 1.4904134591822076, + "grad_norm": 0.006811431143432856, + "learning_rate": 1.8505354157875822e-06, + "loss": 0.001, + "step": 88190 + }, + { + "epoch": 1.4905824594610582, + "grad_norm": 0.04848619922995567, + "learning_rate": 1.8493900795425756e-06, + "loss": 0.001, + "step": 88200 + }, + { + "epoch": 1.4907514597399085, + "grad_norm": 0.021616630256175995, + "learning_rate": 1.8482450174158956e-06, + "loss": 0.0007, + "step": 88210 + }, + { + "epoch": 1.490920460018759, + "grad_norm": 0.010953355580568314, + "learning_rate": 1.8471002295071715e-06, + "loss": 0.0007, + "step": 88220 + }, + { + "epoch": 1.4910894602976095, + "grad_norm": 0.005469362251460552, + "learning_rate": 1.845955715916002e-06, + "loss": 0.0011, + "step": 88230 + }, + { + "epoch": 1.4912584605764598, + "grad_norm": 0.09093499928712845, + "learning_rate": 1.8448114767419683e-06, + "loss": 0.0005, + "step": 88240 + }, + { + "epoch": 1.4914274608553104, + "grad_norm": 0.06009407341480255, + "learning_rate": 1.843667512084622e-06, + "loss": 0.0008, + "step": 88250 + }, + { + "epoch": 1.491596461134161, + "grad_norm": 0.01995786651968956, + "learning_rate": 1.8425238220434966e-06, + "loss": 0.001, + "step": 88260 + }, + { + "epoch": 1.4917654614130114, + "grad_norm": 0.01962241530418396, + "learning_rate": 1.8413804067180952e-06, + "loss": 0.0006, + "step": 88270 + }, + { + "epoch": 1.4919344616918617, + "grad_norm": 0.00016943324590101838, + "learning_rate": 1.8402372662079039e-06, + "loss": 0.0011, + "step": 88280 + }, + { + "epoch": 1.4921034619707123, + "grad_norm": 0.27835261821746826, + "learning_rate": 1.8390944006123785e-06, + "loss": 0.0023, + "step": 88290 + }, + { + "epoch": 1.4922724622495627, + "grad_norm": 0.020108027383685112, + "learning_rate": 1.8379518100309562e-06, + "loss": 0.0005, + "step": 88300 + }, + { + "epoch": 1.4924414625284133, + "grad_norm": 0.0005495472578331828, + "learning_rate": 1.8368094945630455e-06, + "loss": 0.0005, + "step": 88310 + }, + { + "epoch": 1.4926104628072636, + "grad_norm": 0.0009330344619229436, + "learning_rate": 1.835667454308035e-06, + "loss": 0.0022, + "step": 88320 + }, + { + "epoch": 1.492779463086114, + "grad_norm": 0.006676590535789728, + "learning_rate": 1.834525689365289e-06, + "loss": 0.0005, + "step": 88330 + }, + { + "epoch": 1.4929484633649646, + "grad_norm": 0.09464714676141739, + "learning_rate": 1.8333841998341435e-06, + "loss": 0.0007, + "step": 88340 + }, + { + "epoch": 1.4931174636438151, + "grad_norm": 0.035130809992551804, + "learning_rate": 1.832242985813917e-06, + "loss": 0.0004, + "step": 88350 + }, + { + "epoch": 1.4932864639226655, + "grad_norm": 0.023531250655651093, + "learning_rate": 1.8311020474038971e-06, + "loss": 0.0006, + "step": 88360 + }, + { + "epoch": 1.4934554642015159, + "grad_norm": 0.0664057582616806, + "learning_rate": 1.8299613847033526e-06, + "loss": 0.0012, + "step": 88370 + }, + { + "epoch": 1.4936244644803665, + "grad_norm": 0.050617605447769165, + "learning_rate": 1.8288209978115268e-06, + "loss": 0.0015, + "step": 88380 + }, + { + "epoch": 1.4937934647592168, + "grad_norm": 0.05996793136000633, + "learning_rate": 1.8276808868276408e-06, + "loss": 0.0007, + "step": 88390 + }, + { + "epoch": 1.4939624650380674, + "grad_norm": 0.13003170490264893, + "learning_rate": 1.8265410518508865e-06, + "loss": 0.0005, + "step": 88400 + }, + { + "epoch": 1.4941314653169178, + "grad_norm": 0.028135791420936584, + "learning_rate": 1.8254014929804375e-06, + "loss": 0.0008, + "step": 88410 + }, + { + "epoch": 1.4943004655957681, + "grad_norm": 0.14190877974033356, + "learning_rate": 1.8242622103154384e-06, + "loss": 0.0004, + "step": 88420 + }, + { + "epoch": 1.4944694658746187, + "grad_norm": 0.029621532186865807, + "learning_rate": 1.8231232039550156e-06, + "loss": 0.0005, + "step": 88430 + }, + { + "epoch": 1.4946384661534693, + "grad_norm": 0.0512235090136528, + "learning_rate": 1.8219844739982652e-06, + "loss": 0.0007, + "step": 88440 + }, + { + "epoch": 1.4948074664323197, + "grad_norm": 0.08601278066635132, + "learning_rate": 1.820846020544264e-06, + "loss": 0.0008, + "step": 88450 + }, + { + "epoch": 1.49497646671117, + "grad_norm": 0.03690945357084274, + "learning_rate": 1.8197078436920612e-06, + "loss": 0.0007, + "step": 88460 + }, + { + "epoch": 1.4951454669900206, + "grad_norm": 0.09859257936477661, + "learning_rate": 1.8185699435406867e-06, + "loss": 0.001, + "step": 88470 + }, + { + "epoch": 1.495314467268871, + "grad_norm": 0.09783705323934555, + "learning_rate": 1.8174323201891398e-06, + "loss": 0.001, + "step": 88480 + }, + { + "epoch": 1.4954834675477215, + "grad_norm": 0.024148667231202126, + "learning_rate": 1.8162949737364028e-06, + "loss": 0.0006, + "step": 88490 + }, + { + "epoch": 1.495652467826572, + "grad_norm": 0.04207601770758629, + "learning_rate": 1.8151579042814267e-06, + "loss": 0.0008, + "step": 88500 + }, + { + "epoch": 1.4958214681054223, + "grad_norm": 0.07525702565908432, + "learning_rate": 1.814021111923145e-06, + "loss": 0.0008, + "step": 88510 + }, + { + "epoch": 1.4959904683842729, + "grad_norm": 0.01988234929740429, + "learning_rate": 1.812884596760462e-06, + "loss": 0.0005, + "step": 88520 + }, + { + "epoch": 1.4961594686631232, + "grad_norm": 0.009409152902662754, + "learning_rate": 1.8117483588922618e-06, + "loss": 0.0014, + "step": 88530 + }, + { + "epoch": 1.4963284689419738, + "grad_norm": 0.017130011692643166, + "learning_rate": 1.8106123984174006e-06, + "loss": 0.0005, + "step": 88540 + }, + { + "epoch": 1.4964974692208242, + "grad_norm": 0.02408965677022934, + "learning_rate": 1.8094767154347142e-06, + "loss": 0.0006, + "step": 88550 + }, + { + "epoch": 1.4966664694996747, + "grad_norm": 0.005401544738560915, + "learning_rate": 1.8083413100430113e-06, + "loss": 0.0009, + "step": 88560 + }, + { + "epoch": 1.496835469778525, + "grad_norm": 0.017198331654071808, + "learning_rate": 1.8072061823410803e-06, + "loss": 0.0011, + "step": 88570 + }, + { + "epoch": 1.4970044700573757, + "grad_norm": 0.013952374458312988, + "learning_rate": 1.8060713324276792e-06, + "loss": 0.0009, + "step": 88580 + }, + { + "epoch": 1.497173470336226, + "grad_norm": 0.03441685065627098, + "learning_rate": 1.8049367604015472e-06, + "loss": 0.0005, + "step": 88590 + }, + { + "epoch": 1.4973424706150764, + "grad_norm": 0.00039839776582084596, + "learning_rate": 1.803802466361399e-06, + "loss": 0.0018, + "step": 88600 + }, + { + "epoch": 1.497511470893927, + "grad_norm": 0.004353836644440889, + "learning_rate": 1.8026684504059204e-06, + "loss": 0.0007, + "step": 88610 + }, + { + "epoch": 1.4976804711727774, + "grad_norm": 0.02817024476826191, + "learning_rate": 1.80153471263378e-06, + "loss": 0.0007, + "step": 88620 + }, + { + "epoch": 1.497849471451628, + "grad_norm": 0.024944987148046494, + "learning_rate": 1.800401253143615e-06, + "loss": 0.0004, + "step": 88630 + }, + { + "epoch": 1.4980184717304783, + "grad_norm": 0.04219798743724823, + "learning_rate": 1.7992680720340449e-06, + "loss": 0.0006, + "step": 88640 + }, + { + "epoch": 1.498187472009329, + "grad_norm": 0.05956093594431877, + "learning_rate": 1.7981351694036586e-06, + "loss": 0.0012, + "step": 88650 + }, + { + "epoch": 1.4983564722881793, + "grad_norm": 0.04532230645418167, + "learning_rate": 1.7970025453510275e-06, + "loss": 0.0007, + "step": 88660 + }, + { + "epoch": 1.4985254725670298, + "grad_norm": 0.06564140319824219, + "learning_rate": 1.7958701999746925e-06, + "loss": 0.0009, + "step": 88670 + }, + { + "epoch": 1.4986944728458802, + "grad_norm": 0.027097081765532494, + "learning_rate": 1.7947381333731761e-06, + "loss": 0.0005, + "step": 88680 + }, + { + "epoch": 1.4988634731247306, + "grad_norm": 0.18646438419818878, + "learning_rate": 1.7936063456449698e-06, + "loss": 0.0021, + "step": 88690 + }, + { + "epoch": 1.4990324734035811, + "grad_norm": 0.0645003616809845, + "learning_rate": 1.7924748368885492e-06, + "loss": 0.0009, + "step": 88700 + }, + { + "epoch": 1.4992014736824315, + "grad_norm": 0.05661391094326973, + "learning_rate": 1.7913436072023566e-06, + "loss": 0.0004, + "step": 88710 + }, + { + "epoch": 1.499370473961282, + "grad_norm": 0.027739612385630608, + "learning_rate": 1.7902126566848177e-06, + "loss": 0.0024, + "step": 88720 + }, + { + "epoch": 1.4995394742401325, + "grad_norm": 0.03819940239191055, + "learning_rate": 1.7890819854343284e-06, + "loss": 0.0006, + "step": 88730 + }, + { + "epoch": 1.499708474518983, + "grad_norm": 0.06162050738930702, + "learning_rate": 1.787951593549263e-06, + "loss": 0.0008, + "step": 88740 + }, + { + "epoch": 1.4998774747978334, + "grad_norm": 0.021486075595021248, + "learning_rate": 1.7868214811279738e-06, + "loss": 0.0008, + "step": 88750 + }, + { + "epoch": 1.500046475076684, + "grad_norm": 0.025850096717476845, + "learning_rate": 1.7856916482687825e-06, + "loss": 0.0005, + "step": 88760 + }, + { + "epoch": 1.5002154753555343, + "grad_norm": 0.05704595521092415, + "learning_rate": 1.784562095069991e-06, + "loss": 0.0004, + "step": 88770 + }, + { + "epoch": 1.5003844756343847, + "grad_norm": 0.00453208526596427, + "learning_rate": 1.7834328216298786e-06, + "loss": 0.0003, + "step": 88780 + }, + { + "epoch": 1.5005534759132353, + "grad_norm": 0.012295718304812908, + "learning_rate": 1.782303828046693e-06, + "loss": 0.0008, + "step": 88790 + }, + { + "epoch": 1.5007224761920859, + "grad_norm": 0.06527142971754074, + "learning_rate": 1.7811751144186667e-06, + "loss": 0.0011, + "step": 88800 + }, + { + "epoch": 1.5008914764709362, + "grad_norm": 0.010464944876730442, + "learning_rate": 1.7800466808439993e-06, + "loss": 0.0003, + "step": 88810 + }, + { + "epoch": 1.5010604767497866, + "grad_norm": 0.05920897051692009, + "learning_rate": 1.778918527420873e-06, + "loss": 0.0004, + "step": 88820 + }, + { + "epoch": 1.501229477028637, + "grad_norm": 0.07279383391141891, + "learning_rate": 1.7777906542474398e-06, + "loss": 0.0005, + "step": 88830 + }, + { + "epoch": 1.5013984773074875, + "grad_norm": 0.025966499000787735, + "learning_rate": 1.7766630614218317e-06, + "loss": 0.001, + "step": 88840 + }, + { + "epoch": 1.5015674775863381, + "grad_norm": 0.0517917238175869, + "learning_rate": 1.775535749042156e-06, + "loss": 0.0007, + "step": 88850 + }, + { + "epoch": 1.5017364778651885, + "grad_norm": 0.0029979932587593794, + "learning_rate": 1.7744087172064917e-06, + "loss": 0.0005, + "step": 88860 + }, + { + "epoch": 1.5019054781440389, + "grad_norm": 0.02118530496954918, + "learning_rate": 1.773281966012899e-06, + "loss": 0.0006, + "step": 88870 + }, + { + "epoch": 1.5020744784228894, + "grad_norm": 0.0917520821094513, + "learning_rate": 1.7721554955594072e-06, + "loss": 0.0015, + "step": 88880 + }, + { + "epoch": 1.50224347870174, + "grad_norm": 0.024326149374246597, + "learning_rate": 1.7710293059440286e-06, + "loss": 0.0005, + "step": 88890 + }, + { + "epoch": 1.5024124789805904, + "grad_norm": 0.006080344319343567, + "learning_rate": 1.7699033972647435e-06, + "loss": 0.0007, + "step": 88900 + }, + { + "epoch": 1.5025814792594407, + "grad_norm": 0.00796883087605238, + "learning_rate": 1.768777769619514e-06, + "loss": 0.0006, + "step": 88910 + }, + { + "epoch": 1.502750479538291, + "grad_norm": 0.060068417340517044, + "learning_rate": 1.7676524231062731e-06, + "loss": 0.0008, + "step": 88920 + }, + { + "epoch": 1.5029194798171417, + "grad_norm": 0.04129534587264061, + "learning_rate": 1.7665273578229342e-06, + "loss": 0.0005, + "step": 88930 + }, + { + "epoch": 1.5030884800959923, + "grad_norm": 0.027377134189009666, + "learning_rate": 1.7654025738673797e-06, + "loss": 0.0006, + "step": 88940 + }, + { + "epoch": 1.5032574803748426, + "grad_norm": 0.01775474287569523, + "learning_rate": 1.7642780713374736e-06, + "loss": 0.001, + "step": 88950 + }, + { + "epoch": 1.503426480653693, + "grad_norm": 0.03469599410891533, + "learning_rate": 1.7631538503310524e-06, + "loss": 0.0008, + "step": 88960 + }, + { + "epoch": 1.5035954809325436, + "grad_norm": 0.001071075676009059, + "learning_rate": 1.7620299109459305e-06, + "loss": 0.0006, + "step": 88970 + }, + { + "epoch": 1.503764481211394, + "grad_norm": 0.006017435807734728, + "learning_rate": 1.7609062532798936e-06, + "loss": 0.0015, + "step": 88980 + }, + { + "epoch": 1.5039334814902445, + "grad_norm": 0.26868242025375366, + "learning_rate": 1.759782877430708e-06, + "loss": 0.0008, + "step": 88990 + }, + { + "epoch": 1.504102481769095, + "grad_norm": 0.0289106834679842, + "learning_rate": 1.7586597834961094e-06, + "loss": 0.0012, + "step": 89000 + }, + { + "epoch": 1.5042714820479453, + "grad_norm": 0.04162640869617462, + "learning_rate": 1.757536971573816e-06, + "loss": 0.001, + "step": 89010 + }, + { + "epoch": 1.5044404823267958, + "grad_norm": 0.06132710352540016, + "learning_rate": 1.7564144417615148e-06, + "loss": 0.0024, + "step": 89020 + }, + { + "epoch": 1.5046094826056464, + "grad_norm": 0.015400673262774944, + "learning_rate": 1.7552921941568741e-06, + "loss": 0.0008, + "step": 89030 + }, + { + "epoch": 1.5047784828844968, + "grad_norm": 0.018372181802988052, + "learning_rate": 1.7541702288575314e-06, + "loss": 0.0012, + "step": 89040 + }, + { + "epoch": 1.5049474831633471, + "grad_norm": 0.08104084432125092, + "learning_rate": 1.7530485459611073e-06, + "loss": 0.0008, + "step": 89050 + }, + { + "epoch": 1.5051164834421977, + "grad_norm": 0.01839877851307392, + "learning_rate": 1.7519271455651897e-06, + "loss": 0.0002, + "step": 89060 + }, + { + "epoch": 1.505285483721048, + "grad_norm": 0.025520803406834602, + "learning_rate": 1.75080602776735e-06, + "loss": 0.0005, + "step": 89070 + }, + { + "epoch": 1.5054544839998987, + "grad_norm": 0.07787895202636719, + "learning_rate": 1.7496851926651265e-06, + "loss": 0.002, + "step": 89080 + }, + { + "epoch": 1.505623484278749, + "grad_norm": 0.04697718843817711, + "learning_rate": 1.7485646403560392e-06, + "loss": 0.0009, + "step": 89090 + }, + { + "epoch": 1.5057924845575994, + "grad_norm": 0.009050512686371803, + "learning_rate": 1.7474443709375837e-06, + "loss": 0.0005, + "step": 89100 + }, + { + "epoch": 1.50596148483645, + "grad_norm": 0.03001599945127964, + "learning_rate": 1.7463243845072248e-06, + "loss": 0.0011, + "step": 89110 + }, + { + "epoch": 1.5061304851153006, + "grad_norm": 0.1008617952466011, + "learning_rate": 1.7452046811624113e-06, + "loss": 0.001, + "step": 89120 + }, + { + "epoch": 1.506299485394151, + "grad_norm": 0.036419469863176346, + "learning_rate": 1.7440852610005582e-06, + "loss": 0.0007, + "step": 89130 + }, + { + "epoch": 1.5064684856730013, + "grad_norm": 0.03142918273806572, + "learning_rate": 1.7429661241190626e-06, + "loss": 0.0006, + "step": 89140 + }, + { + "epoch": 1.5066374859518519, + "grad_norm": 0.030899101868271828, + "learning_rate": 1.741847270615295e-06, + "loss": 0.0005, + "step": 89150 + }, + { + "epoch": 1.5068064862307022, + "grad_norm": 0.057271748781204224, + "learning_rate": 1.7407287005866025e-06, + "loss": 0.0007, + "step": 89160 + }, + { + "epoch": 1.5069754865095528, + "grad_norm": 0.05494721606373787, + "learning_rate": 1.7396104141303032e-06, + "loss": 0.0007, + "step": 89170 + }, + { + "epoch": 1.5071444867884032, + "grad_norm": 0.018767060711979866, + "learning_rate": 1.7384924113436964e-06, + "loss": 0.0006, + "step": 89180 + }, + { + "epoch": 1.5073134870672535, + "grad_norm": 0.0328710712492466, + "learning_rate": 1.7373746923240502e-06, + "loss": 0.0005, + "step": 89190 + }, + { + "epoch": 1.5074824873461041, + "grad_norm": 0.04069463163614273, + "learning_rate": 1.736257257168616e-06, + "loss": 0.0006, + "step": 89200 + }, + { + "epoch": 1.5076514876249547, + "grad_norm": 0.07939145714044571, + "learning_rate": 1.7351401059746114e-06, + "loss": 0.0008, + "step": 89210 + }, + { + "epoch": 1.507820487903805, + "grad_norm": 0.0318235382437706, + "learning_rate": 1.7340232388392375e-06, + "loss": 0.0004, + "step": 89220 + }, + { + "epoch": 1.5079894881826554, + "grad_norm": 0.08584918081760406, + "learning_rate": 1.7329066558596646e-06, + "loss": 0.0008, + "step": 89230 + }, + { + "epoch": 1.5081584884615058, + "grad_norm": 0.038147006183862686, + "learning_rate": 1.731790357133044e-06, + "loss": 0.0007, + "step": 89240 + }, + { + "epoch": 1.5083274887403564, + "grad_norm": 0.016866449266672134, + "learning_rate": 1.7306743427564954e-06, + "loss": 0.0014, + "step": 89250 + }, + { + "epoch": 1.508496489019207, + "grad_norm": 0.051926031708717346, + "learning_rate": 1.7295586128271208e-06, + "loss": 0.0005, + "step": 89260 + }, + { + "epoch": 1.5086654892980573, + "grad_norm": 0.0010678662220016122, + "learning_rate": 1.7284431674419905e-06, + "loss": 0.0006, + "step": 89270 + }, + { + "epoch": 1.5088344895769077, + "grad_norm": 0.12304358929395676, + "learning_rate": 1.727328006698158e-06, + "loss": 0.001, + "step": 89280 + }, + { + "epoch": 1.5090034898557583, + "grad_norm": 0.06094713136553764, + "learning_rate": 1.7262131306926433e-06, + "loss": 0.0005, + "step": 89290 + }, + { + "epoch": 1.5091724901346089, + "grad_norm": 0.05747349560260773, + "learning_rate": 1.7250985395224496e-06, + "loss": 0.0006, + "step": 89300 + }, + { + "epoch": 1.5093414904134592, + "grad_norm": 0.051456887274980545, + "learning_rate": 1.7239842332845487e-06, + "loss": 0.0006, + "step": 89310 + }, + { + "epoch": 1.5095104906923096, + "grad_norm": 0.013854267075657845, + "learning_rate": 1.7228702120758922e-06, + "loss": 0.0014, + "step": 89320 + }, + { + "epoch": 1.50967949097116, + "grad_norm": 0.0752004012465477, + "learning_rate": 1.7217564759934053e-06, + "loss": 0.0005, + "step": 89330 + }, + { + "epoch": 1.5098484912500105, + "grad_norm": 0.04393812641501427, + "learning_rate": 1.7206430251339907e-06, + "loss": 0.0006, + "step": 89340 + }, + { + "epoch": 1.5100174915288611, + "grad_norm": 0.0351971797645092, + "learning_rate": 1.7195298595945193e-06, + "loss": 0.0009, + "step": 89350 + }, + { + "epoch": 1.5101864918077115, + "grad_norm": 0.03731498122215271, + "learning_rate": 1.7184169794718448e-06, + "loss": 0.0006, + "step": 89360 + }, + { + "epoch": 1.5103554920865618, + "grad_norm": 0.15052951872348785, + "learning_rate": 1.717304384862794e-06, + "loss": 0.0016, + "step": 89370 + }, + { + "epoch": 1.5105244923654124, + "grad_norm": 0.008420226164162159, + "learning_rate": 1.7161920758641654e-06, + "loss": 0.0027, + "step": 89380 + }, + { + "epoch": 1.510693492644263, + "grad_norm": 0.031745459884405136, + "learning_rate": 1.715080052572738e-06, + "loss": 0.0003, + "step": 89390 + }, + { + "epoch": 1.5108624929231134, + "grad_norm": 0.02988767810165882, + "learning_rate": 1.7139683150852604e-06, + "loss": 0.0005, + "step": 89400 + }, + { + "epoch": 1.5110314932019637, + "grad_norm": 0.07824013382196426, + "learning_rate": 1.7128568634984622e-06, + "loss": 0.0007, + "step": 89410 + }, + { + "epoch": 1.511200493480814, + "grad_norm": 0.08616514503955841, + "learning_rate": 1.7117456979090418e-06, + "loss": 0.0008, + "step": 89420 + }, + { + "epoch": 1.5113694937596647, + "grad_norm": 0.25163033604621887, + "learning_rate": 1.7106348184136789e-06, + "loss": 0.0006, + "step": 89430 + }, + { + "epoch": 1.5115384940385153, + "grad_norm": 0.010954326018691063, + "learning_rate": 1.7095242251090233e-06, + "loss": 0.001, + "step": 89440 + }, + { + "epoch": 1.5117074943173656, + "grad_norm": 0.007738580461591482, + "learning_rate": 1.7084139180917037e-06, + "loss": 0.0007, + "step": 89450 + }, + { + "epoch": 1.511876494596216, + "grad_norm": 0.04122927039861679, + "learning_rate": 1.70730389745832e-06, + "loss": 0.0011, + "step": 89460 + }, + { + "epoch": 1.5120454948750666, + "grad_norm": 0.09648521989583969, + "learning_rate": 1.7061941633054523e-06, + "loss": 0.0007, + "step": 89470 + }, + { + "epoch": 1.5122144951539171, + "grad_norm": 0.05812167376279831, + "learning_rate": 1.7050847157296496e-06, + "loss": 0.0011, + "step": 89480 + }, + { + "epoch": 1.5123834954327675, + "grad_norm": 0.06150609627366066, + "learning_rate": 1.7039755548274427e-06, + "loss": 0.0006, + "step": 89490 + }, + { + "epoch": 1.5125524957116179, + "grad_norm": 0.013848185539245605, + "learning_rate": 1.7028666806953303e-06, + "loss": 0.0007, + "step": 89500 + }, + { + "epoch": 1.5127214959904682, + "grad_norm": 0.012053185142576694, + "learning_rate": 1.7017580934297918e-06, + "loss": 0.0002, + "step": 89510 + }, + { + "epoch": 1.5128904962693188, + "grad_norm": 0.054048582911491394, + "learning_rate": 1.7006497931272798e-06, + "loss": 0.001, + "step": 89520 + }, + { + "epoch": 1.5130594965481694, + "grad_norm": 0.09194289892911911, + "learning_rate": 1.6995417798842228e-06, + "loss": 0.0008, + "step": 89530 + }, + { + "epoch": 1.5132284968270198, + "grad_norm": 0.010368667542934418, + "learning_rate": 1.6984340537970202e-06, + "loss": 0.0003, + "step": 89540 + }, + { + "epoch": 1.5133974971058701, + "grad_norm": 0.012516893446445465, + "learning_rate": 1.6973266149620533e-06, + "loss": 0.0007, + "step": 89550 + }, + { + "epoch": 1.5135664973847207, + "grad_norm": 0.004726815968751907, + "learning_rate": 1.6962194634756708e-06, + "loss": 0.0008, + "step": 89560 + }, + { + "epoch": 1.5137354976635713, + "grad_norm": 0.054614678025245667, + "learning_rate": 1.6951125994342043e-06, + "loss": 0.0012, + "step": 89570 + }, + { + "epoch": 1.5139044979424217, + "grad_norm": 0.046499382704496384, + "learning_rate": 1.6940060229339521e-06, + "loss": 0.0007, + "step": 89580 + }, + { + "epoch": 1.514073498221272, + "grad_norm": 0.035863835364580154, + "learning_rate": 1.692899734071194e-06, + "loss": 0.0008, + "step": 89590 + }, + { + "epoch": 1.5142424985001224, + "grad_norm": 0.01402368862181902, + "learning_rate": 1.6917937329421842e-06, + "loss": 0.0009, + "step": 89600 + }, + { + "epoch": 1.514411498778973, + "grad_norm": 0.0271760281175375, + "learning_rate": 1.6906880196431458e-06, + "loss": 0.001, + "step": 89610 + }, + { + "epoch": 1.5145804990578235, + "grad_norm": 0.016171742230653763, + "learning_rate": 1.689582594270286e-06, + "loss": 0.001, + "step": 89620 + }, + { + "epoch": 1.514749499336674, + "grad_norm": 0.2035505771636963, + "learning_rate": 1.6884774569197782e-06, + "loss": 0.0008, + "step": 89630 + }, + { + "epoch": 1.5149184996155243, + "grad_norm": 0.06680934876203537, + "learning_rate": 1.6873726076877778e-06, + "loss": 0.0007, + "step": 89640 + }, + { + "epoch": 1.5150874998943749, + "grad_norm": 0.02429303154349327, + "learning_rate": 1.686268046670409e-06, + "loss": 0.0012, + "step": 89650 + }, + { + "epoch": 1.5152565001732254, + "grad_norm": 0.006075267214328051, + "learning_rate": 1.6851637739637771e-06, + "loss": 0.0011, + "step": 89660 + }, + { + "epoch": 1.5154255004520758, + "grad_norm": 0.05170568823814392, + "learning_rate": 1.684059789663956e-06, + "loss": 0.0004, + "step": 89670 + }, + { + "epoch": 1.5155945007309262, + "grad_norm": 0.06817694008350372, + "learning_rate": 1.6829560938670008e-06, + "loss": 0.001, + "step": 89680 + }, + { + "epoch": 1.5157635010097765, + "grad_norm": 0.06747003644704819, + "learning_rate": 1.6818526866689356e-06, + "loss": 0.0006, + "step": 89690 + }, + { + "epoch": 1.515932501288627, + "grad_norm": 0.007273124530911446, + "learning_rate": 1.6807495681657632e-06, + "loss": 0.001, + "step": 89700 + }, + { + "epoch": 1.5161015015674777, + "grad_norm": 0.02065047062933445, + "learning_rate": 1.679646738453462e-06, + "loss": 0.0007, + "step": 89710 + }, + { + "epoch": 1.516270501846328, + "grad_norm": 0.06045887991786003, + "learning_rate": 1.678544197627981e-06, + "loss": 0.0008, + "step": 89720 + }, + { + "epoch": 1.5164395021251784, + "grad_norm": 0.040009673684835434, + "learning_rate": 1.677441945785247e-06, + "loss": 0.0012, + "step": 89730 + }, + { + "epoch": 1.516608502404029, + "grad_norm": 0.8757259249687195, + "learning_rate": 1.676339983021164e-06, + "loss": 0.001, + "step": 89740 + }, + { + "epoch": 1.5167775026828796, + "grad_norm": 0.03649216145277023, + "learning_rate": 1.6752383094316038e-06, + "loss": 0.001, + "step": 89750 + }, + { + "epoch": 1.51694650296173, + "grad_norm": 0.03585437312722206, + "learning_rate": 1.6741369251124218e-06, + "loss": 0.0006, + "step": 89760 + }, + { + "epoch": 1.5171155032405803, + "grad_norm": 0.18319280445575714, + "learning_rate": 1.6730358301594396e-06, + "loss": 0.0002, + "step": 89770 + }, + { + "epoch": 1.5172845035194307, + "grad_norm": 0.08437007665634155, + "learning_rate": 1.6719350246684613e-06, + "loss": 0.0008, + "step": 89780 + }, + { + "epoch": 1.5174535037982813, + "grad_norm": 0.021380001679062843, + "learning_rate": 1.6708345087352589e-06, + "loss": 0.0013, + "step": 89790 + }, + { + "epoch": 1.5176225040771318, + "grad_norm": 0.074330635368824, + "learning_rate": 1.6697342824555861e-06, + "loss": 0.0008, + "step": 89800 + }, + { + "epoch": 1.5177915043559822, + "grad_norm": 0.011674449779093266, + "learning_rate": 1.6686343459251647e-06, + "loss": 0.0007, + "step": 89810 + }, + { + "epoch": 1.5179605046348326, + "grad_norm": 0.016544923186302185, + "learning_rate": 1.6675346992396973e-06, + "loss": 0.0003, + "step": 89820 + }, + { + "epoch": 1.5181295049136831, + "grad_norm": 0.08490558713674545, + "learning_rate": 1.6664353424948558e-06, + "loss": 0.0013, + "step": 89830 + }, + { + "epoch": 1.5182985051925337, + "grad_norm": 0.1269432008266449, + "learning_rate": 1.6653362757862928e-06, + "loss": 0.0011, + "step": 89840 + }, + { + "epoch": 1.518467505471384, + "grad_norm": 0.07166081666946411, + "learning_rate": 1.6642374992096295e-06, + "loss": 0.0006, + "step": 89850 + }, + { + "epoch": 1.5186365057502345, + "grad_norm": 0.10532987862825394, + "learning_rate": 1.6631390128604653e-06, + "loss": 0.0011, + "step": 89860 + }, + { + "epoch": 1.5188055060290848, + "grad_norm": 0.05109262093901634, + "learning_rate": 1.662040816834376e-06, + "loss": 0.0006, + "step": 89870 + }, + { + "epoch": 1.5189745063079354, + "grad_norm": 0.014516928233206272, + "learning_rate": 1.6609429112269072e-06, + "loss": 0.0008, + "step": 89880 + }, + { + "epoch": 1.519143506586786, + "grad_norm": 0.017849545925855637, + "learning_rate": 1.6598452961335843e-06, + "loss": 0.0008, + "step": 89890 + }, + { + "epoch": 1.5193125068656363, + "grad_norm": 0.11236193776130676, + "learning_rate": 1.6587479716499027e-06, + "loss": 0.0006, + "step": 89900 + }, + { + "epoch": 1.5194815071444867, + "grad_norm": 0.07865837961435318, + "learning_rate": 1.6576509378713363e-06, + "loss": 0.0007, + "step": 89910 + }, + { + "epoch": 1.5196505074233373, + "grad_norm": 0.07344283908605576, + "learning_rate": 1.6565541948933322e-06, + "loss": 0.001, + "step": 89920 + }, + { + "epoch": 1.5198195077021877, + "grad_norm": 0.11414451152086258, + "learning_rate": 1.6554577428113145e-06, + "loss": 0.0005, + "step": 89930 + }, + { + "epoch": 1.5199885079810382, + "grad_norm": 0.0004753537359647453, + "learning_rate": 1.654361581720676e-06, + "loss": 0.0003, + "step": 89940 + }, + { + "epoch": 1.5201575082598886, + "grad_norm": 0.017375130206346512, + "learning_rate": 1.6532657117167916e-06, + "loss": 0.001, + "step": 89950 + }, + { + "epoch": 1.520326508538739, + "grad_norm": 0.037377480417490005, + "learning_rate": 1.6521701328950041e-06, + "loss": 0.0007, + "step": 89960 + }, + { + "epoch": 1.5204955088175895, + "grad_norm": 0.0739019438624382, + "learning_rate": 1.651074845350637e-06, + "loss": 0.0008, + "step": 89970 + }, + { + "epoch": 1.5206645090964401, + "grad_norm": 0.0003539329918567091, + "learning_rate": 1.6499798491789827e-06, + "loss": 0.0007, + "step": 89980 + }, + { + "epoch": 1.5208335093752905, + "grad_norm": 0.00020151161879766732, + "learning_rate": 1.6488851444753146e-06, + "loss": 0.0005, + "step": 89990 + }, + { + "epoch": 1.5210025096541409, + "grad_norm": 0.022759245708584785, + "learning_rate": 1.6477907313348735e-06, + "loss": 0.0016, + "step": 90000 + }, + { + "epoch": 1.5211715099329914, + "grad_norm": 0.006832859478890896, + "learning_rate": 1.6466966098528825e-06, + "loss": 0.0006, + "step": 90010 + }, + { + "epoch": 1.5213405102118418, + "grad_norm": 0.03370920941233635, + "learning_rate": 1.6456027801245317e-06, + "loss": 0.0007, + "step": 90020 + }, + { + "epoch": 1.5215095104906924, + "grad_norm": 0.009634205140173435, + "learning_rate": 1.6445092422449932e-06, + "loss": 0.0008, + "step": 90030 + }, + { + "epoch": 1.5216785107695427, + "grad_norm": 0.03818223625421524, + "learning_rate": 1.6434159963094065e-06, + "loss": 0.0008, + "step": 90040 + }, + { + "epoch": 1.521847511048393, + "grad_norm": 0.01654481515288353, + "learning_rate": 1.6423230424128933e-06, + "loss": 0.0008, + "step": 90050 + }, + { + "epoch": 1.5220165113272437, + "grad_norm": 0.04384077340364456, + "learning_rate": 1.6412303806505414e-06, + "loss": 0.0008, + "step": 90060 + }, + { + "epoch": 1.5221855116060943, + "grad_norm": 0.010644759051501751, + "learning_rate": 1.640138011117422e-06, + "loss": 0.0005, + "step": 90070 + }, + { + "epoch": 1.5223545118849446, + "grad_norm": 0.07942457497119904, + "learning_rate": 1.6390459339085728e-06, + "loss": 0.0008, + "step": 90080 + }, + { + "epoch": 1.522523512163795, + "grad_norm": 0.14543570578098297, + "learning_rate": 1.637954149119012e-06, + "loss": 0.0008, + "step": 90090 + }, + { + "epoch": 1.5226925124426456, + "grad_norm": 0.0862889289855957, + "learning_rate": 1.636862656843729e-06, + "loss": 0.0008, + "step": 90100 + }, + { + "epoch": 1.522861512721496, + "grad_norm": 0.0043488903902471066, + "learning_rate": 1.6357714571776901e-06, + "loss": 0.0016, + "step": 90110 + }, + { + "epoch": 1.5230305130003465, + "grad_norm": 0.02066611312329769, + "learning_rate": 1.6346805502158364e-06, + "loss": 0.0007, + "step": 90120 + }, + { + "epoch": 1.523199513279197, + "grad_norm": 0.0023197117261588573, + "learning_rate": 1.6335899360530783e-06, + "loss": 0.0008, + "step": 90130 + }, + { + "epoch": 1.5233685135580473, + "grad_norm": 0.009627950377762318, + "learning_rate": 1.6324996147843075e-06, + "loss": 0.0005, + "step": 90140 + }, + { + "epoch": 1.5235375138368978, + "grad_norm": 0.01624545268714428, + "learning_rate": 1.631409586504385e-06, + "loss": 0.0005, + "step": 90150 + }, + { + "epoch": 1.5237065141157484, + "grad_norm": 0.004025209695100784, + "learning_rate": 1.6303198513081514e-06, + "loss": 0.0005, + "step": 90160 + }, + { + "epoch": 1.5238755143945988, + "grad_norm": 0.034432973712682724, + "learning_rate": 1.6292304092904155e-06, + "loss": 0.0014, + "step": 90170 + }, + { + "epoch": 1.5240445146734491, + "grad_norm": 0.0053732809610664845, + "learning_rate": 1.6281412605459668e-06, + "loss": 0.0005, + "step": 90180 + }, + { + "epoch": 1.5242135149522995, + "grad_norm": 0.10048115253448486, + "learning_rate": 1.6270524051695647e-06, + "loss": 0.0008, + "step": 90190 + }, + { + "epoch": 1.52438251523115, + "grad_norm": 0.07562459260225296, + "learning_rate": 1.6259638432559467e-06, + "loss": 0.0009, + "step": 90200 + }, + { + "epoch": 1.5245515155100007, + "grad_norm": 0.03330732509493828, + "learning_rate": 1.6248755748998203e-06, + "loss": 0.0007, + "step": 90210 + }, + { + "epoch": 1.524720515788851, + "grad_norm": 0.016535423696041107, + "learning_rate": 1.6237876001958735e-06, + "loss": 0.0003, + "step": 90220 + }, + { + "epoch": 1.5248895160677014, + "grad_norm": 0.08232079446315765, + "learning_rate": 1.622699919238762e-06, + "loss": 0.0006, + "step": 90230 + }, + { + "epoch": 1.525058516346552, + "grad_norm": 0.006675524637103081, + "learning_rate": 1.621612532123123e-06, + "loss": 0.0008, + "step": 90240 + }, + { + "epoch": 1.5252275166254026, + "grad_norm": 0.009124464355409145, + "learning_rate": 1.62052543894356e-06, + "loss": 0.0005, + "step": 90250 + }, + { + "epoch": 1.525396516904253, + "grad_norm": 0.0359950065612793, + "learning_rate": 1.6194386397946598e-06, + "loss": 0.0005, + "step": 90260 + }, + { + "epoch": 1.5255655171831033, + "grad_norm": 0.03500552102923393, + "learning_rate": 1.6183521347709752e-06, + "loss": 0.0006, + "step": 90270 + }, + { + "epoch": 1.5257345174619537, + "grad_norm": 0.0056722103618085384, + "learning_rate": 1.6172659239670396e-06, + "loss": 0.0002, + "step": 90280 + }, + { + "epoch": 1.5259035177408042, + "grad_norm": 0.014110148884356022, + "learning_rate": 1.6161800074773582e-06, + "loss": 0.0003, + "step": 90290 + }, + { + "epoch": 1.5260725180196548, + "grad_norm": 0.042396847158670425, + "learning_rate": 1.615094385396413e-06, + "loss": 0.0011, + "step": 90300 + }, + { + "epoch": 1.5262415182985052, + "grad_norm": 0.0611516535282135, + "learning_rate": 1.6140090578186546e-06, + "loss": 0.0015, + "step": 90310 + }, + { + "epoch": 1.5264105185773555, + "grad_norm": 0.059691231697797775, + "learning_rate": 1.6129240248385153e-06, + "loss": 0.0012, + "step": 90320 + }, + { + "epoch": 1.5265795188562061, + "grad_norm": 0.03298520669341087, + "learning_rate": 1.6118392865503952e-06, + "loss": 0.0008, + "step": 90330 + }, + { + "epoch": 1.5267485191350567, + "grad_norm": 0.09905547648668289, + "learning_rate": 1.6107548430486746e-06, + "loss": 0.0015, + "step": 90340 + }, + { + "epoch": 1.526917519413907, + "grad_norm": 0.05093686282634735, + "learning_rate": 1.6096706944277023e-06, + "loss": 0.0008, + "step": 90350 + }, + { + "epoch": 1.5270865196927574, + "grad_norm": 0.022593246772885323, + "learning_rate": 1.6085868407818062e-06, + "loss": 0.0006, + "step": 90360 + }, + { + "epoch": 1.5272555199716078, + "grad_norm": 0.000559734005946666, + "learning_rate": 1.6075032822052883e-06, + "loss": 0.0015, + "step": 90370 + }, + { + "epoch": 1.5274245202504584, + "grad_norm": 0.036225344985723495, + "learning_rate": 1.6064200187924207e-06, + "loss": 0.0011, + "step": 90380 + }, + { + "epoch": 1.527593520529309, + "grad_norm": 0.022825142368674278, + "learning_rate": 1.605337050637455e-06, + "loss": 0.001, + "step": 90390 + }, + { + "epoch": 1.5277625208081593, + "grad_norm": 0.06424136459827423, + "learning_rate": 1.6042543778346115e-06, + "loss": 0.0008, + "step": 90400 + }, + { + "epoch": 1.5279315210870097, + "grad_norm": 0.005746909417212009, + "learning_rate": 1.6031720004780921e-06, + "loss": 0.0003, + "step": 90410 + }, + { + "epoch": 1.5281005213658603, + "grad_norm": 0.017361126840114594, + "learning_rate": 1.6020899186620642e-06, + "loss": 0.0006, + "step": 90420 + }, + { + "epoch": 1.5282695216447109, + "grad_norm": 0.09066200256347656, + "learning_rate": 1.6010081324806792e-06, + "loss": 0.0004, + "step": 90430 + }, + { + "epoch": 1.5284385219235612, + "grad_norm": 0.048238012939691544, + "learning_rate": 1.5999266420280529e-06, + "loss": 0.001, + "step": 90440 + }, + { + "epoch": 1.5286075222024116, + "grad_norm": 0.05118170753121376, + "learning_rate": 1.5988454473982845e-06, + "loss": 0.0009, + "step": 90450 + }, + { + "epoch": 1.528776522481262, + "grad_norm": 0.017097176983952522, + "learning_rate": 1.5977645486854393e-06, + "loss": 0.0012, + "step": 90460 + }, + { + "epoch": 1.5289455227601125, + "grad_norm": 0.1016089916229248, + "learning_rate": 1.5966839459835626e-06, + "loss": 0.0009, + "step": 90470 + }, + { + "epoch": 1.5291145230389631, + "grad_norm": 0.0010697973193600774, + "learning_rate": 1.5956036393866737e-06, + "loss": 0.0008, + "step": 90480 + }, + { + "epoch": 1.5292835233178135, + "grad_norm": 0.02048424817621708, + "learning_rate": 1.5945236289887611e-06, + "loss": 0.0006, + "step": 90490 + }, + { + "epoch": 1.5294525235966638, + "grad_norm": 0.0900135487318039, + "learning_rate": 1.5934439148837926e-06, + "loss": 0.001, + "step": 90500 + }, + { + "epoch": 1.5296215238755144, + "grad_norm": 0.03776565194129944, + "learning_rate": 1.592364497165711e-06, + "loss": 0.0009, + "step": 90510 + }, + { + "epoch": 1.529790524154365, + "grad_norm": 0.1263919621706009, + "learning_rate": 1.5912853759284264e-06, + "loss": 0.0008, + "step": 90520 + }, + { + "epoch": 1.5299595244332154, + "grad_norm": 0.009383470751345158, + "learning_rate": 1.590206551265831e-06, + "loss": 0.0004, + "step": 90530 + }, + { + "epoch": 1.5301285247120657, + "grad_norm": 0.048516057431697845, + "learning_rate": 1.5891280232717854e-06, + "loss": 0.0006, + "step": 90540 + }, + { + "epoch": 1.530297524990916, + "grad_norm": 0.174342080950737, + "learning_rate": 1.588049792040129e-06, + "loss": 0.0013, + "step": 90550 + }, + { + "epoch": 1.5304665252697667, + "grad_norm": 0.012062018737196922, + "learning_rate": 1.5869718576646702e-06, + "loss": 0.0005, + "step": 90560 + }, + { + "epoch": 1.5306355255486173, + "grad_norm": 0.0026453419122844934, + "learning_rate": 1.5858942202391986e-06, + "loss": 0.0008, + "step": 90570 + }, + { + "epoch": 1.5308045258274676, + "grad_norm": 0.024594619870185852, + "learning_rate": 1.5848168798574699e-06, + "loss": 0.0009, + "step": 90580 + }, + { + "epoch": 1.530973526106318, + "grad_norm": 0.07544858008623123, + "learning_rate": 1.5837398366132206e-06, + "loss": 0.0009, + "step": 90590 + }, + { + "epoch": 1.5311425263851686, + "grad_norm": 0.08281031996011734, + "learning_rate": 1.5826630906001561e-06, + "loss": 0.0007, + "step": 90600 + }, + { + "epoch": 1.5313115266640192, + "grad_norm": 0.008873536251485348, + "learning_rate": 1.5815866419119624e-06, + "loss": 0.0006, + "step": 90610 + }, + { + "epoch": 1.5314805269428695, + "grad_norm": 0.04672101140022278, + "learning_rate": 1.5805104906422914e-06, + "loss": 0.0014, + "step": 90620 + }, + { + "epoch": 1.5316495272217199, + "grad_norm": 0.023583827540278435, + "learning_rate": 1.5794346368847752e-06, + "loss": 0.0007, + "step": 90630 + }, + { + "epoch": 1.5318185275005702, + "grad_norm": 0.09966600686311722, + "learning_rate": 1.5783590807330206e-06, + "loss": 0.0006, + "step": 90640 + }, + { + "epoch": 1.5319875277794208, + "grad_norm": 0.05041343718767166, + "learning_rate": 1.5772838222806024e-06, + "loss": 0.0007, + "step": 90650 + }, + { + "epoch": 1.5321565280582714, + "grad_norm": 0.006506835110485554, + "learning_rate": 1.5762088616210769e-06, + "loss": 0.0003, + "step": 90660 + }, + { + "epoch": 1.5323255283371218, + "grad_norm": 0.06299679726362228, + "learning_rate": 1.575134198847968e-06, + "loss": 0.0007, + "step": 90670 + }, + { + "epoch": 1.5324945286159721, + "grad_norm": 0.028970442712306976, + "learning_rate": 1.5740598340547769e-06, + "loss": 0.001, + "step": 90680 + }, + { + "epoch": 1.5326635288948227, + "grad_norm": 0.07725638151168823, + "learning_rate": 1.5729857673349797e-06, + "loss": 0.0006, + "step": 90690 + }, + { + "epoch": 1.5328325291736733, + "grad_norm": 0.008100302889943123, + "learning_rate": 1.5719119987820264e-06, + "loss": 0.0008, + "step": 90700 + }, + { + "epoch": 1.5330015294525237, + "grad_norm": 0.00746137136593461, + "learning_rate": 1.570838528489337e-06, + "loss": 0.0004, + "step": 90710 + }, + { + "epoch": 1.533170529731374, + "grad_norm": 0.06312989443540573, + "learning_rate": 1.569765356550312e-06, + "loss": 0.0008, + "step": 90720 + }, + { + "epoch": 1.5333395300102244, + "grad_norm": 0.10903391987085342, + "learning_rate": 1.568692483058319e-06, + "loss": 0.0007, + "step": 90730 + }, + { + "epoch": 1.533508530289075, + "grad_norm": 0.14214962720870972, + "learning_rate": 1.5676199081067067e-06, + "loss": 0.0017, + "step": 90740 + }, + { + "epoch": 1.5336775305679256, + "grad_norm": 0.015006943605840206, + "learning_rate": 1.5665476317887912e-06, + "loss": 0.0004, + "step": 90750 + }, + { + "epoch": 1.533846530846776, + "grad_norm": 0.005455281585454941, + "learning_rate": 1.5654756541978689e-06, + "loss": 0.0005, + "step": 90760 + }, + { + "epoch": 1.5340155311256263, + "grad_norm": 0.003434483427554369, + "learning_rate": 1.564403975427204e-06, + "loss": 0.0004, + "step": 90770 + }, + { + "epoch": 1.5341845314044769, + "grad_norm": 0.017020530998706818, + "learning_rate": 1.5633325955700402e-06, + "loss": 0.0003, + "step": 90780 + }, + { + "epoch": 1.5343535316833274, + "grad_norm": 0.029370540753006935, + "learning_rate": 1.5622615147195903e-06, + "loss": 0.0006, + "step": 90790 + }, + { + "epoch": 1.5345225319621778, + "grad_norm": 0.06792864948511124, + "learning_rate": 1.5611907329690462e-06, + "loss": 0.0007, + "step": 90800 + }, + { + "epoch": 1.5346915322410282, + "grad_norm": 0.048491671681404114, + "learning_rate": 1.560120250411568e-06, + "loss": 0.0054, + "step": 90810 + }, + { + "epoch": 1.5348605325198785, + "grad_norm": 0.019118905067443848, + "learning_rate": 1.5590500671402969e-06, + "loss": 0.0011, + "step": 90820 + }, + { + "epoch": 1.5350295327987291, + "grad_norm": 0.03405429795384407, + "learning_rate": 1.5579801832483394e-06, + "loss": 0.0007, + "step": 90830 + }, + { + "epoch": 1.5351985330775797, + "grad_norm": 0.0075188977643847466, + "learning_rate": 1.5569105988287836e-06, + "loss": 0.0004, + "step": 90840 + }, + { + "epoch": 1.53536753335643, + "grad_norm": 0.05416296422481537, + "learning_rate": 1.5558413139746887e-06, + "loss": 0.0009, + "step": 90850 + }, + { + "epoch": 1.5355365336352804, + "grad_norm": 0.028006546199321747, + "learning_rate": 1.5547723287790856e-06, + "loss": 0.0008, + "step": 90860 + }, + { + "epoch": 1.535705533914131, + "grad_norm": 0.026694685220718384, + "learning_rate": 1.5537036433349827e-06, + "loss": 0.0006, + "step": 90870 + }, + { + "epoch": 1.5358745341929814, + "grad_norm": 0.033826615661382675, + "learning_rate": 1.5526352577353604e-06, + "loss": 0.0005, + "step": 90880 + }, + { + "epoch": 1.536043534471832, + "grad_norm": 0.015938522294163704, + "learning_rate": 1.5515671720731746e-06, + "loss": 0.0013, + "step": 90890 + }, + { + "epoch": 1.5362125347506823, + "grad_norm": 0.011160106398165226, + "learning_rate": 1.550499386441352e-06, + "loss": 0.0008, + "step": 90900 + }, + { + "epoch": 1.5363815350295327, + "grad_norm": 0.009316002018749714, + "learning_rate": 1.5494319009327968e-06, + "loss": 0.0005, + "step": 90910 + }, + { + "epoch": 1.5365505353083833, + "grad_norm": 0.04132316634058952, + "learning_rate": 1.5483647156403837e-06, + "loss": 0.0003, + "step": 90920 + }, + { + "epoch": 1.5367195355872338, + "grad_norm": 0.04599205404520035, + "learning_rate": 1.5472978306569653e-06, + "loss": 0.0008, + "step": 90930 + }, + { + "epoch": 1.5368885358660842, + "grad_norm": 0.02022514119744301, + "learning_rate": 1.5462312460753626e-06, + "loss": 0.0005, + "step": 90940 + }, + { + "epoch": 1.5370575361449346, + "grad_norm": 0.005118310451507568, + "learning_rate": 1.5451649619883774e-06, + "loss": 0.0009, + "step": 90950 + }, + { + "epoch": 1.5372265364237852, + "grad_norm": 0.040655795484781265, + "learning_rate": 1.5440989784887773e-06, + "loss": 0.0007, + "step": 90960 + }, + { + "epoch": 1.5373955367026355, + "grad_norm": 0.03651266545057297, + "learning_rate": 1.5430332956693122e-06, + "loss": 0.0006, + "step": 90970 + }, + { + "epoch": 1.537564536981486, + "grad_norm": 0.003880225121974945, + "learning_rate": 1.5419679136226984e-06, + "loss": 0.0008, + "step": 90980 + }, + { + "epoch": 1.5377335372603365, + "grad_norm": 0.060235753655433655, + "learning_rate": 1.540902832441632e-06, + "loss": 0.0005, + "step": 90990 + }, + { + "epoch": 1.5379025375391868, + "grad_norm": 0.01511063426733017, + "learning_rate": 1.5398380522187778e-06, + "loss": 0.0008, + "step": 91000 + }, + { + "epoch": 1.5380715378180374, + "grad_norm": 0.03903907164931297, + "learning_rate": 1.538773573046779e-06, + "loss": 0.0007, + "step": 91010 + }, + { + "epoch": 1.538240538096888, + "grad_norm": 0.00315358629450202, + "learning_rate": 1.5377093950182476e-06, + "loss": 0.0006, + "step": 91020 + }, + { + "epoch": 1.5384095383757384, + "grad_norm": 0.011011041700839996, + "learning_rate": 1.5366455182257762e-06, + "loss": 0.0006, + "step": 91030 + }, + { + "epoch": 1.5385785386545887, + "grad_norm": 0.007849492132663727, + "learning_rate": 1.5355819427619228e-06, + "loss": 0.0007, + "step": 91040 + }, + { + "epoch": 1.538747538933439, + "grad_norm": 0.03877522423863411, + "learning_rate": 1.5345186687192265e-06, + "loss": 0.0009, + "step": 91050 + }, + { + "epoch": 1.5389165392122897, + "grad_norm": 0.023960836231708527, + "learning_rate": 1.5334556961901964e-06, + "loss": 0.002, + "step": 91060 + }, + { + "epoch": 1.5390855394911402, + "grad_norm": 0.05260419473052025, + "learning_rate": 1.532393025267318e-06, + "loss": 0.0007, + "step": 91070 + }, + { + "epoch": 1.5392545397699906, + "grad_norm": 0.0148914884775877, + "learning_rate": 1.5313306560430457e-06, + "loss": 0.0008, + "step": 91080 + }, + { + "epoch": 1.539423540048841, + "grad_norm": 0.032751284539699554, + "learning_rate": 1.5302685886098146e-06, + "loss": 0.0007, + "step": 91090 + }, + { + "epoch": 1.5395925403276915, + "grad_norm": 0.0445295013487339, + "learning_rate": 1.5292068230600254e-06, + "loss": 0.0005, + "step": 91100 + }, + { + "epoch": 1.5397615406065421, + "grad_norm": 0.008731862530112267, + "learning_rate": 1.5281453594860607e-06, + "loss": 0.0005, + "step": 91110 + }, + { + "epoch": 1.5399305408853925, + "grad_norm": 0.05220063775777817, + "learning_rate": 1.5270841979802697e-06, + "loss": 0.0008, + "step": 91120 + }, + { + "epoch": 1.5400995411642429, + "grad_norm": 0.07239104062318802, + "learning_rate": 1.5260233386349798e-06, + "loss": 0.0003, + "step": 91130 + }, + { + "epoch": 1.5402685414430932, + "grad_norm": 0.20314405858516693, + "learning_rate": 1.5249627815424933e-06, + "loss": 0.0007, + "step": 91140 + }, + { + "epoch": 1.5404375417219438, + "grad_norm": 0.00026998284738510847, + "learning_rate": 1.5239025267950802e-06, + "loss": 0.0007, + "step": 91150 + }, + { + "epoch": 1.5406065420007944, + "grad_norm": 0.0450599379837513, + "learning_rate": 1.5228425744849906e-06, + "loss": 0.0008, + "step": 91160 + }, + { + "epoch": 1.5407755422796447, + "grad_norm": 0.013693476095795631, + "learning_rate": 1.5217829247044424e-06, + "loss": 0.0007, + "step": 91170 + }, + { + "epoch": 1.5409445425584951, + "grad_norm": 0.0012772573390975595, + "learning_rate": 1.5207235775456343e-06, + "loss": 0.0006, + "step": 91180 + }, + { + "epoch": 1.5411135428373457, + "grad_norm": 0.018909180536866188, + "learning_rate": 1.5196645331007305e-06, + "loss": 0.0006, + "step": 91190 + }, + { + "epoch": 1.5412825431161963, + "grad_norm": 0.07891616970300674, + "learning_rate": 1.5186057914618768e-06, + "loss": 0.0006, + "step": 91200 + }, + { + "epoch": 1.5414515433950466, + "grad_norm": 0.02020966075360775, + "learning_rate": 1.5175473527211847e-06, + "loss": 0.0009, + "step": 91210 + }, + { + "epoch": 1.541620543673897, + "grad_norm": 0.06416349858045578, + "learning_rate": 1.5164892169707485e-06, + "loss": 0.0007, + "step": 91220 + }, + { + "epoch": 1.5417895439527474, + "grad_norm": 0.02633839100599289, + "learning_rate": 1.5154313843026259e-06, + "loss": 0.0005, + "step": 91230 + }, + { + "epoch": 1.541958544231598, + "grad_norm": 0.009072850458323956, + "learning_rate": 1.5143738548088566e-06, + "loss": 0.0008, + "step": 91240 + }, + { + "epoch": 1.5421275445104485, + "grad_norm": 0.003974773455411196, + "learning_rate": 1.513316628581451e-06, + "loss": 0.0009, + "step": 91250 + }, + { + "epoch": 1.542296544789299, + "grad_norm": 0.014070197939872742, + "learning_rate": 1.512259705712391e-06, + "loss": 0.0005, + "step": 91260 + }, + { + "epoch": 1.5424655450681493, + "grad_norm": 0.02480059303343296, + "learning_rate": 1.511203086293635e-06, + "loss": 0.0006, + "step": 91270 + }, + { + "epoch": 1.5426345453469998, + "grad_norm": 0.061281755566596985, + "learning_rate": 1.5101467704171159e-06, + "loss": 0.0005, + "step": 91280 + }, + { + "epoch": 1.5428035456258504, + "grad_norm": 0.03887879103422165, + "learning_rate": 1.5090907581747349e-06, + "loss": 0.001, + "step": 91290 + }, + { + "epoch": 1.5429725459047008, + "grad_norm": 0.03775886818766594, + "learning_rate": 1.5080350496583729e-06, + "loss": 0.0004, + "step": 91300 + }, + { + "epoch": 1.5431415461835511, + "grad_norm": 0.04389318823814392, + "learning_rate": 1.5069796449598795e-06, + "loss": 0.0016, + "step": 91310 + }, + { + "epoch": 1.5433105464624015, + "grad_norm": 0.01458847988396883, + "learning_rate": 1.5059245441710824e-06, + "loss": 0.0007, + "step": 91320 + }, + { + "epoch": 1.543479546741252, + "grad_norm": 0.10211494565010071, + "learning_rate": 1.5048697473837775e-06, + "loss": 0.0011, + "step": 91330 + }, + { + "epoch": 1.5436485470201027, + "grad_norm": 0.044371504336595535, + "learning_rate": 1.5038152546897406e-06, + "loss": 0.0008, + "step": 91340 + }, + { + "epoch": 1.543817547298953, + "grad_norm": 0.05276376008987427, + "learning_rate": 1.5027610661807145e-06, + "loss": 0.0005, + "step": 91350 + }, + { + "epoch": 1.5439865475778034, + "grad_norm": 0.022463826462626457, + "learning_rate": 1.5017071819484214e-06, + "loss": 0.0004, + "step": 91360 + }, + { + "epoch": 1.544155547856654, + "grad_norm": 0.02301204949617386, + "learning_rate": 1.5006536020845518e-06, + "loss": 0.0009, + "step": 91370 + }, + { + "epoch": 1.5443245481355046, + "grad_norm": 0.10036075860261917, + "learning_rate": 1.4996003266807734e-06, + "loss": 0.0005, + "step": 91380 + }, + { + "epoch": 1.544493548414355, + "grad_norm": 0.002767896978184581, + "learning_rate": 1.4985473558287278e-06, + "loss": 0.0006, + "step": 91390 + }, + { + "epoch": 1.5446625486932053, + "grad_norm": 0.03478080406785011, + "learning_rate": 1.497494689620026e-06, + "loss": 0.0006, + "step": 91400 + }, + { + "epoch": 1.5448315489720557, + "grad_norm": 0.000982145662419498, + "learning_rate": 1.4964423281462576e-06, + "loss": 0.0005, + "step": 91410 + }, + { + "epoch": 1.5450005492509062, + "grad_norm": 0.04009811207652092, + "learning_rate": 1.4953902714989798e-06, + "loss": 0.0006, + "step": 91420 + }, + { + "epoch": 1.5451695495297568, + "grad_norm": 0.0612616166472435, + "learning_rate": 1.494338519769729e-06, + "loss": 0.0007, + "step": 91430 + }, + { + "epoch": 1.5453385498086072, + "grad_norm": 0.028253573924303055, + "learning_rate": 1.4932870730500143e-06, + "loss": 0.0006, + "step": 91440 + }, + { + "epoch": 1.5455075500874575, + "grad_norm": 0.028928278014063835, + "learning_rate": 1.4922359314313122e-06, + "loss": 0.0008, + "step": 91450 + }, + { + "epoch": 1.5456765503663081, + "grad_norm": 0.10750661045312881, + "learning_rate": 1.4911850950050806e-06, + "loss": 0.0011, + "step": 91460 + }, + { + "epoch": 1.5458455506451587, + "grad_norm": 0.029245685786008835, + "learning_rate": 1.4901345638627467e-06, + "loss": 0.0008, + "step": 91470 + }, + { + "epoch": 1.546014550924009, + "grad_norm": 0.0031404420733451843, + "learning_rate": 1.489084338095711e-06, + "loss": 0.0005, + "step": 91480 + }, + { + "epoch": 1.5461835512028594, + "grad_norm": 0.0078032235614955425, + "learning_rate": 1.4880344177953493e-06, + "loss": 0.0017, + "step": 91490 + }, + { + "epoch": 1.5463525514817098, + "grad_norm": 0.007911061868071556, + "learning_rate": 1.4869848030530081e-06, + "loss": 0.0007, + "step": 91500 + }, + { + "epoch": 1.5465215517605604, + "grad_norm": 0.12613831460475922, + "learning_rate": 1.4859354939600118e-06, + "loss": 0.0019, + "step": 91510 + }, + { + "epoch": 1.546690552039411, + "grad_norm": 0.050074364989995956, + "learning_rate": 1.4848864906076516e-06, + "loss": 0.0011, + "step": 91520 + }, + { + "epoch": 1.5468595523182613, + "grad_norm": 0.01013795007020235, + "learning_rate": 1.4838377930871994e-06, + "loss": 0.0011, + "step": 91530 + }, + { + "epoch": 1.5470285525971117, + "grad_norm": 0.040607839822769165, + "learning_rate": 1.4827894014898936e-06, + "loss": 0.0007, + "step": 91540 + }, + { + "epoch": 1.5471975528759623, + "grad_norm": 0.06654667854309082, + "learning_rate": 1.4817413159069533e-06, + "loss": 0.0007, + "step": 91550 + }, + { + "epoch": 1.5473665531548129, + "grad_norm": 0.05213646590709686, + "learning_rate": 1.4806935364295627e-06, + "loss": 0.0022, + "step": 91560 + }, + { + "epoch": 1.5475355534336632, + "grad_norm": 0.01856975443661213, + "learning_rate": 1.479646063148888e-06, + "loss": 0.0006, + "step": 91570 + }, + { + "epoch": 1.5477045537125136, + "grad_norm": 0.0003089377423748374, + "learning_rate": 1.4785988961560598e-06, + "loss": 0.0007, + "step": 91580 + }, + { + "epoch": 1.547873553991364, + "grad_norm": 0.05260462313890457, + "learning_rate": 1.477552035542192e-06, + "loss": 0.0007, + "step": 91590 + }, + { + "epoch": 1.5480425542702145, + "grad_norm": 0.005849786102771759, + "learning_rate": 1.4765054813983616e-06, + "loss": 0.0004, + "step": 91600 + }, + { + "epoch": 1.5482115545490651, + "grad_norm": 0.029813293367624283, + "learning_rate": 1.475459233815626e-06, + "loss": 0.0015, + "step": 91610 + }, + { + "epoch": 1.5483805548279155, + "grad_norm": 0.025738809257745743, + "learning_rate": 1.4744132928850151e-06, + "loss": 0.0005, + "step": 91620 + }, + { + "epoch": 1.5485495551067658, + "grad_norm": 0.07673606276512146, + "learning_rate": 1.4733676586975288e-06, + "loss": 0.0005, + "step": 91630 + }, + { + "epoch": 1.5487185553856164, + "grad_norm": 0.019551604986190796, + "learning_rate": 1.4723223313441426e-06, + "loss": 0.001, + "step": 91640 + }, + { + "epoch": 1.548887555664467, + "grad_norm": 0.018417326733469963, + "learning_rate": 1.4712773109158057e-06, + "loss": 0.0009, + "step": 91650 + }, + { + "epoch": 1.5490565559433174, + "grad_norm": 0.009148292243480682, + "learning_rate": 1.4702325975034416e-06, + "loss": 0.0006, + "step": 91660 + }, + { + "epoch": 1.5492255562221677, + "grad_norm": 0.3337763249874115, + "learning_rate": 1.4691881911979417e-06, + "loss": 0.0016, + "step": 91670 + }, + { + "epoch": 1.549394556501018, + "grad_norm": 0.07064887881278992, + "learning_rate": 1.468144092090178e-06, + "loss": 0.0008, + "step": 91680 + }, + { + "epoch": 1.5495635567798687, + "grad_norm": 0.010934130288660526, + "learning_rate": 1.4671003002709888e-06, + "loss": 0.0007, + "step": 91690 + }, + { + "epoch": 1.5497325570587193, + "grad_norm": 0.004269908182322979, + "learning_rate": 1.4660568158311928e-06, + "loss": 0.0009, + "step": 91700 + }, + { + "epoch": 1.5499015573375696, + "grad_norm": 0.09389406442642212, + "learning_rate": 1.4650136388615743e-06, + "loss": 0.0008, + "step": 91710 + }, + { + "epoch": 1.55007055761642, + "grad_norm": 0.019758351147174835, + "learning_rate": 1.4639707694528982e-06, + "loss": 0.0004, + "step": 91720 + }, + { + "epoch": 1.5502395578952706, + "grad_norm": 0.013832749798893929, + "learning_rate": 1.4629282076958955e-06, + "loss": 0.0005, + "step": 91730 + }, + { + "epoch": 1.550408558174121, + "grad_norm": 0.19052620232105255, + "learning_rate": 1.4618859536812785e-06, + "loss": 0.0018, + "step": 91740 + }, + { + "epoch": 1.5505775584529715, + "grad_norm": 0.007601190824061632, + "learning_rate": 1.4608440074997244e-06, + "loss": 0.002, + "step": 91750 + }, + { + "epoch": 1.5507465587318219, + "grad_norm": 0.012108417227864265, + "learning_rate": 1.459802369241891e-06, + "loss": 0.0004, + "step": 91760 + }, + { + "epoch": 1.5509155590106722, + "grad_norm": 0.026216894388198853, + "learning_rate": 1.4587610389984024e-06, + "loss": 0.001, + "step": 91770 + }, + { + "epoch": 1.5510845592895228, + "grad_norm": 0.016562877222895622, + "learning_rate": 1.4577200168598625e-06, + "loss": 0.0006, + "step": 91780 + }, + { + "epoch": 1.5512535595683734, + "grad_norm": 0.045029088854789734, + "learning_rate": 1.4566793029168425e-06, + "loss": 0.0011, + "step": 91790 + }, + { + "epoch": 1.5514225598472238, + "grad_norm": 0.026214392855763435, + "learning_rate": 1.4556388972598922e-06, + "loss": 0.0007, + "step": 91800 + }, + { + "epoch": 1.5515915601260741, + "grad_norm": 0.002082178369164467, + "learning_rate": 1.4545987999795297e-06, + "loss": 0.0006, + "step": 91810 + }, + { + "epoch": 1.5517605604049247, + "grad_norm": 0.005932193715125322, + "learning_rate": 1.453559011166249e-06, + "loss": 0.0004, + "step": 91820 + }, + { + "epoch": 1.551929560683775, + "grad_norm": 0.07331282645463943, + "learning_rate": 1.4525195309105172e-06, + "loss": 0.0004, + "step": 91830 + }, + { + "epoch": 1.5520985609626257, + "grad_norm": 0.00024068939092103392, + "learning_rate": 1.451480359302776e-06, + "loss": 0.0006, + "step": 91840 + }, + { + "epoch": 1.552267561241476, + "grad_norm": 0.01877421885728836, + "learning_rate": 1.4504414964334351e-06, + "loss": 0.0013, + "step": 91850 + }, + { + "epoch": 1.5524365615203264, + "grad_norm": 0.03928346931934357, + "learning_rate": 1.449402942392883e-06, + "loss": 0.0006, + "step": 91860 + }, + { + "epoch": 1.552605561799177, + "grad_norm": 0.05283835157752037, + "learning_rate": 1.4483646972714765e-06, + "loss": 0.0008, + "step": 91870 + }, + { + "epoch": 1.5527745620780276, + "grad_norm": 0.036525264382362366, + "learning_rate": 1.4473267611595514e-06, + "loss": 0.0012, + "step": 91880 + }, + { + "epoch": 1.552943562356878, + "grad_norm": 0.02714865282177925, + "learning_rate": 1.4462891341474094e-06, + "loss": 0.0006, + "step": 91890 + }, + { + "epoch": 1.5531125626357283, + "grad_norm": 0.019561218097805977, + "learning_rate": 1.445251816325331e-06, + "loss": 0.0006, + "step": 91900 + }, + { + "epoch": 1.5532815629145789, + "grad_norm": 0.0465192086994648, + "learning_rate": 1.444214807783569e-06, + "loss": 0.0006, + "step": 91910 + }, + { + "epoch": 1.5534505631934292, + "grad_norm": 0.016598006710410118, + "learning_rate": 1.4431781086123453e-06, + "loss": 0.001, + "step": 91920 + }, + { + "epoch": 1.5536195634722798, + "grad_norm": 0.0028368341736495495, + "learning_rate": 1.442141718901861e-06, + "loss": 0.0005, + "step": 91930 + }, + { + "epoch": 1.5537885637511302, + "grad_norm": 0.1509881168603897, + "learning_rate": 1.441105638742284e-06, + "loss": 0.0012, + "step": 91940 + }, + { + "epoch": 1.5539575640299805, + "grad_norm": 0.14502137899398804, + "learning_rate": 1.4400698682237606e-06, + "loss": 0.0016, + "step": 91950 + }, + { + "epoch": 1.5541265643088311, + "grad_norm": 0.016168328002095222, + "learning_rate": 1.439034407436406e-06, + "loss": 0.002, + "step": 91960 + }, + { + "epoch": 1.5542955645876817, + "grad_norm": 0.16724376380443573, + "learning_rate": 1.4379992564703126e-06, + "loss": 0.0006, + "step": 91970 + }, + { + "epoch": 1.554464564866532, + "grad_norm": 0.005992779042571783, + "learning_rate": 1.436964415415541e-06, + "loss": 0.0007, + "step": 91980 + }, + { + "epoch": 1.5546335651453824, + "grad_norm": 0.10158688575029373, + "learning_rate": 1.43592988436213e-06, + "loss": 0.0013, + "step": 91990 + }, + { + "epoch": 1.5548025654242328, + "grad_norm": 0.05698493868112564, + "learning_rate": 1.4348956634000855e-06, + "loss": 0.0008, + "step": 92000 + }, + { + "epoch": 1.5549715657030834, + "grad_norm": 0.0953177884221077, + "learning_rate": 1.4338617526193915e-06, + "loss": 0.0008, + "step": 92010 + }, + { + "epoch": 1.555140565981934, + "grad_norm": 0.02836747094988823, + "learning_rate": 1.4328281521100034e-06, + "loss": 0.0014, + "step": 92020 + }, + { + "epoch": 1.5553095662607843, + "grad_norm": 0.05428193509578705, + "learning_rate": 1.4317948619618515e-06, + "loss": 0.0007, + "step": 92030 + }, + { + "epoch": 1.5554785665396347, + "grad_norm": 0.04886719584465027, + "learning_rate": 1.4307618822648328e-06, + "loss": 0.001, + "step": 92040 + }, + { + "epoch": 1.5556475668184853, + "grad_norm": 0.029720531776547432, + "learning_rate": 1.4297292131088258e-06, + "loss": 0.0004, + "step": 92050 + }, + { + "epoch": 1.5558165670973358, + "grad_norm": 0.010107689537107944, + "learning_rate": 1.4286968545836739e-06, + "loss": 0.0002, + "step": 92060 + }, + { + "epoch": 1.5559855673761862, + "grad_norm": 0.044548213481903076, + "learning_rate": 1.4276648067792004e-06, + "loss": 0.0004, + "step": 92070 + }, + { + "epoch": 1.5561545676550366, + "grad_norm": 0.031103257089853287, + "learning_rate": 1.4266330697851955e-06, + "loss": 0.0008, + "step": 92080 + }, + { + "epoch": 1.556323567933887, + "grad_norm": 0.04000338166952133, + "learning_rate": 1.4256016436914282e-06, + "loss": 0.0005, + "step": 92090 + }, + { + "epoch": 1.5564925682127375, + "grad_norm": 0.009134330786764622, + "learning_rate": 1.424570528587635e-06, + "loss": 0.0015, + "step": 92100 + }, + { + "epoch": 1.556661568491588, + "grad_norm": 0.0020735403522849083, + "learning_rate": 1.4235397245635307e-06, + "loss": 0.0005, + "step": 92110 + }, + { + "epoch": 1.5568305687704385, + "grad_norm": 0.022044990211725235, + "learning_rate": 1.4225092317087974e-06, + "loss": 0.0005, + "step": 92120 + }, + { + "epoch": 1.5569995690492888, + "grad_norm": 0.04893451929092407, + "learning_rate": 1.4214790501130953e-06, + "loss": 0.0005, + "step": 92130 + }, + { + "epoch": 1.5571685693281394, + "grad_norm": 0.06270928680896759, + "learning_rate": 1.420449179866053e-06, + "loss": 0.0015, + "step": 92140 + }, + { + "epoch": 1.55733756960699, + "grad_norm": 0.0685892105102539, + "learning_rate": 1.4194196210572754e-06, + "loss": 0.0006, + "step": 92150 + }, + { + "epoch": 1.5575065698858404, + "grad_norm": 0.006450988817960024, + "learning_rate": 1.4183903737763404e-06, + "loss": 0.0004, + "step": 92160 + }, + { + "epoch": 1.5576755701646907, + "grad_norm": 0.020199205726385117, + "learning_rate": 1.4173614381127948e-06, + "loss": 0.0006, + "step": 92170 + }, + { + "epoch": 1.557844570443541, + "grad_norm": 0.020659884437918663, + "learning_rate": 1.4163328141561638e-06, + "loss": 0.0008, + "step": 92180 + }, + { + "epoch": 1.5580135707223917, + "grad_norm": 0.06154713034629822, + "learning_rate": 1.41530450199594e-06, + "loss": 0.001, + "step": 92190 + }, + { + "epoch": 1.5581825710012422, + "grad_norm": 0.023259872570633888, + "learning_rate": 1.4142765017215926e-06, + "loss": 0.0011, + "step": 92200 + }, + { + "epoch": 1.5583515712800926, + "grad_norm": 0.035250257700681686, + "learning_rate": 1.4132488134225642e-06, + "loss": 0.001, + "step": 92210 + }, + { + "epoch": 1.558520571558943, + "grad_norm": 0.043679896742105484, + "learning_rate": 1.412221437188266e-06, + "loss": 0.0006, + "step": 92220 + }, + { + "epoch": 1.5586895718377936, + "grad_norm": 0.06833620369434357, + "learning_rate": 1.4111943731080858e-06, + "loss": 0.0006, + "step": 92230 + }, + { + "epoch": 1.5588585721166441, + "grad_norm": 0.01950804889202118, + "learning_rate": 1.4101676212713843e-06, + "loss": 0.0006, + "step": 92240 + }, + { + "epoch": 1.5590275723954945, + "grad_norm": 0.06699059903621674, + "learning_rate": 1.409141181767492e-06, + "loss": 0.0007, + "step": 92250 + }, + { + "epoch": 1.5591965726743449, + "grad_norm": 0.010098809376358986, + "learning_rate": 1.4081150546857165e-06, + "loss": 0.001, + "step": 92260 + }, + { + "epoch": 1.5593655729531952, + "grad_norm": 0.010151817463338375, + "learning_rate": 1.4070892401153324e-06, + "loss": 0.0011, + "step": 92270 + }, + { + "epoch": 1.5595345732320458, + "grad_norm": 0.0016320428112521768, + "learning_rate": 1.4060637381455943e-06, + "loss": 0.0009, + "step": 92280 + }, + { + "epoch": 1.5597035735108964, + "grad_norm": 0.014087764546275139, + "learning_rate": 1.4050385488657225e-06, + "loss": 0.0006, + "step": 92290 + }, + { + "epoch": 1.5598725737897468, + "grad_norm": 0.0018531668465584517, + "learning_rate": 1.404013672364916e-06, + "loss": 0.0009, + "step": 92300 + }, + { + "epoch": 1.5600415740685971, + "grad_norm": 0.0181476641446352, + "learning_rate": 1.4029891087323422e-06, + "loss": 0.0004, + "step": 92310 + }, + { + "epoch": 1.5602105743474477, + "grad_norm": 0.04024354740977287, + "learning_rate": 1.401964858057145e-06, + "loss": 0.0015, + "step": 92320 + }, + { + "epoch": 1.5603795746262983, + "grad_norm": 0.024551119655370712, + "learning_rate": 1.4009409204284363e-06, + "loss": 0.0009, + "step": 92330 + }, + { + "epoch": 1.5605485749051486, + "grad_norm": 0.028060588985681534, + "learning_rate": 1.3999172959353074e-06, + "loss": 0.0008, + "step": 92340 + }, + { + "epoch": 1.560717575183999, + "grad_norm": 0.03398832306265831, + "learning_rate": 1.3988939846668149e-06, + "loss": 0.001, + "step": 92350 + }, + { + "epoch": 1.5608865754628494, + "grad_norm": 0.08230680972337723, + "learning_rate": 1.3978709867119945e-06, + "loss": 0.0008, + "step": 92360 + }, + { + "epoch": 1.5610555757417, + "grad_norm": 0.14842632412910461, + "learning_rate": 1.39684830215985e-06, + "loss": 0.0008, + "step": 92370 + }, + { + "epoch": 1.5612245760205505, + "grad_norm": 0.010570487938821316, + "learning_rate": 1.3958259310993605e-06, + "loss": 0.0009, + "step": 92380 + }, + { + "epoch": 1.561393576299401, + "grad_norm": 0.057076577097177505, + "learning_rate": 1.3948038736194792e-06, + "loss": 0.0009, + "step": 92390 + }, + { + "epoch": 1.5615625765782513, + "grad_norm": 0.02614855021238327, + "learning_rate": 1.393782129809127e-06, + "loss": 0.0006, + "step": 92400 + }, + { + "epoch": 1.5617315768571018, + "grad_norm": 0.015939511358737946, + "learning_rate": 1.3927606997572024e-06, + "loss": 0.0003, + "step": 92410 + }, + { + "epoch": 1.5619005771359524, + "grad_norm": 0.007223693188279867, + "learning_rate": 1.3917395835525739e-06, + "loss": 0.0011, + "step": 92420 + }, + { + "epoch": 1.5620695774148028, + "grad_norm": 0.023942166939377785, + "learning_rate": 1.390718781284086e-06, + "loss": 0.0008, + "step": 92430 + }, + { + "epoch": 1.5622385776936532, + "grad_norm": 0.03485532104969025, + "learning_rate": 1.3896982930405494e-06, + "loss": 0.0005, + "step": 92440 + }, + { + "epoch": 1.5624075779725035, + "grad_norm": 0.007194232195615768, + "learning_rate": 1.3886781189107556e-06, + "loss": 0.0005, + "step": 92450 + }, + { + "epoch": 1.562576578251354, + "grad_norm": 0.014133160002529621, + "learning_rate": 1.3876582589834614e-06, + "loss": 0.001, + "step": 92460 + }, + { + "epoch": 1.5627455785302047, + "grad_norm": 0.07405367493629456, + "learning_rate": 1.3866387133474018e-06, + "loss": 0.0011, + "step": 92470 + }, + { + "epoch": 1.562914578809055, + "grad_norm": 0.06717579811811447, + "learning_rate": 1.38561948209128e-06, + "loss": 0.0005, + "step": 92480 + }, + { + "epoch": 1.5630835790879054, + "grad_norm": 0.019617587327957153, + "learning_rate": 1.384600565303777e-06, + "loss": 0.0005, + "step": 92490 + }, + { + "epoch": 1.563252579366756, + "grad_norm": 0.09758741408586502, + "learning_rate": 1.3835819630735403e-06, + "loss": 0.0006, + "step": 92500 + }, + { + "epoch": 1.5634215796456066, + "grad_norm": 0.06455901265144348, + "learning_rate": 1.3825636754891958e-06, + "loss": 0.0006, + "step": 92510 + }, + { + "epoch": 1.563590579924457, + "grad_norm": 0.021474866196513176, + "learning_rate": 1.3815457026393375e-06, + "loss": 0.0004, + "step": 92520 + }, + { + "epoch": 1.5637595802033073, + "grad_norm": 0.05228448659181595, + "learning_rate": 1.3805280446125364e-06, + "loss": 0.0011, + "step": 92530 + }, + { + "epoch": 1.5639285804821577, + "grad_norm": 0.00021487113554030657, + "learning_rate": 1.3795107014973302e-06, + "loss": 0.0004, + "step": 92540 + }, + { + "epoch": 1.5640975807610082, + "grad_norm": 0.007745729759335518, + "learning_rate": 1.3784936733822364e-06, + "loss": 0.001, + "step": 92550 + }, + { + "epoch": 1.5642665810398588, + "grad_norm": 0.048509631305933, + "learning_rate": 1.3774769603557382e-06, + "loss": 0.0006, + "step": 92560 + }, + { + "epoch": 1.5644355813187092, + "grad_norm": 0.03314121067523956, + "learning_rate": 1.3764605625062961e-06, + "loss": 0.0009, + "step": 92570 + }, + { + "epoch": 1.5646045815975596, + "grad_norm": 0.029128141701221466, + "learning_rate": 1.3754444799223427e-06, + "loss": 0.0014, + "step": 92580 + }, + { + "epoch": 1.5647735818764101, + "grad_norm": 0.03834288939833641, + "learning_rate": 1.3744287126922789e-06, + "loss": 0.0005, + "step": 92590 + }, + { + "epoch": 1.5649425821552607, + "grad_norm": 0.06999506801366806, + "learning_rate": 1.3734132609044843e-06, + "loss": 0.0023, + "step": 92600 + }, + { + "epoch": 1.565111582434111, + "grad_norm": 0.04065316915512085, + "learning_rate": 1.3723981246473085e-06, + "loss": 0.0009, + "step": 92610 + }, + { + "epoch": 1.5652805827129614, + "grad_norm": 0.029068050906062126, + "learning_rate": 1.3713833040090701e-06, + "loss": 0.0028, + "step": 92620 + }, + { + "epoch": 1.5654495829918118, + "grad_norm": 0.0029551261104643345, + "learning_rate": 1.3703687990780673e-06, + "loss": 0.0012, + "step": 92630 + }, + { + "epoch": 1.5656185832706624, + "grad_norm": 0.0025054332800209522, + "learning_rate": 1.3693546099425632e-06, + "loss": 0.0005, + "step": 92640 + }, + { + "epoch": 1.565787583549513, + "grad_norm": 0.026966840028762817, + "learning_rate": 1.3683407366908007e-06, + "loss": 0.0003, + "step": 92650 + }, + { + "epoch": 1.5659565838283633, + "grad_norm": 0.013984468765556812, + "learning_rate": 1.3673271794109883e-06, + "loss": 0.0008, + "step": 92660 + }, + { + "epoch": 1.5661255841072137, + "grad_norm": 0.01775095798075199, + "learning_rate": 1.3663139381913116e-06, + "loss": 0.0004, + "step": 92670 + }, + { + "epoch": 1.5662945843860643, + "grad_norm": 0.08793025463819504, + "learning_rate": 1.3653010131199295e-06, + "loss": 0.0007, + "step": 92680 + }, + { + "epoch": 1.5664635846649146, + "grad_norm": 0.12743797898292542, + "learning_rate": 1.3642884042849686e-06, + "loss": 0.0008, + "step": 92690 + }, + { + "epoch": 1.5666325849437652, + "grad_norm": 0.02725476399064064, + "learning_rate": 1.3632761117745335e-06, + "loss": 0.0017, + "step": 92700 + }, + { + "epoch": 1.5668015852226156, + "grad_norm": 0.05294652655720711, + "learning_rate": 1.3622641356766951e-06, + "loss": 0.0013, + "step": 92710 + }, + { + "epoch": 1.566970585501466, + "grad_norm": 0.02998647838830948, + "learning_rate": 1.3612524760795038e-06, + "loss": 0.0015, + "step": 92720 + }, + { + "epoch": 1.5671395857803165, + "grad_norm": 0.009507248178124428, + "learning_rate": 1.3602411330709758e-06, + "loss": 0.0003, + "step": 92730 + }, + { + "epoch": 1.5673085860591671, + "grad_norm": 0.0252500269562006, + "learning_rate": 1.3592301067391057e-06, + "loss": 0.0008, + "step": 92740 + }, + { + "epoch": 1.5674775863380175, + "grad_norm": 0.0627959668636322, + "learning_rate": 1.3582193971718549e-06, + "loss": 0.0004, + "step": 92750 + }, + { + "epoch": 1.5676465866168678, + "grad_norm": 0.04591644927859306, + "learning_rate": 1.357209004457163e-06, + "loss": 0.0008, + "step": 92760 + }, + { + "epoch": 1.5678155868957184, + "grad_norm": 0.032508768141269684, + "learning_rate": 1.3561989286829353e-06, + "loss": 0.0004, + "step": 92770 + }, + { + "epoch": 1.5679845871745688, + "grad_norm": 0.02691843919456005, + "learning_rate": 1.3551891699370556e-06, + "loss": 0.0004, + "step": 92780 + }, + { + "epoch": 1.5681535874534194, + "grad_norm": 0.002264541108161211, + "learning_rate": 1.3541797283073781e-06, + "loss": 0.0006, + "step": 92790 + }, + { + "epoch": 1.5683225877322697, + "grad_norm": 0.004401510581374168, + "learning_rate": 1.35317060388173e-06, + "loss": 0.0014, + "step": 92800 + }, + { + "epoch": 1.56849158801112, + "grad_norm": 0.051362864673137665, + "learning_rate": 1.3521617967479066e-06, + "loss": 0.0005, + "step": 92810 + }, + { + "epoch": 1.5686605882899707, + "grad_norm": 0.0028736076783388853, + "learning_rate": 1.3511533069936833e-06, + "loss": 0.0006, + "step": 92820 + }, + { + "epoch": 1.5688295885688213, + "grad_norm": 0.018490580841898918, + "learning_rate": 1.3501451347067995e-06, + "loss": 0.0006, + "step": 92830 + }, + { + "epoch": 1.5689985888476716, + "grad_norm": 0.024485796689987183, + "learning_rate": 1.3491372799749747e-06, + "loss": 0.0008, + "step": 92840 + }, + { + "epoch": 1.569167589126522, + "grad_norm": 0.038191087543964386, + "learning_rate": 1.3481297428858935e-06, + "loss": 0.0005, + "step": 92850 + }, + { + "epoch": 1.5693365894053726, + "grad_norm": 0.07634992152452469, + "learning_rate": 1.34712252352722e-06, + "loss": 0.0005, + "step": 92860 + }, + { + "epoch": 1.569505589684223, + "grad_norm": 0.014637090265750885, + "learning_rate": 1.3461156219865845e-06, + "loss": 0.0005, + "step": 92870 + }, + { + "epoch": 1.5696745899630735, + "grad_norm": 0.09911244362592697, + "learning_rate": 1.345109038351594e-06, + "loss": 0.0008, + "step": 92880 + }, + { + "epoch": 1.5698435902419239, + "grad_norm": 0.03441356495022774, + "learning_rate": 1.344102772709825e-06, + "loss": 0.0011, + "step": 92890 + }, + { + "epoch": 1.5700125905207742, + "grad_norm": 0.05653800442814827, + "learning_rate": 1.3430968251488285e-06, + "loss": 0.0005, + "step": 92900 + }, + { + "epoch": 1.5701815907996248, + "grad_norm": 0.06830038875341415, + "learning_rate": 1.3420911957561255e-06, + "loss": 0.0006, + "step": 92910 + }, + { + "epoch": 1.5703505910784754, + "grad_norm": 0.005915106739848852, + "learning_rate": 1.3410858846192116e-06, + "loss": 0.0012, + "step": 92920 + }, + { + "epoch": 1.5705195913573258, + "grad_norm": 0.04270845279097557, + "learning_rate": 1.3400808918255549e-06, + "loss": 0.0008, + "step": 92930 + }, + { + "epoch": 1.5706885916361761, + "grad_norm": 0.04755331203341484, + "learning_rate": 1.3390762174625916e-06, + "loss": 0.0005, + "step": 92940 + }, + { + "epoch": 1.5708575919150265, + "grad_norm": 0.02603033185005188, + "learning_rate": 1.3380718616177363e-06, + "loss": 0.0005, + "step": 92950 + }, + { + "epoch": 1.571026592193877, + "grad_norm": 0.008106399327516556, + "learning_rate": 1.3370678243783703e-06, + "loss": 0.0004, + "step": 92960 + }, + { + "epoch": 1.5711955924727277, + "grad_norm": 0.01038984302431345, + "learning_rate": 1.3360641058318507e-06, + "loss": 0.0006, + "step": 92970 + }, + { + "epoch": 1.571364592751578, + "grad_norm": 0.03053467348217964, + "learning_rate": 1.335060706065508e-06, + "loss": 0.0011, + "step": 92980 + }, + { + "epoch": 1.5715335930304284, + "grad_norm": 0.03615260496735573, + "learning_rate": 1.3340576251666387e-06, + "loss": 0.0006, + "step": 92990 + }, + { + "epoch": 1.571702593309279, + "grad_norm": 0.021119754761457443, + "learning_rate": 1.3330548632225188e-06, + "loss": 0.0008, + "step": 93000 + }, + { + "epoch": 1.5718715935881296, + "grad_norm": 0.09246402978897095, + "learning_rate": 1.3320524203203933e-06, + "loss": 0.0012, + "step": 93010 + }, + { + "epoch": 1.57204059386698, + "grad_norm": 0.013367106206715107, + "learning_rate": 1.3310502965474776e-06, + "loss": 0.0006, + "step": 93020 + }, + { + "epoch": 1.5722095941458303, + "grad_norm": 0.0342743843793869, + "learning_rate": 1.330048491990964e-06, + "loss": 0.001, + "step": 93030 + }, + { + "epoch": 1.5723785944246806, + "grad_norm": 0.05452071875333786, + "learning_rate": 1.3290470067380117e-06, + "loss": 0.0008, + "step": 93040 + }, + { + "epoch": 1.5725475947035312, + "grad_norm": 0.015118611045181751, + "learning_rate": 1.3280458408757574e-06, + "loss": 0.0009, + "step": 93050 + }, + { + "epoch": 1.5727165949823818, + "grad_norm": 0.08342970162630081, + "learning_rate": 1.3270449944913044e-06, + "loss": 0.0004, + "step": 93060 + }, + { + "epoch": 1.5728855952612322, + "grad_norm": 0.024492163211107254, + "learning_rate": 1.3260444676717343e-06, + "loss": 0.0005, + "step": 93070 + }, + { + "epoch": 1.5730545955400825, + "grad_norm": 0.01578182354569435, + "learning_rate": 1.3250442605040941e-06, + "loss": 0.0006, + "step": 93080 + }, + { + "epoch": 1.5732235958189331, + "grad_norm": 0.11322375386953354, + "learning_rate": 1.324044373075411e-06, + "loss": 0.0015, + "step": 93090 + }, + { + "epoch": 1.5733925960977837, + "grad_norm": 0.0352368988096714, + "learning_rate": 1.3230448054726762e-06, + "loss": 0.001, + "step": 93100 + }, + { + "epoch": 1.573561596376634, + "grad_norm": 0.007330413442105055, + "learning_rate": 1.3220455577828607e-06, + "loss": 0.0005, + "step": 93110 + }, + { + "epoch": 1.5737305966554844, + "grad_norm": 0.044726476073265076, + "learning_rate": 1.3210466300928993e-06, + "loss": 0.0006, + "step": 93120 + }, + { + "epoch": 1.5738995969343348, + "grad_norm": 0.048842594027519226, + "learning_rate": 1.3200480224897079e-06, + "loss": 0.0006, + "step": 93130 + }, + { + "epoch": 1.5740685972131854, + "grad_norm": 0.006548158824443817, + "learning_rate": 1.3190497350601667e-06, + "loss": 0.0005, + "step": 93140 + }, + { + "epoch": 1.574237597492036, + "grad_norm": 0.027237258851528168, + "learning_rate": 1.3180517678911337e-06, + "loss": 0.0017, + "step": 93150 + }, + { + "epoch": 1.5744065977708863, + "grad_norm": 0.016414618119597435, + "learning_rate": 1.317054121069436e-06, + "loss": 0.0005, + "step": 93160 + }, + { + "epoch": 1.5745755980497367, + "grad_norm": 0.031219804659485817, + "learning_rate": 1.3160567946818752e-06, + "loss": 0.0007, + "step": 93170 + }, + { + "epoch": 1.5747445983285873, + "grad_norm": 0.032161157578229904, + "learning_rate": 1.315059788815221e-06, + "loss": 0.0006, + "step": 93180 + }, + { + "epoch": 1.5749135986074378, + "grad_norm": 0.03028140589594841, + "learning_rate": 1.3140631035562196e-06, + "loss": 0.0006, + "step": 93190 + }, + { + "epoch": 1.5750825988862882, + "grad_norm": 0.03312603756785393, + "learning_rate": 1.313066738991588e-06, + "loss": 0.0006, + "step": 93200 + }, + { + "epoch": 1.5752515991651386, + "grad_norm": 0.0011683525517582893, + "learning_rate": 1.3120706952080127e-06, + "loss": 0.0004, + "step": 93210 + }, + { + "epoch": 1.575420599443989, + "grad_norm": 0.01985262706875801, + "learning_rate": 1.3110749722921562e-06, + "loss": 0.0008, + "step": 93220 + }, + { + "epoch": 1.5755895997228395, + "grad_norm": 0.010746241547167301, + "learning_rate": 1.310079570330649e-06, + "loss": 0.0008, + "step": 93230 + }, + { + "epoch": 1.57575860000169, + "grad_norm": 0.009657425805926323, + "learning_rate": 1.309084489410099e-06, + "loss": 0.0006, + "step": 93240 + }, + { + "epoch": 1.5759276002805405, + "grad_norm": 0.006918651517480612, + "learning_rate": 1.3080897296170797e-06, + "loss": 0.0006, + "step": 93250 + }, + { + "epoch": 1.5760966005593908, + "grad_norm": 0.017401212826371193, + "learning_rate": 1.3070952910381435e-06, + "loss": 0.0005, + "step": 93260 + }, + { + "epoch": 1.5762656008382414, + "grad_norm": 0.012027682736515999, + "learning_rate": 1.3061011737598078e-06, + "loss": 0.001, + "step": 93270 + }, + { + "epoch": 1.576434601117092, + "grad_norm": 0.05377326160669327, + "learning_rate": 1.3051073778685685e-06, + "loss": 0.0006, + "step": 93280 + }, + { + "epoch": 1.5766036013959424, + "grad_norm": 0.0717761442065239, + "learning_rate": 1.3041139034508882e-06, + "loss": 0.0008, + "step": 93290 + }, + { + "epoch": 1.5767726016747927, + "grad_norm": 0.006123771890997887, + "learning_rate": 1.303120750593207e-06, + "loss": 0.0015, + "step": 93300 + }, + { + "epoch": 1.576941601953643, + "grad_norm": 0.01824995130300522, + "learning_rate": 1.3021279193819303e-06, + "loss": 0.001, + "step": 93310 + }, + { + "epoch": 1.5771106022324937, + "grad_norm": 0.051778171211481094, + "learning_rate": 1.301135409903443e-06, + "loss": 0.0003, + "step": 93320 + }, + { + "epoch": 1.5772796025113442, + "grad_norm": 0.02047240547835827, + "learning_rate": 1.3001432222440951e-06, + "loss": 0.0007, + "step": 93330 + }, + { + "epoch": 1.5774486027901946, + "grad_norm": 0.15653613209724426, + "learning_rate": 1.299151356490213e-06, + "loss": 0.0004, + "step": 93340 + }, + { + "epoch": 1.577617603069045, + "grad_norm": 0.049618061631917953, + "learning_rate": 1.2981598127280954e-06, + "loss": 0.0011, + "step": 93350 + }, + { + "epoch": 1.5777866033478956, + "grad_norm": 0.0032934839837253094, + "learning_rate": 1.2971685910440086e-06, + "loss": 0.0007, + "step": 93360 + }, + { + "epoch": 1.5779556036267461, + "grad_norm": 0.031259384006261826, + "learning_rate": 1.2961776915241946e-06, + "loss": 0.0005, + "step": 93370 + }, + { + "epoch": 1.5781246039055965, + "grad_norm": 0.0658779963850975, + "learning_rate": 1.2951871142548684e-06, + "loss": 0.0003, + "step": 93380 + }, + { + "epoch": 1.5782936041844469, + "grad_norm": 0.03829159960150719, + "learning_rate": 1.2941968593222122e-06, + "loss": 0.0007, + "step": 93390 + }, + { + "epoch": 1.5784626044632972, + "grad_norm": 0.021253319457173347, + "learning_rate": 1.293206926812386e-06, + "loss": 0.001, + "step": 93400 + }, + { + "epoch": 1.5786316047421478, + "grad_norm": 0.00683000311255455, + "learning_rate": 1.2922173168115148e-06, + "loss": 0.0004, + "step": 93410 + }, + { + "epoch": 1.5788006050209984, + "grad_norm": 0.08008040487766266, + "learning_rate": 1.2912280294057023e-06, + "loss": 0.0009, + "step": 93420 + }, + { + "epoch": 1.5789696052998488, + "grad_norm": 0.03998623043298721, + "learning_rate": 1.290239064681022e-06, + "loss": 0.001, + "step": 93430 + }, + { + "epoch": 1.5791386055786991, + "grad_norm": 0.10981569439172745, + "learning_rate": 1.2892504227235152e-06, + "loss": 0.0011, + "step": 93440 + }, + { + "epoch": 1.5793076058575497, + "grad_norm": 0.03829672187566757, + "learning_rate": 1.2882621036192028e-06, + "loss": 0.0008, + "step": 93450 + }, + { + "epoch": 1.5794766061364003, + "grad_norm": 0.004690766800194979, + "learning_rate": 1.2872741074540695e-06, + "loss": 0.002, + "step": 93460 + }, + { + "epoch": 1.5796456064152506, + "grad_norm": 0.006633737590163946, + "learning_rate": 1.286286434314079e-06, + "loss": 0.0007, + "step": 93470 + }, + { + "epoch": 1.579814606694101, + "grad_norm": 0.012032300233840942, + "learning_rate": 1.2852990842851603e-06, + "loss": 0.0005, + "step": 93480 + }, + { + "epoch": 1.5799836069729514, + "grad_norm": 0.013286178931593895, + "learning_rate": 1.2843120574532214e-06, + "loss": 0.0008, + "step": 93490 + }, + { + "epoch": 1.580152607251802, + "grad_norm": 0.026893189176917076, + "learning_rate": 1.2833253539041346e-06, + "loss": 0.0006, + "step": 93500 + }, + { + "epoch": 1.5803216075306525, + "grad_norm": 0.06069996580481529, + "learning_rate": 1.2823389737237513e-06, + "loss": 0.0006, + "step": 93510 + }, + { + "epoch": 1.580490607809503, + "grad_norm": 0.01458305586129427, + "learning_rate": 1.2813529169978883e-06, + "loss": 0.0006, + "step": 93520 + }, + { + "epoch": 1.5806596080883533, + "grad_norm": 0.110781729221344, + "learning_rate": 1.2803671838123399e-06, + "loss": 0.0006, + "step": 93530 + }, + { + "epoch": 1.5808286083672038, + "grad_norm": 0.01895264908671379, + "learning_rate": 1.2793817742528674e-06, + "loss": 0.001, + "step": 93540 + }, + { + "epoch": 1.5809976086460544, + "grad_norm": 0.1498795449733734, + "learning_rate": 1.2783966884052074e-06, + "loss": 0.0007, + "step": 93550 + }, + { + "epoch": 1.5811666089249048, + "grad_norm": 0.05143171176314354, + "learning_rate": 1.2774119263550666e-06, + "loss": 0.0005, + "step": 93560 + }, + { + "epoch": 1.5813356092037552, + "grad_norm": 0.009230494499206543, + "learning_rate": 1.2764274881881266e-06, + "loss": 0.001, + "step": 93570 + }, + { + "epoch": 1.5815046094826055, + "grad_norm": 0.021424710750579834, + "learning_rate": 1.2754433739900346e-06, + "loss": 0.0007, + "step": 93580 + }, + { + "epoch": 1.581673609761456, + "grad_norm": 0.10135804116725922, + "learning_rate": 1.2744595838464164e-06, + "loss": 0.0012, + "step": 93590 + }, + { + "epoch": 1.5818426100403067, + "grad_norm": 0.03953375294804573, + "learning_rate": 1.2734761178428639e-06, + "loss": 0.0008, + "step": 93600 + }, + { + "epoch": 1.582011610319157, + "grad_norm": 0.015265276655554771, + "learning_rate": 1.2724929760649456e-06, + "loss": 0.0004, + "step": 93610 + }, + { + "epoch": 1.5821806105980074, + "grad_norm": 0.03104473650455475, + "learning_rate": 1.2715101585981971e-06, + "loss": 0.0019, + "step": 93620 + }, + { + "epoch": 1.582349610876858, + "grad_norm": 0.003955503460019827, + "learning_rate": 1.2705276655281312e-06, + "loss": 0.0005, + "step": 93630 + }, + { + "epoch": 1.5825186111557084, + "grad_norm": 0.09750876575708389, + "learning_rate": 1.269545496940227e-06, + "loss": 0.0006, + "step": 93640 + }, + { + "epoch": 1.582687611434559, + "grad_norm": 0.028847867622971535, + "learning_rate": 1.2685636529199401e-06, + "loss": 0.0008, + "step": 93650 + }, + { + "epoch": 1.5828566117134093, + "grad_norm": 0.08246053010225296, + "learning_rate": 1.267582133552694e-06, + "loss": 0.0007, + "step": 93660 + }, + { + "epoch": 1.5830256119922597, + "grad_norm": 0.02705477736890316, + "learning_rate": 1.266600938923887e-06, + "loss": 0.0013, + "step": 93670 + }, + { + "epoch": 1.5831946122711102, + "grad_norm": 0.07032588869333267, + "learning_rate": 1.2656200691188852e-06, + "loss": 0.0004, + "step": 93680 + }, + { + "epoch": 1.5833636125499608, + "grad_norm": 0.00811270996928215, + "learning_rate": 1.2646395242230313e-06, + "loss": 0.0005, + "step": 93690 + }, + { + "epoch": 1.5835326128288112, + "grad_norm": 0.010227940045297146, + "learning_rate": 1.2636593043216388e-06, + "loss": 0.0006, + "step": 93700 + }, + { + "epoch": 1.5837016131076616, + "grad_norm": 0.007332955952733755, + "learning_rate": 1.2626794094999883e-06, + "loss": 0.0003, + "step": 93710 + }, + { + "epoch": 1.5838706133865121, + "grad_norm": 0.058238618075847626, + "learning_rate": 1.2616998398433378e-06, + "loss": 0.0008, + "step": 93720 + }, + { + "epoch": 1.5840396136653625, + "grad_norm": 0.00826265849173069, + "learning_rate": 1.2607205954369128e-06, + "loss": 0.0004, + "step": 93730 + }, + { + "epoch": 1.584208613944213, + "grad_norm": 0.012059211730957031, + "learning_rate": 1.2597416763659131e-06, + "loss": 0.0006, + "step": 93740 + }, + { + "epoch": 1.5843776142230634, + "grad_norm": 0.016500458121299744, + "learning_rate": 1.2587630827155095e-06, + "loss": 0.0006, + "step": 93750 + }, + { + "epoch": 1.5845466145019138, + "grad_norm": 0.11306676268577576, + "learning_rate": 1.257784814570846e-06, + "loss": 0.0011, + "step": 93760 + }, + { + "epoch": 1.5847156147807644, + "grad_norm": 0.11525103449821472, + "learning_rate": 1.2568068720170335e-06, + "loss": 0.0003, + "step": 93770 + }, + { + "epoch": 1.584884615059615, + "grad_norm": 0.007584443315863609, + "learning_rate": 1.2558292551391604e-06, + "loss": 0.0009, + "step": 93780 + }, + { + "epoch": 1.5850536153384653, + "grad_norm": 0.04393519461154938, + "learning_rate": 1.2548519640222817e-06, + "loss": 0.0009, + "step": 93790 + }, + { + "epoch": 1.5852226156173157, + "grad_norm": 0.005927392281591892, + "learning_rate": 1.2538749987514297e-06, + "loss": 0.001, + "step": 93800 + }, + { + "epoch": 1.5853916158961663, + "grad_norm": 0.04695868119597435, + "learning_rate": 1.2528983594116017e-06, + "loss": 0.0005, + "step": 93810 + }, + { + "epoch": 1.5855606161750166, + "grad_norm": 0.11101292073726654, + "learning_rate": 1.251922046087773e-06, + "loss": 0.0003, + "step": 93820 + }, + { + "epoch": 1.5857296164538672, + "grad_norm": 0.049037858843803406, + "learning_rate": 1.2509460588648841e-06, + "loss": 0.001, + "step": 93830 + }, + { + "epoch": 1.5858986167327176, + "grad_norm": 0.039560019969940186, + "learning_rate": 1.2499703978278544e-06, + "loss": 0.0004, + "step": 93840 + }, + { + "epoch": 1.586067617011568, + "grad_norm": 0.052576590329408646, + "learning_rate": 1.2489950630615683e-06, + "loss": 0.0006, + "step": 93850 + }, + { + "epoch": 1.5862366172904185, + "grad_norm": 0.0020860591903328896, + "learning_rate": 1.2480200546508869e-06, + "loss": 0.001, + "step": 93860 + }, + { + "epoch": 1.5864056175692691, + "grad_norm": 0.022277476266026497, + "learning_rate": 1.2470453726806381e-06, + "loss": 0.0003, + "step": 93870 + }, + { + "epoch": 1.5865746178481195, + "grad_norm": 0.0447673462331295, + "learning_rate": 1.2460710172356266e-06, + "loss": 0.001, + "step": 93880 + }, + { + "epoch": 1.5867436181269698, + "grad_norm": 0.01280398853123188, + "learning_rate": 1.2450969884006237e-06, + "loss": 0.0006, + "step": 93890 + }, + { + "epoch": 1.5869126184058202, + "grad_norm": 0.02612018957734108, + "learning_rate": 1.2441232862603776e-06, + "loss": 0.0009, + "step": 93900 + }, + { + "epoch": 1.5870816186846708, + "grad_norm": 0.051986292004585266, + "learning_rate": 1.2431499108996009e-06, + "loss": 0.0018, + "step": 93910 + }, + { + "epoch": 1.5872506189635214, + "grad_norm": 0.04087941721081734, + "learning_rate": 1.2421768624029845e-06, + "loss": 0.0005, + "step": 93920 + }, + { + "epoch": 1.5874196192423717, + "grad_norm": 0.02893008291721344, + "learning_rate": 1.2412041408551885e-06, + "loss": 0.0009, + "step": 93930 + }, + { + "epoch": 1.587588619521222, + "grad_norm": 0.024935085326433182, + "learning_rate": 1.2402317463408454e-06, + "loss": 0.0003, + "step": 93940 + }, + { + "epoch": 1.5877576198000727, + "grad_norm": 0.024023279547691345, + "learning_rate": 1.2392596789445554e-06, + "loss": 0.0006, + "step": 93950 + }, + { + "epoch": 1.5879266200789233, + "grad_norm": 0.07324106246232986, + "learning_rate": 1.2382879387508944e-06, + "loss": 0.0007, + "step": 93960 + }, + { + "epoch": 1.5880956203577736, + "grad_norm": 0.04412218555808067, + "learning_rate": 1.2373165258444098e-06, + "loss": 0.0005, + "step": 93970 + }, + { + "epoch": 1.588264620636624, + "grad_norm": 0.021460387855768204, + "learning_rate": 1.236345440309617e-06, + "loss": 0.0006, + "step": 93980 + }, + { + "epoch": 1.5884336209154744, + "grad_norm": 0.04855572059750557, + "learning_rate": 1.2353746822310076e-06, + "loss": 0.0006, + "step": 93990 + }, + { + "epoch": 1.588602621194325, + "grad_norm": 0.3658362030982971, + "learning_rate": 1.2344042516930399e-06, + "loss": 0.001, + "step": 94000 + }, + { + "epoch": 1.5887716214731755, + "grad_norm": 0.009330356493592262, + "learning_rate": 1.2334341487801477e-06, + "loss": 0.0012, + "step": 94010 + }, + { + "epoch": 1.5889406217520259, + "grad_norm": 0.004394082818180323, + "learning_rate": 1.2324643735767328e-06, + "loss": 0.0005, + "step": 94020 + }, + { + "epoch": 1.5891096220308762, + "grad_norm": 0.052747923880815506, + "learning_rate": 1.2314949261671732e-06, + "loss": 0.0004, + "step": 94030 + }, + { + "epoch": 1.5892786223097268, + "grad_norm": 0.01922859065234661, + "learning_rate": 1.2305258066358123e-06, + "loss": 0.0007, + "step": 94040 + }, + { + "epoch": 1.5894476225885774, + "grad_norm": 0.06810221076011658, + "learning_rate": 1.2295570150669712e-06, + "loss": 0.0014, + "step": 94050 + }, + { + "epoch": 1.5896166228674278, + "grad_norm": 0.09049668163061142, + "learning_rate": 1.2285885515449364e-06, + "loss": 0.0006, + "step": 94060 + }, + { + "epoch": 1.5897856231462781, + "grad_norm": 0.02533421292901039, + "learning_rate": 1.2276204161539722e-06, + "loss": 0.0006, + "step": 94070 + }, + { + "epoch": 1.5899546234251285, + "grad_norm": 0.02883775718510151, + "learning_rate": 1.2266526089783077e-06, + "loss": 0.0023, + "step": 94080 + }, + { + "epoch": 1.590123623703979, + "grad_norm": 0.08370274305343628, + "learning_rate": 1.2256851301021499e-06, + "loss": 0.0007, + "step": 94090 + }, + { + "epoch": 1.5902926239828297, + "grad_norm": 0.03740803152322769, + "learning_rate": 1.224717979609672e-06, + "loss": 0.0005, + "step": 94100 + }, + { + "epoch": 1.59046162426168, + "grad_norm": 0.005330103915184736, + "learning_rate": 1.2237511575850209e-06, + "loss": 0.0001, + "step": 94110 + }, + { + "epoch": 1.5906306245405304, + "grad_norm": 0.054041408002376556, + "learning_rate": 1.222784664112317e-06, + "loss": 0.0019, + "step": 94120 + }, + { + "epoch": 1.590799624819381, + "grad_norm": 0.04788007214665413, + "learning_rate": 1.2218184992756472e-06, + "loss": 0.0008, + "step": 94130 + }, + { + "epoch": 1.5909686250982316, + "grad_norm": 0.10085374116897583, + "learning_rate": 1.2208526631590727e-06, + "loss": 0.0011, + "step": 94140 + }, + { + "epoch": 1.591137625377082, + "grad_norm": 0.011281809769570827, + "learning_rate": 1.2198871558466296e-06, + "loss": 0.0015, + "step": 94150 + }, + { + "epoch": 1.5913066256559323, + "grad_norm": 0.01467775460332632, + "learning_rate": 1.2189219774223165e-06, + "loss": 0.0004, + "step": 94160 + }, + { + "epoch": 1.5914756259347826, + "grad_norm": 0.06958004832267761, + "learning_rate": 1.2179571279701137e-06, + "loss": 0.0006, + "step": 94170 + }, + { + "epoch": 1.5916446262136332, + "grad_norm": 0.07026882469654083, + "learning_rate": 1.216992607573963e-06, + "loss": 0.0008, + "step": 94180 + }, + { + "epoch": 1.5918136264924838, + "grad_norm": 0.0071510751731693745, + "learning_rate": 1.2160284163177844e-06, + "loss": 0.0009, + "step": 94190 + }, + { + "epoch": 1.5919826267713342, + "grad_norm": 0.007311527617275715, + "learning_rate": 1.215064554285469e-06, + "loss": 0.001, + "step": 94200 + }, + { + "epoch": 1.5921516270501845, + "grad_norm": 0.025926044210791588, + "learning_rate": 1.2141010215608745e-06, + "loss": 0.0006, + "step": 94210 + }, + { + "epoch": 1.5923206273290351, + "grad_norm": 0.04072462022304535, + "learning_rate": 1.2131378182278352e-06, + "loss": 0.0005, + "step": 94220 + }, + { + "epoch": 1.5924896276078857, + "grad_norm": 0.04394914582371712, + "learning_rate": 1.2121749443701525e-06, + "loss": 0.0003, + "step": 94230 + }, + { + "epoch": 1.592658627886736, + "grad_norm": 0.02323491871356964, + "learning_rate": 1.2112124000716035e-06, + "loss": 0.0008, + "step": 94240 + }, + { + "epoch": 1.5928276281655864, + "grad_norm": 0.04634846746921539, + "learning_rate": 1.2102501854159316e-06, + "loss": 0.0007, + "step": 94250 + }, + { + "epoch": 1.5929966284444368, + "grad_norm": 0.016460781916975975, + "learning_rate": 1.209288300486856e-06, + "loss": 0.0005, + "step": 94260 + }, + { + "epoch": 1.5931656287232874, + "grad_norm": 0.028184879571199417, + "learning_rate": 1.2083267453680636e-06, + "loss": 0.0006, + "step": 94270 + }, + { + "epoch": 1.593334629002138, + "grad_norm": 0.004588103853166103, + "learning_rate": 1.2073655201432171e-06, + "loss": 0.0005, + "step": 94280 + }, + { + "epoch": 1.5935036292809883, + "grad_norm": 0.0014949159231036901, + "learning_rate": 1.2064046248959443e-06, + "loss": 0.0005, + "step": 94290 + }, + { + "epoch": 1.5936726295598387, + "grad_norm": 0.03530004993081093, + "learning_rate": 1.2054440597098515e-06, + "loss": 0.0007, + "step": 94300 + }, + { + "epoch": 1.5938416298386893, + "grad_norm": 0.007246180437505245, + "learning_rate": 1.2044838246685087e-06, + "loss": 0.0005, + "step": 94310 + }, + { + "epoch": 1.5940106301175399, + "grad_norm": 0.07312962412834167, + "learning_rate": 1.2035239198554627e-06, + "loss": 0.0011, + "step": 94320 + }, + { + "epoch": 1.5941796303963902, + "grad_norm": 0.00831496249884367, + "learning_rate": 1.2025643453542307e-06, + "loss": 0.0007, + "step": 94330 + }, + { + "epoch": 1.5943486306752406, + "grad_norm": 0.0062678721733391285, + "learning_rate": 1.2016051012483003e-06, + "loss": 0.0008, + "step": 94340 + }, + { + "epoch": 1.594517630954091, + "grad_norm": 0.04183892160654068, + "learning_rate": 1.200646187621129e-06, + "loss": 0.0011, + "step": 94350 + }, + { + "epoch": 1.5946866312329415, + "grad_norm": 0.04647721350193024, + "learning_rate": 1.1996876045561483e-06, + "loss": 0.0006, + "step": 94360 + }, + { + "epoch": 1.594855631511792, + "grad_norm": 0.03425600752234459, + "learning_rate": 1.1987293521367581e-06, + "loss": 0.0007, + "step": 94370 + }, + { + "epoch": 1.5950246317906425, + "grad_norm": 0.0020859267096966505, + "learning_rate": 1.1977714304463333e-06, + "loss": 0.0011, + "step": 94380 + }, + { + "epoch": 1.5951936320694928, + "grad_norm": 0.0032163539435714483, + "learning_rate": 1.1968138395682143e-06, + "loss": 0.0007, + "step": 94390 + }, + { + "epoch": 1.5953626323483434, + "grad_norm": 0.028047826141119003, + "learning_rate": 1.1958565795857203e-06, + "loss": 0.0005, + "step": 94400 + }, + { + "epoch": 1.595531632627194, + "grad_norm": 0.04284824803471565, + "learning_rate": 1.1948996505821335e-06, + "loss": 0.0008, + "step": 94410 + }, + { + "epoch": 1.5957006329060444, + "grad_norm": 0.012573568150401115, + "learning_rate": 1.1939430526407148e-06, + "loss": 0.0005, + "step": 94420 + }, + { + "epoch": 1.5958696331848947, + "grad_norm": 0.049135226756334305, + "learning_rate": 1.1929867858446897e-06, + "loss": 0.0008, + "step": 94430 + }, + { + "epoch": 1.596038633463745, + "grad_norm": 0.02598082646727562, + "learning_rate": 1.1920308502772609e-06, + "loss": 0.0021, + "step": 94440 + }, + { + "epoch": 1.5962076337425957, + "grad_norm": 0.046017665416002274, + "learning_rate": 1.1910752460215973e-06, + "loss": 0.0004, + "step": 94450 + }, + { + "epoch": 1.5963766340214463, + "grad_norm": 0.02556859329342842, + "learning_rate": 1.1901199731608416e-06, + "loss": 0.0008, + "step": 94460 + }, + { + "epoch": 1.5965456343002966, + "grad_norm": 0.018384618684649467, + "learning_rate": 1.189165031778109e-06, + "loss": 0.0011, + "step": 94470 + }, + { + "epoch": 1.596714634579147, + "grad_norm": 0.094480961561203, + "learning_rate": 1.1882104219564812e-06, + "loss": 0.0018, + "step": 94480 + }, + { + "epoch": 1.5968836348579976, + "grad_norm": 0.02965855970978737, + "learning_rate": 1.1872561437790165e-06, + "loss": 0.0006, + "step": 94490 + }, + { + "epoch": 1.5970526351368481, + "grad_norm": 0.09040623903274536, + "learning_rate": 1.1863021973287392e-06, + "loss": 0.0006, + "step": 94500 + }, + { + "epoch": 1.5972216354156985, + "grad_norm": 0.044634588062763214, + "learning_rate": 1.1853485826886485e-06, + "loss": 0.0009, + "step": 94510 + }, + { + "epoch": 1.5973906356945489, + "grad_norm": 0.00631709024310112, + "learning_rate": 1.184395299941713e-06, + "loss": 0.0005, + "step": 94520 + }, + { + "epoch": 1.5975596359733992, + "grad_norm": 0.012562483549118042, + "learning_rate": 1.1834423491708757e-06, + "loss": 0.0007, + "step": 94530 + }, + { + "epoch": 1.5977286362522498, + "grad_norm": 0.08589354902505875, + "learning_rate": 1.1824897304590437e-06, + "loss": 0.0008, + "step": 94540 + }, + { + "epoch": 1.5978976365311004, + "grad_norm": 0.014393548481166363, + "learning_rate": 1.181537443889103e-06, + "loss": 0.0006, + "step": 94550 + }, + { + "epoch": 1.5980666368099508, + "grad_norm": 0.0159346554428339, + "learning_rate": 1.180585489543904e-06, + "loss": 0.0006, + "step": 94560 + }, + { + "epoch": 1.5982356370888011, + "grad_norm": 0.02267078123986721, + "learning_rate": 1.1796338675062746e-06, + "loss": 0.0012, + "step": 94570 + }, + { + "epoch": 1.5984046373676517, + "grad_norm": 0.04347150772809982, + "learning_rate": 1.1786825778590073e-06, + "loss": 0.0006, + "step": 94580 + }, + { + "epoch": 1.598573637646502, + "grad_norm": 0.014009674079716206, + "learning_rate": 1.1777316206848722e-06, + "loss": 0.0007, + "step": 94590 + }, + { + "epoch": 1.5987426379253526, + "grad_norm": 0.04441831260919571, + "learning_rate": 1.1767809960666033e-06, + "loss": 0.0002, + "step": 94600 + }, + { + "epoch": 1.598911638204203, + "grad_norm": 0.018436765298247337, + "learning_rate": 1.1758307040869132e-06, + "loss": 0.0009, + "step": 94610 + }, + { + "epoch": 1.5990806384830534, + "grad_norm": 0.14143379032611847, + "learning_rate": 1.1748807448284793e-06, + "loss": 0.0008, + "step": 94620 + }, + { + "epoch": 1.599249638761904, + "grad_norm": 0.056121762841939926, + "learning_rate": 1.173931118373955e-06, + "loss": 0.0004, + "step": 94630 + }, + { + "epoch": 1.5994186390407545, + "grad_norm": 0.028397779911756516, + "learning_rate": 1.1729818248059599e-06, + "loss": 0.0002, + "step": 94640 + }, + { + "epoch": 1.599587639319605, + "grad_norm": 0.06756635010242462, + "learning_rate": 1.1720328642070893e-06, + "loss": 0.0005, + "step": 94650 + }, + { + "epoch": 1.5997566395984553, + "grad_norm": 0.002616355661302805, + "learning_rate": 1.1710842366599057e-06, + "loss": 0.0007, + "step": 94660 + }, + { + "epoch": 1.5999256398773058, + "grad_norm": 0.037892259657382965, + "learning_rate": 1.170135942246946e-06, + "loss": 0.0004, + "step": 94670 + }, + { + "epoch": 1.6000946401561562, + "grad_norm": 0.004484300035983324, + "learning_rate": 1.1691879810507139e-06, + "loss": 0.0009, + "step": 94680 + }, + { + "epoch": 1.6002636404350068, + "grad_norm": 0.05947664752602577, + "learning_rate": 1.1682403531536885e-06, + "loss": 0.0005, + "step": 94690 + }, + { + "epoch": 1.6004326407138572, + "grad_norm": 0.10121885687112808, + "learning_rate": 1.1672930586383175e-06, + "loss": 0.0006, + "step": 94700 + }, + { + "epoch": 1.6006016409927075, + "grad_norm": 0.0022841356694698334, + "learning_rate": 1.1663460975870216e-06, + "loss": 0.001, + "step": 94710 + }, + { + "epoch": 1.600770641271558, + "grad_norm": 0.00949120707809925, + "learning_rate": 1.1653994700821885e-06, + "loss": 0.0004, + "step": 94720 + }, + { + "epoch": 1.6009396415504087, + "grad_norm": 0.01416967436671257, + "learning_rate": 1.1644531762061805e-06, + "loss": 0.0009, + "step": 94730 + }, + { + "epoch": 1.601108641829259, + "grad_norm": 0.025021463632583618, + "learning_rate": 1.1635072160413313e-06, + "loss": 0.0006, + "step": 94740 + }, + { + "epoch": 1.6012776421081094, + "grad_norm": 0.10143998265266418, + "learning_rate": 1.162561589669941e-06, + "loss": 0.001, + "step": 94750 + }, + { + "epoch": 1.6014466423869598, + "grad_norm": 0.04918771609663963, + "learning_rate": 1.1616162971742867e-06, + "loss": 0.0005, + "step": 94760 + }, + { + "epoch": 1.6016156426658104, + "grad_norm": 0.026630748063325882, + "learning_rate": 1.1606713386366109e-06, + "loss": 0.0005, + "step": 94770 + }, + { + "epoch": 1.601784642944661, + "grad_norm": 0.010483386926352978, + "learning_rate": 1.1597267141391315e-06, + "loss": 0.0006, + "step": 94780 + }, + { + "epoch": 1.6019536432235113, + "grad_norm": 0.10334274172782898, + "learning_rate": 1.1587824237640334e-06, + "loss": 0.0011, + "step": 94790 + }, + { + "epoch": 1.6021226435023617, + "grad_norm": 0.03335344418883324, + "learning_rate": 1.1578384675934766e-06, + "loss": 0.001, + "step": 94800 + }, + { + "epoch": 1.6022916437812122, + "grad_norm": 0.01131820771843195, + "learning_rate": 1.1568948457095874e-06, + "loss": 0.0006, + "step": 94810 + }, + { + "epoch": 1.6024606440600628, + "grad_norm": 0.020446304231882095, + "learning_rate": 1.1559515581944687e-06, + "loss": 0.001, + "step": 94820 + }, + { + "epoch": 1.6026296443389132, + "grad_norm": 0.06710178405046463, + "learning_rate": 1.1550086051301878e-06, + "loss": 0.0006, + "step": 94830 + }, + { + "epoch": 1.6027986446177636, + "grad_norm": 0.029595445841550827, + "learning_rate": 1.1540659865987891e-06, + "loss": 0.0007, + "step": 94840 + }, + { + "epoch": 1.602967644896614, + "grad_norm": 0.0384245291352272, + "learning_rate": 1.1531237026822817e-06, + "loss": 0.0014, + "step": 94850 + }, + { + "epoch": 1.6031366451754645, + "grad_norm": 0.00988872442394495, + "learning_rate": 1.1521817534626527e-06, + "loss": 0.0013, + "step": 94860 + }, + { + "epoch": 1.603305645454315, + "grad_norm": 0.011559010483324528, + "learning_rate": 1.1512401390218525e-06, + "loss": 0.0008, + "step": 94870 + }, + { + "epoch": 1.6034746457331654, + "grad_norm": 0.09325367212295532, + "learning_rate": 1.150298859441808e-06, + "loss": 0.0007, + "step": 94880 + }, + { + "epoch": 1.6036436460120158, + "grad_norm": 0.019055569544434547, + "learning_rate": 1.1493579148044149e-06, + "loss": 0.0004, + "step": 94890 + }, + { + "epoch": 1.6038126462908664, + "grad_norm": 0.058484043926000595, + "learning_rate": 1.1484173051915415e-06, + "loss": 0.001, + "step": 94900 + }, + { + "epoch": 1.603981646569717, + "grad_norm": 0.05305975303053856, + "learning_rate": 1.1474770306850226e-06, + "loss": 0.0005, + "step": 94910 + }, + { + "epoch": 1.6041506468485673, + "grad_norm": 0.015423264354467392, + "learning_rate": 1.146537091366669e-06, + "loss": 0.0004, + "step": 94920 + }, + { + "epoch": 1.6043196471274177, + "grad_norm": 0.012684252113103867, + "learning_rate": 1.1455974873182574e-06, + "loss": 0.0018, + "step": 94930 + }, + { + "epoch": 1.604488647406268, + "grad_norm": 0.0017607345944270492, + "learning_rate": 1.144658218621541e-06, + "loss": 0.0004, + "step": 94940 + }, + { + "epoch": 1.6046576476851186, + "grad_norm": 0.0076165455393493176, + "learning_rate": 1.143719285358238e-06, + "loss": 0.0017, + "step": 94950 + }, + { + "epoch": 1.6048266479639692, + "grad_norm": 0.009829283691942692, + "learning_rate": 1.1427806876100412e-06, + "loss": 0.0003, + "step": 94960 + }, + { + "epoch": 1.6049956482428196, + "grad_norm": 0.008983315899968147, + "learning_rate": 1.1418424254586146e-06, + "loss": 0.0004, + "step": 94970 + }, + { + "epoch": 1.60516464852167, + "grad_norm": 0.002080840291455388, + "learning_rate": 1.1409044989855889e-06, + "loss": 0.0006, + "step": 94980 + }, + { + "epoch": 1.6053336488005205, + "grad_norm": 0.021074660122394562, + "learning_rate": 1.1399669082725707e-06, + "loss": 0.0009, + "step": 94990 + }, + { + "epoch": 1.6055026490793711, + "grad_norm": 0.12969990074634552, + "learning_rate": 1.1390296534011325e-06, + "loss": 0.0013, + "step": 95000 + }, + { + "epoch": 1.6056716493582215, + "grad_norm": 0.08277317136526108, + "learning_rate": 1.1380927344528226e-06, + "loss": 0.0008, + "step": 95010 + }, + { + "epoch": 1.6058406496370718, + "grad_norm": 0.010615160688757896, + "learning_rate": 1.1371561515091544e-06, + "loss": 0.0004, + "step": 95020 + }, + { + "epoch": 1.6060096499159222, + "grad_norm": 0.06074380502104759, + "learning_rate": 1.1362199046516193e-06, + "loss": 0.0011, + "step": 95030 + }, + { + "epoch": 1.6061786501947728, + "grad_norm": 0.07141372561454773, + "learning_rate": 1.1352839939616706e-06, + "loss": 0.0005, + "step": 95040 + }, + { + "epoch": 1.6063476504736234, + "grad_norm": 0.02140755206346512, + "learning_rate": 1.1343484195207411e-06, + "loss": 0.0006, + "step": 95050 + }, + { + "epoch": 1.6065166507524737, + "grad_norm": 0.1394871026277542, + "learning_rate": 1.1334131814102272e-06, + "loss": 0.001, + "step": 95060 + }, + { + "epoch": 1.606685651031324, + "grad_norm": 0.05843876674771309, + "learning_rate": 1.1324782797115007e-06, + "loss": 0.0005, + "step": 95070 + }, + { + "epoch": 1.6068546513101747, + "grad_norm": 0.07482954859733582, + "learning_rate": 1.1315437145059038e-06, + "loss": 0.0007, + "step": 95080 + }, + { + "epoch": 1.6070236515890253, + "grad_norm": 0.0020129596814513206, + "learning_rate": 1.1306094858747458e-06, + "loss": 0.0004, + "step": 95090 + }, + { + "epoch": 1.6071926518678756, + "grad_norm": 0.013063608668744564, + "learning_rate": 1.1296755938993097e-06, + "loss": 0.001, + "step": 95100 + }, + { + "epoch": 1.607361652146726, + "grad_norm": 0.040359411388635635, + "learning_rate": 1.1287420386608506e-06, + "loss": 0.0004, + "step": 95110 + }, + { + "epoch": 1.6075306524255764, + "grad_norm": 0.0003978550957981497, + "learning_rate": 1.127808820240589e-06, + "loss": 0.0004, + "step": 95120 + }, + { + "epoch": 1.607699652704427, + "grad_norm": 0.016882719472050667, + "learning_rate": 1.1268759387197232e-06, + "loss": 0.0003, + "step": 95130 + }, + { + "epoch": 1.6078686529832775, + "grad_norm": 0.051557507365942, + "learning_rate": 1.125943394179415e-06, + "loss": 0.0002, + "step": 95140 + }, + { + "epoch": 1.6080376532621279, + "grad_norm": 0.004775538574904203, + "learning_rate": 1.1250111867008028e-06, + "loss": 0.0007, + "step": 95150 + }, + { + "epoch": 1.6082066535409782, + "grad_norm": 0.03052687458693981, + "learning_rate": 1.124079316364991e-06, + "loss": 0.0004, + "step": 95160 + }, + { + "epoch": 1.6083756538198288, + "grad_norm": 0.006776569876819849, + "learning_rate": 1.1231477832530597e-06, + "loss": 0.0004, + "step": 95170 + }, + { + "epoch": 1.6085446540986794, + "grad_norm": 0.01142202503979206, + "learning_rate": 1.1222165874460528e-06, + "loss": 0.0002, + "step": 95180 + }, + { + "epoch": 1.6087136543775298, + "grad_norm": 0.21770192682743073, + "learning_rate": 1.121285729024993e-06, + "loss": 0.0007, + "step": 95190 + }, + { + "epoch": 1.6088826546563801, + "grad_norm": 0.014671850949525833, + "learning_rate": 1.1203552080708662e-06, + "loss": 0.0007, + "step": 95200 + }, + { + "epoch": 1.6090516549352305, + "grad_norm": 0.09682852029800415, + "learning_rate": 1.1194250246646333e-06, + "loss": 0.0007, + "step": 95210 + }, + { + "epoch": 1.609220655214081, + "grad_norm": 0.014746912755072117, + "learning_rate": 1.1184951788872266e-06, + "loss": 0.0016, + "step": 95220 + }, + { + "epoch": 1.6093896554929317, + "grad_norm": 0.035342611372470856, + "learning_rate": 1.1175656708195443e-06, + "loss": 0.0004, + "step": 95230 + }, + { + "epoch": 1.609558655771782, + "grad_norm": 0.02012399584054947, + "learning_rate": 1.1166365005424612e-06, + "loss": 0.0004, + "step": 95240 + }, + { + "epoch": 1.6097276560506324, + "grad_norm": 0.030961062759160995, + "learning_rate": 1.1157076681368162e-06, + "loss": 0.001, + "step": 95250 + }, + { + "epoch": 1.609896656329483, + "grad_norm": 0.05708079785108566, + "learning_rate": 1.1147791736834252e-06, + "loss": 0.0007, + "step": 95260 + }, + { + "epoch": 1.6100656566083336, + "grad_norm": 0.02694096788764, + "learning_rate": 1.113851017263069e-06, + "loss": 0.0005, + "step": 95270 + }, + { + "epoch": 1.610234656887184, + "grad_norm": 0.09392502158880234, + "learning_rate": 1.1129231989565031e-06, + "loss": 0.0007, + "step": 95280 + }, + { + "epoch": 1.6104036571660343, + "grad_norm": 0.007937021553516388, + "learning_rate": 1.1119957188444525e-06, + "loss": 0.0005, + "step": 95290 + }, + { + "epoch": 1.6105726574448846, + "grad_norm": 0.0383593924343586, + "learning_rate": 1.111068577007613e-06, + "loss": 0.0004, + "step": 95300 + }, + { + "epoch": 1.6107416577237352, + "grad_norm": 0.014574541710317135, + "learning_rate": 1.110141773526649e-06, + "loss": 0.0006, + "step": 95310 + }, + { + "epoch": 1.6109106580025858, + "grad_norm": 0.0773870125412941, + "learning_rate": 1.1092153084821982e-06, + "loss": 0.0005, + "step": 95320 + }, + { + "epoch": 1.6110796582814362, + "grad_norm": 0.013775880448520184, + "learning_rate": 1.1082891819548658e-06, + "loss": 0.0004, + "step": 95330 + }, + { + "epoch": 1.6112486585602865, + "grad_norm": 0.00031712432974018157, + "learning_rate": 1.1073633940252316e-06, + "loss": 0.0003, + "step": 95340 + }, + { + "epoch": 1.6114176588391371, + "grad_norm": 0.03490206226706505, + "learning_rate": 1.106437944773841e-06, + "loss": 0.0014, + "step": 95350 + }, + { + "epoch": 1.6115866591179877, + "grad_norm": 0.001340964576229453, + "learning_rate": 1.1055128342812154e-06, + "loss": 0.0005, + "step": 95360 + }, + { + "epoch": 1.611755659396838, + "grad_norm": 0.1313982903957367, + "learning_rate": 1.104588062627841e-06, + "loss": 0.0006, + "step": 95370 + }, + { + "epoch": 1.6119246596756884, + "grad_norm": 0.08295604586601257, + "learning_rate": 1.1036636298941805e-06, + "loss": 0.0004, + "step": 95380 + }, + { + "epoch": 1.6120936599545388, + "grad_norm": 0.04703642800450325, + "learning_rate": 1.1027395361606609e-06, + "loss": 0.0003, + "step": 95390 + }, + { + "epoch": 1.6122626602333894, + "grad_norm": 0.030354078859090805, + "learning_rate": 1.1018157815076857e-06, + "loss": 0.0007, + "step": 95400 + }, + { + "epoch": 1.61243166051224, + "grad_norm": 0.002051990944892168, + "learning_rate": 1.1008923660156228e-06, + "loss": 0.0009, + "step": 95410 + }, + { + "epoch": 1.6126006607910903, + "grad_norm": 0.015003432519733906, + "learning_rate": 1.0999692897648173e-06, + "loss": 0.0004, + "step": 95420 + }, + { + "epoch": 1.6127696610699407, + "grad_norm": 0.0902337059378624, + "learning_rate": 1.0990465528355788e-06, + "loss": 0.0009, + "step": 95430 + }, + { + "epoch": 1.6129386613487913, + "grad_norm": 0.2827770709991455, + "learning_rate": 1.0981241553081916e-06, + "loss": 0.0011, + "step": 95440 + }, + { + "epoch": 1.6131076616276416, + "grad_norm": 0.04656357318162918, + "learning_rate": 1.0972020972629067e-06, + "loss": 0.0007, + "step": 95450 + }, + { + "epoch": 1.6132766619064922, + "grad_norm": 0.009094469249248505, + "learning_rate": 1.0962803787799485e-06, + "loss": 0.0006, + "step": 95460 + }, + { + "epoch": 1.6134456621853426, + "grad_norm": 0.005886971019208431, + "learning_rate": 1.095358999939512e-06, + "loss": 0.0003, + "step": 95470 + }, + { + "epoch": 1.613614662464193, + "grad_norm": 0.036646418273448944, + "learning_rate": 1.0944379608217604e-06, + "loss": 0.0009, + "step": 95480 + }, + { + "epoch": 1.6137836627430435, + "grad_norm": 0.010496441274881363, + "learning_rate": 1.0935172615068307e-06, + "loss": 0.0006, + "step": 95490 + }, + { + "epoch": 1.613952663021894, + "grad_norm": 0.047105688601732254, + "learning_rate": 1.092596902074825e-06, + "loss": 0.001, + "step": 95500 + }, + { + "epoch": 1.6141216633007445, + "grad_norm": 0.09071959555149078, + "learning_rate": 1.0916768826058222e-06, + "loss": 0.0006, + "step": 95510 + }, + { + "epoch": 1.6142906635795948, + "grad_norm": 0.019688261672854424, + "learning_rate": 1.0907572031798652e-06, + "loss": 0.0011, + "step": 95520 + }, + { + "epoch": 1.6144596638584454, + "grad_norm": 0.029169324785470963, + "learning_rate": 1.089837863876974e-06, + "loss": 0.0005, + "step": 95530 + }, + { + "epoch": 1.6146286641372958, + "grad_norm": 0.035156168043613434, + "learning_rate": 1.088918864777132e-06, + "loss": 0.0007, + "step": 95540 + }, + { + "epoch": 1.6147976644161464, + "grad_norm": 0.0048329150304198265, + "learning_rate": 1.0880002059603e-06, + "loss": 0.0003, + "step": 95550 + }, + { + "epoch": 1.6149666646949967, + "grad_norm": 0.028820117935538292, + "learning_rate": 1.0870818875064033e-06, + "loss": 0.0006, + "step": 95560 + }, + { + "epoch": 1.615135664973847, + "grad_norm": 0.04869238659739494, + "learning_rate": 1.0861639094953418e-06, + "loss": 0.0006, + "step": 95570 + }, + { + "epoch": 1.6153046652526977, + "grad_norm": 0.13940592110157013, + "learning_rate": 1.0852462720069818e-06, + "loss": 0.0023, + "step": 95580 + }, + { + "epoch": 1.6154736655315483, + "grad_norm": 0.027209477499127388, + "learning_rate": 1.084328975121165e-06, + "loss": 0.0005, + "step": 95590 + }, + { + "epoch": 1.6156426658103986, + "grad_norm": 0.011152585968375206, + "learning_rate": 1.0834120189176978e-06, + "loss": 0.0014, + "step": 95600 + }, + { + "epoch": 1.615811666089249, + "grad_norm": 0.03328166902065277, + "learning_rate": 1.0824954034763624e-06, + "loss": 0.0009, + "step": 95610 + }, + { + "epoch": 1.6159806663680996, + "grad_norm": 0.034088313579559326, + "learning_rate": 1.0815791288769061e-06, + "loss": 0.0005, + "step": 95620 + }, + { + "epoch": 1.61614966664695, + "grad_norm": 0.025707952678203583, + "learning_rate": 1.0806631951990526e-06, + "loss": 0.0006, + "step": 95630 + }, + { + "epoch": 1.6163186669258005, + "grad_norm": 0.003974339924752712, + "learning_rate": 1.0797476025224896e-06, + "loss": 0.0003, + "step": 95640 + }, + { + "epoch": 1.6164876672046509, + "grad_norm": 0.09770644456148148, + "learning_rate": 1.078832350926879e-06, + "loss": 0.0006, + "step": 95650 + }, + { + "epoch": 1.6166566674835012, + "grad_norm": 0.06613285094499588, + "learning_rate": 1.077917440491852e-06, + "loss": 0.0016, + "step": 95660 + }, + { + "epoch": 1.6168256677623518, + "grad_norm": 0.05171317607164383, + "learning_rate": 1.077002871297012e-06, + "loss": 0.0006, + "step": 95670 + }, + { + "epoch": 1.6169946680412024, + "grad_norm": 0.004788296762853861, + "learning_rate": 1.0760886434219287e-06, + "loss": 0.0004, + "step": 95680 + }, + { + "epoch": 1.6171636683200528, + "grad_norm": 0.01328999549150467, + "learning_rate": 1.0751747569461462e-06, + "loss": 0.0006, + "step": 95690 + }, + { + "epoch": 1.6173326685989031, + "grad_norm": 0.047808896750211716, + "learning_rate": 1.0742612119491752e-06, + "loss": 0.0009, + "step": 95700 + }, + { + "epoch": 1.6175016688777535, + "grad_norm": 0.02984941564500332, + "learning_rate": 1.0733480085105002e-06, + "loss": 0.0004, + "step": 95710 + }, + { + "epoch": 1.617670669156604, + "grad_norm": 0.05902513116598129, + "learning_rate": 1.0724351467095723e-06, + "loss": 0.0011, + "step": 95720 + }, + { + "epoch": 1.6178396694354547, + "grad_norm": 0.05337316915392876, + "learning_rate": 1.071522626625816e-06, + "loss": 0.0006, + "step": 95730 + }, + { + "epoch": 1.618008669714305, + "grad_norm": 0.04958257079124451, + "learning_rate": 1.070610448338627e-06, + "loss": 0.0003, + "step": 95740 + }, + { + "epoch": 1.6181776699931554, + "grad_norm": 0.06459430605173111, + "learning_rate": 1.0696986119273656e-06, + "loss": 0.0004, + "step": 95750 + }, + { + "epoch": 1.618346670272006, + "grad_norm": 0.0003064874035771936, + "learning_rate": 1.0687871174713688e-06, + "loss": 0.0008, + "step": 95760 + }, + { + "epoch": 1.6185156705508565, + "grad_norm": 0.04987462982535362, + "learning_rate": 1.0678759650499387e-06, + "loss": 0.0009, + "step": 95770 + }, + { + "epoch": 1.618684670829707, + "grad_norm": 0.03283727169036865, + "learning_rate": 1.0669651547423527e-06, + "loss": 0.0008, + "step": 95780 + }, + { + "epoch": 1.6188536711085573, + "grad_norm": 0.01741410419344902, + "learning_rate": 1.0660546866278532e-06, + "loss": 0.0009, + "step": 95790 + }, + { + "epoch": 1.6190226713874076, + "grad_norm": 0.02572944574058056, + "learning_rate": 1.0651445607856576e-06, + "loss": 0.0006, + "step": 95800 + }, + { + "epoch": 1.6191916716662582, + "grad_norm": 0.006492604501545429, + "learning_rate": 1.064234777294948e-06, + "loss": 0.0007, + "step": 95810 + }, + { + "epoch": 1.6193606719451088, + "grad_norm": 0.013690764084458351, + "learning_rate": 1.0633253362348838e-06, + "loss": 0.0003, + "step": 95820 + }, + { + "epoch": 1.6195296722239592, + "grad_norm": 0.06615430116653442, + "learning_rate": 1.062416237684588e-06, + "loss": 0.0006, + "step": 95830 + }, + { + "epoch": 1.6196986725028095, + "grad_norm": 0.025486761704087257, + "learning_rate": 1.061507481723157e-06, + "loss": 0.0004, + "step": 95840 + }, + { + "epoch": 1.61986767278166, + "grad_norm": 0.0018564617494121194, + "learning_rate": 1.060599068429659e-06, + "loss": 0.001, + "step": 95850 + }, + { + "epoch": 1.6200366730605107, + "grad_norm": 0.12936896085739136, + "learning_rate": 1.0596909978831276e-06, + "loss": 0.0007, + "step": 95860 + }, + { + "epoch": 1.620205673339361, + "grad_norm": 0.03564248979091644, + "learning_rate": 1.058783270162571e-06, + "loss": 0.0007, + "step": 95870 + }, + { + "epoch": 1.6203746736182114, + "grad_norm": 0.010401569306850433, + "learning_rate": 1.0578758853469662e-06, + "loss": 0.0008, + "step": 95880 + }, + { + "epoch": 1.6205436738970618, + "grad_norm": 0.01914243958890438, + "learning_rate": 1.0569688435152586e-06, + "loss": 0.0006, + "step": 95890 + }, + { + "epoch": 1.6207126741759124, + "grad_norm": 0.023274172097444534, + "learning_rate": 1.056062144746367e-06, + "loss": 0.0006, + "step": 95900 + }, + { + "epoch": 1.620881674454763, + "grad_norm": 0.010225529782474041, + "learning_rate": 1.0551557891191766e-06, + "loss": 0.0012, + "step": 95910 + }, + { + "epoch": 1.6210506747336133, + "grad_norm": 0.016484683379530907, + "learning_rate": 1.054249776712547e-06, + "loss": 0.0005, + "step": 95920 + }, + { + "epoch": 1.6212196750124637, + "grad_norm": 0.1197252869606018, + "learning_rate": 1.053344107605303e-06, + "loss": 0.0014, + "step": 95930 + }, + { + "epoch": 1.6213886752913143, + "grad_norm": 0.09648619592189789, + "learning_rate": 1.0524387818762448e-06, + "loss": 0.0008, + "step": 95940 + }, + { + "epoch": 1.6215576755701648, + "grad_norm": 0.0543203242123127, + "learning_rate": 1.0515337996041381e-06, + "loss": 0.0004, + "step": 95950 + }, + { + "epoch": 1.6217266758490152, + "grad_norm": 0.014883480966091156, + "learning_rate": 1.0506291608677226e-06, + "loss": 0.0007, + "step": 95960 + }, + { + "epoch": 1.6218956761278656, + "grad_norm": 0.01520876307040453, + "learning_rate": 1.0497248657457038e-06, + "loss": 0.0003, + "step": 95970 + }, + { + "epoch": 1.622064676406716, + "grad_norm": 0.017585331574082375, + "learning_rate": 1.0488209143167616e-06, + "loss": 0.0005, + "step": 95980 + }, + { + "epoch": 1.6222336766855665, + "grad_norm": 0.007562616840004921, + "learning_rate": 1.047917306659545e-06, + "loss": 0.001, + "step": 95990 + }, + { + "epoch": 1.622402676964417, + "grad_norm": 0.0027630627155303955, + "learning_rate": 1.0470140428526693e-06, + "loss": 0.0006, + "step": 96000 + }, + { + "epoch": 1.6225716772432675, + "grad_norm": 0.016282545402646065, + "learning_rate": 1.0461111229747263e-06, + "loss": 0.0016, + "step": 96010 + }, + { + "epoch": 1.6227406775221178, + "grad_norm": 0.0015029292553663254, + "learning_rate": 1.0452085471042705e-06, + "loss": 0.0005, + "step": 96020 + }, + { + "epoch": 1.6229096778009684, + "grad_norm": 0.05951765552163124, + "learning_rate": 1.0443063153198346e-06, + "loss": 0.0018, + "step": 96030 + }, + { + "epoch": 1.623078678079819, + "grad_norm": 0.019307712092995644, + "learning_rate": 1.0434044276999138e-06, + "loss": 0.0005, + "step": 96040 + }, + { + "epoch": 1.6232476783586693, + "grad_norm": 0.0006605935632251203, + "learning_rate": 1.0425028843229773e-06, + "loss": 0.0018, + "step": 96050 + }, + { + "epoch": 1.6234166786375197, + "grad_norm": 0.02443947084248066, + "learning_rate": 1.0416016852674648e-06, + "loss": 0.0005, + "step": 96060 + }, + { + "epoch": 1.62358567891637, + "grad_norm": 0.08372944593429565, + "learning_rate": 1.0407008306117862e-06, + "loss": 0.0011, + "step": 96070 + }, + { + "epoch": 1.6237546791952207, + "grad_norm": 0.06334172189235687, + "learning_rate": 1.0398003204343166e-06, + "loss": 0.0006, + "step": 96080 + }, + { + "epoch": 1.6239236794740712, + "grad_norm": 0.0012170334812253714, + "learning_rate": 1.0389001548134088e-06, + "loss": 0.0003, + "step": 96090 + }, + { + "epoch": 1.6240926797529216, + "grad_norm": 0.028422201052308083, + "learning_rate": 1.0380003338273774e-06, + "loss": 0.0004, + "step": 96100 + }, + { + "epoch": 1.624261680031772, + "grad_norm": 0.015722034499049187, + "learning_rate": 1.037100857554515e-06, + "loss": 0.0004, + "step": 96110 + }, + { + "epoch": 1.6244306803106225, + "grad_norm": 0.024624217301607132, + "learning_rate": 1.0362017260730772e-06, + "loss": 0.0007, + "step": 96120 + }, + { + "epoch": 1.6245996805894731, + "grad_norm": 0.0006813265499658883, + "learning_rate": 1.0353029394612957e-06, + "loss": 0.0002, + "step": 96130 + }, + { + "epoch": 1.6247686808683235, + "grad_norm": 0.04793710634112358, + "learning_rate": 1.0344044977973666e-06, + "loss": 0.0004, + "step": 96140 + }, + { + "epoch": 1.6249376811471739, + "grad_norm": 0.07139737904071808, + "learning_rate": 1.0335064011594615e-06, + "loss": 0.0008, + "step": 96150 + }, + { + "epoch": 1.6251066814260242, + "grad_norm": 0.000769991718698293, + "learning_rate": 1.032608649625716e-06, + "loss": 0.0007, + "step": 96160 + }, + { + "epoch": 1.6252756817048748, + "grad_norm": 0.026528295129537582, + "learning_rate": 1.0317112432742416e-06, + "loss": 0.0005, + "step": 96170 + }, + { + "epoch": 1.6254446819837254, + "grad_norm": 0.002808311488479376, + "learning_rate": 1.0308141821831147e-06, + "loss": 0.0006, + "step": 96180 + }, + { + "epoch": 1.6256136822625757, + "grad_norm": 0.037516504526138306, + "learning_rate": 1.0299174664303862e-06, + "loss": 0.0004, + "step": 96190 + }, + { + "epoch": 1.625782682541426, + "grad_norm": 0.08719465881586075, + "learning_rate": 1.0290210960940722e-06, + "loss": 0.0013, + "step": 96200 + }, + { + "epoch": 1.6259516828202767, + "grad_norm": 0.030474737286567688, + "learning_rate": 1.0281250712521633e-06, + "loss": 0.0004, + "step": 96210 + }, + { + "epoch": 1.6261206830991273, + "grad_norm": 0.012552831321954727, + "learning_rate": 1.0272293919826182e-06, + "loss": 0.0008, + "step": 96220 + }, + { + "epoch": 1.6262896833779776, + "grad_norm": 0.010344977490603924, + "learning_rate": 1.0263340583633635e-06, + "loss": 0.0006, + "step": 96230 + }, + { + "epoch": 1.626458683656828, + "grad_norm": 0.006808555219322443, + "learning_rate": 1.0254390704722984e-06, + "loss": 0.0009, + "step": 96240 + }, + { + "epoch": 1.6266276839356784, + "grad_norm": 0.03128369525074959, + "learning_rate": 1.0245444283872913e-06, + "loss": 0.0004, + "step": 96250 + }, + { + "epoch": 1.626796684214529, + "grad_norm": 0.08118956536054611, + "learning_rate": 1.0236501321861819e-06, + "loss": 0.0008, + "step": 96260 + }, + { + "epoch": 1.6269656844933795, + "grad_norm": 0.03353813290596008, + "learning_rate": 1.0227561819467753e-06, + "loss": 0.0006, + "step": 96270 + }, + { + "epoch": 1.6271346847722299, + "grad_norm": 0.09277798235416412, + "learning_rate": 1.0218625777468527e-06, + "loss": 0.0008, + "step": 96280 + }, + { + "epoch": 1.6273036850510803, + "grad_norm": 0.03057316318154335, + "learning_rate": 1.020969319664159e-06, + "loss": 0.0008, + "step": 96290 + }, + { + "epoch": 1.6274726853299308, + "grad_norm": 0.01385189313441515, + "learning_rate": 1.0200764077764146e-06, + "loss": 0.0009, + "step": 96300 + }, + { + "epoch": 1.6276416856087814, + "grad_norm": 0.03868904337286949, + "learning_rate": 1.0191838421613043e-06, + "loss": 0.0018, + "step": 96310 + }, + { + "epoch": 1.6278106858876318, + "grad_norm": 0.058415502309799194, + "learning_rate": 1.018291622896489e-06, + "loss": 0.0017, + "step": 96320 + }, + { + "epoch": 1.6279796861664821, + "grad_norm": 0.03958326205611229, + "learning_rate": 1.0173997500595927e-06, + "loss": 0.0006, + "step": 96330 + }, + { + "epoch": 1.6281486864453325, + "grad_norm": 0.047710344195365906, + "learning_rate": 1.0165082237282158e-06, + "loss": 0.0004, + "step": 96340 + }, + { + "epoch": 1.628317686724183, + "grad_norm": 0.04047977924346924, + "learning_rate": 1.0156170439799224e-06, + "loss": 0.0005, + "step": 96350 + }, + { + "epoch": 1.6284866870030337, + "grad_norm": 0.03744013234972954, + "learning_rate": 1.0147262108922522e-06, + "loss": 0.0005, + "step": 96360 + }, + { + "epoch": 1.628655687281884, + "grad_norm": 0.025144357234239578, + "learning_rate": 1.01383572454271e-06, + "loss": 0.0006, + "step": 96370 + }, + { + "epoch": 1.6288246875607344, + "grad_norm": 0.0007659460534341633, + "learning_rate": 1.0129455850087737e-06, + "loss": 0.0004, + "step": 96380 + }, + { + "epoch": 1.628993687839585, + "grad_norm": 0.06955332309007645, + "learning_rate": 1.0120557923678886e-06, + "loss": 0.0005, + "step": 96390 + }, + { + "epoch": 1.6291626881184353, + "grad_norm": 0.011934510432183743, + "learning_rate": 1.0111663466974724e-06, + "loss": 0.0015, + "step": 96400 + }, + { + "epoch": 1.629331688397286, + "grad_norm": 0.03708849102258682, + "learning_rate": 1.010277248074909e-06, + "loss": 0.0011, + "step": 96410 + }, + { + "epoch": 1.6295006886761363, + "grad_norm": 0.02652420848608017, + "learning_rate": 1.0093884965775564e-06, + "loss": 0.0005, + "step": 96420 + }, + { + "epoch": 1.6296696889549867, + "grad_norm": 0.18634644150733948, + "learning_rate": 1.0085000922827386e-06, + "loss": 0.001, + "step": 96430 + }, + { + "epoch": 1.6298386892338372, + "grad_norm": 0.014714261516928673, + "learning_rate": 1.0076120352677543e-06, + "loss": 0.001, + "step": 96440 + }, + { + "epoch": 1.6300076895126878, + "grad_norm": 0.07127939164638519, + "learning_rate": 1.0067243256098646e-06, + "loss": 0.0007, + "step": 96450 + }, + { + "epoch": 1.6301766897915382, + "grad_norm": 0.027194982394576073, + "learning_rate": 1.005836963386308e-06, + "loss": 0.0007, + "step": 96460 + }, + { + "epoch": 1.6303456900703885, + "grad_norm": 0.04858466610312462, + "learning_rate": 1.004949948674286e-06, + "loss": 0.0007, + "step": 96470 + }, + { + "epoch": 1.6305146903492391, + "grad_norm": 0.019683321937918663, + "learning_rate": 1.0040632815509765e-06, + "loss": 0.0005, + "step": 96480 + }, + { + "epoch": 1.6306836906280895, + "grad_norm": 0.09658891707658768, + "learning_rate": 1.0031769620935212e-06, + "loss": 0.0005, + "step": 96490 + }, + { + "epoch": 1.63085269090694, + "grad_norm": 0.0899178758263588, + "learning_rate": 1.002290990379035e-06, + "loss": 0.0013, + "step": 96500 + }, + { + "epoch": 1.6310216911857904, + "grad_norm": 0.06441479176282883, + "learning_rate": 1.0014053664846035e-06, + "loss": 0.0009, + "step": 96510 + }, + { + "epoch": 1.6311906914646408, + "grad_norm": 0.043808892369270325, + "learning_rate": 1.000520090487277e-06, + "loss": 0.0007, + "step": 96520 + }, + { + "epoch": 1.6313596917434914, + "grad_norm": 0.035942934453487396, + "learning_rate": 9.996351624640821e-07, + "loss": 0.0003, + "step": 96530 + }, + { + "epoch": 1.631528692022342, + "grad_norm": 0.03058365173637867, + "learning_rate": 9.98750582492009e-07, + "loss": 0.0008, + "step": 96540 + }, + { + "epoch": 1.6316976923011923, + "grad_norm": 0.06809443980455399, + "learning_rate": 9.978663506480235e-07, + "loss": 0.0011, + "step": 96550 + }, + { + "epoch": 1.6318666925800427, + "grad_norm": 0.008974218741059303, + "learning_rate": 9.969824670090543e-07, + "loss": 0.0008, + "step": 96560 + }, + { + "epoch": 1.6320356928588933, + "grad_norm": 0.05109541118144989, + "learning_rate": 9.96098931652007e-07, + "loss": 0.0007, + "step": 96570 + }, + { + "epoch": 1.6322046931377436, + "grad_norm": 0.028603311628103256, + "learning_rate": 9.952157446537508e-07, + "loss": 0.0008, + "step": 96580 + }, + { + "epoch": 1.6323736934165942, + "grad_norm": 0.013319021090865135, + "learning_rate": 9.9433290609113e-07, + "loss": 0.0008, + "step": 96590 + }, + { + "epoch": 1.6325426936954446, + "grad_norm": 0.0273862574249506, + "learning_rate": 9.934504160409526e-07, + "loss": 0.0009, + "step": 96600 + }, + { + "epoch": 1.632711693974295, + "grad_norm": 0.057017650455236435, + "learning_rate": 9.925682745800009e-07, + "loss": 0.0008, + "step": 96610 + }, + { + "epoch": 1.6328806942531455, + "grad_norm": 0.011289657093584538, + "learning_rate": 9.916864817850275e-07, + "loss": 0.0005, + "step": 96620 + }, + { + "epoch": 1.633049694531996, + "grad_norm": 0.014769106172025204, + "learning_rate": 9.908050377327494e-07, + "loss": 0.0015, + "step": 96630 + }, + { + "epoch": 1.6332186948108465, + "grad_norm": 0.19689464569091797, + "learning_rate": 9.899239424998586e-07, + "loss": 0.0004, + "step": 96640 + }, + { + "epoch": 1.6333876950896968, + "grad_norm": 0.0896286591887474, + "learning_rate": 9.890431961630148e-07, + "loss": 0.0015, + "step": 96650 + }, + { + "epoch": 1.6335566953685472, + "grad_norm": 0.017027780413627625, + "learning_rate": 9.88162798798845e-07, + "loss": 0.0004, + "step": 96660 + }, + { + "epoch": 1.6337256956473978, + "grad_norm": 0.016162235289812088, + "learning_rate": 9.87282750483951e-07, + "loss": 0.0002, + "step": 96670 + }, + { + "epoch": 1.6338946959262484, + "grad_norm": 0.07593441009521484, + "learning_rate": 9.864030512948991e-07, + "loss": 0.0007, + "step": 96680 + }, + { + "epoch": 1.6340636962050987, + "grad_norm": 0.033916737884283066, + "learning_rate": 9.855237013082285e-07, + "loss": 0.0008, + "step": 96690 + }, + { + "epoch": 1.634232696483949, + "grad_norm": 0.022884488105773926, + "learning_rate": 9.846447006004456e-07, + "loss": 0.0003, + "step": 96700 + }, + { + "epoch": 1.6344016967627997, + "grad_norm": 0.08436908572912216, + "learning_rate": 9.8376604924803e-07, + "loss": 0.0004, + "step": 96710 + }, + { + "epoch": 1.6345706970416503, + "grad_norm": 0.03133808448910713, + "learning_rate": 9.828877473274257e-07, + "loss": 0.0004, + "step": 96720 + }, + { + "epoch": 1.6347396973205006, + "grad_norm": 0.031930964440107346, + "learning_rate": 9.82009794915052e-07, + "loss": 0.001, + "step": 96730 + }, + { + "epoch": 1.634908697599351, + "grad_norm": 0.06534985452890396, + "learning_rate": 9.811321920872924e-07, + "loss": 0.001, + "step": 96740 + }, + { + "epoch": 1.6350776978782013, + "grad_norm": 0.044219955801963806, + "learning_rate": 9.802549389205041e-07, + "loss": 0.0006, + "step": 96750 + }, + { + "epoch": 1.635246698157052, + "grad_norm": 0.039992161095142365, + "learning_rate": 9.79378035491013e-07, + "loss": 0.0005, + "step": 96760 + }, + { + "epoch": 1.6354156984359025, + "grad_norm": 0.07529482990503311, + "learning_rate": 9.785014818751125e-07, + "loss": 0.0005, + "step": 96770 + }, + { + "epoch": 1.6355846987147529, + "grad_norm": 0.01657024398446083, + "learning_rate": 9.77625278149069e-07, + "loss": 0.0004, + "step": 96780 + }, + { + "epoch": 1.6357536989936032, + "grad_norm": 0.06686761975288391, + "learning_rate": 9.767494243891134e-07, + "loss": 0.0012, + "step": 96790 + }, + { + "epoch": 1.6359226992724538, + "grad_norm": 0.03590511530637741, + "learning_rate": 9.758739206714512e-07, + "loss": 0.0009, + "step": 96800 + }, + { + "epoch": 1.6360916995513044, + "grad_norm": 0.00774614792317152, + "learning_rate": 9.74998767072257e-07, + "loss": 0.0006, + "step": 96810 + }, + { + "epoch": 1.6362606998301548, + "grad_norm": 0.06778514385223389, + "learning_rate": 9.741239636676696e-07, + "loss": 0.0008, + "step": 96820 + }, + { + "epoch": 1.6364297001090051, + "grad_norm": 0.09307301044464111, + "learning_rate": 9.732495105338036e-07, + "loss": 0.0009, + "step": 96830 + }, + { + "epoch": 1.6365987003878555, + "grad_norm": 0.027598995715379715, + "learning_rate": 9.723754077467417e-07, + "loss": 0.0006, + "step": 96840 + }, + { + "epoch": 1.636767700666706, + "grad_norm": 0.06962253898382187, + "learning_rate": 9.715016553825318e-07, + "loss": 0.0007, + "step": 96850 + }, + { + "epoch": 1.6369367009455567, + "grad_norm": 0.02600184455513954, + "learning_rate": 9.706282535171979e-07, + "loss": 0.0002, + "step": 96860 + }, + { + "epoch": 1.637105701224407, + "grad_norm": 0.01340588927268982, + "learning_rate": 9.697552022267276e-07, + "loss": 0.0008, + "step": 96870 + }, + { + "epoch": 1.6372747015032574, + "grad_norm": 0.03624692186713219, + "learning_rate": 9.688825015870829e-07, + "loss": 0.0013, + "step": 96880 + }, + { + "epoch": 1.637443701782108, + "grad_norm": 0.14636138081550598, + "learning_rate": 9.680101516741908e-07, + "loss": 0.0007, + "step": 96890 + }, + { + "epoch": 1.6376127020609585, + "grad_norm": 0.03972083702683449, + "learning_rate": 9.671381525639517e-07, + "loss": 0.0008, + "step": 96900 + }, + { + "epoch": 1.637781702339809, + "grad_norm": 0.04988046735525131, + "learning_rate": 9.662665043322317e-07, + "loss": 0.0007, + "step": 96910 + }, + { + "epoch": 1.6379507026186593, + "grad_norm": 0.016621902585029602, + "learning_rate": 9.653952070548712e-07, + "loss": 0.0008, + "step": 96920 + }, + { + "epoch": 1.6381197028975096, + "grad_norm": 0.04055316373705864, + "learning_rate": 9.645242608076749e-07, + "loss": 0.0012, + "step": 96930 + }, + { + "epoch": 1.6382887031763602, + "grad_norm": 0.026886506006121635, + "learning_rate": 9.636536656664209e-07, + "loss": 0.0005, + "step": 96940 + }, + { + "epoch": 1.6384577034552108, + "grad_norm": 0.03368248790502548, + "learning_rate": 9.627834217068532e-07, + "loss": 0.0008, + "step": 96950 + }, + { + "epoch": 1.6386267037340612, + "grad_norm": 0.004224222619086504, + "learning_rate": 9.619135290046906e-07, + "loss": 0.0006, + "step": 96960 + }, + { + "epoch": 1.6387957040129115, + "grad_norm": 0.004865554627031088, + "learning_rate": 9.610439876356142e-07, + "loss": 0.0003, + "step": 96970 + }, + { + "epoch": 1.638964704291762, + "grad_norm": 0.04468980059027672, + "learning_rate": 9.601747976752802e-07, + "loss": 0.0026, + "step": 96980 + }, + { + "epoch": 1.6391337045706127, + "grad_norm": 0.06228204444050789, + "learning_rate": 9.593059591993132e-07, + "loss": 0.0012, + "step": 96990 + }, + { + "epoch": 1.639302704849463, + "grad_norm": 0.09721628576517105, + "learning_rate": 9.58437472283304e-07, + "loss": 0.0005, + "step": 97000 + }, + { + "epoch": 1.6394717051283134, + "grad_norm": 0.04007471725344658, + "learning_rate": 9.575693370028166e-07, + "loss": 0.0005, + "step": 97010 + }, + { + "epoch": 1.6396407054071638, + "grad_norm": 0.022833475843071938, + "learning_rate": 9.567015534333835e-07, + "loss": 0.0007, + "step": 97020 + }, + { + "epoch": 1.6398097056860144, + "grad_norm": 0.035724788904190063, + "learning_rate": 9.558341216505063e-07, + "loss": 0.0011, + "step": 97030 + }, + { + "epoch": 1.639978705964865, + "grad_norm": 0.0324830487370491, + "learning_rate": 9.549670417296536e-07, + "loss": 0.0007, + "step": 97040 + }, + { + "epoch": 1.6401477062437153, + "grad_norm": 0.025162674486637115, + "learning_rate": 9.541003137462685e-07, + "loss": 0.0009, + "step": 97050 + }, + { + "epoch": 1.6403167065225657, + "grad_norm": 0.025426583364605904, + "learning_rate": 9.532339377757571e-07, + "loss": 0.0005, + "step": 97060 + }, + { + "epoch": 1.6404857068014163, + "grad_norm": 0.017605816945433617, + "learning_rate": 9.523679138935022e-07, + "loss": 0.0006, + "step": 97070 + }, + { + "epoch": 1.6406547070802668, + "grad_norm": 0.04608120024204254, + "learning_rate": 9.515022421748482e-07, + "loss": 0.0011, + "step": 97080 + }, + { + "epoch": 1.6408237073591172, + "grad_norm": 0.03623725846409798, + "learning_rate": 9.506369226951156e-07, + "loss": 0.0003, + "step": 97090 + }, + { + "epoch": 1.6409927076379676, + "grad_norm": 0.04509522765874863, + "learning_rate": 9.497719555295898e-07, + "loss": 0.0007, + "step": 97100 + }, + { + "epoch": 1.641161707916818, + "grad_norm": 0.009332438930869102, + "learning_rate": 9.489073407535287e-07, + "loss": 0.0005, + "step": 97110 + }, + { + "epoch": 1.6413307081956685, + "grad_norm": 0.020322060212492943, + "learning_rate": 9.480430784421552e-07, + "loss": 0.0005, + "step": 97120 + }, + { + "epoch": 1.641499708474519, + "grad_norm": 0.11147165298461914, + "learning_rate": 9.471791686706677e-07, + "loss": 0.001, + "step": 97130 + }, + { + "epoch": 1.6416687087533695, + "grad_norm": 0.027426326647400856, + "learning_rate": 9.463156115142275e-07, + "loss": 0.0005, + "step": 97140 + }, + { + "epoch": 1.6418377090322198, + "grad_norm": 0.008892586454749107, + "learning_rate": 9.454524070479714e-07, + "loss": 0.0009, + "step": 97150 + }, + { + "epoch": 1.6420067093110704, + "grad_norm": 0.06547423452138901, + "learning_rate": 9.445895553469986e-07, + "loss": 0.0007, + "step": 97160 + }, + { + "epoch": 1.642175709589921, + "grad_norm": 0.04201348125934601, + "learning_rate": 9.437270564863854e-07, + "loss": 0.0011, + "step": 97170 + }, + { + "epoch": 1.6423447098687713, + "grad_norm": 0.0022331583313643932, + "learning_rate": 9.428649105411697e-07, + "loss": 0.0005, + "step": 97180 + }, + { + "epoch": 1.6425137101476217, + "grad_norm": 0.13671652972698212, + "learning_rate": 9.420031175863642e-07, + "loss": 0.0013, + "step": 97190 + }, + { + "epoch": 1.642682710426472, + "grad_norm": 0.050300270318984985, + "learning_rate": 9.411416776969484e-07, + "loss": 0.0006, + "step": 97200 + }, + { + "epoch": 1.6428517107053227, + "grad_norm": 0.03513916954398155, + "learning_rate": 9.40280590947874e-07, + "loss": 0.0015, + "step": 97210 + }, + { + "epoch": 1.6430207109841732, + "grad_norm": 0.016461919993162155, + "learning_rate": 9.394198574140567e-07, + "loss": 0.0004, + "step": 97220 + }, + { + "epoch": 1.6431897112630236, + "grad_norm": 0.15564028918743134, + "learning_rate": 9.385594771703871e-07, + "loss": 0.0013, + "step": 97230 + }, + { + "epoch": 1.643358711541874, + "grad_norm": 0.08642155677080154, + "learning_rate": 9.376994502917197e-07, + "loss": 0.001, + "step": 97240 + }, + { + "epoch": 1.6435277118207245, + "grad_norm": 0.006080037914216518, + "learning_rate": 9.368397768528825e-07, + "loss": 0.0008, + "step": 97250 + }, + { + "epoch": 1.6436967120995751, + "grad_norm": 0.14761480689048767, + "learning_rate": 9.359804569286729e-07, + "loss": 0.0009, + "step": 97260 + }, + { + "epoch": 1.6438657123784255, + "grad_norm": 0.0032137467060238123, + "learning_rate": 9.351214905938521e-07, + "loss": 0.0009, + "step": 97270 + }, + { + "epoch": 1.6440347126572759, + "grad_norm": 0.04017636552453041, + "learning_rate": 9.342628779231582e-07, + "loss": 0.0008, + "step": 97280 + }, + { + "epoch": 1.6442037129361262, + "grad_norm": 0.0005415910272859037, + "learning_rate": 9.334046189912915e-07, + "loss": 0.0004, + "step": 97290 + }, + { + "epoch": 1.6443727132149768, + "grad_norm": 0.08849450200796127, + "learning_rate": 9.325467138729272e-07, + "loss": 0.001, + "step": 97300 + }, + { + "epoch": 1.6445417134938274, + "grad_norm": 0.0690179243683815, + "learning_rate": 9.31689162642705e-07, + "loss": 0.0008, + "step": 97310 + }, + { + "epoch": 1.6447107137726777, + "grad_norm": 0.03570980206131935, + "learning_rate": 9.308319653752379e-07, + "loss": 0.0007, + "step": 97320 + }, + { + "epoch": 1.644879714051528, + "grad_norm": 0.010233347304165363, + "learning_rate": 9.299751221451042e-07, + "loss": 0.0003, + "step": 97330 + }, + { + "epoch": 1.6450487143303787, + "grad_norm": 0.055424634367227554, + "learning_rate": 9.291186330268564e-07, + "loss": 0.0007, + "step": 97340 + }, + { + "epoch": 1.645217714609229, + "grad_norm": 0.028542617335915565, + "learning_rate": 9.282624980950094e-07, + "loss": 0.0005, + "step": 97350 + }, + { + "epoch": 1.6453867148880796, + "grad_norm": 0.009859766811132431, + "learning_rate": 9.274067174240548e-07, + "loss": 0.0003, + "step": 97360 + }, + { + "epoch": 1.64555571516693, + "grad_norm": 0.011371731758117676, + "learning_rate": 9.265512910884466e-07, + "loss": 0.0004, + "step": 97370 + }, + { + "epoch": 1.6457247154457804, + "grad_norm": 0.0602949783205986, + "learning_rate": 9.256962191626124e-07, + "loss": 0.0008, + "step": 97380 + }, + { + "epoch": 1.645893715724631, + "grad_norm": 0.03658745437860489, + "learning_rate": 9.248415017209478e-07, + "loss": 0.0007, + "step": 97390 + }, + { + "epoch": 1.6460627160034815, + "grad_norm": 0.08593559265136719, + "learning_rate": 9.23987138837818e-07, + "loss": 0.0007, + "step": 97400 + }, + { + "epoch": 1.646231716282332, + "grad_norm": 0.062391120940446854, + "learning_rate": 9.231331305875552e-07, + "loss": 0.0009, + "step": 97410 + }, + { + "epoch": 1.6464007165611823, + "grad_norm": 0.02832197956740856, + "learning_rate": 9.222794770444638e-07, + "loss": 0.0006, + "step": 97420 + }, + { + "epoch": 1.6465697168400328, + "grad_norm": 0.025280700996518135, + "learning_rate": 9.214261782828133e-07, + "loss": 0.0008, + "step": 97430 + }, + { + "epoch": 1.6467387171188832, + "grad_norm": 0.22469237446784973, + "learning_rate": 9.20573234376848e-07, + "loss": 0.0012, + "step": 97440 + }, + { + "epoch": 1.6469077173977338, + "grad_norm": 0.02634529396891594, + "learning_rate": 9.197206454007756e-07, + "loss": 0.0006, + "step": 97450 + }, + { + "epoch": 1.6470767176765841, + "grad_norm": 0.018179452046751976, + "learning_rate": 9.188684114287772e-07, + "loss": 0.0006, + "step": 97460 + }, + { + "epoch": 1.6472457179554345, + "grad_norm": 0.00996420904994011, + "learning_rate": 9.180165325349999e-07, + "loss": 0.0007, + "step": 97470 + }, + { + "epoch": 1.647414718234285, + "grad_norm": 0.04182242974638939, + "learning_rate": 9.171650087935624e-07, + "loss": 0.0008, + "step": 97480 + }, + { + "epoch": 1.6475837185131357, + "grad_norm": 0.014716528356075287, + "learning_rate": 9.163138402785504e-07, + "loss": 0.0008, + "step": 97490 + }, + { + "epoch": 1.647752718791986, + "grad_norm": 0.013761723414063454, + "learning_rate": 9.154630270640214e-07, + "loss": 0.0007, + "step": 97500 + }, + { + "epoch": 1.6479217190708364, + "grad_norm": 0.036597806960344315, + "learning_rate": 9.146125692239976e-07, + "loss": 0.0005, + "step": 97510 + }, + { + "epoch": 1.6480907193496868, + "grad_norm": 0.0009444573661312461, + "learning_rate": 9.137624668324741e-07, + "loss": 0.0009, + "step": 97520 + }, + { + "epoch": 1.6482597196285373, + "grad_norm": 0.061494581401348114, + "learning_rate": 9.129127199634158e-07, + "loss": 0.0012, + "step": 97530 + }, + { + "epoch": 1.648428719907388, + "grad_norm": 0.07248084247112274, + "learning_rate": 9.120633286907521e-07, + "loss": 0.0006, + "step": 97540 + }, + { + "epoch": 1.6485977201862383, + "grad_norm": 0.030622024089097977, + "learning_rate": 9.112142930883866e-07, + "loss": 0.0016, + "step": 97550 + }, + { + "epoch": 1.6487667204650887, + "grad_norm": 0.055071569979190826, + "learning_rate": 9.103656132301869e-07, + "loss": 0.0005, + "step": 97560 + }, + { + "epoch": 1.6489357207439392, + "grad_norm": 0.02567562833428383, + "learning_rate": 9.095172891899939e-07, + "loss": 0.0005, + "step": 97570 + }, + { + "epoch": 1.6491047210227898, + "grad_norm": 0.05259372666478157, + "learning_rate": 9.086693210416164e-07, + "loss": 0.0007, + "step": 97580 + }, + { + "epoch": 1.6492737213016402, + "grad_norm": 0.0008730893023312092, + "learning_rate": 9.078217088588298e-07, + "loss": 0.0005, + "step": 97590 + }, + { + "epoch": 1.6494427215804905, + "grad_norm": 0.013485025614500046, + "learning_rate": 9.069744527153812e-07, + "loss": 0.0004, + "step": 97600 + }, + { + "epoch": 1.649611721859341, + "grad_norm": 0.006705063860863447, + "learning_rate": 9.061275526849883e-07, + "loss": 0.0009, + "step": 97610 + }, + { + "epoch": 1.6497807221381915, + "grad_norm": 0.07051364332437515, + "learning_rate": 9.052810088413322e-07, + "loss": 0.0008, + "step": 97620 + }, + { + "epoch": 1.649949722417042, + "grad_norm": 0.029289480298757553, + "learning_rate": 9.044348212580689e-07, + "loss": 0.0008, + "step": 97630 + }, + { + "epoch": 1.6501187226958924, + "grad_norm": 0.016281338408589363, + "learning_rate": 9.035889900088179e-07, + "loss": 0.0007, + "step": 97640 + }, + { + "epoch": 1.6502877229747428, + "grad_norm": 0.00940323993563652, + "learning_rate": 9.027435151671743e-07, + "loss": 0.0021, + "step": 97650 + }, + { + "epoch": 1.6504567232535934, + "grad_norm": 0.01000247523188591, + "learning_rate": 9.018983968066947e-07, + "loss": 0.0007, + "step": 97660 + }, + { + "epoch": 1.650625723532444, + "grad_norm": 0.05016298592090607, + "learning_rate": 9.010536350009119e-07, + "loss": 0.0004, + "step": 97670 + }, + { + "epoch": 1.6507947238112943, + "grad_norm": 0.04229207709431648, + "learning_rate": 9.002092298233211e-07, + "loss": 0.0005, + "step": 97680 + }, + { + "epoch": 1.6509637240901447, + "grad_norm": 0.03415260091423988, + "learning_rate": 8.99365181347393e-07, + "loss": 0.0005, + "step": 97690 + }, + { + "epoch": 1.651132724368995, + "grad_norm": 0.07657230645418167, + "learning_rate": 8.985214896465604e-07, + "loss": 0.0008, + "step": 97700 + }, + { + "epoch": 1.6513017246478456, + "grad_norm": 0.0001833623682614416, + "learning_rate": 8.97678154794232e-07, + "loss": 0.0002, + "step": 97710 + }, + { + "epoch": 1.6514707249266962, + "grad_norm": 0.03562748432159424, + "learning_rate": 8.96835176863779e-07, + "loss": 0.0005, + "step": 97720 + }, + { + "epoch": 1.6516397252055466, + "grad_norm": 0.02505319006741047, + "learning_rate": 8.95992555928547e-07, + "loss": 0.0004, + "step": 97730 + }, + { + "epoch": 1.651808725484397, + "grad_norm": 0.11562114953994751, + "learning_rate": 8.95150292061846e-07, + "loss": 0.0012, + "step": 97740 + }, + { + "epoch": 1.6519777257632475, + "grad_norm": 0.008282345719635487, + "learning_rate": 8.943083853369583e-07, + "loss": 0.0004, + "step": 97750 + }, + { + "epoch": 1.6521467260420981, + "grad_norm": 0.012088058516383171, + "learning_rate": 8.934668358271342e-07, + "loss": 0.0008, + "step": 97760 + }, + { + "epoch": 1.6523157263209485, + "grad_norm": 0.01571849174797535, + "learning_rate": 8.926256436055914e-07, + "loss": 0.0009, + "step": 97770 + }, + { + "epoch": 1.6524847265997988, + "grad_norm": 0.020298385992646217, + "learning_rate": 8.917848087455178e-07, + "loss": 0.0005, + "step": 97780 + }, + { + "epoch": 1.6526537268786492, + "grad_norm": 0.024168308824300766, + "learning_rate": 8.909443313200706e-07, + "loss": 0.0005, + "step": 97790 + }, + { + "epoch": 1.6528227271574998, + "grad_norm": 0.010439195670187473, + "learning_rate": 8.901042114023766e-07, + "loss": 0.0007, + "step": 97800 + }, + { + "epoch": 1.6529917274363504, + "grad_norm": 0.06465132534503937, + "learning_rate": 8.892644490655284e-07, + "loss": 0.001, + "step": 97810 + }, + { + "epoch": 1.6531607277152007, + "grad_norm": 0.015410232357680798, + "learning_rate": 8.88425044382591e-07, + "loss": 0.0006, + "step": 97820 + }, + { + "epoch": 1.653329727994051, + "grad_norm": 0.08747339993715286, + "learning_rate": 8.875859974265944e-07, + "loss": 0.0011, + "step": 97830 + }, + { + "epoch": 1.6534987282729017, + "grad_norm": 0.04376240819692612, + "learning_rate": 8.867473082705424e-07, + "loss": 0.0007, + "step": 97840 + }, + { + "epoch": 1.6536677285517523, + "grad_norm": 0.0737852230668068, + "learning_rate": 8.859089769874024e-07, + "loss": 0.0008, + "step": 97850 + }, + { + "epoch": 1.6538367288306026, + "grad_norm": 0.05410739779472351, + "learning_rate": 8.850710036501165e-07, + "loss": 0.0006, + "step": 97860 + }, + { + "epoch": 1.654005729109453, + "grad_norm": 0.110353983938694, + "learning_rate": 8.842333883315884e-07, + "loss": 0.0008, + "step": 97870 + }, + { + "epoch": 1.6541747293883033, + "grad_norm": 0.00010571058373898268, + "learning_rate": 8.83396131104699e-07, + "loss": 0.0005, + "step": 97880 + }, + { + "epoch": 1.654343729667154, + "grad_norm": 0.03712800517678261, + "learning_rate": 8.825592320422899e-07, + "loss": 0.0012, + "step": 97890 + }, + { + "epoch": 1.6545127299460045, + "grad_norm": 0.05840837582945824, + "learning_rate": 8.817226912171783e-07, + "loss": 0.0006, + "step": 97900 + }, + { + "epoch": 1.6546817302248549, + "grad_norm": 0.024938484653830528, + "learning_rate": 8.808865087021451e-07, + "loss": 0.0005, + "step": 97910 + }, + { + "epoch": 1.6548507305037052, + "grad_norm": 0.0135054225102067, + "learning_rate": 8.800506845699441e-07, + "loss": 0.0003, + "step": 97920 + }, + { + "epoch": 1.6550197307825558, + "grad_norm": 0.003684627590700984, + "learning_rate": 8.79215218893294e-07, + "loss": 0.0003, + "step": 97930 + }, + { + "epoch": 1.6551887310614064, + "grad_norm": 0.05478254705667496, + "learning_rate": 8.783801117448853e-07, + "loss": 0.0006, + "step": 97940 + }, + { + "epoch": 1.6553577313402568, + "grad_norm": 0.03860902413725853, + "learning_rate": 8.775453631973785e-07, + "loss": 0.0003, + "step": 97950 + }, + { + "epoch": 1.6555267316191071, + "grad_norm": 0.02583891712129116, + "learning_rate": 8.76710973323397e-07, + "loss": 0.0011, + "step": 97960 + }, + { + "epoch": 1.6556957318979575, + "grad_norm": 0.0985838994383812, + "learning_rate": 8.758769421955388e-07, + "loss": 0.0007, + "step": 97970 + }, + { + "epoch": 1.655864732176808, + "grad_norm": 0.008611098863184452, + "learning_rate": 8.750432698863698e-07, + "loss": 0.001, + "step": 97980 + }, + { + "epoch": 1.6560337324556587, + "grad_norm": 0.06516892462968826, + "learning_rate": 8.742099564684209e-07, + "loss": 0.001, + "step": 97990 + }, + { + "epoch": 1.656202732734509, + "grad_norm": 0.02409842424094677, + "learning_rate": 8.733770020141969e-07, + "loss": 0.0014, + "step": 98000 + }, + { + "epoch": 1.6563717330133594, + "grad_norm": 0.049286551773548126, + "learning_rate": 8.725444065961663e-07, + "loss": 0.0009, + "step": 98010 + }, + { + "epoch": 1.65654073329221, + "grad_norm": 0.023762457072734833, + "learning_rate": 8.717121702867709e-07, + "loss": 0.0008, + "step": 98020 + }, + { + "epoch": 1.6567097335710605, + "grad_norm": 0.04187716171145439, + "learning_rate": 8.708802931584193e-07, + "loss": 0.001, + "step": 98030 + }, + { + "epoch": 1.656878733849911, + "grad_norm": 0.044227421283721924, + "learning_rate": 8.700487752834875e-07, + "loss": 0.0003, + "step": 98040 + }, + { + "epoch": 1.6570477341287613, + "grad_norm": 0.049865856766700745, + "learning_rate": 8.692176167343231e-07, + "loss": 0.0008, + "step": 98050 + }, + { + "epoch": 1.6572167344076116, + "grad_norm": 0.047756098210811615, + "learning_rate": 8.683868175832394e-07, + "loss": 0.0006, + "step": 98060 + }, + { + "epoch": 1.6573857346864622, + "grad_norm": 0.02121659927070141, + "learning_rate": 8.675563779025214e-07, + "loss": 0.0006, + "step": 98070 + }, + { + "epoch": 1.6575547349653128, + "grad_norm": 0.024621332064270973, + "learning_rate": 8.667262977644197e-07, + "loss": 0.0005, + "step": 98080 + }, + { + "epoch": 1.6577237352441632, + "grad_norm": 0.0006263728719204664, + "learning_rate": 8.658965772411576e-07, + "loss": 0.0009, + "step": 98090 + }, + { + "epoch": 1.6578927355230135, + "grad_norm": 0.01369393803179264, + "learning_rate": 8.650672164049217e-07, + "loss": 0.0007, + "step": 98100 + }, + { + "epoch": 1.6580617358018641, + "grad_norm": 0.001999722560867667, + "learning_rate": 8.642382153278739e-07, + "loss": 0.0004, + "step": 98110 + }, + { + "epoch": 1.6582307360807147, + "grad_norm": 0.002143674297258258, + "learning_rate": 8.634095740821385e-07, + "loss": 0.0007, + "step": 98120 + }, + { + "epoch": 1.658399736359565, + "grad_norm": 0.04328879714012146, + "learning_rate": 8.625812927398136e-07, + "loss": 0.0005, + "step": 98130 + }, + { + "epoch": 1.6585687366384154, + "grad_norm": 0.022024834528565407, + "learning_rate": 8.617533713729609e-07, + "loss": 0.001, + "step": 98140 + }, + { + "epoch": 1.6587377369172658, + "grad_norm": 0.0001287844788748771, + "learning_rate": 8.609258100536155e-07, + "loss": 0.0008, + "step": 98150 + }, + { + "epoch": 1.6589067371961164, + "grad_norm": 0.02500726468861103, + "learning_rate": 8.600986088537783e-07, + "loss": 0.002, + "step": 98160 + }, + { + "epoch": 1.659075737474967, + "grad_norm": 0.12904953956604004, + "learning_rate": 8.592717678454222e-07, + "loss": 0.0006, + "step": 98170 + }, + { + "epoch": 1.6592447377538173, + "grad_norm": 0.04279755800962448, + "learning_rate": 8.584452871004834e-07, + "loss": 0.0005, + "step": 98180 + }, + { + "epoch": 1.6594137380326677, + "grad_norm": 0.007707957644015551, + "learning_rate": 8.576191666908717e-07, + "loss": 0.0007, + "step": 98190 + }, + { + "epoch": 1.6595827383115183, + "grad_norm": 0.12773577868938446, + "learning_rate": 8.56793406688462e-07, + "loss": 0.0009, + "step": 98200 + }, + { + "epoch": 1.6597517385903686, + "grad_norm": 0.02980293706059456, + "learning_rate": 8.55968007165101e-07, + "loss": 0.0008, + "step": 98210 + }, + { + "epoch": 1.6599207388692192, + "grad_norm": 0.06709340214729309, + "learning_rate": 8.55142968192601e-07, + "loss": 0.0008, + "step": 98220 + }, + { + "epoch": 1.6600897391480696, + "grad_norm": 0.023071642965078354, + "learning_rate": 8.54318289842746e-07, + "loss": 0.0015, + "step": 98230 + }, + { + "epoch": 1.66025873942692, + "grad_norm": 0.003490469651296735, + "learning_rate": 8.534939721872848e-07, + "loss": 0.0013, + "step": 98240 + }, + { + "epoch": 1.6604277397057705, + "grad_norm": 0.034906335175037384, + "learning_rate": 8.526700152979395e-07, + "loss": 0.0006, + "step": 98250 + }, + { + "epoch": 1.660596739984621, + "grad_norm": 0.025669587776064873, + "learning_rate": 8.51846419246396e-07, + "loss": 0.0029, + "step": 98260 + }, + { + "epoch": 1.6607657402634715, + "grad_norm": 0.05409591645002365, + "learning_rate": 8.510231841043137e-07, + "loss": 0.0005, + "step": 98270 + }, + { + "epoch": 1.6609347405423218, + "grad_norm": 0.01553164143115282, + "learning_rate": 8.502003099433154e-07, + "loss": 0.0011, + "step": 98280 + }, + { + "epoch": 1.6611037408211724, + "grad_norm": 0.05354379862546921, + "learning_rate": 8.493777968349959e-07, + "loss": 0.0006, + "step": 98290 + }, + { + "epoch": 1.6612727411000228, + "grad_norm": 0.06067023053765297, + "learning_rate": 8.485556448509202e-07, + "loss": 0.0005, + "step": 98300 + }, + { + "epoch": 1.6614417413788733, + "grad_norm": 0.029005998745560646, + "learning_rate": 8.477338540626157e-07, + "loss": 0.0007, + "step": 98310 + }, + { + "epoch": 1.6616107416577237, + "grad_norm": 0.0032901307567954063, + "learning_rate": 8.46912424541586e-07, + "loss": 0.0005, + "step": 98320 + }, + { + "epoch": 1.661779741936574, + "grad_norm": 0.011192691512405872, + "learning_rate": 8.460913563592959e-07, + "loss": 0.0006, + "step": 98330 + }, + { + "epoch": 1.6619487422154247, + "grad_norm": 0.02068910375237465, + "learning_rate": 8.452706495871837e-07, + "loss": 0.0012, + "step": 98340 + }, + { + "epoch": 1.6621177424942752, + "grad_norm": 0.054953016340732574, + "learning_rate": 8.444503042966567e-07, + "loss": 0.0005, + "step": 98350 + }, + { + "epoch": 1.6622867427731256, + "grad_norm": 0.05563584342598915, + "learning_rate": 8.436303205590863e-07, + "loss": 0.0004, + "step": 98360 + }, + { + "epoch": 1.662455743051976, + "grad_norm": 0.08429501205682755, + "learning_rate": 8.428106984458156e-07, + "loss": 0.0006, + "step": 98370 + }, + { + "epoch": 1.6626247433308265, + "grad_norm": 0.12233336269855499, + "learning_rate": 8.419914380281579e-07, + "loss": 0.0007, + "step": 98380 + }, + { + "epoch": 1.662793743609677, + "grad_norm": 0.1364852637052536, + "learning_rate": 8.411725393773895e-07, + "loss": 0.0006, + "step": 98390 + }, + { + "epoch": 1.6629627438885275, + "grad_norm": 0.05216464400291443, + "learning_rate": 8.403540025647616e-07, + "loss": 0.001, + "step": 98400 + }, + { + "epoch": 1.6631317441673779, + "grad_norm": 0.04671711102128029, + "learning_rate": 8.395358276614879e-07, + "loss": 0.0006, + "step": 98410 + }, + { + "epoch": 1.6633007444462282, + "grad_norm": 0.027912594377994537, + "learning_rate": 8.387180147387569e-07, + "loss": 0.0007, + "step": 98420 + }, + { + "epoch": 1.6634697447250788, + "grad_norm": 0.0016561595257371664, + "learning_rate": 8.379005638677184e-07, + "loss": 0.001, + "step": 98430 + }, + { + "epoch": 1.6636387450039294, + "grad_norm": 0.040859147906303406, + "learning_rate": 8.37083475119499e-07, + "loss": 0.0004, + "step": 98440 + }, + { + "epoch": 1.6638077452827797, + "grad_norm": 0.005387297365814447, + "learning_rate": 8.362667485651849e-07, + "loss": 0.0004, + "step": 98450 + }, + { + "epoch": 1.66397674556163, + "grad_norm": 0.02775135263800621, + "learning_rate": 8.354503842758388e-07, + "loss": 0.0002, + "step": 98460 + }, + { + "epoch": 1.6641457458404805, + "grad_norm": 0.0674198791384697, + "learning_rate": 8.346343823224862e-07, + "loss": 0.0004, + "step": 98470 + }, + { + "epoch": 1.664314746119331, + "grad_norm": 0.032847706228494644, + "learning_rate": 8.338187427761251e-07, + "loss": 0.0008, + "step": 98480 + }, + { + "epoch": 1.6644837463981816, + "grad_norm": 0.05390644818544388, + "learning_rate": 8.330034657077174e-07, + "loss": 0.0007, + "step": 98490 + }, + { + "epoch": 1.664652746677032, + "grad_norm": 0.020645366981625557, + "learning_rate": 8.321885511881994e-07, + "loss": 0.0015, + "step": 98500 + }, + { + "epoch": 1.6648217469558824, + "grad_norm": 0.026290608569979668, + "learning_rate": 8.313739992884695e-07, + "loss": 0.0005, + "step": 98510 + }, + { + "epoch": 1.664990747234733, + "grad_norm": 0.023746129125356674, + "learning_rate": 8.305598100793993e-07, + "loss": 0.0004, + "step": 98520 + }, + { + "epoch": 1.6651597475135835, + "grad_norm": 0.02805907279253006, + "learning_rate": 8.297459836318267e-07, + "loss": 0.0019, + "step": 98530 + }, + { + "epoch": 1.665328747792434, + "grad_norm": 0.01339644007384777, + "learning_rate": 8.289325200165605e-07, + "loss": 0.0005, + "step": 98540 + }, + { + "epoch": 1.6654977480712843, + "grad_norm": 0.05567881464958191, + "learning_rate": 8.281194193043729e-07, + "loss": 0.0007, + "step": 98550 + }, + { + "epoch": 1.6656667483501346, + "grad_norm": 0.00011985190212726593, + "learning_rate": 8.273066815660086e-07, + "loss": 0.0011, + "step": 98560 + }, + { + "epoch": 1.6658357486289852, + "grad_norm": 0.06024892255663872, + "learning_rate": 8.26494306872182e-07, + "loss": 0.0005, + "step": 98570 + }, + { + "epoch": 1.6660047489078358, + "grad_norm": 0.008531689643859863, + "learning_rate": 8.256822952935705e-07, + "loss": 0.0003, + "step": 98580 + }, + { + "epoch": 1.6661737491866861, + "grad_norm": 0.03165486827492714, + "learning_rate": 8.248706469008255e-07, + "loss": 0.0007, + "step": 98590 + }, + { + "epoch": 1.6663427494655365, + "grad_norm": 0.07225144654512405, + "learning_rate": 8.240593617645621e-07, + "loss": 0.0007, + "step": 98600 + }, + { + "epoch": 1.666511749744387, + "grad_norm": 0.015458598732948303, + "learning_rate": 8.232484399553681e-07, + "loss": 0.0001, + "step": 98610 + }, + { + "epoch": 1.6666807500232377, + "grad_norm": 0.00764080137014389, + "learning_rate": 8.224378815437955e-07, + "loss": 0.0022, + "step": 98620 + }, + { + "epoch": 1.666849750302088, + "grad_norm": 0.03568202629685402, + "learning_rate": 8.216276866003692e-07, + "loss": 0.0007, + "step": 98630 + }, + { + "epoch": 1.6670187505809384, + "grad_norm": 0.07748332619667053, + "learning_rate": 8.208178551955776e-07, + "loss": 0.0006, + "step": 98640 + }, + { + "epoch": 1.6671877508597888, + "grad_norm": 0.05307856202125549, + "learning_rate": 8.200083873998827e-07, + "loss": 0.0006, + "step": 98650 + }, + { + "epoch": 1.6673567511386393, + "grad_norm": 0.015796871855854988, + "learning_rate": 8.191992832837087e-07, + "loss": 0.0007, + "step": 98660 + }, + { + "epoch": 1.66752575141749, + "grad_norm": 0.00500758970156312, + "learning_rate": 8.183905429174554e-07, + "loss": 0.0006, + "step": 98670 + }, + { + "epoch": 1.6676947516963403, + "grad_norm": 0.00411376915872097, + "learning_rate": 8.175821663714839e-07, + "loss": 0.0009, + "step": 98680 + }, + { + "epoch": 1.6678637519751907, + "grad_norm": 0.018853604793548584, + "learning_rate": 8.167741537161289e-07, + "loss": 0.0012, + "step": 98690 + }, + { + "epoch": 1.6680327522540412, + "grad_norm": 0.10769926756620407, + "learning_rate": 8.159665050216902e-07, + "loss": 0.0007, + "step": 98700 + }, + { + "epoch": 1.6682017525328918, + "grad_norm": 0.3210642337799072, + "learning_rate": 8.151592203584374e-07, + "loss": 0.0004, + "step": 98710 + }, + { + "epoch": 1.6683707528117422, + "grad_norm": 0.0003552972339093685, + "learning_rate": 8.143522997966097e-07, + "loss": 0.001, + "step": 98720 + }, + { + "epoch": 1.6685397530905925, + "grad_norm": 0.041418470442295074, + "learning_rate": 8.135457434064104e-07, + "loss": 0.0012, + "step": 98730 + }, + { + "epoch": 1.668708753369443, + "grad_norm": 0.04489567130804062, + "learning_rate": 8.127395512580155e-07, + "loss": 0.0006, + "step": 98740 + }, + { + "epoch": 1.6688777536482935, + "grad_norm": 0.04523347318172455, + "learning_rate": 8.119337234215686e-07, + "loss": 0.0015, + "step": 98750 + }, + { + "epoch": 1.669046753927144, + "grad_norm": 0.02380324713885784, + "learning_rate": 8.111282599671783e-07, + "loss": 0.0005, + "step": 98760 + }, + { + "epoch": 1.6692157542059944, + "grad_norm": 0.04562634229660034, + "learning_rate": 8.103231609649259e-07, + "loss": 0.0013, + "step": 98770 + }, + { + "epoch": 1.6693847544848448, + "grad_norm": 0.06537719070911407, + "learning_rate": 8.095184264848566e-07, + "loss": 0.0012, + "step": 98780 + }, + { + "epoch": 1.6695537547636954, + "grad_norm": 0.019319357350468636, + "learning_rate": 8.087140565969881e-07, + "loss": 0.0007, + "step": 98790 + }, + { + "epoch": 1.669722755042546, + "grad_norm": 0.19342520833015442, + "learning_rate": 8.079100513713045e-07, + "loss": 0.0005, + "step": 98800 + }, + { + "epoch": 1.6698917553213963, + "grad_norm": 0.012815208174288273, + "learning_rate": 8.071064108777566e-07, + "loss": 0.0006, + "step": 98810 + }, + { + "epoch": 1.6700607556002467, + "grad_norm": 0.010864531621336937, + "learning_rate": 8.063031351862672e-07, + "loss": 0.0004, + "step": 98820 + }, + { + "epoch": 1.670229755879097, + "grad_norm": 0.013417099602520466, + "learning_rate": 8.055002243667232e-07, + "loss": 0.0014, + "step": 98830 + }, + { + "epoch": 1.6703987561579476, + "grad_norm": 0.06046375259757042, + "learning_rate": 8.046976784889832e-07, + "loss": 0.0005, + "step": 98840 + }, + { + "epoch": 1.6705677564367982, + "grad_norm": 0.027439545840024948, + "learning_rate": 8.038954976228708e-07, + "loss": 0.0007, + "step": 98850 + }, + { + "epoch": 1.6707367567156486, + "grad_norm": 0.007659797091037035, + "learning_rate": 8.030936818381818e-07, + "loss": 0.0004, + "step": 98860 + }, + { + "epoch": 1.670905756994499, + "grad_norm": 0.04878820851445198, + "learning_rate": 8.02292231204676e-07, + "loss": 0.0002, + "step": 98870 + }, + { + "epoch": 1.6710747572733495, + "grad_norm": 0.026526227593421936, + "learning_rate": 8.014911457920854e-07, + "loss": 0.0004, + "step": 98880 + }, + { + "epoch": 1.6712437575522001, + "grad_norm": 0.11598348617553711, + "learning_rate": 8.006904256701064e-07, + "loss": 0.0005, + "step": 98890 + }, + { + "epoch": 1.6714127578310505, + "grad_norm": 0.0019160934025421739, + "learning_rate": 7.998900709084073e-07, + "loss": 0.0006, + "step": 98900 + }, + { + "epoch": 1.6715817581099008, + "grad_norm": 0.004642430692911148, + "learning_rate": 7.990900815766212e-07, + "loss": 0.0005, + "step": 98910 + }, + { + "epoch": 1.6717507583887512, + "grad_norm": 0.06269663572311401, + "learning_rate": 7.982904577443517e-07, + "loss": 0.0007, + "step": 98920 + }, + { + "epoch": 1.6719197586676018, + "grad_norm": 0.03629099577665329, + "learning_rate": 7.974911994811701e-07, + "loss": 0.0003, + "step": 98930 + }, + { + "epoch": 1.6720887589464524, + "grad_norm": 0.04994253069162369, + "learning_rate": 7.966923068566173e-07, + "loss": 0.001, + "step": 98940 + }, + { + "epoch": 1.6722577592253027, + "grad_norm": 0.08595866709947586, + "learning_rate": 7.958937799401977e-07, + "loss": 0.0007, + "step": 98950 + }, + { + "epoch": 1.672426759504153, + "grad_norm": 0.04729030281305313, + "learning_rate": 7.950956188013903e-07, + "loss": 0.0006, + "step": 98960 + }, + { + "epoch": 1.6725957597830037, + "grad_norm": 0.06552130728960037, + "learning_rate": 7.942978235096355e-07, + "loss": 0.0011, + "step": 98970 + }, + { + "epoch": 1.6727647600618543, + "grad_norm": 0.013469538651406765, + "learning_rate": 7.935003941343489e-07, + "loss": 0.0005, + "step": 98980 + }, + { + "epoch": 1.6729337603407046, + "grad_norm": 0.028116554021835327, + "learning_rate": 7.927033307449072e-07, + "loss": 0.0007, + "step": 98990 + }, + { + "epoch": 1.673102760619555, + "grad_norm": 0.0238230898976326, + "learning_rate": 7.919066334106623e-07, + "loss": 0.0003, + "step": 99000 + }, + { + "epoch": 1.6732717608984053, + "grad_norm": 0.02308010868728161, + "learning_rate": 7.911103022009276e-07, + "loss": 0.0009, + "step": 99010 + }, + { + "epoch": 1.673440761177256, + "grad_norm": 0.008989348076283932, + "learning_rate": 7.903143371849903e-07, + "loss": 0.0004, + "step": 99020 + }, + { + "epoch": 1.6736097614561065, + "grad_norm": 0.0486636646091938, + "learning_rate": 7.895187384321006e-07, + "loss": 0.0003, + "step": 99030 + }, + { + "epoch": 1.6737787617349569, + "grad_norm": 0.014975378289818764, + "learning_rate": 7.887235060114812e-07, + "loss": 0.0012, + "step": 99040 + }, + { + "epoch": 1.6739477620138072, + "grad_norm": 0.04476180300116539, + "learning_rate": 7.879286399923219e-07, + "loss": 0.0007, + "step": 99050 + }, + { + "epoch": 1.6741167622926578, + "grad_norm": 0.03560128062963486, + "learning_rate": 7.871341404437776e-07, + "loss": 0.0004, + "step": 99060 + }, + { + "epoch": 1.6742857625715084, + "grad_norm": 0.024541029706597328, + "learning_rate": 7.863400074349764e-07, + "loss": 0.0003, + "step": 99070 + }, + { + "epoch": 1.6744547628503588, + "grad_norm": 0.03204498067498207, + "learning_rate": 7.85546241035009e-07, + "loss": 0.0003, + "step": 99080 + }, + { + "epoch": 1.6746237631292091, + "grad_norm": 0.14887754619121552, + "learning_rate": 7.847528413129391e-07, + "loss": 0.0007, + "step": 99090 + }, + { + "epoch": 1.6747927634080595, + "grad_norm": 0.08677040785551071, + "learning_rate": 7.839598083377941e-07, + "loss": 0.0005, + "step": 99100 + }, + { + "epoch": 1.67496176368691, + "grad_norm": 0.05007180571556091, + "learning_rate": 7.831671421785736e-07, + "loss": 0.0008, + "step": 99110 + }, + { + "epoch": 1.6751307639657607, + "grad_norm": 0.037230510264635086, + "learning_rate": 7.823748429042421e-07, + "loss": 0.0006, + "step": 99120 + }, + { + "epoch": 1.675299764244611, + "grad_norm": 0.036416828632354736, + "learning_rate": 7.815829105837353e-07, + "loss": 0.0006, + "step": 99130 + }, + { + "epoch": 1.6754687645234614, + "grad_norm": 0.06408748030662537, + "learning_rate": 7.807913452859534e-07, + "loss": 0.0004, + "step": 99140 + }, + { + "epoch": 1.675637764802312, + "grad_norm": 0.004515242297202349, + "learning_rate": 7.800001470797675e-07, + "loss": 0.0006, + "step": 99150 + }, + { + "epoch": 1.6758067650811623, + "grad_norm": 0.0739276334643364, + "learning_rate": 7.792093160340142e-07, + "loss": 0.0008, + "step": 99160 + }, + { + "epoch": 1.675975765360013, + "grad_norm": 0.03279067948460579, + "learning_rate": 7.784188522175018e-07, + "loss": 0.0008, + "step": 99170 + }, + { + "epoch": 1.6761447656388633, + "grad_norm": 0.13736632466316223, + "learning_rate": 7.776287556990025e-07, + "loss": 0.0006, + "step": 99180 + }, + { + "epoch": 1.6763137659177136, + "grad_norm": 0.042497534304857254, + "learning_rate": 7.768390265472598e-07, + "loss": 0.0007, + "step": 99190 + }, + { + "epoch": 1.6764827661965642, + "grad_norm": 0.015636952593922615, + "learning_rate": 7.760496648309829e-07, + "loss": 0.0007, + "step": 99200 + }, + { + "epoch": 1.6766517664754148, + "grad_norm": 0.016732599586248398, + "learning_rate": 7.752606706188514e-07, + "loss": 0.0008, + "step": 99210 + }, + { + "epoch": 1.6768207667542652, + "grad_norm": 0.0025693783536553383, + "learning_rate": 7.7447204397951e-07, + "loss": 0.0002, + "step": 99220 + }, + { + "epoch": 1.6769897670331155, + "grad_norm": 0.014126485213637352, + "learning_rate": 7.736837849815749e-07, + "loss": 0.0006, + "step": 99230 + }, + { + "epoch": 1.6771587673119661, + "grad_norm": 0.007326044607907534, + "learning_rate": 7.728958936936265e-07, + "loss": 0.001, + "step": 99240 + }, + { + "epoch": 1.6773277675908165, + "grad_norm": 0.015904217958450317, + "learning_rate": 7.721083701842175e-07, + "loss": 0.0006, + "step": 99250 + }, + { + "epoch": 1.677496767869667, + "grad_norm": 0.04559899866580963, + "learning_rate": 7.713212145218629e-07, + "loss": 0.0007, + "step": 99260 + }, + { + "epoch": 1.6776657681485174, + "grad_norm": 0.01228527631610632, + "learning_rate": 7.705344267750531e-07, + "loss": 0.0006, + "step": 99270 + }, + { + "epoch": 1.6778347684273678, + "grad_norm": 0.08058196306228638, + "learning_rate": 7.697480070122388e-07, + "loss": 0.0006, + "step": 99280 + }, + { + "epoch": 1.6780037687062184, + "grad_norm": 0.0017960042459890246, + "learning_rate": 7.68961955301844e-07, + "loss": 0.0007, + "step": 99290 + }, + { + "epoch": 1.678172768985069, + "grad_norm": 0.055955979973077774, + "learning_rate": 7.681762717122593e-07, + "loss": 0.0007, + "step": 99300 + }, + { + "epoch": 1.6783417692639193, + "grad_norm": 0.03739108517765999, + "learning_rate": 7.673909563118431e-07, + "loss": 0.0004, + "step": 99310 + }, + { + "epoch": 1.6785107695427697, + "grad_norm": 0.009558171033859253, + "learning_rate": 7.666060091689204e-07, + "loss": 0.0004, + "step": 99320 + }, + { + "epoch": 1.6786797698216203, + "grad_norm": 0.03387034311890602, + "learning_rate": 7.658214303517864e-07, + "loss": 0.0011, + "step": 99330 + }, + { + "epoch": 1.6788487701004706, + "grad_norm": 0.009359865449368954, + "learning_rate": 7.650372199287038e-07, + "loss": 0.0005, + "step": 99340 + }, + { + "epoch": 1.6790177703793212, + "grad_norm": 0.01976497657597065, + "learning_rate": 7.642533779679006e-07, + "loss": 0.0007, + "step": 99350 + }, + { + "epoch": 1.6791867706581716, + "grad_norm": 0.04325069859623909, + "learning_rate": 7.634699045375782e-07, + "loss": 0.0008, + "step": 99360 + }, + { + "epoch": 1.679355770937022, + "grad_norm": 0.004810777027159929, + "learning_rate": 7.626867997058984e-07, + "loss": 0.0005, + "step": 99370 + }, + { + "epoch": 1.6795247712158725, + "grad_norm": 0.03099067695438862, + "learning_rate": 7.619040635409991e-07, + "loss": 0.0009, + "step": 99380 + }, + { + "epoch": 1.679693771494723, + "grad_norm": 0.09227609634399414, + "learning_rate": 7.61121696110979e-07, + "loss": 0.0005, + "step": 99390 + }, + { + "epoch": 1.6798627717735735, + "grad_norm": 0.016670966520905495, + "learning_rate": 7.6033969748391e-07, + "loss": 0.0005, + "step": 99400 + }, + { + "epoch": 1.6800317720524238, + "grad_norm": 0.6341423392295837, + "learning_rate": 7.595580677278286e-07, + "loss": 0.0013, + "step": 99410 + }, + { + "epoch": 1.6802007723312742, + "grad_norm": 0.020962310954928398, + "learning_rate": 7.587768069107409e-07, + "loss": 0.0008, + "step": 99420 + }, + { + "epoch": 1.6803697726101248, + "grad_norm": 0.08138630539178848, + "learning_rate": 7.579959151006195e-07, + "loss": 0.0006, + "step": 99430 + }, + { + "epoch": 1.6805387728889754, + "grad_norm": 0.03248428553342819, + "learning_rate": 7.57215392365408e-07, + "loss": 0.0004, + "step": 99440 + }, + { + "epoch": 1.6807077731678257, + "grad_norm": 8.540959970559925e-05, + "learning_rate": 7.564352387730123e-07, + "loss": 0.0005, + "step": 99450 + }, + { + "epoch": 1.680876773446676, + "grad_norm": 0.14268356561660767, + "learning_rate": 7.55655454391313e-07, + "loss": 0.0023, + "step": 99460 + }, + { + "epoch": 1.6810457737255267, + "grad_norm": 0.11119076609611511, + "learning_rate": 7.548760392881521e-07, + "loss": 0.0005, + "step": 99470 + }, + { + "epoch": 1.6812147740043772, + "grad_norm": 0.07005003094673157, + "learning_rate": 7.540969935313441e-07, + "loss": 0.0005, + "step": 99480 + }, + { + "epoch": 1.6813837742832276, + "grad_norm": 0.00016238611715380102, + "learning_rate": 7.533183171886705e-07, + "loss": 0.0005, + "step": 99490 + }, + { + "epoch": 1.681552774562078, + "grad_norm": 0.023415926843881607, + "learning_rate": 7.52540010327878e-07, + "loss": 0.0007, + "step": 99500 + }, + { + "epoch": 1.6817217748409283, + "grad_norm": 0.05968974903225899, + "learning_rate": 7.517620730166836e-07, + "loss": 0.0021, + "step": 99510 + }, + { + "epoch": 1.681890775119779, + "grad_norm": 0.005587894469499588, + "learning_rate": 7.509845053227732e-07, + "loss": 0.0012, + "step": 99520 + }, + { + "epoch": 1.6820597753986295, + "grad_norm": 0.10403554141521454, + "learning_rate": 7.502073073137972e-07, + "loss": 0.0011, + "step": 99530 + }, + { + "epoch": 1.6822287756774799, + "grad_norm": 0.0056676738895475864, + "learning_rate": 7.49430479057377e-07, + "loss": 0.0004, + "step": 99540 + }, + { + "epoch": 1.6823977759563302, + "grad_norm": 0.039203256368637085, + "learning_rate": 7.486540206210984e-07, + "loss": 0.0008, + "step": 99550 + }, + { + "epoch": 1.6825667762351808, + "grad_norm": 0.02632479928433895, + "learning_rate": 7.478779320725183e-07, + "loss": 0.0005, + "step": 99560 + }, + { + "epoch": 1.6827357765140314, + "grad_norm": 0.007668390870094299, + "learning_rate": 7.471022134791611e-07, + "loss": 0.0014, + "step": 99570 + }, + { + "epoch": 1.6829047767928818, + "grad_norm": 0.009151371195912361, + "learning_rate": 7.463268649085159e-07, + "loss": 0.0006, + "step": 99580 + }, + { + "epoch": 1.6830737770717321, + "grad_norm": 0.01876378431916237, + "learning_rate": 7.455518864280448e-07, + "loss": 0.0005, + "step": 99590 + }, + { + "epoch": 1.6832427773505825, + "grad_norm": 0.03298467770218849, + "learning_rate": 7.44777278105171e-07, + "loss": 0.0005, + "step": 99600 + }, + { + "epoch": 1.683411777629433, + "grad_norm": 0.019236354157328606, + "learning_rate": 7.440030400072923e-07, + "loss": 0.0003, + "step": 99610 + }, + { + "epoch": 1.6835807779082836, + "grad_norm": 0.03573700040578842, + "learning_rate": 7.432291722017693e-07, + "loss": 0.0004, + "step": 99620 + }, + { + "epoch": 1.683749778187134, + "grad_norm": 0.0062813530676066875, + "learning_rate": 7.424556747559331e-07, + "loss": 0.0008, + "step": 99630 + }, + { + "epoch": 1.6839187784659844, + "grad_norm": 0.06845460832118988, + "learning_rate": 7.416825477370809e-07, + "loss": 0.0009, + "step": 99640 + }, + { + "epoch": 1.684087778744835, + "grad_norm": 0.06802421808242798, + "learning_rate": 7.409097912124801e-07, + "loss": 0.0005, + "step": 99650 + }, + { + "epoch": 1.6842567790236855, + "grad_norm": 0.02949458360671997, + "learning_rate": 7.40137405249362e-07, + "loss": 0.0003, + "step": 99660 + }, + { + "epoch": 1.684425779302536, + "grad_norm": 0.03888123482465744, + "learning_rate": 7.393653899149295e-07, + "loss": 0.0008, + "step": 99670 + }, + { + "epoch": 1.6845947795813863, + "grad_norm": 0.11102794855833054, + "learning_rate": 7.38593745276352e-07, + "loss": 0.0004, + "step": 99680 + }, + { + "epoch": 1.6847637798602366, + "grad_norm": 0.0057936361990869045, + "learning_rate": 7.378224714007648e-07, + "loss": 0.0004, + "step": 99690 + }, + { + "epoch": 1.6849327801390872, + "grad_norm": 0.002656053751707077, + "learning_rate": 7.370515683552731e-07, + "loss": 0.0007, + "step": 99700 + }, + { + "epoch": 1.6851017804179378, + "grad_norm": 0.000269414100330323, + "learning_rate": 7.362810362069511e-07, + "loss": 0.0003, + "step": 99710 + }, + { + "epoch": 1.6852707806967882, + "grad_norm": 0.0326961912214756, + "learning_rate": 7.355108750228357e-07, + "loss": 0.0005, + "step": 99720 + }, + { + "epoch": 1.6854397809756385, + "grad_norm": 0.07516507059335709, + "learning_rate": 7.347410848699371e-07, + "loss": 0.0004, + "step": 99730 + }, + { + "epoch": 1.685608781254489, + "grad_norm": 0.10635815560817719, + "learning_rate": 7.339716658152285e-07, + "loss": 0.0009, + "step": 99740 + }, + { + "epoch": 1.6857777815333397, + "grad_norm": 0.09443478286266327, + "learning_rate": 7.332026179256557e-07, + "loss": 0.0006, + "step": 99750 + }, + { + "epoch": 1.68594678181219, + "grad_norm": 0.04211077466607094, + "learning_rate": 7.32433941268127e-07, + "loss": 0.0004, + "step": 99760 + }, + { + "epoch": 1.6861157820910404, + "grad_norm": 0.01785372570157051, + "learning_rate": 7.316656359095236e-07, + "loss": 0.0004, + "step": 99770 + }, + { + "epoch": 1.6862847823698908, + "grad_norm": 0.003708686213940382, + "learning_rate": 7.308977019166891e-07, + "loss": 0.0003, + "step": 99780 + }, + { + "epoch": 1.6864537826487414, + "grad_norm": 0.014424859546124935, + "learning_rate": 7.301301393564397e-07, + "loss": 0.0006, + "step": 99790 + }, + { + "epoch": 1.686622782927592, + "grad_norm": 0.056485239416360855, + "learning_rate": 7.293629482955555e-07, + "loss": 0.0007, + "step": 99800 + }, + { + "epoch": 1.6867917832064423, + "grad_norm": 0.004605097230523825, + "learning_rate": 7.285961288007865e-07, + "loss": 0.0006, + "step": 99810 + }, + { + "epoch": 1.6869607834852927, + "grad_norm": 0.012538746930658817, + "learning_rate": 7.278296809388507e-07, + "loss": 0.0004, + "step": 99820 + }, + { + "epoch": 1.6871297837641432, + "grad_norm": 0.05235278978943825, + "learning_rate": 7.270636047764306e-07, + "loss": 0.001, + "step": 99830 + }, + { + "epoch": 1.6872987840429938, + "grad_norm": 0.03077354095876217, + "learning_rate": 7.262979003801806e-07, + "loss": 0.0008, + "step": 99840 + }, + { + "epoch": 1.6874677843218442, + "grad_norm": 0.04643261432647705, + "learning_rate": 7.255325678167191e-07, + "loss": 0.0009, + "step": 99850 + }, + { + "epoch": 1.6876367846006946, + "grad_norm": 0.06332317739725113, + "learning_rate": 7.24767607152636e-07, + "loss": 0.0006, + "step": 99860 + }, + { + "epoch": 1.687805784879545, + "grad_norm": 0.03759251907467842, + "learning_rate": 7.240030184544833e-07, + "loss": 0.0007, + "step": 99870 + }, + { + "epoch": 1.6879747851583955, + "grad_norm": 0.04582984745502472, + "learning_rate": 7.232388017887859e-07, + "loss": 0.0013, + "step": 99880 + }, + { + "epoch": 1.688143785437246, + "grad_norm": 0.047386761754751205, + "learning_rate": 7.224749572220341e-07, + "loss": 0.0002, + "step": 99890 + }, + { + "epoch": 1.6883127857160964, + "grad_norm": 0.052096910774707794, + "learning_rate": 7.217114848206869e-07, + "loss": 0.001, + "step": 99900 + }, + { + "epoch": 1.6884817859949468, + "grad_norm": 0.01506057009100914, + "learning_rate": 7.209483846511689e-07, + "loss": 0.0044, + "step": 99910 + }, + { + "epoch": 1.6886507862737974, + "grad_norm": 0.0026974184438586235, + "learning_rate": 7.201856567798743e-07, + "loss": 0.0008, + "step": 99920 + }, + { + "epoch": 1.688819786552648, + "grad_norm": 0.019957927986979485, + "learning_rate": 7.194233012731633e-07, + "loss": 0.0013, + "step": 99930 + }, + { + "epoch": 1.6889887868314983, + "grad_norm": 0.011490083299577236, + "learning_rate": 7.186613181973656e-07, + "loss": 0.0005, + "step": 99940 + }, + { + "epoch": 1.6891577871103487, + "grad_norm": 0.05209843069314957, + "learning_rate": 7.178997076187755e-07, + "loss": 0.0005, + "step": 99950 + }, + { + "epoch": 1.689326787389199, + "grad_norm": 0.03485189750790596, + "learning_rate": 7.171384696036598e-07, + "loss": 0.0006, + "step": 99960 + }, + { + "epoch": 1.6894957876680496, + "grad_norm": 0.034865107387304306, + "learning_rate": 7.163776042182463e-07, + "loss": 0.0009, + "step": 99970 + }, + { + "epoch": 1.6896647879469002, + "grad_norm": 0.0097969900816679, + "learning_rate": 7.156171115287374e-07, + "loss": 0.0008, + "step": 99980 + }, + { + "epoch": 1.6898337882257506, + "grad_norm": 0.046122949570417404, + "learning_rate": 7.148569916012977e-07, + "loss": 0.0006, + "step": 99990 + }, + { + "epoch": 1.690002788504601, + "grad_norm": 0.002712082350626588, + "learning_rate": 7.140972445020617e-07, + "loss": 0.0006, + "step": 100000 + }, + { + "epoch": 1.6901717887834515, + "grad_norm": 0.021689051762223244, + "learning_rate": 7.133378702971305e-07, + "loss": 0.0006, + "step": 100010 + }, + { + "epoch": 1.6903407890623021, + "grad_norm": 0.00048819032963365316, + "learning_rate": 7.125788690525753e-07, + "loss": 0.0005, + "step": 100020 + }, + { + "epoch": 1.6905097893411525, + "grad_norm": 0.07447975873947144, + "learning_rate": 7.118202408344299e-07, + "loss": 0.0011, + "step": 100030 + }, + { + "epoch": 1.6906787896200028, + "grad_norm": 0.06627485901117325, + "learning_rate": 7.110619857087015e-07, + "loss": 0.0004, + "step": 100040 + }, + { + "epoch": 1.6908477898988532, + "grad_norm": 0.05524977296590805, + "learning_rate": 7.103041037413599e-07, + "loss": 0.0014, + "step": 100050 + }, + { + "epoch": 1.6910167901777038, + "grad_norm": 0.028608134016394615, + "learning_rate": 7.095465949983449e-07, + "loss": 0.0011, + "step": 100060 + }, + { + "epoch": 1.6911857904565544, + "grad_norm": 0.055802833288908005, + "learning_rate": 7.087894595455642e-07, + "loss": 0.0004, + "step": 100070 + }, + { + "epoch": 1.6913547907354047, + "grad_norm": 0.018083810806274414, + "learning_rate": 7.080326974488932e-07, + "loss": 0.0009, + "step": 100080 + }, + { + "epoch": 1.691523791014255, + "grad_norm": 0.016348091885447502, + "learning_rate": 7.072763087741713e-07, + "loss": 0.0005, + "step": 100090 + }, + { + "epoch": 1.6916927912931057, + "grad_norm": 0.010338032618165016, + "learning_rate": 7.065202935872089e-07, + "loss": 0.0008, + "step": 100100 + }, + { + "epoch": 1.691861791571956, + "grad_norm": 0.15109117329120636, + "learning_rate": 7.057646519537847e-07, + "loss": 0.0005, + "step": 100110 + }, + { + "epoch": 1.6920307918508066, + "grad_norm": 0.009320233948528767, + "learning_rate": 7.050093839396405e-07, + "loss": 0.001, + "step": 100120 + }, + { + "epoch": 1.692199792129657, + "grad_norm": 0.02333456091582775, + "learning_rate": 7.042544896104913e-07, + "loss": 0.0006, + "step": 100130 + }, + { + "epoch": 1.6923687924085073, + "grad_norm": 0.05184491351246834, + "learning_rate": 7.034999690320132e-07, + "loss": 0.0012, + "step": 100140 + }, + { + "epoch": 1.692537792687358, + "grad_norm": 0.008726450614631176, + "learning_rate": 7.027458222698563e-07, + "loss": 0.0007, + "step": 100150 + }, + { + "epoch": 1.6927067929662085, + "grad_norm": 0.04120657965540886, + "learning_rate": 7.019920493896321e-07, + "loss": 0.001, + "step": 100160 + }, + { + "epoch": 1.6928757932450589, + "grad_norm": 0.018746282905340195, + "learning_rate": 7.012386504569252e-07, + "loss": 0.0005, + "step": 100170 + }, + { + "epoch": 1.6930447935239092, + "grad_norm": 0.02525946870446205, + "learning_rate": 7.004856255372827e-07, + "loss": 0.0018, + "step": 100180 + }, + { + "epoch": 1.6932137938027598, + "grad_norm": 0.036460552364587784, + "learning_rate": 6.997329746962234e-07, + "loss": 0.0008, + "step": 100190 + }, + { + "epoch": 1.6933827940816102, + "grad_norm": 0.010049732401967049, + "learning_rate": 6.989806979992297e-07, + "loss": 0.0003, + "step": 100200 + }, + { + "epoch": 1.6935517943604608, + "grad_norm": 0.0019592733588069677, + "learning_rate": 6.982287955117551e-07, + "loss": 0.0003, + "step": 100210 + }, + { + "epoch": 1.6937207946393111, + "grad_norm": 0.054332565516233444, + "learning_rate": 6.974772672992164e-07, + "loss": 0.0003, + "step": 100220 + }, + { + "epoch": 1.6938897949181615, + "grad_norm": 0.07857688516378403, + "learning_rate": 6.967261134270032e-07, + "loss": 0.0006, + "step": 100230 + }, + { + "epoch": 1.694058795197012, + "grad_norm": 0.05319392308592796, + "learning_rate": 6.959753339604669e-07, + "loss": 0.0006, + "step": 100240 + }, + { + "epoch": 1.6942277954758627, + "grad_norm": 0.05939016863703728, + "learning_rate": 6.952249289649293e-07, + "loss": 0.0004, + "step": 100250 + }, + { + "epoch": 1.694396795754713, + "grad_norm": 0.049175795167684555, + "learning_rate": 6.94474898505681e-07, + "loss": 0.0003, + "step": 100260 + }, + { + "epoch": 1.6945657960335634, + "grad_norm": 0.028834955766797066, + "learning_rate": 6.937252426479779e-07, + "loss": 0.0003, + "step": 100270 + }, + { + "epoch": 1.6947347963124137, + "grad_norm": 0.003831252222880721, + "learning_rate": 6.929759614570419e-07, + "loss": 0.0007, + "step": 100280 + }, + { + "epoch": 1.6949037965912643, + "grad_norm": 0.049640364944934845, + "learning_rate": 6.922270549980664e-07, + "loss": 0.0009, + "step": 100290 + }, + { + "epoch": 1.695072796870115, + "grad_norm": 0.03364095836877823, + "learning_rate": 6.914785233362076e-07, + "loss": 0.0015, + "step": 100300 + }, + { + "epoch": 1.6952417971489653, + "grad_norm": 0.015599294565618038, + "learning_rate": 6.907303665365938e-07, + "loss": 0.001, + "step": 100310 + }, + { + "epoch": 1.6954107974278156, + "grad_norm": 0.009604704566299915, + "learning_rate": 6.89982584664316e-07, + "loss": 0.0003, + "step": 100320 + }, + { + "epoch": 1.6955797977066662, + "grad_norm": 0.04983891546726227, + "learning_rate": 6.892351777844359e-07, + "loss": 0.0004, + "step": 100330 + }, + { + "epoch": 1.6957487979855168, + "grad_norm": 0.10597651451826096, + "learning_rate": 6.884881459619825e-07, + "loss": 0.0007, + "step": 100340 + }, + { + "epoch": 1.6959177982643672, + "grad_norm": 0.03459309786558151, + "learning_rate": 6.877414892619488e-07, + "loss": 0.0003, + "step": 100350 + }, + { + "epoch": 1.6960867985432175, + "grad_norm": 0.025150327011942863, + "learning_rate": 6.869952077492998e-07, + "loss": 0.0007, + "step": 100360 + }, + { + "epoch": 1.696255798822068, + "grad_norm": 0.0011559352278709412, + "learning_rate": 6.862493014889643e-07, + "loss": 0.0009, + "step": 100370 + }, + { + "epoch": 1.6964247991009185, + "grad_norm": 0.08916772156953812, + "learning_rate": 6.855037705458406e-07, + "loss": 0.0007, + "step": 100380 + }, + { + "epoch": 1.696593799379769, + "grad_norm": 0.05255361273884773, + "learning_rate": 6.847586149847924e-07, + "loss": 0.0009, + "step": 100390 + }, + { + "epoch": 1.6967627996586194, + "grad_norm": 0.019203945994377136, + "learning_rate": 6.840138348706538e-07, + "loss": 0.0005, + "step": 100400 + }, + { + "epoch": 1.6969317999374698, + "grad_norm": 0.00024325685808435082, + "learning_rate": 6.832694302682213e-07, + "loss": 0.0008, + "step": 100410 + }, + { + "epoch": 1.6971008002163204, + "grad_norm": 0.004759735893458128, + "learning_rate": 6.825254012422649e-07, + "loss": 0.0009, + "step": 100420 + }, + { + "epoch": 1.697269800495171, + "grad_norm": 0.006870803888887167, + "learning_rate": 6.817817478575162e-07, + "loss": 0.0004, + "step": 100430 + }, + { + "epoch": 1.6974388007740213, + "grad_norm": 0.023119403049349785, + "learning_rate": 6.810384701786776e-07, + "loss": 0.0015, + "step": 100440 + }, + { + "epoch": 1.6976078010528717, + "grad_norm": 0.03685407713055611, + "learning_rate": 6.802955682704187e-07, + "loss": 0.0011, + "step": 100450 + }, + { + "epoch": 1.697776801331722, + "grad_norm": 0.031681958585977554, + "learning_rate": 6.795530421973745e-07, + "loss": 0.0007, + "step": 100460 + }, + { + "epoch": 1.6979458016105726, + "grad_norm": 0.07114414125680923, + "learning_rate": 6.788108920241481e-07, + "loss": 0.001, + "step": 100470 + }, + { + "epoch": 1.6981148018894232, + "grad_norm": 0.04379222169518471, + "learning_rate": 6.78069117815312e-07, + "loss": 0.0006, + "step": 100480 + }, + { + "epoch": 1.6982838021682736, + "grad_norm": 0.010722734965384007, + "learning_rate": 6.773277196354017e-07, + "loss": 0.0011, + "step": 100490 + }, + { + "epoch": 1.698452802447124, + "grad_norm": 0.025447919964790344, + "learning_rate": 6.765866975489244e-07, + "loss": 0.0005, + "step": 100500 + }, + { + "epoch": 1.6986218027259745, + "grad_norm": 0.0570237897336483, + "learning_rate": 6.758460516203513e-07, + "loss": 0.0008, + "step": 100510 + }, + { + "epoch": 1.698790803004825, + "grad_norm": 0.01165765430778265, + "learning_rate": 6.751057819141233e-07, + "loss": 0.0003, + "step": 100520 + }, + { + "epoch": 1.6989598032836755, + "grad_norm": 0.033633843064308167, + "learning_rate": 6.743658884946464e-07, + "loss": 0.0007, + "step": 100530 + }, + { + "epoch": 1.6991288035625258, + "grad_norm": 0.17939716577529907, + "learning_rate": 6.736263714262958e-07, + "loss": 0.0009, + "step": 100540 + }, + { + "epoch": 1.6992978038413762, + "grad_norm": 0.032884031534194946, + "learning_rate": 6.728872307734119e-07, + "loss": 0.0006, + "step": 100550 + }, + { + "epoch": 1.6994668041202268, + "grad_norm": 0.051502492278814316, + "learning_rate": 6.721484666003053e-07, + "loss": 0.0009, + "step": 100560 + }, + { + "epoch": 1.6996358043990774, + "grad_norm": 0.07070668786764145, + "learning_rate": 6.714100789712502e-07, + "loss": 0.0012, + "step": 100570 + }, + { + "epoch": 1.6998048046779277, + "grad_norm": 0.1215224340558052, + "learning_rate": 6.706720679504908e-07, + "loss": 0.0004, + "step": 100580 + }, + { + "epoch": 1.699973804956778, + "grad_norm": 0.05705857276916504, + "learning_rate": 6.699344336022379e-07, + "loss": 0.0004, + "step": 100590 + }, + { + "epoch": 1.7001428052356287, + "grad_norm": 0.02882068045437336, + "learning_rate": 6.691971759906685e-07, + "loss": 0.0005, + "step": 100600 + }, + { + "epoch": 1.7003118055144792, + "grad_norm": 0.08917008340358734, + "learning_rate": 6.684602951799291e-07, + "loss": 0.0007, + "step": 100610 + }, + { + "epoch": 1.7004808057933296, + "grad_norm": 0.01354215107858181, + "learning_rate": 6.677237912341294e-07, + "loss": 0.0006, + "step": 100620 + }, + { + "epoch": 1.70064980607218, + "grad_norm": 0.05526946112513542, + "learning_rate": 6.669876642173512e-07, + "loss": 0.0008, + "step": 100630 + }, + { + "epoch": 1.7008188063510303, + "grad_norm": 0.027818327769637108, + "learning_rate": 6.662519141936397e-07, + "loss": 0.0006, + "step": 100640 + }, + { + "epoch": 1.700987806629881, + "grad_norm": 0.03925297036767006, + "learning_rate": 6.655165412270087e-07, + "loss": 0.0004, + "step": 100650 + }, + { + "epoch": 1.7011568069087315, + "grad_norm": 0.009000157006084919, + "learning_rate": 6.647815453814393e-07, + "loss": 0.0007, + "step": 100660 + }, + { + "epoch": 1.7013258071875819, + "grad_norm": 0.04803379997611046, + "learning_rate": 6.640469267208821e-07, + "loss": 0.0006, + "step": 100670 + }, + { + "epoch": 1.7014948074664322, + "grad_norm": 0.0002350498689338565, + "learning_rate": 6.633126853092487e-07, + "loss": 0.0004, + "step": 100680 + }, + { + "epoch": 1.7016638077452828, + "grad_norm": 0.042979419231414795, + "learning_rate": 6.625788212104245e-07, + "loss": 0.0002, + "step": 100690 + }, + { + "epoch": 1.7018328080241334, + "grad_norm": 0.010143440216779709, + "learning_rate": 6.618453344882575e-07, + "loss": 0.0003, + "step": 100700 + }, + { + "epoch": 1.7020018083029838, + "grad_norm": 0.003690978977829218, + "learning_rate": 6.611122252065655e-07, + "loss": 0.0002, + "step": 100710 + }, + { + "epoch": 1.7021708085818341, + "grad_norm": 0.05425015464425087, + "learning_rate": 6.603794934291319e-07, + "loss": 0.0009, + "step": 100720 + }, + { + "epoch": 1.7023398088606845, + "grad_norm": 0.0006868285126984119, + "learning_rate": 6.596471392197096e-07, + "loss": 0.001, + "step": 100730 + }, + { + "epoch": 1.702508809139535, + "grad_norm": 0.0235914159566164, + "learning_rate": 6.589151626420137e-07, + "loss": 0.0003, + "step": 100740 + }, + { + "epoch": 1.7026778094183856, + "grad_norm": 0.055669207125902176, + "learning_rate": 6.581835637597334e-07, + "loss": 0.0013, + "step": 100750 + }, + { + "epoch": 1.702846809697236, + "grad_norm": 0.0040846276096999645, + "learning_rate": 6.574523426365187e-07, + "loss": 0.0006, + "step": 100760 + }, + { + "epoch": 1.7030158099760864, + "grad_norm": 0.001606640755198896, + "learning_rate": 6.56721499335991e-07, + "loss": 0.0006, + "step": 100770 + }, + { + "epoch": 1.703184810254937, + "grad_norm": 0.018381789326667786, + "learning_rate": 6.559910339217357e-07, + "loss": 0.0008, + "step": 100780 + }, + { + "epoch": 1.7033538105337875, + "grad_norm": 0.03388458117842674, + "learning_rate": 6.55260946457309e-07, + "loss": 0.0007, + "step": 100790 + }, + { + "epoch": 1.703522810812638, + "grad_norm": 0.03354884311556816, + "learning_rate": 6.54531237006229e-07, + "loss": 0.0006, + "step": 100800 + }, + { + "epoch": 1.7036918110914883, + "grad_norm": 0.0004501251969486475, + "learning_rate": 6.538019056319872e-07, + "loss": 0.0008, + "step": 100810 + }, + { + "epoch": 1.7038608113703386, + "grad_norm": 0.007855558767914772, + "learning_rate": 6.530729523980361e-07, + "loss": 0.0005, + "step": 100820 + }, + { + "epoch": 1.7040298116491892, + "grad_norm": 0.01765887252986431, + "learning_rate": 6.523443773677995e-07, + "loss": 0.0004, + "step": 100830 + }, + { + "epoch": 1.7041988119280398, + "grad_norm": 0.0445299856364727, + "learning_rate": 6.516161806046667e-07, + "loss": 0.0009, + "step": 100840 + }, + { + "epoch": 1.7043678122068902, + "grad_norm": 0.010799041017889977, + "learning_rate": 6.508883621719952e-07, + "loss": 0.0024, + "step": 100850 + }, + { + "epoch": 1.7045368124857405, + "grad_norm": 0.08953937143087387, + "learning_rate": 6.50160922133109e-07, + "loss": 0.0003, + "step": 100860 + }, + { + "epoch": 1.704705812764591, + "grad_norm": 0.061672527343034744, + "learning_rate": 6.494338605512968e-07, + "loss": 0.0008, + "step": 100870 + }, + { + "epoch": 1.7048748130434417, + "grad_norm": 0.009565744549036026, + "learning_rate": 6.487071774898185e-07, + "loss": 0.0007, + "step": 100880 + }, + { + "epoch": 1.705043813322292, + "grad_norm": 0.010980235412716866, + "learning_rate": 6.479808730118975e-07, + "loss": 0.0004, + "step": 100890 + }, + { + "epoch": 1.7052128136011424, + "grad_norm": 0.024364009499549866, + "learning_rate": 6.472549471807277e-07, + "loss": 0.0007, + "step": 100900 + }, + { + "epoch": 1.7053818138799928, + "grad_norm": 0.03408200293779373, + "learning_rate": 6.465294000594663e-07, + "loss": 0.0004, + "step": 100910 + }, + { + "epoch": 1.7055508141588434, + "grad_norm": 0.07476289570331573, + "learning_rate": 6.458042317112407e-07, + "loss": 0.0005, + "step": 100920 + }, + { + "epoch": 1.705719814437694, + "grad_norm": 0.03566889092326164, + "learning_rate": 6.450794421991425e-07, + "loss": 0.0005, + "step": 100930 + }, + { + "epoch": 1.7058888147165443, + "grad_norm": 0.004053400829434395, + "learning_rate": 6.44355031586234e-07, + "loss": 0.0005, + "step": 100940 + }, + { + "epoch": 1.7060578149953947, + "grad_norm": 0.008549575693905354, + "learning_rate": 6.4363099993554e-07, + "loss": 0.0007, + "step": 100950 + }, + { + "epoch": 1.7062268152742452, + "grad_norm": 0.020716724917292595, + "learning_rate": 6.429073473100578e-07, + "loss": 0.0003, + "step": 100960 + }, + { + "epoch": 1.7063958155530956, + "grad_norm": 0.0033090277574956417, + "learning_rate": 6.421840737727453e-07, + "loss": 0.0004, + "step": 100970 + }, + { + "epoch": 1.7065648158319462, + "grad_norm": 0.08368370682001114, + "learning_rate": 6.414611793865339e-07, + "loss": 0.0006, + "step": 100980 + }, + { + "epoch": 1.7067338161107966, + "grad_norm": 0.019599411636590958, + "learning_rate": 6.407386642143165e-07, + "loss": 0.0006, + "step": 100990 + }, + { + "epoch": 1.706902816389647, + "grad_norm": 0.06469276547431946, + "learning_rate": 6.400165283189574e-07, + "loss": 0.0004, + "step": 101000 + }, + { + "epoch": 1.7070718166684975, + "grad_norm": 0.014812301844358444, + "learning_rate": 6.392947717632836e-07, + "loss": 0.0003, + "step": 101010 + }, + { + "epoch": 1.707240816947348, + "grad_norm": 0.01817789115011692, + "learning_rate": 6.38573394610093e-07, + "loss": 0.0005, + "step": 101020 + }, + { + "epoch": 1.7074098172261984, + "grad_norm": 0.02831515297293663, + "learning_rate": 6.37852396922149e-07, + "loss": 0.0007, + "step": 101030 + }, + { + "epoch": 1.7075788175050488, + "grad_norm": 0.010831360705196857, + "learning_rate": 6.371317787621822e-07, + "loss": 0.0011, + "step": 101040 + }, + { + "epoch": 1.7077478177838994, + "grad_norm": 0.06960634887218475, + "learning_rate": 6.364115401928883e-07, + "loss": 0.001, + "step": 101050 + }, + { + "epoch": 1.7079168180627498, + "grad_norm": 0.0023254172410815954, + "learning_rate": 6.356916812769332e-07, + "loss": 0.0003, + "step": 101060 + }, + { + "epoch": 1.7080858183416003, + "grad_norm": 0.10252980887889862, + "learning_rate": 6.349722020769466e-07, + "loss": 0.0008, + "step": 101070 + }, + { + "epoch": 1.7082548186204507, + "grad_norm": 0.04966580495238304, + "learning_rate": 6.342531026555288e-07, + "loss": 0.0006, + "step": 101080 + }, + { + "epoch": 1.708423818899301, + "grad_norm": 0.026467200368642807, + "learning_rate": 6.335343830752422e-07, + "loss": 0.0008, + "step": 101090 + }, + { + "epoch": 1.7085928191781516, + "grad_norm": 0.04453878104686737, + "learning_rate": 6.328160433986202e-07, + "loss": 0.0007, + "step": 101100 + }, + { + "epoch": 1.7087618194570022, + "grad_norm": 0.005278876982629299, + "learning_rate": 6.320980836881635e-07, + "loss": 0.0005, + "step": 101110 + }, + { + "epoch": 1.7089308197358526, + "grad_norm": 0.03430505841970444, + "learning_rate": 6.313805040063348e-07, + "loss": 0.0003, + "step": 101120 + }, + { + "epoch": 1.709099820014703, + "grad_norm": 0.09736597537994385, + "learning_rate": 6.306633044155702e-07, + "loss": 0.0006, + "step": 101130 + }, + { + "epoch": 1.7092688202935535, + "grad_norm": 0.011779002845287323, + "learning_rate": 6.299464849782666e-07, + "loss": 0.0005, + "step": 101140 + }, + { + "epoch": 1.709437820572404, + "grad_norm": 0.0022404491901397705, + "learning_rate": 6.292300457567934e-07, + "loss": 0.0006, + "step": 101150 + }, + { + "epoch": 1.7096068208512545, + "grad_norm": 0.4547003209590912, + "learning_rate": 6.285139868134821e-07, + "loss": 0.0015, + "step": 101160 + }, + { + "epoch": 1.7097758211301048, + "grad_norm": 0.027612561360001564, + "learning_rate": 6.277983082106353e-07, + "loss": 0.0006, + "step": 101170 + }, + { + "epoch": 1.7099448214089552, + "grad_norm": 0.026960104703903198, + "learning_rate": 6.270830100105185e-07, + "loss": 0.0001, + "step": 101180 + }, + { + "epoch": 1.7101138216878058, + "grad_norm": 0.14393088221549988, + "learning_rate": 6.263680922753684e-07, + "loss": 0.0003, + "step": 101190 + }, + { + "epoch": 1.7102828219666564, + "grad_norm": 0.0568784661591053, + "learning_rate": 6.256535550673837e-07, + "loss": 0.0005, + "step": 101200 + }, + { + "epoch": 1.7104518222455067, + "grad_norm": 0.01398561056703329, + "learning_rate": 6.249393984487339e-07, + "loss": 0.0018, + "step": 101210 + }, + { + "epoch": 1.710620822524357, + "grad_norm": 0.10412480682134628, + "learning_rate": 6.242256224815551e-07, + "loss": 0.0009, + "step": 101220 + }, + { + "epoch": 1.7107898228032075, + "grad_norm": 0.04285868629813194, + "learning_rate": 6.23512227227947e-07, + "loss": 0.0012, + "step": 101230 + }, + { + "epoch": 1.710958823082058, + "grad_norm": 0.002318931743502617, + "learning_rate": 6.227992127499799e-07, + "loss": 0.0002, + "step": 101240 + }, + { + "epoch": 1.7111278233609086, + "grad_norm": 0.030637163668870926, + "learning_rate": 6.220865791096908e-07, + "loss": 0.0006, + "step": 101250 + }, + { + "epoch": 1.711296823639759, + "grad_norm": 0.041567422449588776, + "learning_rate": 6.213743263690791e-07, + "loss": 0.0006, + "step": 101260 + }, + { + "epoch": 1.7114658239186094, + "grad_norm": 0.0006766514852643013, + "learning_rate": 6.206624545901174e-07, + "loss": 0.0006, + "step": 101270 + }, + { + "epoch": 1.71163482419746, + "grad_norm": 0.09084150940179825, + "learning_rate": 6.199509638347395e-07, + "loss": 0.0006, + "step": 101280 + }, + { + "epoch": 1.7118038244763105, + "grad_norm": 0.028928222134709358, + "learning_rate": 6.192398541648504e-07, + "loss": 0.0015, + "step": 101290 + }, + { + "epoch": 1.7119728247551609, + "grad_norm": 0.10853814333677292, + "learning_rate": 6.185291256423182e-07, + "loss": 0.0006, + "step": 101300 + }, + { + "epoch": 1.7121418250340112, + "grad_norm": 0.03964819014072418, + "learning_rate": 6.178187783289818e-07, + "loss": 0.0003, + "step": 101310 + }, + { + "epoch": 1.7123108253128616, + "grad_norm": 0.05229168385267258, + "learning_rate": 6.171088122866431e-07, + "loss": 0.0003, + "step": 101320 + }, + { + "epoch": 1.7124798255917122, + "grad_norm": 0.025795824825763702, + "learning_rate": 6.163992275770741e-07, + "loss": 0.0015, + "step": 101330 + }, + { + "epoch": 1.7126488258705628, + "grad_norm": 0.05593524128198624, + "learning_rate": 6.156900242620101e-07, + "loss": 0.0006, + "step": 101340 + }, + { + "epoch": 1.7128178261494131, + "grad_norm": 0.06450933963060379, + "learning_rate": 6.14981202403157e-07, + "loss": 0.0008, + "step": 101350 + }, + { + "epoch": 1.7129868264282635, + "grad_norm": 0.021435856819152832, + "learning_rate": 6.142727620621857e-07, + "loss": 0.0005, + "step": 101360 + }, + { + "epoch": 1.713155826707114, + "grad_norm": 0.004841253627091646, + "learning_rate": 6.135647033007325e-07, + "loss": 0.0004, + "step": 101370 + }, + { + "epoch": 1.7133248269859647, + "grad_norm": 0.03038441203534603, + "learning_rate": 6.12857026180404e-07, + "loss": 0.0005, + "step": 101380 + }, + { + "epoch": 1.713493827264815, + "grad_norm": 0.0590740442276001, + "learning_rate": 6.121497307627699e-07, + "loss": 0.0004, + "step": 101390 + }, + { + "epoch": 1.7136628275436654, + "grad_norm": 0.0016183584230020642, + "learning_rate": 6.114428171093695e-07, + "loss": 0.0006, + "step": 101400 + }, + { + "epoch": 1.7138318278225158, + "grad_norm": 0.029670948162674904, + "learning_rate": 6.107362852817056e-07, + "loss": 0.0005, + "step": 101410 + }, + { + "epoch": 1.7140008281013663, + "grad_norm": 0.03551269322633743, + "learning_rate": 6.100301353412519e-07, + "loss": 0.0005, + "step": 101420 + }, + { + "epoch": 1.714169828380217, + "grad_norm": 0.0018679883796721697, + "learning_rate": 6.093243673494459e-07, + "loss": 0.0007, + "step": 101430 + }, + { + "epoch": 1.7143388286590673, + "grad_norm": 0.010143287479877472, + "learning_rate": 6.086189813676946e-07, + "loss": 0.0003, + "step": 101440 + }, + { + "epoch": 1.7145078289379176, + "grad_norm": 0.04105092212557793, + "learning_rate": 6.079139774573672e-07, + "loss": 0.0004, + "step": 101450 + }, + { + "epoch": 1.7146768292167682, + "grad_norm": 0.004793565254658461, + "learning_rate": 6.072093556798053e-07, + "loss": 0.0008, + "step": 101460 + }, + { + "epoch": 1.7148458294956188, + "grad_norm": 0.060135677456855774, + "learning_rate": 6.065051160963121e-07, + "loss": 0.0008, + "step": 101470 + }, + { + "epoch": 1.7150148297744692, + "grad_norm": 0.10621100664138794, + "learning_rate": 6.058012587681617e-07, + "loss": 0.0006, + "step": 101480 + }, + { + "epoch": 1.7151838300533195, + "grad_norm": 0.03052833490073681, + "learning_rate": 6.050977837565914e-07, + "loss": 0.0005, + "step": 101490 + }, + { + "epoch": 1.71535283033217, + "grad_norm": 0.023036569356918335, + "learning_rate": 6.043946911228082e-07, + "loss": 0.0021, + "step": 101500 + }, + { + "epoch": 1.7155218306110205, + "grad_norm": 0.009780052118003368, + "learning_rate": 6.036919809279834e-07, + "loss": 0.0006, + "step": 101510 + }, + { + "epoch": 1.715690830889871, + "grad_norm": 0.03306550905108452, + "learning_rate": 6.029896532332575e-07, + "loss": 0.0008, + "step": 101520 + }, + { + "epoch": 1.7158598311687214, + "grad_norm": 0.0446292869746685, + "learning_rate": 6.022877080997353e-07, + "loss": 0.0008, + "step": 101530 + }, + { + "epoch": 1.7160288314475718, + "grad_norm": 0.03361291065812111, + "learning_rate": 6.015861455884902e-07, + "loss": 0.0009, + "step": 101540 + }, + { + "epoch": 1.7161978317264224, + "grad_norm": 0.0014617099659517407, + "learning_rate": 6.008849657605609e-07, + "loss": 0.0005, + "step": 101550 + }, + { + "epoch": 1.716366832005273, + "grad_norm": 0.08785160630941391, + "learning_rate": 6.001841686769544e-07, + "loss": 0.0006, + "step": 101560 + }, + { + "epoch": 1.7165358322841233, + "grad_norm": 0.09249861538410187, + "learning_rate": 5.994837543986421e-07, + "loss": 0.0012, + "step": 101570 + }, + { + "epoch": 1.7167048325629737, + "grad_norm": 0.025042526423931122, + "learning_rate": 5.98783722986564e-07, + "loss": 0.0005, + "step": 101580 + }, + { + "epoch": 1.716873832841824, + "grad_norm": 0.005246470682322979, + "learning_rate": 5.980840745016275e-07, + "loss": 0.0007, + "step": 101590 + }, + { + "epoch": 1.7170428331206746, + "grad_norm": 0.08312425017356873, + "learning_rate": 5.97384809004703e-07, + "loss": 0.0006, + "step": 101600 + }, + { + "epoch": 1.7172118333995252, + "grad_norm": 0.06092371046543121, + "learning_rate": 5.966859265566316e-07, + "loss": 0.0007, + "step": 101610 + }, + { + "epoch": 1.7173808336783756, + "grad_norm": 0.03240653872489929, + "learning_rate": 5.959874272182192e-07, + "loss": 0.0009, + "step": 101620 + }, + { + "epoch": 1.717549833957226, + "grad_norm": 0.024100584909319878, + "learning_rate": 5.952893110502395e-07, + "loss": 0.0006, + "step": 101630 + }, + { + "epoch": 1.7177188342360765, + "grad_norm": 0.05034208297729492, + "learning_rate": 5.945915781134298e-07, + "loss": 0.0008, + "step": 101640 + }, + { + "epoch": 1.717887834514927, + "grad_norm": 0.07382944971323013, + "learning_rate": 5.938942284684984e-07, + "loss": 0.0008, + "step": 101650 + }, + { + "epoch": 1.7180568347937775, + "grad_norm": 0.005421789828687906, + "learning_rate": 5.931972621761167e-07, + "loss": 0.0008, + "step": 101660 + }, + { + "epoch": 1.7182258350726278, + "grad_norm": 0.14722028374671936, + "learning_rate": 5.925006792969251e-07, + "loss": 0.0005, + "step": 101670 + }, + { + "epoch": 1.7183948353514782, + "grad_norm": 0.0022031988482922316, + "learning_rate": 5.918044798915285e-07, + "loss": 0.0006, + "step": 101680 + }, + { + "epoch": 1.7185638356303288, + "grad_norm": 0.043418727815151215, + "learning_rate": 5.911086640205016e-07, + "loss": 0.001, + "step": 101690 + }, + { + "epoch": 1.7187328359091794, + "grad_norm": 0.10383982211351395, + "learning_rate": 5.904132317443812e-07, + "loss": 0.001, + "step": 101700 + }, + { + "epoch": 1.7189018361880297, + "grad_norm": 0.046413641422986984, + "learning_rate": 5.897181831236753e-07, + "loss": 0.0007, + "step": 101710 + }, + { + "epoch": 1.71907083646688, + "grad_norm": 0.02331531047821045, + "learning_rate": 5.890235182188553e-07, + "loss": 0.0006, + "step": 101720 + }, + { + "epoch": 1.7192398367457307, + "grad_norm": 0.03394658863544464, + "learning_rate": 5.883292370903615e-07, + "loss": 0.0003, + "step": 101730 + }, + { + "epoch": 1.7194088370245812, + "grad_norm": 0.032232072204351425, + "learning_rate": 5.876353397985984e-07, + "loss": 0.0005, + "step": 101740 + }, + { + "epoch": 1.7195778373034316, + "grad_norm": 0.07648573070764542, + "learning_rate": 5.8694182640394e-07, + "loss": 0.0004, + "step": 101750 + }, + { + "epoch": 1.719746837582282, + "grad_norm": 0.07018204033374786, + "learning_rate": 5.862486969667236e-07, + "loss": 0.0004, + "step": 101760 + }, + { + "epoch": 1.7199158378611323, + "grad_norm": 0.002022471046075225, + "learning_rate": 5.855559515472564e-07, + "loss": 0.0012, + "step": 101770 + }, + { + "epoch": 1.720084838139983, + "grad_norm": 0.004247077275067568, + "learning_rate": 5.848635902058092e-07, + "loss": 0.0005, + "step": 101780 + }, + { + "epoch": 1.7202538384188335, + "grad_norm": 0.0623776949942112, + "learning_rate": 5.841716130026215e-07, + "loss": 0.0007, + "step": 101790 + }, + { + "epoch": 1.7204228386976839, + "grad_norm": 0.01923542656004429, + "learning_rate": 5.834800199978985e-07, + "loss": 0.0003, + "step": 101800 + }, + { + "epoch": 1.7205918389765342, + "grad_norm": 0.00485391728579998, + "learning_rate": 5.827888112518132e-07, + "loss": 0.0006, + "step": 101810 + }, + { + "epoch": 1.7207608392553848, + "grad_norm": 0.019686343148350716, + "learning_rate": 5.820979868245025e-07, + "loss": 0.0006, + "step": 101820 + }, + { + "epoch": 1.7209298395342354, + "grad_norm": 0.013536707498133183, + "learning_rate": 5.814075467760727e-07, + "loss": 0.0003, + "step": 101830 + }, + { + "epoch": 1.7210988398130858, + "grad_norm": 0.0900881439447403, + "learning_rate": 5.807174911665942e-07, + "loss": 0.0009, + "step": 101840 + }, + { + "epoch": 1.7212678400919361, + "grad_norm": 0.013546904549002647, + "learning_rate": 5.80027820056106e-07, + "loss": 0.0003, + "step": 101850 + }, + { + "epoch": 1.7214368403707865, + "grad_norm": 0.06801117211580276, + "learning_rate": 5.793385335046137e-07, + "loss": 0.0006, + "step": 101860 + }, + { + "epoch": 1.721605840649637, + "grad_norm": 0.0018033263040706515, + "learning_rate": 5.786496315720864e-07, + "loss": 0.0004, + "step": 101870 + }, + { + "epoch": 1.7217748409284876, + "grad_norm": 0.039153531193733215, + "learning_rate": 5.779611143184638e-07, + "loss": 0.0006, + "step": 101880 + }, + { + "epoch": 1.721943841207338, + "grad_norm": 0.04686809703707695, + "learning_rate": 5.772729818036488e-07, + "loss": 0.0005, + "step": 101890 + }, + { + "epoch": 1.7221128414861884, + "grad_norm": 0.048774782568216324, + "learning_rate": 5.765852340875144e-07, + "loss": 0.0003, + "step": 101900 + }, + { + "epoch": 1.722281841765039, + "grad_norm": 0.1338806450366974, + "learning_rate": 5.758978712298952e-07, + "loss": 0.0007, + "step": 101910 + }, + { + "epoch": 1.7224508420438893, + "grad_norm": 0.06597351282835007, + "learning_rate": 5.752108932905976e-07, + "loss": 0.0008, + "step": 101920 + }, + { + "epoch": 1.72261984232274, + "grad_norm": 0.02449699118733406, + "learning_rate": 5.745243003293893e-07, + "loss": 0.0005, + "step": 101930 + }, + { + "epoch": 1.7227888426015903, + "grad_norm": 0.029172487556934357, + "learning_rate": 5.738380924060105e-07, + "loss": 0.0008, + "step": 101940 + }, + { + "epoch": 1.7229578428804406, + "grad_norm": 0.04729709029197693, + "learning_rate": 5.731522695801617e-07, + "loss": 0.0004, + "step": 101950 + }, + { + "epoch": 1.7231268431592912, + "grad_norm": 0.010576958768069744, + "learning_rate": 5.724668319115151e-07, + "loss": 0.0005, + "step": 101960 + }, + { + "epoch": 1.7232958434381418, + "grad_norm": 0.03171493485569954, + "learning_rate": 5.717817794597047e-07, + "loss": 0.0003, + "step": 101970 + }, + { + "epoch": 1.7234648437169922, + "grad_norm": 0.01054026186466217, + "learning_rate": 5.71097112284335e-07, + "loss": 0.0007, + "step": 101980 + }, + { + "epoch": 1.7236338439958425, + "grad_norm": 0.018904397264122963, + "learning_rate": 5.704128304449758e-07, + "loss": 0.0004, + "step": 101990 + }, + { + "epoch": 1.723802844274693, + "grad_norm": 0.02453923597931862, + "learning_rate": 5.697289340011613e-07, + "loss": 0.0005, + "step": 102000 + }, + { + "epoch": 1.7239718445535435, + "grad_norm": 0.004252988379448652, + "learning_rate": 5.690454230123948e-07, + "loss": 0.0009, + "step": 102010 + }, + { + "epoch": 1.724140844832394, + "grad_norm": 0.027058754116296768, + "learning_rate": 5.683622975381458e-07, + "loss": 0.0007, + "step": 102020 + }, + { + "epoch": 1.7243098451112444, + "grad_norm": 0.03157680481672287, + "learning_rate": 5.676795576378479e-07, + "loss": 0.0005, + "step": 102030 + }, + { + "epoch": 1.7244788453900948, + "grad_norm": 0.044976986944675446, + "learning_rate": 5.669972033709048e-07, + "loss": 0.0007, + "step": 102040 + }, + { + "epoch": 1.7246478456689454, + "grad_norm": 0.045679911971092224, + "learning_rate": 5.663152347966821e-07, + "loss": 0.0015, + "step": 102050 + }, + { + "epoch": 1.724816845947796, + "grad_norm": 0.00455823726952076, + "learning_rate": 5.656336519745165e-07, + "loss": 0.0004, + "step": 102060 + }, + { + "epoch": 1.7249858462266463, + "grad_norm": 0.019595401361584663, + "learning_rate": 5.649524549637075e-07, + "loss": 0.001, + "step": 102070 + }, + { + "epoch": 1.7251548465054967, + "grad_norm": 0.07024908810853958, + "learning_rate": 5.642716438235241e-07, + "loss": 0.0005, + "step": 102080 + }, + { + "epoch": 1.7253238467843472, + "grad_norm": 0.03039000928401947, + "learning_rate": 5.635912186131981e-07, + "loss": 0.0006, + "step": 102090 + }, + { + "epoch": 1.7254928470631976, + "grad_norm": 0.04661163315176964, + "learning_rate": 5.629111793919322e-07, + "loss": 0.0005, + "step": 102100 + }, + { + "epoch": 1.7256618473420482, + "grad_norm": 0.02858460694551468, + "learning_rate": 5.622315262188905e-07, + "loss": 0.0006, + "step": 102110 + }, + { + "epoch": 1.7258308476208986, + "grad_norm": 0.06292527168989182, + "learning_rate": 5.61552259153208e-07, + "loss": 0.0006, + "step": 102120 + }, + { + "epoch": 1.725999847899749, + "grad_norm": 0.1382041871547699, + "learning_rate": 5.608733782539844e-07, + "loss": 0.0009, + "step": 102130 + }, + { + "epoch": 1.7261688481785995, + "grad_norm": 0.03715429827570915, + "learning_rate": 5.60194883580284e-07, + "loss": 0.0006, + "step": 102140 + }, + { + "epoch": 1.72633784845745, + "grad_norm": 0.008654680103063583, + "learning_rate": 5.595167751911412e-07, + "loss": 0.0006, + "step": 102150 + }, + { + "epoch": 1.7265068487363004, + "grad_norm": 0.004395073279738426, + "learning_rate": 5.588390531455528e-07, + "loss": 0.0004, + "step": 102160 + }, + { + "epoch": 1.7266758490151508, + "grad_norm": 0.019985094666481018, + "learning_rate": 5.581617175024845e-07, + "loss": 0.0003, + "step": 102170 + }, + { + "epoch": 1.7268448492940012, + "grad_norm": 0.06431503593921661, + "learning_rate": 5.57484768320869e-07, + "loss": 0.0007, + "step": 102180 + }, + { + "epoch": 1.7270138495728518, + "grad_norm": 0.06975045055150986, + "learning_rate": 5.568082056596019e-07, + "loss": 0.0005, + "step": 102190 + }, + { + "epoch": 1.7271828498517023, + "grad_norm": 0.027523482218384743, + "learning_rate": 5.561320295775486e-07, + "loss": 0.0009, + "step": 102200 + }, + { + "epoch": 1.7273518501305527, + "grad_norm": 0.014709588140249252, + "learning_rate": 5.554562401335412e-07, + "loss": 0.0008, + "step": 102210 + }, + { + "epoch": 1.727520850409403, + "grad_norm": 0.08284450322389603, + "learning_rate": 5.547808373863739e-07, + "loss": 0.0006, + "step": 102220 + }, + { + "epoch": 1.7276898506882536, + "grad_norm": 0.09165903925895691, + "learning_rate": 5.541058213948125e-07, + "loss": 0.0008, + "step": 102230 + }, + { + "epoch": 1.7278588509671042, + "grad_norm": 0.017165465280413628, + "learning_rate": 5.534311922175845e-07, + "loss": 0.0007, + "step": 102240 + }, + { + "epoch": 1.7280278512459546, + "grad_norm": 0.002477036789059639, + "learning_rate": 5.527569499133878e-07, + "loss": 0.0002, + "step": 102250 + }, + { + "epoch": 1.728196851524805, + "grad_norm": 0.021751822903752327, + "learning_rate": 5.520830945408828e-07, + "loss": 0.0003, + "step": 102260 + }, + { + "epoch": 1.7283658518036553, + "grad_norm": 0.021902253851294518, + "learning_rate": 5.514096261587004e-07, + "loss": 0.0008, + "step": 102270 + }, + { + "epoch": 1.728534852082506, + "grad_norm": 0.052564337849617004, + "learning_rate": 5.507365448254332e-07, + "loss": 0.0014, + "step": 102280 + }, + { + "epoch": 1.7287038523613565, + "grad_norm": 0.02939448691904545, + "learning_rate": 5.500638505996453e-07, + "loss": 0.0008, + "step": 102290 + }, + { + "epoch": 1.7288728526402068, + "grad_norm": 0.06470446288585663, + "learning_rate": 5.493915435398611e-07, + "loss": 0.0008, + "step": 102300 + }, + { + "epoch": 1.7290418529190572, + "grad_norm": 0.02330961637198925, + "learning_rate": 5.487196237045777e-07, + "loss": 0.0031, + "step": 102310 + }, + { + "epoch": 1.7292108531979078, + "grad_norm": 0.01982557401061058, + "learning_rate": 5.480480911522528e-07, + "loss": 0.0008, + "step": 102320 + }, + { + "epoch": 1.7293798534767584, + "grad_norm": 0.012928773649036884, + "learning_rate": 5.473769459413158e-07, + "loss": 0.0009, + "step": 102330 + }, + { + "epoch": 1.7295488537556087, + "grad_norm": 0.0316120982170105, + "learning_rate": 5.467061881301567e-07, + "loss": 0.0002, + "step": 102340 + }, + { + "epoch": 1.729717854034459, + "grad_norm": 0.04284735396504402, + "learning_rate": 5.460358177771352e-07, + "loss": 0.0004, + "step": 102350 + }, + { + "epoch": 1.7298868543133095, + "grad_norm": 0.04509047791361809, + "learning_rate": 5.453658349405794e-07, + "loss": 0.0009, + "step": 102360 + }, + { + "epoch": 1.73005585459216, + "grad_norm": 0.004043929278850555, + "learning_rate": 5.446962396787775e-07, + "loss": 0.0005, + "step": 102370 + }, + { + "epoch": 1.7302248548710106, + "grad_norm": 0.061174239963293076, + "learning_rate": 5.440270320499897e-07, + "loss": 0.0005, + "step": 102380 + }, + { + "epoch": 1.730393855149861, + "grad_norm": 0.005769838113337755, + "learning_rate": 5.433582121124393e-07, + "loss": 0.0004, + "step": 102390 + }, + { + "epoch": 1.7305628554287114, + "grad_norm": 0.09978872537612915, + "learning_rate": 5.426897799243186e-07, + "loss": 0.0013, + "step": 102400 + }, + { + "epoch": 1.730731855707562, + "grad_norm": 0.042144156992435455, + "learning_rate": 5.420217355437824e-07, + "loss": 0.0004, + "step": 102410 + }, + { + "epoch": 1.7309008559864125, + "grad_norm": 0.012190748006105423, + "learning_rate": 5.413540790289556e-07, + "loss": 0.0008, + "step": 102420 + }, + { + "epoch": 1.7310698562652629, + "grad_norm": 0.11674518883228302, + "learning_rate": 5.406868104379253e-07, + "loss": 0.0012, + "step": 102430 + }, + { + "epoch": 1.7312388565441132, + "grad_norm": 0.0697534829378128, + "learning_rate": 5.400199298287495e-07, + "loss": 0.0011, + "step": 102440 + }, + { + "epoch": 1.7314078568229636, + "grad_norm": 0.05596902593970299, + "learning_rate": 5.393534372594478e-07, + "loss": 0.0012, + "step": 102450 + }, + { + "epoch": 1.7315768571018142, + "grad_norm": 0.0021226536482572556, + "learning_rate": 5.38687332788011e-07, + "loss": 0.0005, + "step": 102460 + }, + { + "epoch": 1.7317458573806648, + "grad_norm": 0.058090031147003174, + "learning_rate": 5.3802161647239e-07, + "loss": 0.0008, + "step": 102470 + }, + { + "epoch": 1.7319148576595151, + "grad_norm": 0.0030619546305388212, + "learning_rate": 5.373562883705086e-07, + "loss": 0.0005, + "step": 102480 + }, + { + "epoch": 1.7320838579383655, + "grad_norm": 0.02905389480292797, + "learning_rate": 5.366913485402509e-07, + "loss": 0.0007, + "step": 102490 + }, + { + "epoch": 1.732252858217216, + "grad_norm": 0.0476466603577137, + "learning_rate": 5.360267970394722e-07, + "loss": 0.0006, + "step": 102500 + }, + { + "epoch": 1.7324218584960667, + "grad_norm": 0.02050813101232052, + "learning_rate": 5.353626339259893e-07, + "loss": 0.0006, + "step": 102510 + }, + { + "epoch": 1.732590858774917, + "grad_norm": 0.11159972101449966, + "learning_rate": 5.346988592575903e-07, + "loss": 0.001, + "step": 102520 + }, + { + "epoch": 1.7327598590537674, + "grad_norm": 0.007609162945300341, + "learning_rate": 5.340354730920244e-07, + "loss": 0.0005, + "step": 102530 + }, + { + "epoch": 1.7329288593326178, + "grad_norm": 0.04392042011022568, + "learning_rate": 5.333724754870112e-07, + "loss": 0.0009, + "step": 102540 + }, + { + "epoch": 1.7330978596114683, + "grad_norm": 0.04385608434677124, + "learning_rate": 5.327098665002334e-07, + "loss": 0.0019, + "step": 102550 + }, + { + "epoch": 1.733266859890319, + "grad_norm": 0.032793279737234116, + "learning_rate": 5.320476461893415e-07, + "loss": 0.0008, + "step": 102560 + }, + { + "epoch": 1.7334358601691693, + "grad_norm": 0.18722468614578247, + "learning_rate": 5.313858146119516e-07, + "loss": 0.0011, + "step": 102570 + }, + { + "epoch": 1.7336048604480196, + "grad_norm": 0.10890011489391327, + "learning_rate": 5.307243718256483e-07, + "loss": 0.0015, + "step": 102580 + }, + { + "epoch": 1.7337738607268702, + "grad_norm": 0.0011250500101596117, + "learning_rate": 5.300633178879777e-07, + "loss": 0.0003, + "step": 102590 + }, + { + "epoch": 1.7339428610057208, + "grad_norm": 0.0004868998075835407, + "learning_rate": 5.294026528564567e-07, + "loss": 0.0008, + "step": 102600 + }, + { + "epoch": 1.7341118612845712, + "grad_norm": 0.017471345141530037, + "learning_rate": 5.287423767885647e-07, + "loss": 0.0007, + "step": 102610 + }, + { + "epoch": 1.7342808615634215, + "grad_norm": 0.01358699705451727, + "learning_rate": 5.280824897417497e-07, + "loss": 0.0005, + "step": 102620 + }, + { + "epoch": 1.734449861842272, + "grad_norm": 0.009207590483129025, + "learning_rate": 5.274229917734258e-07, + "loss": 0.0017, + "step": 102630 + }, + { + "epoch": 1.7346188621211225, + "grad_norm": 0.01364581286907196, + "learning_rate": 5.267638829409704e-07, + "loss": 0.0004, + "step": 102640 + }, + { + "epoch": 1.734787862399973, + "grad_norm": 0.0013375000562518835, + "learning_rate": 5.261051633017322e-07, + "loss": 0.0004, + "step": 102650 + }, + { + "epoch": 1.7349568626788234, + "grad_norm": 0.043894656002521515, + "learning_rate": 5.254468329130197e-07, + "loss": 0.0006, + "step": 102660 + }, + { + "epoch": 1.7351258629576738, + "grad_norm": 0.015651630237698555, + "learning_rate": 5.247888918321137e-07, + "loss": 0.0007, + "step": 102670 + }, + { + "epoch": 1.7352948632365244, + "grad_norm": 0.0647960677742958, + "learning_rate": 5.241313401162562e-07, + "loss": 0.0008, + "step": 102680 + }, + { + "epoch": 1.735463863515375, + "grad_norm": 0.014756627380847931, + "learning_rate": 5.234741778226593e-07, + "loss": 0.0007, + "step": 102690 + }, + { + "epoch": 1.7356328637942253, + "grad_norm": 0.06088197976350784, + "learning_rate": 5.228174050084966e-07, + "loss": 0.0006, + "step": 102700 + }, + { + "epoch": 1.7358018640730757, + "grad_norm": 0.05784652754664421, + "learning_rate": 5.221610217309136e-07, + "loss": 0.0007, + "step": 102710 + }, + { + "epoch": 1.735970864351926, + "grad_norm": 0.02115379087626934, + "learning_rate": 5.215050280470163e-07, + "loss": 0.0001, + "step": 102720 + }, + { + "epoch": 1.7361398646307766, + "grad_norm": 0.16476072371006012, + "learning_rate": 5.208494240138812e-07, + "loss": 0.0007, + "step": 102730 + }, + { + "epoch": 1.7363088649096272, + "grad_norm": 0.008767795749008656, + "learning_rate": 5.20194209688547e-07, + "loss": 0.0006, + "step": 102740 + }, + { + "epoch": 1.7364778651884776, + "grad_norm": 0.020127803087234497, + "learning_rate": 5.195393851280223e-07, + "loss": 0.0005, + "step": 102750 + }, + { + "epoch": 1.736646865467328, + "grad_norm": 0.011776608414947987, + "learning_rate": 5.18884950389279e-07, + "loss": 0.0002, + "step": 102760 + }, + { + "epoch": 1.7368158657461785, + "grad_norm": 0.012264423072338104, + "learning_rate": 5.182309055292573e-07, + "loss": 0.0004, + "step": 102770 + }, + { + "epoch": 1.736984866025029, + "grad_norm": 0.026176534593105316, + "learning_rate": 5.17577250604861e-07, + "loss": 0.0002, + "step": 102780 + }, + { + "epoch": 1.7371538663038795, + "grad_norm": 0.008904360234737396, + "learning_rate": 5.169239856729624e-07, + "loss": 0.0007, + "step": 102790 + }, + { + "epoch": 1.7373228665827298, + "grad_norm": 0.03166089206933975, + "learning_rate": 5.162711107903973e-07, + "loss": 0.001, + "step": 102800 + }, + { + "epoch": 1.7374918668615802, + "grad_norm": 0.04781964793801308, + "learning_rate": 5.156186260139706e-07, + "loss": 0.0009, + "step": 102810 + }, + { + "epoch": 1.7376608671404308, + "grad_norm": 0.007664358243346214, + "learning_rate": 5.149665314004498e-07, + "loss": 0.0004, + "step": 102820 + }, + { + "epoch": 1.7378298674192814, + "grad_norm": 0.0008122368599288166, + "learning_rate": 5.143148270065723e-07, + "loss": 0.0002, + "step": 102830 + }, + { + "epoch": 1.7379988676981317, + "grad_norm": 0.17051158845424652, + "learning_rate": 5.136635128890371e-07, + "loss": 0.001, + "step": 102840 + }, + { + "epoch": 1.738167867976982, + "grad_norm": 0.015453736297786236, + "learning_rate": 5.130125891045146e-07, + "loss": 0.0007, + "step": 102850 + }, + { + "epoch": 1.7383368682558327, + "grad_norm": 0.06761077046394348, + "learning_rate": 5.123620557096354e-07, + "loss": 0.0013, + "step": 102860 + }, + { + "epoch": 1.738505868534683, + "grad_norm": 0.011229383759200573, + "learning_rate": 5.11711912761001e-07, + "loss": 0.0004, + "step": 102870 + }, + { + "epoch": 1.7386748688135336, + "grad_norm": 0.10608507692813873, + "learning_rate": 5.110621603151766e-07, + "loss": 0.0039, + "step": 102880 + }, + { + "epoch": 1.738843869092384, + "grad_norm": 0.02043667435646057, + "learning_rate": 5.104127984286933e-07, + "loss": 0.0005, + "step": 102890 + }, + { + "epoch": 1.7390128693712343, + "grad_norm": 0.017715172842144966, + "learning_rate": 5.097638271580501e-07, + "loss": 0.0004, + "step": 102900 + }, + { + "epoch": 1.739181869650085, + "grad_norm": 0.03839319571852684, + "learning_rate": 5.091152465597082e-07, + "loss": 0.0003, + "step": 102910 + }, + { + "epoch": 1.7393508699289355, + "grad_norm": 0.04127620533108711, + "learning_rate": 5.084670566901001e-07, + "loss": 0.0008, + "step": 102920 + }, + { + "epoch": 1.7395198702077859, + "grad_norm": 0.034130506217479706, + "learning_rate": 5.07819257605619e-07, + "loss": 0.0005, + "step": 102930 + }, + { + "epoch": 1.7396888704866362, + "grad_norm": 0.027376290410757065, + "learning_rate": 5.071718493626277e-07, + "loss": 0.0008, + "step": 102940 + }, + { + "epoch": 1.7398578707654868, + "grad_norm": 0.07222705334424973, + "learning_rate": 5.065248320174543e-07, + "loss": 0.0004, + "step": 102950 + }, + { + "epoch": 1.7400268710443372, + "grad_norm": 0.006656379904597998, + "learning_rate": 5.058782056263912e-07, + "loss": 0.0007, + "step": 102960 + }, + { + "epoch": 1.7401958713231878, + "grad_norm": 0.03969889506697655, + "learning_rate": 5.052319702456987e-07, + "loss": 0.0006, + "step": 102970 + }, + { + "epoch": 1.7403648716020381, + "grad_norm": 0.02160169929265976, + "learning_rate": 5.045861259316038e-07, + "loss": 0.0004, + "step": 102980 + }, + { + "epoch": 1.7405338718808885, + "grad_norm": 0.0028784871101379395, + "learning_rate": 5.039406727402951e-07, + "loss": 0.0003, + "step": 102990 + }, + { + "epoch": 1.740702872159739, + "grad_norm": 0.0740542784333229, + "learning_rate": 5.03295610727933e-07, + "loss": 0.0005, + "step": 103000 + }, + { + "epoch": 1.7408718724385897, + "grad_norm": 0.09806045144796371, + "learning_rate": 5.02650939950639e-07, + "loss": 0.0007, + "step": 103010 + }, + { + "epoch": 1.74104087271744, + "grad_norm": 0.015504520386457443, + "learning_rate": 5.020066604645041e-07, + "loss": 0.0005, + "step": 103020 + }, + { + "epoch": 1.7412098729962904, + "grad_norm": 0.011831060983240604, + "learning_rate": 5.01362772325582e-07, + "loss": 0.0004, + "step": 103030 + }, + { + "epoch": 1.741378873275141, + "grad_norm": 0.0006339370738714933, + "learning_rate": 5.007192755898965e-07, + "loss": 0.0015, + "step": 103040 + }, + { + "epoch": 1.7415478735539913, + "grad_norm": 0.0036010006442666054, + "learning_rate": 5.000761703134321e-07, + "loss": 0.0007, + "step": 103050 + }, + { + "epoch": 1.741716873832842, + "grad_norm": 0.010716896504163742, + "learning_rate": 4.994334565521447e-07, + "loss": 0.0004, + "step": 103060 + }, + { + "epoch": 1.7418858741116923, + "grad_norm": 0.030913732945919037, + "learning_rate": 4.987911343619517e-07, + "loss": 0.001, + "step": 103070 + }, + { + "epoch": 1.7420548743905426, + "grad_norm": 0.02269650623202324, + "learning_rate": 4.981492037987396e-07, + "loss": 0.0016, + "step": 103080 + }, + { + "epoch": 1.7422238746693932, + "grad_norm": 0.07766445726156235, + "learning_rate": 4.975076649183574e-07, + "loss": 0.0047, + "step": 103090 + }, + { + "epoch": 1.7423928749482438, + "grad_norm": 0.13688509166240692, + "learning_rate": 4.968665177766247e-07, + "loss": 0.0011, + "step": 103100 + }, + { + "epoch": 1.7425618752270942, + "grad_norm": 0.06837814301252365, + "learning_rate": 4.962257624293221e-07, + "loss": 0.0005, + "step": 103110 + }, + { + "epoch": 1.7427308755059445, + "grad_norm": 0.015745289623737335, + "learning_rate": 4.955853989321996e-07, + "loss": 0.0005, + "step": 103120 + }, + { + "epoch": 1.7428998757847949, + "grad_norm": 0.07937926054000854, + "learning_rate": 4.949454273409726e-07, + "loss": 0.0005, + "step": 103130 + }, + { + "epoch": 1.7430688760636455, + "grad_norm": 0.021244054660201073, + "learning_rate": 4.9430584771132e-07, + "loss": 0.0003, + "step": 103140 + }, + { + "epoch": 1.743237876342496, + "grad_norm": 0.0003001017321366817, + "learning_rate": 4.936666600988893e-07, + "loss": 0.0006, + "step": 103150 + }, + { + "epoch": 1.7434068766213464, + "grad_norm": 0.054227881133556366, + "learning_rate": 4.930278645592928e-07, + "loss": 0.0009, + "step": 103160 + }, + { + "epoch": 1.7435758769001968, + "grad_norm": 0.039106860756874084, + "learning_rate": 4.923894611481095e-07, + "loss": 0.0004, + "step": 103170 + }, + { + "epoch": 1.7437448771790474, + "grad_norm": 0.028639808297157288, + "learning_rate": 4.917514499208825e-07, + "loss": 0.0007, + "step": 103180 + }, + { + "epoch": 1.743913877457898, + "grad_norm": 0.005487741436809301, + "learning_rate": 4.911138309331232e-07, + "loss": 0.0021, + "step": 103190 + }, + { + "epoch": 1.7440828777367483, + "grad_norm": 0.07214861363172531, + "learning_rate": 4.904766042403053e-07, + "loss": 0.0004, + "step": 103200 + }, + { + "epoch": 1.7442518780155987, + "grad_norm": 0.09120302647352219, + "learning_rate": 4.898397698978736e-07, + "loss": 0.0007, + "step": 103210 + }, + { + "epoch": 1.744420878294449, + "grad_norm": 0.010890722274780273, + "learning_rate": 4.892033279612329e-07, + "loss": 0.0004, + "step": 103220 + }, + { + "epoch": 1.7445898785732996, + "grad_norm": 0.06599367409944534, + "learning_rate": 4.885672784857586e-07, + "loss": 0.0013, + "step": 103230 + }, + { + "epoch": 1.7447588788521502, + "grad_norm": 0.02376548759639263, + "learning_rate": 4.879316215267887e-07, + "loss": 0.0007, + "step": 103240 + }, + { + "epoch": 1.7449278791310006, + "grad_norm": 0.0024631957057863474, + "learning_rate": 4.872963571396305e-07, + "loss": 0.0007, + "step": 103250 + }, + { + "epoch": 1.745096879409851, + "grad_norm": 0.02037734165787697, + "learning_rate": 4.866614853795526e-07, + "loss": 0.0003, + "step": 103260 + }, + { + "epoch": 1.7452658796887015, + "grad_norm": 0.08212215453386307, + "learning_rate": 4.860270063017935e-07, + "loss": 0.0013, + "step": 103270 + }, + { + "epoch": 1.745434879967552, + "grad_norm": 0.05925295501947403, + "learning_rate": 4.853929199615554e-07, + "loss": 0.0008, + "step": 103280 + }, + { + "epoch": 1.7456038802464025, + "grad_norm": 0.0582982636988163, + "learning_rate": 4.847592264140078e-07, + "loss": 0.0011, + "step": 103290 + }, + { + "epoch": 1.7457728805252528, + "grad_norm": 0.0348796546459198, + "learning_rate": 4.841259257142833e-07, + "loss": 0.0009, + "step": 103300 + }, + { + "epoch": 1.7459418808041032, + "grad_norm": 0.006674604490399361, + "learning_rate": 4.834930179174829e-07, + "loss": 0.0003, + "step": 103310 + }, + { + "epoch": 1.7461108810829538, + "grad_norm": 0.006219710223376751, + "learning_rate": 4.828605030786743e-07, + "loss": 0.0008, + "step": 103320 + }, + { + "epoch": 1.7462798813618043, + "grad_norm": 0.11030562967061996, + "learning_rate": 4.822283812528871e-07, + "loss": 0.0011, + "step": 103330 + }, + { + "epoch": 1.7464488816406547, + "grad_norm": 0.1262752115726471, + "learning_rate": 4.815966524951193e-07, + "loss": 0.0007, + "step": 103340 + }, + { + "epoch": 1.746617881919505, + "grad_norm": 0.03812302276492119, + "learning_rate": 4.809653168603362e-07, + "loss": 0.0006, + "step": 103350 + }, + { + "epoch": 1.7467868821983557, + "grad_norm": 0.018701814115047455, + "learning_rate": 4.803343744034649e-07, + "loss": 0.0004, + "step": 103360 + }, + { + "epoch": 1.7469558824772062, + "grad_norm": 0.021062275394797325, + "learning_rate": 4.797038251794023e-07, + "loss": 0.0004, + "step": 103370 + }, + { + "epoch": 1.7471248827560566, + "grad_norm": 0.020336691290140152, + "learning_rate": 4.790736692430071e-07, + "loss": 0.001, + "step": 103380 + }, + { + "epoch": 1.747293883034907, + "grad_norm": 0.05079956725239754, + "learning_rate": 4.78443906649107e-07, + "loss": 0.0015, + "step": 103390 + }, + { + "epoch": 1.7474628833137573, + "grad_norm": 0.010214217007160187, + "learning_rate": 4.778145374524956e-07, + "loss": 0.0002, + "step": 103400 + }, + { + "epoch": 1.747631883592608, + "grad_norm": 0.10870637744665146, + "learning_rate": 4.771855617079285e-07, + "loss": 0.0006, + "step": 103410 + }, + { + "epoch": 1.7478008838714585, + "grad_norm": 0.04904106631875038, + "learning_rate": 4.765569794701325e-07, + "loss": 0.0014, + "step": 103420 + }, + { + "epoch": 1.7479698841503089, + "grad_norm": 0.009648908860981464, + "learning_rate": 4.759287907937948e-07, + "loss": 0.0003, + "step": 103430 + }, + { + "epoch": 1.7481388844291592, + "grad_norm": 0.00835387408733368, + "learning_rate": 4.7530099573357303e-07, + "loss": 0.0007, + "step": 103440 + }, + { + "epoch": 1.7483078847080098, + "grad_norm": 0.1508789360523224, + "learning_rate": 4.7467359434408613e-07, + "loss": 0.0012, + "step": 103450 + }, + { + "epoch": 1.7484768849868604, + "grad_norm": 0.03186517953872681, + "learning_rate": 4.740465866799232e-07, + "loss": 0.0007, + "step": 103460 + }, + { + "epoch": 1.7486458852657107, + "grad_norm": 0.01163002010434866, + "learning_rate": 4.734199727956357e-07, + "loss": 0.0002, + "step": 103470 + }, + { + "epoch": 1.748814885544561, + "grad_norm": 0.016560060903429985, + "learning_rate": 4.727937527457427e-07, + "loss": 0.0004, + "step": 103480 + }, + { + "epoch": 1.7489838858234115, + "grad_norm": 0.0025057720486074686, + "learning_rate": 4.7216792658472677e-07, + "loss": 0.0011, + "step": 103490 + }, + { + "epoch": 1.749152886102262, + "grad_norm": 0.004498408176004887, + "learning_rate": 4.715424943670405e-07, + "loss": 0.0005, + "step": 103500 + }, + { + "epoch": 1.7493218863811126, + "grad_norm": 0.014925013296306133, + "learning_rate": 4.7091745614709693e-07, + "loss": 0.0007, + "step": 103510 + }, + { + "epoch": 1.749490886659963, + "grad_norm": 0.09344600141048431, + "learning_rate": 4.7029281197927876e-07, + "loss": 0.0011, + "step": 103520 + }, + { + "epoch": 1.7496598869388134, + "grad_norm": 0.08888610452413559, + "learning_rate": 4.6966856191793253e-07, + "loss": 0.0005, + "step": 103530 + }, + { + "epoch": 1.749828887217664, + "grad_norm": 0.015614978969097137, + "learning_rate": 4.6904470601737253e-07, + "loss": 0.0005, + "step": 103540 + }, + { + "epoch": 1.7499978874965145, + "grad_norm": 0.024644412100315094, + "learning_rate": 4.684212443318753e-07, + "loss": 0.0008, + "step": 103550 + }, + { + "epoch": 1.7501668877753649, + "grad_norm": 0.03356558084487915, + "learning_rate": 4.67798176915687e-07, + "loss": 0.0005, + "step": 103560 + }, + { + "epoch": 1.7503358880542152, + "grad_norm": 0.042815741151571274, + "learning_rate": 4.6717550382301466e-07, + "loss": 0.0009, + "step": 103570 + }, + { + "epoch": 1.7505048883330656, + "grad_norm": 0.0005397217464633286, + "learning_rate": 4.665532251080368e-07, + "loss": 0.0002, + "step": 103580 + }, + { + "epoch": 1.7506738886119162, + "grad_norm": 0.029403207823634148, + "learning_rate": 4.6593134082489276e-07, + "loss": 0.0004, + "step": 103590 + }, + { + "epoch": 1.7508428888907668, + "grad_norm": 0.038796812295913696, + "learning_rate": 4.65309851027691e-07, + "loss": 0.0004, + "step": 103600 + }, + { + "epoch": 1.7510118891696171, + "grad_norm": 0.0008636804413981736, + "learning_rate": 4.646887557705021e-07, + "loss": 0.0004, + "step": 103610 + }, + { + "epoch": 1.7511808894484675, + "grad_norm": 0.05733103305101395, + "learning_rate": 4.6406805510736676e-07, + "loss": 0.0006, + "step": 103620 + }, + { + "epoch": 1.751349889727318, + "grad_norm": 0.03309517353773117, + "learning_rate": 4.6344774909228617e-07, + "loss": 0.0005, + "step": 103630 + }, + { + "epoch": 1.7515188900061687, + "grad_norm": 0.0010934839956462383, + "learning_rate": 4.6282783777923226e-07, + "loss": 0.0004, + "step": 103640 + }, + { + "epoch": 1.751687890285019, + "grad_norm": 0.004879363812506199, + "learning_rate": 4.622083212221401e-07, + "loss": 0.0004, + "step": 103650 + }, + { + "epoch": 1.7518568905638694, + "grad_norm": 0.009129153564572334, + "learning_rate": 4.6158919947490887e-07, + "loss": 0.0004, + "step": 103660 + }, + { + "epoch": 1.7520258908427198, + "grad_norm": 0.02794717065989971, + "learning_rate": 4.609704725914077e-07, + "loss": 0.0007, + "step": 103670 + }, + { + "epoch": 1.7521948911215703, + "grad_norm": 0.0331835113465786, + "learning_rate": 4.6035214062546686e-07, + "loss": 0.0011, + "step": 103680 + }, + { + "epoch": 1.752363891400421, + "grad_norm": 0.022332506254315376, + "learning_rate": 4.597342036308855e-07, + "loss": 0.0005, + "step": 103690 + }, + { + "epoch": 1.7525328916792713, + "grad_norm": 0.12581118941307068, + "learning_rate": 4.5911666166142564e-07, + "loss": 0.001, + "step": 103700 + }, + { + "epoch": 1.7527018919581216, + "grad_norm": 0.004481620155274868, + "learning_rate": 4.584995147708171e-07, + "loss": 0.0004, + "step": 103710 + }, + { + "epoch": 1.7528708922369722, + "grad_norm": 0.009676892310380936, + "learning_rate": 4.5788276301275635e-07, + "loss": 0.0008, + "step": 103720 + }, + { + "epoch": 1.7530398925158228, + "grad_norm": 0.0342324897646904, + "learning_rate": 4.572664064409005e-07, + "loss": 0.0005, + "step": 103730 + }, + { + "epoch": 1.7532088927946732, + "grad_norm": 0.0059454115107655525, + "learning_rate": 4.566504451088782e-07, + "loss": 0.0007, + "step": 103740 + }, + { + "epoch": 1.7533778930735235, + "grad_norm": 0.11812199652194977, + "learning_rate": 4.5603487907028066e-07, + "loss": 0.0006, + "step": 103750 + }, + { + "epoch": 1.753546893352374, + "grad_norm": 0.0014133210061118007, + "learning_rate": 4.554197083786638e-07, + "loss": 0.0004, + "step": 103760 + }, + { + "epoch": 1.7537158936312245, + "grad_norm": 0.004159275442361832, + "learning_rate": 4.5480493308755205e-07, + "loss": 0.0003, + "step": 103770 + }, + { + "epoch": 1.753884893910075, + "grad_norm": 0.030234072357416153, + "learning_rate": 4.541905532504326e-07, + "loss": 0.0002, + "step": 103780 + }, + { + "epoch": 1.7540538941889254, + "grad_norm": 0.025236107409000397, + "learning_rate": 4.535765689207605e-07, + "loss": 0.0003, + "step": 103790 + }, + { + "epoch": 1.7542228944677758, + "grad_norm": 0.04440271481871605, + "learning_rate": 4.529629801519542e-07, + "loss": 0.0006, + "step": 103800 + }, + { + "epoch": 1.7543918947466264, + "grad_norm": 0.2087211161851883, + "learning_rate": 4.5234978699740026e-07, + "loss": 0.0027, + "step": 103810 + }, + { + "epoch": 1.7545608950254767, + "grad_norm": 0.04368753731250763, + "learning_rate": 4.5173698951044787e-07, + "loss": 0.0007, + "step": 103820 + }, + { + "epoch": 1.7547298953043273, + "grad_norm": 0.04231741651892662, + "learning_rate": 4.511245877444154e-07, + "loss": 0.0004, + "step": 103830 + }, + { + "epoch": 1.7548988955831777, + "grad_norm": 0.019522041082382202, + "learning_rate": 4.505125817525824e-07, + "loss": 0.0015, + "step": 103840 + }, + { + "epoch": 1.755067895862028, + "grad_norm": 0.017136752605438232, + "learning_rate": 4.499009715881986e-07, + "loss": 0.0002, + "step": 103850 + }, + { + "epoch": 1.7552368961408786, + "grad_norm": 0.0033868923783302307, + "learning_rate": 4.4928975730447466e-07, + "loss": 0.0005, + "step": 103860 + }, + { + "epoch": 1.7554058964197292, + "grad_norm": 0.030208753421902657, + "learning_rate": 4.4867893895459203e-07, + "loss": 0.0005, + "step": 103870 + }, + { + "epoch": 1.7555748966985796, + "grad_norm": 0.0030300342477858067, + "learning_rate": 4.4806851659169204e-07, + "loss": 0.0011, + "step": 103880 + }, + { + "epoch": 1.75574389697743, + "grad_norm": 0.06566812098026276, + "learning_rate": 4.474584902688861e-07, + "loss": 0.0014, + "step": 103890 + }, + { + "epoch": 1.7559128972562805, + "grad_norm": 0.04958728328347206, + "learning_rate": 4.4684886003924897e-07, + "loss": 0.0005, + "step": 103900 + }, + { + "epoch": 1.7560818975351309, + "grad_norm": 0.01267607044428587, + "learning_rate": 4.462396259558222e-07, + "loss": 0.0009, + "step": 103910 + }, + { + "epoch": 1.7562508978139815, + "grad_norm": 0.06525146216154099, + "learning_rate": 4.4563078807161044e-07, + "loss": 0.0005, + "step": 103920 + }, + { + "epoch": 1.7564198980928318, + "grad_norm": 0.006330079864710569, + "learning_rate": 4.4502234643958695e-07, + "loss": 0.0008, + "step": 103930 + }, + { + "epoch": 1.7565888983716822, + "grad_norm": 0.054142214357852936, + "learning_rate": 4.4441430111268945e-07, + "loss": 0.0003, + "step": 103940 + }, + { + "epoch": 1.7567578986505328, + "grad_norm": 0.018102606758475304, + "learning_rate": 4.4380665214381945e-07, + "loss": 0.0008, + "step": 103950 + }, + { + "epoch": 1.7569268989293834, + "grad_norm": 0.04914461821317673, + "learning_rate": 4.4319939958584636e-07, + "loss": 0.0006, + "step": 103960 + }, + { + "epoch": 1.7570958992082337, + "grad_norm": 0.07141398638486862, + "learning_rate": 4.425925434916034e-07, + "loss": 0.001, + "step": 103970 + }, + { + "epoch": 1.757264899487084, + "grad_norm": 0.08057130128145218, + "learning_rate": 4.419860839138906e-07, + "loss": 0.0005, + "step": 103980 + }, + { + "epoch": 1.7574338997659344, + "grad_norm": 0.02311614342033863, + "learning_rate": 4.4138002090547194e-07, + "loss": 0.0003, + "step": 103990 + }, + { + "epoch": 1.757602900044785, + "grad_norm": 0.059669192880392075, + "learning_rate": 4.407743545190796e-07, + "loss": 0.0011, + "step": 104000 + }, + { + "epoch": 1.7577719003236356, + "grad_norm": 0.03703858703374863, + "learning_rate": 4.4016908480740696e-07, + "loss": 0.0008, + "step": 104010 + }, + { + "epoch": 1.757940900602486, + "grad_norm": 0.11064209789037704, + "learning_rate": 4.3956421182311806e-07, + "loss": 0.001, + "step": 104020 + }, + { + "epoch": 1.7581099008813363, + "grad_norm": 0.002106801373884082, + "learning_rate": 4.38959735618838e-07, + "loss": 0.0004, + "step": 104030 + }, + { + "epoch": 1.758278901160187, + "grad_norm": 0.016959697008132935, + "learning_rate": 4.383556562471603e-07, + "loss": 0.0005, + "step": 104040 + }, + { + "epoch": 1.7584479014390375, + "grad_norm": 0.023819249123334885, + "learning_rate": 4.3775197376064117e-07, + "loss": 0.0006, + "step": 104050 + }, + { + "epoch": 1.7586169017178879, + "grad_norm": 0.07967918366193771, + "learning_rate": 4.371486882118053e-07, + "loss": 0.0005, + "step": 104060 + }, + { + "epoch": 1.7587859019967382, + "grad_norm": 0.014442991465330124, + "learning_rate": 4.365457996531408e-07, + "loss": 0.0006, + "step": 104070 + }, + { + "epoch": 1.7589549022755886, + "grad_norm": 0.022880321368575096, + "learning_rate": 4.359433081371023e-07, + "loss": 0.0007, + "step": 104080 + }, + { + "epoch": 1.7591239025544392, + "grad_norm": 0.050609149038791656, + "learning_rate": 4.3534121371611007e-07, + "loss": 0.0007, + "step": 104090 + }, + { + "epoch": 1.7592929028332898, + "grad_norm": 0.03655462712049484, + "learning_rate": 4.3473951644254775e-07, + "loss": 0.0005, + "step": 104100 + }, + { + "epoch": 1.7594619031121401, + "grad_norm": 0.004376859869807959, + "learning_rate": 4.341382163687663e-07, + "loss": 0.0005, + "step": 104110 + }, + { + "epoch": 1.7596309033909905, + "grad_norm": 0.01908200979232788, + "learning_rate": 4.335373135470833e-07, + "loss": 0.0005, + "step": 104120 + }, + { + "epoch": 1.759799903669841, + "grad_norm": 0.05158529058098793, + "learning_rate": 4.329368080297786e-07, + "loss": 0.0007, + "step": 104130 + }, + { + "epoch": 1.7599689039486917, + "grad_norm": 0.17117096483707428, + "learning_rate": 4.3233669986909975e-07, + "loss": 0.0012, + "step": 104140 + }, + { + "epoch": 1.760137904227542, + "grad_norm": 0.009141642600297928, + "learning_rate": 4.317369891172579e-07, + "loss": 0.0005, + "step": 104150 + }, + { + "epoch": 1.7603069045063924, + "grad_norm": 0.0012519872980192304, + "learning_rate": 4.311376758264324e-07, + "loss": 0.0006, + "step": 104160 + }, + { + "epoch": 1.7604759047852427, + "grad_norm": 0.019536923617124557, + "learning_rate": 4.3053876004876595e-07, + "loss": 0.0006, + "step": 104170 + }, + { + "epoch": 1.7606449050640933, + "grad_norm": 0.0017009720904752612, + "learning_rate": 4.299402418363663e-07, + "loss": 0.0006, + "step": 104180 + }, + { + "epoch": 1.760813905342944, + "grad_norm": 0.07516901195049286, + "learning_rate": 4.29342121241309e-07, + "loss": 0.0005, + "step": 104190 + }, + { + "epoch": 1.7609829056217943, + "grad_norm": 0.08406435698270798, + "learning_rate": 4.28744398315632e-07, + "loss": 0.0006, + "step": 104200 + }, + { + "epoch": 1.7611519059006446, + "grad_norm": 0.002859867876395583, + "learning_rate": 4.2814707311134074e-07, + "loss": 0.0007, + "step": 104210 + }, + { + "epoch": 1.7613209061794952, + "grad_norm": 0.0836159959435463, + "learning_rate": 4.275501456804049e-07, + "loss": 0.0003, + "step": 104220 + }, + { + "epoch": 1.7614899064583458, + "grad_norm": 0.07688683271408081, + "learning_rate": 4.269536160747612e-07, + "loss": 0.0004, + "step": 104230 + }, + { + "epoch": 1.7616589067371962, + "grad_norm": 0.01429643202573061, + "learning_rate": 4.263574843463086e-07, + "loss": 0.0007, + "step": 104240 + }, + { + "epoch": 1.7618279070160465, + "grad_norm": 0.08189475536346436, + "learning_rate": 4.257617505469158e-07, + "loss": 0.0006, + "step": 104250 + }, + { + "epoch": 1.7619969072948969, + "grad_norm": 0.03205665573477745, + "learning_rate": 4.251664147284129e-07, + "loss": 0.0005, + "step": 104260 + }, + { + "epoch": 1.7621659075737475, + "grad_norm": 0.06763836741447449, + "learning_rate": 4.245714769425979e-07, + "loss": 0.0005, + "step": 104270 + }, + { + "epoch": 1.762334907852598, + "grad_norm": 0.05251099169254303, + "learning_rate": 4.2397693724123167e-07, + "loss": 0.0004, + "step": 104280 + }, + { + "epoch": 1.7625039081314484, + "grad_norm": 0.07325220108032227, + "learning_rate": 4.233827956760439e-07, + "loss": 0.0007, + "step": 104290 + }, + { + "epoch": 1.7626729084102988, + "grad_norm": 0.05792950093746185, + "learning_rate": 4.227890522987271e-07, + "loss": 0.0011, + "step": 104300 + }, + { + "epoch": 1.7628419086891494, + "grad_norm": 0.04769614338874817, + "learning_rate": 4.2219570716094007e-07, + "loss": 0.0016, + "step": 104310 + }, + { + "epoch": 1.763010908968, + "grad_norm": 0.0192970372736454, + "learning_rate": 4.2160276031430637e-07, + "loss": 0.0003, + "step": 104320 + }, + { + "epoch": 1.7631799092468503, + "grad_norm": 0.06704641878604889, + "learning_rate": 4.210102118104159e-07, + "loss": 0.0007, + "step": 104330 + }, + { + "epoch": 1.7633489095257007, + "grad_norm": 0.018758367747068405, + "learning_rate": 4.204180617008219e-07, + "loss": 0.0007, + "step": 104340 + }, + { + "epoch": 1.763517909804551, + "grad_norm": 0.03824960067868233, + "learning_rate": 4.1982631003704586e-07, + "loss": 0.0006, + "step": 104350 + }, + { + "epoch": 1.7636869100834016, + "grad_norm": 0.020262107253074646, + "learning_rate": 4.192349568705717e-07, + "loss": 0.0006, + "step": 104360 + }, + { + "epoch": 1.7638559103622522, + "grad_norm": 0.07570063322782516, + "learning_rate": 4.1864400225285094e-07, + "loss": 0.0007, + "step": 104370 + }, + { + "epoch": 1.7640249106411026, + "grad_norm": 0.04012012109160423, + "learning_rate": 4.1805344623529863e-07, + "loss": 0.0006, + "step": 104380 + }, + { + "epoch": 1.764193910919953, + "grad_norm": 0.06360950320959091, + "learning_rate": 4.174632888692975e-07, + "loss": 0.0007, + "step": 104390 + }, + { + "epoch": 1.7643629111988035, + "grad_norm": 0.043980613350868225, + "learning_rate": 4.1687353020619217e-07, + "loss": 0.0007, + "step": 104400 + }, + { + "epoch": 1.764531911477654, + "grad_norm": 0.0020944224670529366, + "learning_rate": 4.1628417029729586e-07, + "loss": 0.0007, + "step": 104410 + }, + { + "epoch": 1.7647009117565045, + "grad_norm": 0.03159353509545326, + "learning_rate": 4.1569520919388593e-07, + "loss": 0.0007, + "step": 104420 + }, + { + "epoch": 1.7648699120353548, + "grad_norm": 0.06862014532089233, + "learning_rate": 4.1510664694720314e-07, + "loss": 0.0009, + "step": 104430 + }, + { + "epoch": 1.7650389123142052, + "grad_norm": 0.020865771919488907, + "learning_rate": 4.145184836084576e-07, + "loss": 0.0006, + "step": 104440 + }, + { + "epoch": 1.7652079125930558, + "grad_norm": 0.03221869096159935, + "learning_rate": 4.1393071922882e-07, + "loss": 0.0004, + "step": 104450 + }, + { + "epoch": 1.7653769128719063, + "grad_norm": 0.04893992096185684, + "learning_rate": 4.133433538594306e-07, + "loss": 0.0005, + "step": 104460 + }, + { + "epoch": 1.7655459131507567, + "grad_norm": 0.08351850509643555, + "learning_rate": 4.127563875513918e-07, + "loss": 0.0004, + "step": 104470 + }, + { + "epoch": 1.765714913429607, + "grad_norm": 0.03808083385229111, + "learning_rate": 4.121698203557728e-07, + "loss": 0.0006, + "step": 104480 + }, + { + "epoch": 1.7658839137084577, + "grad_norm": 0.01868014596402645, + "learning_rate": 4.115836523236083e-07, + "loss": 0.0006, + "step": 104490 + }, + { + "epoch": 1.7660529139873082, + "grad_norm": 0.0008707979577593505, + "learning_rate": 4.1099788350589806e-07, + "loss": 0.0004, + "step": 104500 + }, + { + "epoch": 1.7662219142661586, + "grad_norm": 0.07478159666061401, + "learning_rate": 4.1041251395360525e-07, + "loss": 0.0005, + "step": 104510 + }, + { + "epoch": 1.766390914545009, + "grad_norm": 0.009744617156684399, + "learning_rate": 4.098275437176613e-07, + "loss": 0.0006, + "step": 104520 + }, + { + "epoch": 1.7665599148238593, + "grad_norm": 0.04124493896961212, + "learning_rate": 4.0924297284896054e-07, + "loss": 0.0011, + "step": 104530 + }, + { + "epoch": 1.76672891510271, + "grad_norm": 0.007389638107270002, + "learning_rate": 4.08658801398365e-07, + "loss": 0.0003, + "step": 104540 + }, + { + "epoch": 1.7668979153815605, + "grad_norm": 0.00717326020821929, + "learning_rate": 4.0807502941669797e-07, + "loss": 0.0004, + "step": 104550 + }, + { + "epoch": 1.7670669156604109, + "grad_norm": 0.09308916330337524, + "learning_rate": 4.074916569547527e-07, + "loss": 0.0003, + "step": 104560 + }, + { + "epoch": 1.7672359159392612, + "grad_norm": 0.06012408062815666, + "learning_rate": 4.0690868406328355e-07, + "loss": 0.0011, + "step": 104570 + }, + { + "epoch": 1.7674049162181118, + "grad_norm": 0.00715381745249033, + "learning_rate": 4.063261107930139e-07, + "loss": 0.0004, + "step": 104580 + }, + { + "epoch": 1.7675739164969624, + "grad_norm": 0.018232326954603195, + "learning_rate": 4.0574393719462866e-07, + "loss": 0.0006, + "step": 104590 + }, + { + "epoch": 1.7677429167758127, + "grad_norm": 0.007518917787820101, + "learning_rate": 4.0516216331878135e-07, + "loss": 0.0005, + "step": 104600 + }, + { + "epoch": 1.767911917054663, + "grad_norm": 0.02275843173265457, + "learning_rate": 4.04580789216088e-07, + "loss": 0.0005, + "step": 104610 + }, + { + "epoch": 1.7680809173335135, + "grad_norm": 0.02745683863759041, + "learning_rate": 4.039998149371316e-07, + "loss": 0.0007, + "step": 104620 + }, + { + "epoch": 1.768249917612364, + "grad_norm": 0.025723101571202278, + "learning_rate": 4.034192405324583e-07, + "loss": 0.0003, + "step": 104630 + }, + { + "epoch": 1.7684189178912146, + "grad_norm": 0.020526999607682228, + "learning_rate": 4.0283906605258325e-07, + "loss": 0.0007, + "step": 104640 + }, + { + "epoch": 1.768587918170065, + "grad_norm": 0.002178657567128539, + "learning_rate": 4.022592915479828e-07, + "loss": 0.0015, + "step": 104650 + }, + { + "epoch": 1.7687569184489154, + "grad_norm": 0.06424304097890854, + "learning_rate": 4.0167991706909994e-07, + "loss": 0.0009, + "step": 104660 + }, + { + "epoch": 1.768925918727766, + "grad_norm": 0.04882480204105377, + "learning_rate": 4.011009426663437e-07, + "loss": 0.0011, + "step": 104670 + }, + { + "epoch": 1.7690949190066163, + "grad_norm": 0.05100816860795021, + "learning_rate": 4.0052236839008885e-07, + "loss": 0.0012, + "step": 104680 + }, + { + "epoch": 1.769263919285467, + "grad_norm": 0.028061311691999435, + "learning_rate": 3.999441942906718e-07, + "loss": 0.0003, + "step": 104690 + }, + { + "epoch": 1.7694329195643173, + "grad_norm": 0.05454196035861969, + "learning_rate": 3.9936642041839726e-07, + "loss": 0.0016, + "step": 104700 + }, + { + "epoch": 1.7696019198431676, + "grad_norm": 0.02162751741707325, + "learning_rate": 3.9878904682353617e-07, + "loss": 0.0007, + "step": 104710 + }, + { + "epoch": 1.7697709201220182, + "grad_norm": 0.04085154831409454, + "learning_rate": 3.982120735563205e-07, + "loss": 0.0006, + "step": 104720 + }, + { + "epoch": 1.7699399204008688, + "grad_norm": 0.05033764988183975, + "learning_rate": 3.9763550066695135e-07, + "loss": 0.0006, + "step": 104730 + }, + { + "epoch": 1.7701089206797191, + "grad_norm": 0.0951123908162117, + "learning_rate": 3.9705932820559176e-07, + "loss": 0.004, + "step": 104740 + }, + { + "epoch": 1.7702779209585695, + "grad_norm": 0.06430260092020035, + "learning_rate": 3.964835562223729e-07, + "loss": 0.0003, + "step": 104750 + }, + { + "epoch": 1.77044692123742, + "grad_norm": 0.060577888041734695, + "learning_rate": 3.95908184767389e-07, + "loss": 0.0006, + "step": 104760 + }, + { + "epoch": 1.7706159215162705, + "grad_norm": 0.012775205075740814, + "learning_rate": 3.9533321389070066e-07, + "loss": 0.0003, + "step": 104770 + }, + { + "epoch": 1.770784921795121, + "grad_norm": 0.029996542260050774, + "learning_rate": 3.947586436423323e-07, + "loss": 0.0004, + "step": 104780 + }, + { + "epoch": 1.7709539220739714, + "grad_norm": 0.02309521473944187, + "learning_rate": 3.9418447407227555e-07, + "loss": 0.0006, + "step": 104790 + }, + { + "epoch": 1.7711229223528218, + "grad_norm": 0.015215246006846428, + "learning_rate": 3.936107052304844e-07, + "loss": 0.0007, + "step": 104800 + }, + { + "epoch": 1.7712919226316723, + "grad_norm": 0.007620797958225012, + "learning_rate": 3.9303733716688106e-07, + "loss": 0.0013, + "step": 104810 + }, + { + "epoch": 1.771460922910523, + "grad_norm": 0.0433606281876564, + "learning_rate": 3.924643699313502e-07, + "loss": 0.0007, + "step": 104820 + }, + { + "epoch": 1.7716299231893733, + "grad_norm": 0.04617132619023323, + "learning_rate": 3.9189180357374346e-07, + "loss": 0.0005, + "step": 104830 + }, + { + "epoch": 1.7717989234682237, + "grad_norm": 0.014687767252326012, + "learning_rate": 3.9131963814387606e-07, + "loss": 0.0007, + "step": 104840 + }, + { + "epoch": 1.7719679237470742, + "grad_norm": 0.04283357411623001, + "learning_rate": 3.9074787369152986e-07, + "loss": 0.0005, + "step": 104850 + }, + { + "epoch": 1.7721369240259246, + "grad_norm": 0.03274627402424812, + "learning_rate": 3.9017651026645175e-07, + "loss": 0.0007, + "step": 104860 + }, + { + "epoch": 1.7723059243047752, + "grad_norm": 0.0861838236451149, + "learning_rate": 3.896055479183514e-07, + "loss": 0.0022, + "step": 104870 + }, + { + "epoch": 1.7724749245836255, + "grad_norm": 0.014093455858528614, + "learning_rate": 3.890349866969062e-07, + "loss": 0.0007, + "step": 104880 + }, + { + "epoch": 1.772643924862476, + "grad_norm": 0.02075990103185177, + "learning_rate": 3.884648266517588e-07, + "loss": 0.0006, + "step": 104890 + }, + { + "epoch": 1.7728129251413265, + "grad_norm": 0.013891511596739292, + "learning_rate": 3.878950678325144e-07, + "loss": 0.0004, + "step": 104900 + }, + { + "epoch": 1.772981925420177, + "grad_norm": 0.10854381322860718, + "learning_rate": 3.8732571028874566e-07, + "loss": 0.0008, + "step": 104910 + }, + { + "epoch": 1.7731509256990274, + "grad_norm": 0.06576929986476898, + "learning_rate": 3.8675675406998793e-07, + "loss": 0.0009, + "step": 104920 + }, + { + "epoch": 1.7733199259778778, + "grad_norm": 0.03649206459522247, + "learning_rate": 3.861881992257449e-07, + "loss": 0.0004, + "step": 104930 + }, + { + "epoch": 1.7734889262567282, + "grad_norm": 0.029594246298074722, + "learning_rate": 3.8562004580548374e-07, + "loss": 0.0018, + "step": 104940 + }, + { + "epoch": 1.7736579265355787, + "grad_norm": 0.018652699887752533, + "learning_rate": 3.850522938586343e-07, + "loss": 0.0003, + "step": 104950 + }, + { + "epoch": 1.7738269268144293, + "grad_norm": 0.03187130391597748, + "learning_rate": 3.844849434345965e-07, + "loss": 0.0007, + "step": 104960 + }, + { + "epoch": 1.7739959270932797, + "grad_norm": 0.04595885053277016, + "learning_rate": 3.839179945827304e-07, + "loss": 0.0006, + "step": 104970 + }, + { + "epoch": 1.77416492737213, + "grad_norm": 0.0030543392058461905, + "learning_rate": 3.833514473523653e-07, + "loss": 0.0008, + "step": 104980 + }, + { + "epoch": 1.7743339276509806, + "grad_norm": 0.18576397001743317, + "learning_rate": 3.827853017927913e-07, + "loss": 0.0011, + "step": 104990 + }, + { + "epoch": 1.7745029279298312, + "grad_norm": 0.03337743878364563, + "learning_rate": 3.822195579532678e-07, + "loss": 0.0012, + "step": 105000 + }, + { + "epoch": 1.7746719282086816, + "grad_norm": 0.09377391636371613, + "learning_rate": 3.816542158830161e-07, + "loss": 0.0006, + "step": 105010 + }, + { + "epoch": 1.774840928487532, + "grad_norm": 0.0215314794331789, + "learning_rate": 3.81089275631224e-07, + "loss": 0.0004, + "step": 105020 + }, + { + "epoch": 1.7750099287663823, + "grad_norm": 0.13656103610992432, + "learning_rate": 3.805247372470439e-07, + "loss": 0.0006, + "step": 105030 + }, + { + "epoch": 1.775178929045233, + "grad_norm": 0.012975458987057209, + "learning_rate": 3.7996060077959306e-07, + "loss": 0.0013, + "step": 105040 + }, + { + "epoch": 1.7753479293240835, + "grad_norm": 0.028509151190519333, + "learning_rate": 3.793968662779557e-07, + "loss": 0.0004, + "step": 105050 + }, + { + "epoch": 1.7755169296029338, + "grad_norm": 0.16324865818023682, + "learning_rate": 3.788335337911775e-07, + "loss": 0.0009, + "step": 105060 + }, + { + "epoch": 1.7756859298817842, + "grad_norm": 0.002793596126139164, + "learning_rate": 3.7827060336827206e-07, + "loss": 0.0007, + "step": 105070 + }, + { + "epoch": 1.7758549301606348, + "grad_norm": 0.02703750506043434, + "learning_rate": 3.777080750582174e-07, + "loss": 0.0007, + "step": 105080 + }, + { + "epoch": 1.7760239304394854, + "grad_norm": 0.009869939647614956, + "learning_rate": 3.771459489099549e-07, + "loss": 0.0005, + "step": 105090 + }, + { + "epoch": 1.7761929307183357, + "grad_norm": 0.02329307049512863, + "learning_rate": 3.7658422497239443e-07, + "loss": 0.001, + "step": 105100 + }, + { + "epoch": 1.776361930997186, + "grad_norm": 0.023844977840781212, + "learning_rate": 3.7602290329440573e-07, + "loss": 0.0002, + "step": 105110 + }, + { + "epoch": 1.7765309312760365, + "grad_norm": 0.011266546323895454, + "learning_rate": 3.754619839248297e-07, + "loss": 0.0009, + "step": 105120 + }, + { + "epoch": 1.776699931554887, + "grad_norm": 0.025293419137597084, + "learning_rate": 3.749014669124662e-07, + "loss": 0.0008, + "step": 105130 + }, + { + "epoch": 1.7768689318337376, + "grad_norm": 0.029278069734573364, + "learning_rate": 3.7434135230608513e-07, + "loss": 0.0004, + "step": 105140 + }, + { + "epoch": 1.777037932112588, + "grad_norm": 0.16719502210617065, + "learning_rate": 3.737816401544175e-07, + "loss": 0.0003, + "step": 105150 + }, + { + "epoch": 1.7772069323914383, + "grad_norm": 9.560196485836059e-05, + "learning_rate": 3.732223305061622e-07, + "loss": 0.0002, + "step": 105160 + }, + { + "epoch": 1.777375932670289, + "grad_norm": 0.0026868858840316534, + "learning_rate": 3.726634234099813e-07, + "loss": 0.0004, + "step": 105170 + }, + { + "epoch": 1.7775449329491395, + "grad_norm": 0.013223793357610703, + "learning_rate": 3.721049189145021e-07, + "loss": 0.001, + "step": 105180 + }, + { + "epoch": 1.7777139332279899, + "grad_norm": 0.0006191139691509306, + "learning_rate": 3.715468170683184e-07, + "loss": 0.0006, + "step": 105190 + }, + { + "epoch": 1.7778829335068402, + "grad_norm": 0.2048901915550232, + "learning_rate": 3.709891179199859e-07, + "loss": 0.0006, + "step": 105200 + }, + { + "epoch": 1.7780519337856906, + "grad_norm": 0.03425382822751999, + "learning_rate": 3.7043182151802915e-07, + "loss": 0.0005, + "step": 105210 + }, + { + "epoch": 1.7782209340645412, + "grad_norm": 0.05309786647558212, + "learning_rate": 3.6987492791093427e-07, + "loss": 0.0007, + "step": 105220 + }, + { + "epoch": 1.7783899343433918, + "grad_norm": 0.0020808845292776823, + "learning_rate": 3.6931843714715474e-07, + "loss": 0.0011, + "step": 105230 + }, + { + "epoch": 1.7785589346222421, + "grad_norm": 0.02424847148358822, + "learning_rate": 3.687623492751063e-07, + "loss": 0.0008, + "step": 105240 + }, + { + "epoch": 1.7787279349010925, + "grad_norm": 0.0008779786294326186, + "learning_rate": 3.6820666434317255e-07, + "loss": 0.0005, + "step": 105250 + }, + { + "epoch": 1.778896935179943, + "grad_norm": 0.008909628726541996, + "learning_rate": 3.676513823997008e-07, + "loss": 0.0008, + "step": 105260 + }, + { + "epoch": 1.7790659354587937, + "grad_norm": 0.011672201566398144, + "learning_rate": 3.6709650349300353e-07, + "loss": 0.0001, + "step": 105270 + }, + { + "epoch": 1.779234935737644, + "grad_norm": 0.014921033754944801, + "learning_rate": 3.665420276713566e-07, + "loss": 0.0003, + "step": 105280 + }, + { + "epoch": 1.7794039360164944, + "grad_norm": 0.005454959347844124, + "learning_rate": 3.6598795498300365e-07, + "loss": 0.0013, + "step": 105290 + }, + { + "epoch": 1.7795729362953447, + "grad_norm": 0.03779270127415657, + "learning_rate": 3.6543428547615056e-07, + "loss": 0.0031, + "step": 105300 + }, + { + "epoch": 1.7797419365741953, + "grad_norm": 0.011112458072602749, + "learning_rate": 3.648810191989699e-07, + "loss": 0.0004, + "step": 105310 + }, + { + "epoch": 1.779910936853046, + "grad_norm": 0.0141755947843194, + "learning_rate": 3.643281561995976e-07, + "loss": 0.0017, + "step": 105320 + }, + { + "epoch": 1.7800799371318963, + "grad_norm": 0.009296818636357784, + "learning_rate": 3.6377569652613684e-07, + "loss": 0.0006, + "step": 105330 + }, + { + "epoch": 1.7802489374107466, + "grad_norm": 0.11248309910297394, + "learning_rate": 3.632236402266526e-07, + "loss": 0.0015, + "step": 105340 + }, + { + "epoch": 1.7804179376895972, + "grad_norm": 0.06976965814828873, + "learning_rate": 3.6267198734917855e-07, + "loss": 0.0007, + "step": 105350 + }, + { + "epoch": 1.7805869379684478, + "grad_norm": 0.020244039595127106, + "learning_rate": 3.621207379417091e-07, + "loss": 0.0005, + "step": 105360 + }, + { + "epoch": 1.7807559382472982, + "grad_norm": 0.019706254824995995, + "learning_rate": 3.6156989205220704e-07, + "loss": 0.0004, + "step": 105370 + }, + { + "epoch": 1.7809249385261485, + "grad_norm": 0.013626663014292717, + "learning_rate": 3.6101944972859736e-07, + "loss": 0.0003, + "step": 105380 + }, + { + "epoch": 1.7810939388049989, + "grad_norm": 0.02177486941218376, + "learning_rate": 3.604694110187723e-07, + "loss": 0.0004, + "step": 105390 + }, + { + "epoch": 1.7812629390838495, + "grad_norm": 0.004014752805233002, + "learning_rate": 3.5991977597058746e-07, + "loss": 0.0004, + "step": 105400 + }, + { + "epoch": 1.7814319393627, + "grad_norm": 0.0028710479382425547, + "learning_rate": 3.5937054463186406e-07, + "loss": 0.001, + "step": 105410 + }, + { + "epoch": 1.7816009396415504, + "grad_norm": 0.0014161871513351798, + "learning_rate": 3.5882171705038715e-07, + "loss": 0.0005, + "step": 105420 + }, + { + "epoch": 1.7817699399204008, + "grad_norm": 0.0008943057619035244, + "learning_rate": 3.5827329327390803e-07, + "loss": 0.0003, + "step": 105430 + }, + { + "epoch": 1.7819389401992514, + "grad_norm": 0.022660084068775177, + "learning_rate": 3.5772527335014184e-07, + "loss": 0.0009, + "step": 105440 + }, + { + "epoch": 1.782107940478102, + "grad_norm": 0.020562347024679184, + "learning_rate": 3.571776573267699e-07, + "loss": 0.0006, + "step": 105450 + }, + { + "epoch": 1.7822769407569523, + "grad_norm": 0.007296978496015072, + "learning_rate": 3.566304452514363e-07, + "loss": 0.0008, + "step": 105460 + }, + { + "epoch": 1.7824459410358027, + "grad_norm": 0.019784986972808838, + "learning_rate": 3.5608363717175134e-07, + "loss": 0.0006, + "step": 105470 + }, + { + "epoch": 1.782614941314653, + "grad_norm": 0.10019789636135101, + "learning_rate": 3.5553723313529074e-07, + "loss": 0.0007, + "step": 105480 + }, + { + "epoch": 1.7827839415935036, + "grad_norm": 0.021866897121071815, + "learning_rate": 3.549912331895938e-07, + "loss": 0.0007, + "step": 105490 + }, + { + "epoch": 1.7829529418723542, + "grad_norm": 0.00043900907621718943, + "learning_rate": 3.5444563738216533e-07, + "loss": 0.0007, + "step": 105500 + }, + { + "epoch": 1.7831219421512046, + "grad_norm": 0.0011269195238128304, + "learning_rate": 3.539004457604739e-07, + "loss": 0.0011, + "step": 105510 + }, + { + "epoch": 1.783290942430055, + "grad_norm": 0.25038066506385803, + "learning_rate": 3.533556583719555e-07, + "loss": 0.0013, + "step": 105520 + }, + { + "epoch": 1.7834599427089055, + "grad_norm": 0.011651636101305485, + "learning_rate": 3.5281127526400785e-07, + "loss": 0.0009, + "step": 105530 + }, + { + "epoch": 1.783628942987756, + "grad_norm": 0.03919496014714241, + "learning_rate": 3.522672964839957e-07, + "loss": 0.0004, + "step": 105540 + }, + { + "epoch": 1.7837979432666065, + "grad_norm": 0.012245673686265945, + "learning_rate": 3.517237220792469e-07, + "loss": 0.0005, + "step": 105550 + }, + { + "epoch": 1.7839669435454568, + "grad_norm": 0.018710102885961533, + "learning_rate": 3.5118055209705625e-07, + "loss": 0.0002, + "step": 105560 + }, + { + "epoch": 1.7841359438243072, + "grad_norm": 0.1540943682193756, + "learning_rate": 3.5063778658468105e-07, + "loss": 0.001, + "step": 105570 + }, + { + "epoch": 1.7843049441031578, + "grad_norm": 0.018390536308288574, + "learning_rate": 3.500954255893463e-07, + "loss": 0.0007, + "step": 105580 + }, + { + "epoch": 1.7844739443820083, + "grad_norm": 0.14161762595176697, + "learning_rate": 3.495534691582375e-07, + "loss": 0.0007, + "step": 105590 + }, + { + "epoch": 1.7846429446608587, + "grad_norm": 0.010210787877440453, + "learning_rate": 3.4901191733850983e-07, + "loss": 0.0005, + "step": 105600 + }, + { + "epoch": 1.784811944939709, + "grad_norm": 0.008424220606684685, + "learning_rate": 3.484707701772788e-07, + "loss": 0.0005, + "step": 105610 + }, + { + "epoch": 1.7849809452185597, + "grad_norm": 0.08034904301166534, + "learning_rate": 3.479300277216285e-07, + "loss": 0.0011, + "step": 105620 + }, + { + "epoch": 1.78514994549741, + "grad_norm": 0.02755795232951641, + "learning_rate": 3.473896900186052e-07, + "loss": 0.0004, + "step": 105630 + }, + { + "epoch": 1.7853189457762606, + "grad_norm": 0.017700070515275, + "learning_rate": 3.468497571152218e-07, + "loss": 0.0007, + "step": 105640 + }, + { + "epoch": 1.785487946055111, + "grad_norm": 0.00878935493528843, + "learning_rate": 3.4631022905845413e-07, + "loss": 0.0013, + "step": 105650 + }, + { + "epoch": 1.7856569463339613, + "grad_norm": 0.05933452025055885, + "learning_rate": 3.457711058952451e-07, + "loss": 0.0007, + "step": 105660 + }, + { + "epoch": 1.785825946612812, + "grad_norm": 0.00013369330554269254, + "learning_rate": 3.4523238767249943e-07, + "loss": 0.0008, + "step": 105670 + }, + { + "epoch": 1.7859949468916625, + "grad_norm": 0.0019009606912732124, + "learning_rate": 3.446940744370886e-07, + "loss": 0.0007, + "step": 105680 + }, + { + "epoch": 1.7861639471705129, + "grad_norm": 0.007505466230213642, + "learning_rate": 3.441561662358495e-07, + "loss": 0.0004, + "step": 105690 + }, + { + "epoch": 1.7863329474493632, + "grad_norm": 0.02765263058245182, + "learning_rate": 3.436186631155813e-07, + "loss": 0.0004, + "step": 105700 + }, + { + "epoch": 1.7865019477282138, + "grad_norm": 0.049307625740766525, + "learning_rate": 3.430815651230507e-07, + "loss": 0.0003, + "step": 105710 + }, + { + "epoch": 1.7866709480070642, + "grad_norm": 0.021655231714248657, + "learning_rate": 3.425448723049868e-07, + "loss": 0.0004, + "step": 105720 + }, + { + "epoch": 1.7868399482859147, + "grad_norm": 0.0027275453321635723, + "learning_rate": 3.4200858470808564e-07, + "loss": 0.0004, + "step": 105730 + }, + { + "epoch": 1.787008948564765, + "grad_norm": 0.0030428189784288406, + "learning_rate": 3.4147270237900543e-07, + "loss": 0.0007, + "step": 105740 + }, + { + "epoch": 1.7871779488436155, + "grad_norm": 0.013712714426219463, + "learning_rate": 3.409372253643717e-07, + "loss": 0.0005, + "step": 105750 + }, + { + "epoch": 1.787346949122466, + "grad_norm": 0.012625922448933125, + "learning_rate": 3.404021537107721e-07, + "loss": 0.0007, + "step": 105760 + }, + { + "epoch": 1.7875159494013166, + "grad_norm": 0.08924346417188644, + "learning_rate": 3.398674874647623e-07, + "loss": 0.0006, + "step": 105770 + }, + { + "epoch": 1.787684949680167, + "grad_norm": 0.030749188736081123, + "learning_rate": 3.3933322667285937e-07, + "loss": 0.0003, + "step": 105780 + }, + { + "epoch": 1.7878539499590174, + "grad_norm": 0.04751649126410484, + "learning_rate": 3.3879937138154793e-07, + "loss": 0.0015, + "step": 105790 + }, + { + "epoch": 1.788022950237868, + "grad_norm": 0.020827028900384903, + "learning_rate": 3.3826592163727413e-07, + "loss": 0.0007, + "step": 105800 + }, + { + "epoch": 1.7881919505167183, + "grad_norm": 0.039099644869565964, + "learning_rate": 3.3773287748645245e-07, + "loss": 0.0025, + "step": 105810 + }, + { + "epoch": 1.788360950795569, + "grad_norm": 0.050598226487636566, + "learning_rate": 3.372002389754597e-07, + "loss": 0.0006, + "step": 105820 + }, + { + "epoch": 1.7885299510744193, + "grad_norm": 0.02448190376162529, + "learning_rate": 3.366680061506372e-07, + "loss": 0.0012, + "step": 105830 + }, + { + "epoch": 1.7886989513532696, + "grad_norm": 0.05488467589020729, + "learning_rate": 3.3613617905829286e-07, + "loss": 0.0003, + "step": 105840 + }, + { + "epoch": 1.7888679516321202, + "grad_norm": 0.015062323771417141, + "learning_rate": 3.3560475774469857e-07, + "loss": 0.0002, + "step": 105850 + }, + { + "epoch": 1.7890369519109708, + "grad_norm": 0.03238752484321594, + "learning_rate": 3.35073742256089e-07, + "loss": 0.0005, + "step": 105860 + }, + { + "epoch": 1.7892059521898211, + "grad_norm": 0.021235447376966476, + "learning_rate": 3.3454313263866725e-07, + "loss": 0.0004, + "step": 105870 + }, + { + "epoch": 1.7893749524686715, + "grad_norm": 0.016415433958172798, + "learning_rate": 3.3401292893859625e-07, + "loss": 0.0006, + "step": 105880 + }, + { + "epoch": 1.7895439527475219, + "grad_norm": 0.0194083284586668, + "learning_rate": 3.334831312020087e-07, + "loss": 0.0004, + "step": 105890 + }, + { + "epoch": 1.7897129530263725, + "grad_norm": 0.04980658367276192, + "learning_rate": 3.3295373947499764e-07, + "loss": 0.0007, + "step": 105900 + }, + { + "epoch": 1.789881953305223, + "grad_norm": 0.04084394499659538, + "learning_rate": 3.3242475380362404e-07, + "loss": 0.0004, + "step": 105910 + }, + { + "epoch": 1.7900509535840734, + "grad_norm": 0.041083622723817825, + "learning_rate": 3.3189617423391163e-07, + "loss": 0.0011, + "step": 105920 + }, + { + "epoch": 1.7902199538629238, + "grad_norm": 0.1475612074136734, + "learning_rate": 3.313680008118497e-07, + "loss": 0.0007, + "step": 105930 + }, + { + "epoch": 1.7903889541417743, + "grad_norm": 0.12210751324892044, + "learning_rate": 3.308402335833916e-07, + "loss": 0.0006, + "step": 105940 + }, + { + "epoch": 1.790557954420625, + "grad_norm": 0.02229374274611473, + "learning_rate": 3.3031287259445544e-07, + "loss": 0.0008, + "step": 105950 + }, + { + "epoch": 1.7907269546994753, + "grad_norm": 0.014257742092013359, + "learning_rate": 3.297859178909252e-07, + "loss": 0.0004, + "step": 105960 + }, + { + "epoch": 1.7908959549783257, + "grad_norm": 0.012109160423278809, + "learning_rate": 3.292593695186469e-07, + "loss": 0.0003, + "step": 105970 + }, + { + "epoch": 1.791064955257176, + "grad_norm": 0.0008053976343944669, + "learning_rate": 3.287332275234351e-07, + "loss": 0.0004, + "step": 105980 + }, + { + "epoch": 1.7912339555360266, + "grad_norm": 0.06195811927318573, + "learning_rate": 3.2820749195106416e-07, + "loss": 0.0011, + "step": 105990 + }, + { + "epoch": 1.7914029558148772, + "grad_norm": 0.024041438475251198, + "learning_rate": 3.276821628472776e-07, + "loss": 0.0004, + "step": 106000 + }, + { + "epoch": 1.7915719560937275, + "grad_norm": 0.0122147835791111, + "learning_rate": 3.2715724025777994e-07, + "loss": 0.0002, + "step": 106010 + }, + { + "epoch": 1.791740956372578, + "grad_norm": 0.06569260358810425, + "learning_rate": 3.2663272422824297e-07, + "loss": 0.0009, + "step": 106020 + }, + { + "epoch": 1.7919099566514285, + "grad_norm": 0.11315417289733887, + "learning_rate": 3.261086148043019e-07, + "loss": 0.0006, + "step": 106030 + }, + { + "epoch": 1.792078956930279, + "grad_norm": 0.0007930688443593681, + "learning_rate": 3.255849120315574e-07, + "loss": 0.0009, + "step": 106040 + }, + { + "epoch": 1.7922479572091294, + "grad_norm": 0.05178851634263992, + "learning_rate": 3.250616159555731e-07, + "loss": 0.0005, + "step": 106050 + }, + { + "epoch": 1.7924169574879798, + "grad_norm": 0.03907632455229759, + "learning_rate": 3.245387266218797e-07, + "loss": 0.0005, + "step": 106060 + }, + { + "epoch": 1.7925859577668302, + "grad_norm": 0.007023259066045284, + "learning_rate": 3.240162440759692e-07, + "loss": 0.0003, + "step": 106070 + }, + { + "epoch": 1.7927549580456807, + "grad_norm": 0.09764275699853897, + "learning_rate": 3.2349416836330193e-07, + "loss": 0.0005, + "step": 106080 + }, + { + "epoch": 1.7929239583245313, + "grad_norm": 0.040002115070819855, + "learning_rate": 3.2297249952929935e-07, + "loss": 0.0005, + "step": 106090 + }, + { + "epoch": 1.7930929586033817, + "grad_norm": 0.05797765776515007, + "learning_rate": 3.2245123761935115e-07, + "loss": 0.0005, + "step": 106100 + }, + { + "epoch": 1.793261958882232, + "grad_norm": 0.02930748462677002, + "learning_rate": 3.2193038267880726e-07, + "loss": 0.0008, + "step": 106110 + }, + { + "epoch": 1.7934309591610826, + "grad_norm": 0.02905907668173313, + "learning_rate": 3.21409934752987e-07, + "loss": 0.0006, + "step": 106120 + }, + { + "epoch": 1.7935999594399332, + "grad_norm": 0.021418744698166847, + "learning_rate": 3.2088989388716965e-07, + "loss": 0.0003, + "step": 106130 + }, + { + "epoch": 1.7937689597187836, + "grad_norm": 0.0026306742802262306, + "learning_rate": 3.203702601266034e-07, + "loss": 0.0006, + "step": 106140 + }, + { + "epoch": 1.793937959997634, + "grad_norm": 0.2730619013309479, + "learning_rate": 3.198510335164967e-07, + "loss": 0.0005, + "step": 106150 + }, + { + "epoch": 1.7941069602764843, + "grad_norm": 0.07091565430164337, + "learning_rate": 3.193322141020272e-07, + "loss": 0.0005, + "step": 106160 + }, + { + "epoch": 1.794275960555335, + "grad_norm": 0.00048270318075083196, + "learning_rate": 3.1881380192833266e-07, + "loss": 0.0005, + "step": 106170 + }, + { + "epoch": 1.7944449608341855, + "grad_norm": 0.010864262469112873, + "learning_rate": 3.182957970405187e-07, + "loss": 0.0005, + "step": 106180 + }, + { + "epoch": 1.7946139611130358, + "grad_norm": 0.008652624674141407, + "learning_rate": 3.1777819948365265e-07, + "loss": 0.0004, + "step": 106190 + }, + { + "epoch": 1.7947829613918862, + "grad_norm": 0.07487659901380539, + "learning_rate": 3.1726100930277005e-07, + "loss": 0.0005, + "step": 106200 + }, + { + "epoch": 1.7949519616707368, + "grad_norm": 0.01952086202800274, + "learning_rate": 3.1674422654286717e-07, + "loss": 0.0003, + "step": 106210 + }, + { + "epoch": 1.7951209619495874, + "grad_norm": 0.08285342156887054, + "learning_rate": 3.162278512489081e-07, + "loss": 0.0009, + "step": 106220 + }, + { + "epoch": 1.7952899622284377, + "grad_norm": 0.08504673093557358, + "learning_rate": 3.157118834658196e-07, + "loss": 0.0003, + "step": 106230 + }, + { + "epoch": 1.795458962507288, + "grad_norm": 0.01815224252641201, + "learning_rate": 3.1519632323849247e-07, + "loss": 0.0002, + "step": 106240 + }, + { + "epoch": 1.7956279627861385, + "grad_norm": 0.01604323461651802, + "learning_rate": 3.146811706117847e-07, + "loss": 0.0007, + "step": 106250 + }, + { + "epoch": 1.795796963064989, + "grad_norm": 0.008513304404914379, + "learning_rate": 3.1416642563051493e-07, + "loss": 0.0005, + "step": 106260 + }, + { + "epoch": 1.7959659633438396, + "grad_norm": 0.07370144128799438, + "learning_rate": 3.136520883394706e-07, + "loss": 0.001, + "step": 106270 + }, + { + "epoch": 1.79613496362269, + "grad_norm": 0.0054110633209347725, + "learning_rate": 3.131381587833998e-07, + "loss": 0.0005, + "step": 106280 + }, + { + "epoch": 1.7963039639015403, + "grad_norm": 0.024059509858489037, + "learning_rate": 3.12624637007018e-07, + "loss": 0.0002, + "step": 106290 + }, + { + "epoch": 1.796472964180391, + "grad_norm": 0.066287562251091, + "learning_rate": 3.121115230550026e-07, + "loss": 0.002, + "step": 106300 + }, + { + "epoch": 1.7966419644592415, + "grad_norm": 0.02924017608165741, + "learning_rate": 3.115988169719991e-07, + "loss": 0.0006, + "step": 106310 + }, + { + "epoch": 1.7968109647380919, + "grad_norm": 0.001341874129138887, + "learning_rate": 3.110865188026135e-07, + "loss": 0.0003, + "step": 106320 + }, + { + "epoch": 1.7969799650169422, + "grad_norm": 0.1358218640089035, + "learning_rate": 3.105746285914202e-07, + "loss": 0.0009, + "step": 106330 + }, + { + "epoch": 1.7971489652957926, + "grad_norm": 0.0023471498861908913, + "learning_rate": 3.1006314638295396e-07, + "loss": 0.0003, + "step": 106340 + }, + { + "epoch": 1.7973179655746432, + "grad_norm": 0.012569617480039597, + "learning_rate": 3.0955207222171766e-07, + "loss": 0.0036, + "step": 106350 + }, + { + "epoch": 1.7974869658534938, + "grad_norm": 0.01816113479435444, + "learning_rate": 3.090414061521768e-07, + "loss": 0.001, + "step": 106360 + }, + { + "epoch": 1.7976559661323441, + "grad_norm": 0.027073049917817116, + "learning_rate": 3.0853114821876193e-07, + "loss": 0.0015, + "step": 106370 + }, + { + "epoch": 1.7978249664111945, + "grad_norm": 0.019245292991399765, + "learning_rate": 3.080212984658676e-07, + "loss": 0.0004, + "step": 106380 + }, + { + "epoch": 1.797993966690045, + "grad_norm": 0.02849625051021576, + "learning_rate": 3.0751185693785324e-07, + "loss": 0.0009, + "step": 106390 + }, + { + "epoch": 1.7981629669688957, + "grad_norm": 0.013218315318226814, + "learning_rate": 3.0700282367904287e-07, + "loss": 0.0012, + "step": 106400 + }, + { + "epoch": 1.798331967247746, + "grad_norm": 0.01673104241490364, + "learning_rate": 3.0649419873372553e-07, + "loss": 0.0003, + "step": 106410 + }, + { + "epoch": 1.7985009675265964, + "grad_norm": 0.012420453131198883, + "learning_rate": 3.0598598214615303e-07, + "loss": 0.0005, + "step": 106420 + }, + { + "epoch": 1.7986699678054467, + "grad_norm": 0.004321121610701084, + "learning_rate": 3.0547817396054393e-07, + "loss": 0.0003, + "step": 106430 + }, + { + "epoch": 1.7988389680842973, + "grad_norm": 0.02657952904701233, + "learning_rate": 3.0497077422107836e-07, + "loss": 0.0009, + "step": 106440 + }, + { + "epoch": 1.799007968363148, + "grad_norm": 0.038907743990421295, + "learning_rate": 3.0446378297190325e-07, + "loss": 0.0002, + "step": 106450 + }, + { + "epoch": 1.7991769686419983, + "grad_norm": 0.0133865587413311, + "learning_rate": 3.0395720025713007e-07, + "loss": 0.0005, + "step": 106460 + }, + { + "epoch": 1.7993459689208486, + "grad_norm": 0.00025637217913754284, + "learning_rate": 3.034510261208329e-07, + "loss": 0.0004, + "step": 106470 + }, + { + "epoch": 1.7995149691996992, + "grad_norm": 0.04737827554345131, + "learning_rate": 3.029452606070521e-07, + "loss": 0.0009, + "step": 106480 + }, + { + "epoch": 1.7996839694785498, + "grad_norm": 0.002496320754289627, + "learning_rate": 3.024399037597908e-07, + "loss": 0.0003, + "step": 106490 + }, + { + "epoch": 1.7998529697574002, + "grad_norm": 0.052021387964487076, + "learning_rate": 3.0193495562301886e-07, + "loss": 0.0008, + "step": 106500 + }, + { + "epoch": 1.8000219700362505, + "grad_norm": 0.03761506825685501, + "learning_rate": 3.0143041624066784e-07, + "loss": 0.0005, + "step": 106510 + }, + { + "epoch": 1.800190970315101, + "grad_norm": 0.03732273355126381, + "learning_rate": 3.0092628565663583e-07, + "loss": 0.0003, + "step": 106520 + }, + { + "epoch": 1.8003599705939515, + "grad_norm": 0.019498972222208977, + "learning_rate": 3.0042256391478454e-07, + "loss": 0.0006, + "step": 106530 + }, + { + "epoch": 1.800528970872802, + "grad_norm": 0.013651588931679726, + "learning_rate": 2.999192510589405e-07, + "loss": 0.0003, + "step": 106540 + }, + { + "epoch": 1.8006979711516524, + "grad_norm": 0.06348342448472977, + "learning_rate": 2.994163471328931e-07, + "loss": 0.0004, + "step": 106550 + }, + { + "epoch": 1.8008669714305028, + "grad_norm": 0.0472821369767189, + "learning_rate": 2.989138521803997e-07, + "loss": 0.0008, + "step": 106560 + }, + { + "epoch": 1.8010359717093534, + "grad_norm": 0.006114867981523275, + "learning_rate": 2.9841176624517743e-07, + "loss": 0.0005, + "step": 106570 + }, + { + "epoch": 1.8012049719882037, + "grad_norm": 0.06382883340120316, + "learning_rate": 2.979100893709108e-07, + "loss": 0.0004, + "step": 106580 + }, + { + "epoch": 1.8013739722670543, + "grad_norm": 0.0012848296901211143, + "learning_rate": 2.9740882160124995e-07, + "loss": 0.0005, + "step": 106590 + }, + { + "epoch": 1.8015429725459047, + "grad_norm": 0.0248939897865057, + "learning_rate": 2.9690796297980493e-07, + "loss": 0.0006, + "step": 106600 + }, + { + "epoch": 1.801711972824755, + "grad_norm": 0.031102487817406654, + "learning_rate": 2.964075135501543e-07, + "loss": 0.0006, + "step": 106610 + }, + { + "epoch": 1.8018809731036056, + "grad_norm": 0.026568105444312096, + "learning_rate": 2.9590747335584034e-07, + "loss": 0.0002, + "step": 106620 + }, + { + "epoch": 1.8020499733824562, + "grad_norm": 0.026500539854168892, + "learning_rate": 2.954078424403672e-07, + "loss": 0.0009, + "step": 106630 + }, + { + "epoch": 1.8022189736613066, + "grad_norm": 0.0744604840874672, + "learning_rate": 2.9490862084720686e-07, + "loss": 0.0009, + "step": 106640 + }, + { + "epoch": 1.802387973940157, + "grad_norm": 0.005288366694003344, + "learning_rate": 2.9440980861979216e-07, + "loss": 0.0006, + "step": 106650 + }, + { + "epoch": 1.8025569742190075, + "grad_norm": 0.072980135679245, + "learning_rate": 2.939114058015241e-07, + "loss": 0.0004, + "step": 106660 + }, + { + "epoch": 1.8027259744978579, + "grad_norm": 0.03152094781398773, + "learning_rate": 2.934134124357646e-07, + "loss": 0.0007, + "step": 106670 + }, + { + "epoch": 1.8028949747767085, + "grad_norm": 0.04416177421808243, + "learning_rate": 2.929158285658429e-07, + "loss": 0.0006, + "step": 106680 + }, + { + "epoch": 1.8030639750555588, + "grad_norm": 0.00499266292899847, + "learning_rate": 2.924186542350499e-07, + "loss": 0.0005, + "step": 106690 + }, + { + "epoch": 1.8032329753344092, + "grad_norm": 0.07045271247625351, + "learning_rate": 2.9192188948664267e-07, + "loss": 0.0011, + "step": 106700 + }, + { + "epoch": 1.8034019756132598, + "grad_norm": 0.01703091524541378, + "learning_rate": 2.914255343638428e-07, + "loss": 0.0006, + "step": 106710 + }, + { + "epoch": 1.8035709758921104, + "grad_norm": 0.05974980443716049, + "learning_rate": 2.9092958890983405e-07, + "loss": 0.0007, + "step": 106720 + }, + { + "epoch": 1.8037399761709607, + "grad_norm": 0.011746728792786598, + "learning_rate": 2.90434053167768e-07, + "loss": 0.0007, + "step": 106730 + }, + { + "epoch": 1.803908976449811, + "grad_norm": 0.022092394530773163, + "learning_rate": 2.8993892718075735e-07, + "loss": 0.0003, + "step": 106740 + }, + { + "epoch": 1.8040779767286614, + "grad_norm": 0.04053737595677376, + "learning_rate": 2.8944421099188104e-07, + "loss": 0.0006, + "step": 106750 + }, + { + "epoch": 1.804246977007512, + "grad_norm": 0.0704391822218895, + "learning_rate": 2.889499046441807e-07, + "loss": 0.002, + "step": 106760 + }, + { + "epoch": 1.8044159772863626, + "grad_norm": 0.06557278335094452, + "learning_rate": 2.884560081806653e-07, + "loss": 0.0006, + "step": 106770 + }, + { + "epoch": 1.804584977565213, + "grad_norm": 0.05407819151878357, + "learning_rate": 2.879625216443044e-07, + "loss": 0.0004, + "step": 106780 + }, + { + "epoch": 1.8047539778440633, + "grad_norm": 0.05492717772722244, + "learning_rate": 2.874694450780341e-07, + "loss": 0.0009, + "step": 106790 + }, + { + "epoch": 1.804922978122914, + "grad_norm": 0.028640110045671463, + "learning_rate": 2.8697677852475513e-07, + "loss": 0.0005, + "step": 106800 + }, + { + "epoch": 1.8050919784017645, + "grad_norm": 0.0022897396702319384, + "learning_rate": 2.864845220273327e-07, + "loss": 0.0008, + "step": 106810 + }, + { + "epoch": 1.8052609786806149, + "grad_norm": 0.0251806378364563, + "learning_rate": 2.85992675628593e-07, + "loss": 0.001, + "step": 106820 + }, + { + "epoch": 1.8054299789594652, + "grad_norm": 7.517525227740407e-05, + "learning_rate": 2.8550123937133136e-07, + "loss": 0.0002, + "step": 106830 + }, + { + "epoch": 1.8055989792383156, + "grad_norm": 0.08418340981006622, + "learning_rate": 2.8501021329830347e-07, + "loss": 0.0006, + "step": 106840 + }, + { + "epoch": 1.8057679795171662, + "grad_norm": 0.003870246931910515, + "learning_rate": 2.8451959745223187e-07, + "loss": 0.0009, + "step": 106850 + }, + { + "epoch": 1.8059369797960168, + "grad_norm": 0.008406764827668667, + "learning_rate": 2.8402939187580247e-07, + "loss": 0.0002, + "step": 106860 + }, + { + "epoch": 1.8061059800748671, + "grad_norm": 0.04729627072811127, + "learning_rate": 2.8353959661166555e-07, + "loss": 0.0004, + "step": 106870 + }, + { + "epoch": 1.8062749803537175, + "grad_norm": 6.623937952099368e-05, + "learning_rate": 2.8305021170243474e-07, + "loss": 0.0007, + "step": 106880 + }, + { + "epoch": 1.806443980632568, + "grad_norm": 0.04600764438509941, + "learning_rate": 2.825612371906905e-07, + "loss": 0.0004, + "step": 106890 + }, + { + "epoch": 1.8066129809114186, + "grad_norm": 0.09864575415849686, + "learning_rate": 2.8207267311897435e-07, + "loss": 0.001, + "step": 106900 + }, + { + "epoch": 1.806781981190269, + "grad_norm": 0.04171300306916237, + "learning_rate": 2.815845195297956e-07, + "loss": 0.0008, + "step": 106910 + }, + { + "epoch": 1.8069509814691194, + "grad_norm": 0.010658209212124348, + "learning_rate": 2.810967764656242e-07, + "loss": 0.0006, + "step": 106920 + }, + { + "epoch": 1.8071199817479697, + "grad_norm": 0.037350140511989594, + "learning_rate": 2.8060944396889723e-07, + "loss": 0.0013, + "step": 106930 + }, + { + "epoch": 1.8072889820268203, + "grad_norm": 0.03345376253128052, + "learning_rate": 2.8012252208201363e-07, + "loss": 0.0007, + "step": 106940 + }, + { + "epoch": 1.807457982305671, + "grad_norm": 0.045432355254888535, + "learning_rate": 2.796360108473395e-07, + "loss": 0.001, + "step": 106950 + }, + { + "epoch": 1.8076269825845213, + "grad_norm": 0.17052718997001648, + "learning_rate": 2.791499103072037e-07, + "loss": 0.0007, + "step": 106960 + }, + { + "epoch": 1.8077959828633716, + "grad_norm": 0.02307099848985672, + "learning_rate": 2.786642205038981e-07, + "loss": 0.0014, + "step": 106970 + }, + { + "epoch": 1.8079649831422222, + "grad_norm": 0.038824472576379776, + "learning_rate": 2.781789414796804e-07, + "loss": 0.0008, + "step": 106980 + }, + { + "epoch": 1.8081339834210728, + "grad_norm": 0.2283657193183899, + "learning_rate": 2.7769407327677246e-07, + "loss": 0.0006, + "step": 106990 + }, + { + "epoch": 1.8083029836999231, + "grad_norm": 0.006276116240769625, + "learning_rate": 2.772096159373616e-07, + "loss": 0.0004, + "step": 107000 + }, + { + "epoch": 1.8084719839787735, + "grad_norm": 0.04557156190276146, + "learning_rate": 2.767255695035953e-07, + "loss": 0.0005, + "step": 107010 + }, + { + "epoch": 1.8086409842576239, + "grad_norm": 0.006434998009353876, + "learning_rate": 2.7624193401759035e-07, + "loss": 0.0004, + "step": 107020 + }, + { + "epoch": 1.8088099845364745, + "grad_norm": 0.03410731256008148, + "learning_rate": 2.7575870952142316e-07, + "loss": 0.0003, + "step": 107030 + }, + { + "epoch": 1.808978984815325, + "grad_norm": 0.047531042248010635, + "learning_rate": 2.7527589605713836e-07, + "loss": 0.0005, + "step": 107040 + }, + { + "epoch": 1.8091479850941754, + "grad_norm": 0.026933960616588593, + "learning_rate": 2.7479349366674236e-07, + "loss": 0.0008, + "step": 107050 + }, + { + "epoch": 1.8093169853730258, + "grad_norm": 0.05856289342045784, + "learning_rate": 2.743115023922066e-07, + "loss": 0.0006, + "step": 107060 + }, + { + "epoch": 1.8094859856518763, + "grad_norm": 0.06930646300315857, + "learning_rate": 2.7382992227546644e-07, + "loss": 0.0009, + "step": 107070 + }, + { + "epoch": 1.809654985930727, + "grad_norm": 0.037896398454904556, + "learning_rate": 2.733487533584223e-07, + "loss": 0.0007, + "step": 107080 + }, + { + "epoch": 1.8098239862095773, + "grad_norm": 0.04459800943732262, + "learning_rate": 2.728679956829372e-07, + "loss": 0.0004, + "step": 107090 + }, + { + "epoch": 1.8099929864884277, + "grad_norm": 0.032940663397312164, + "learning_rate": 2.723876492908406e-07, + "loss": 0.0016, + "step": 107100 + }, + { + "epoch": 1.810161986767278, + "grad_norm": 0.0005108294426463544, + "learning_rate": 2.719077142239235e-07, + "loss": 0.0004, + "step": 107110 + }, + { + "epoch": 1.8103309870461286, + "grad_norm": 0.07016304135322571, + "learning_rate": 2.714281905239441e-07, + "loss": 0.0005, + "step": 107120 + }, + { + "epoch": 1.8104999873249792, + "grad_norm": 0.022397033870220184, + "learning_rate": 2.7094907823262184e-07, + "loss": 0.0009, + "step": 107130 + }, + { + "epoch": 1.8106689876038295, + "grad_norm": 0.01936495676636696, + "learning_rate": 2.7047037739164337e-07, + "loss": 0.0004, + "step": 107140 + }, + { + "epoch": 1.81083798788268, + "grad_norm": 0.03775428980588913, + "learning_rate": 2.6999208804265653e-07, + "loss": 0.0007, + "step": 107150 + }, + { + "epoch": 1.8110069881615305, + "grad_norm": 0.0064462157897651196, + "learning_rate": 2.695142102272752e-07, + "loss": 0.0009, + "step": 107160 + }, + { + "epoch": 1.811175988440381, + "grad_norm": 0.0017412303714081645, + "learning_rate": 2.690367439870778e-07, + "loss": 0.0004, + "step": 107170 + }, + { + "epoch": 1.8113449887192314, + "grad_norm": 0.036754146218299866, + "learning_rate": 2.685596893636061e-07, + "loss": 0.0003, + "step": 107180 + }, + { + "epoch": 1.8115139889980818, + "grad_norm": 0.0605829693377018, + "learning_rate": 2.6808304639836466e-07, + "loss": 0.0007, + "step": 107190 + }, + { + "epoch": 1.8116829892769322, + "grad_norm": 0.0008872547186911106, + "learning_rate": 2.676068151328265e-07, + "loss": 0.0004, + "step": 107200 + }, + { + "epoch": 1.8118519895557827, + "grad_norm": 0.015495926141738892, + "learning_rate": 2.671309956084228e-07, + "loss": 0.0004, + "step": 107210 + }, + { + "epoch": 1.8120209898346333, + "grad_norm": 0.0967402532696724, + "learning_rate": 2.666555878665544e-07, + "loss": 0.0004, + "step": 107220 + }, + { + "epoch": 1.8121899901134837, + "grad_norm": 0.0003819911216851324, + "learning_rate": 2.661805919485838e-07, + "loss": 0.0003, + "step": 107230 + }, + { + "epoch": 1.812358990392334, + "grad_norm": 0.011226209811866283, + "learning_rate": 2.657060078958368e-07, + "loss": 0.0006, + "step": 107240 + }, + { + "epoch": 1.8125279906711846, + "grad_norm": 0.11130131781101227, + "learning_rate": 2.652318357496064e-07, + "loss": 0.0007, + "step": 107250 + }, + { + "epoch": 1.8126969909500352, + "grad_norm": 0.11554577201604843, + "learning_rate": 2.6475807555114586e-07, + "loss": 0.0007, + "step": 107260 + }, + { + "epoch": 1.8128659912288856, + "grad_norm": 0.028465701267123222, + "learning_rate": 2.6428472734167597e-07, + "loss": 0.0003, + "step": 107270 + }, + { + "epoch": 1.813034991507736, + "grad_norm": 0.014900845475494862, + "learning_rate": 2.638117911623794e-07, + "loss": 0.0003, + "step": 107280 + }, + { + "epoch": 1.8132039917865863, + "grad_norm": 0.12082474678754807, + "learning_rate": 2.6333926705440494e-07, + "loss": 0.0008, + "step": 107290 + }, + { + "epoch": 1.813372992065437, + "grad_norm": 0.10809744894504547, + "learning_rate": 2.628671550588635e-07, + "loss": 0.0007, + "step": 107300 + }, + { + "epoch": 1.8135419923442875, + "grad_norm": 0.016735605895519257, + "learning_rate": 2.6239545521683175e-07, + "loss": 0.0011, + "step": 107310 + }, + { + "epoch": 1.8137109926231378, + "grad_norm": 0.004057494457811117, + "learning_rate": 2.619241675693496e-07, + "loss": 0.0004, + "step": 107320 + }, + { + "epoch": 1.8138799929019882, + "grad_norm": 0.2128661423921585, + "learning_rate": 2.614532921574214e-07, + "loss": 0.0014, + "step": 107330 + }, + { + "epoch": 1.8140489931808388, + "grad_norm": 0.004425691440701485, + "learning_rate": 2.609828290220151e-07, + "loss": 0.0011, + "step": 107340 + }, + { + "epoch": 1.8142179934596894, + "grad_norm": 0.0008100151899270713, + "learning_rate": 2.6051277820406396e-07, + "loss": 0.0008, + "step": 107350 + }, + { + "epoch": 1.8143869937385397, + "grad_norm": 0.012658101506531239, + "learning_rate": 2.600431397444647e-07, + "loss": 0.0007, + "step": 107360 + }, + { + "epoch": 1.81455599401739, + "grad_norm": 0.023131320253014565, + "learning_rate": 2.5957391368407746e-07, + "loss": 0.0003, + "step": 107370 + }, + { + "epoch": 1.8147249942962405, + "grad_norm": 0.10059446096420288, + "learning_rate": 2.591051000637279e-07, + "loss": 0.0007, + "step": 107380 + }, + { + "epoch": 1.814893994575091, + "grad_norm": 0.02371380105614662, + "learning_rate": 2.586366989242051e-07, + "loss": 0.0008, + "step": 107390 + }, + { + "epoch": 1.8150629948539416, + "grad_norm": 0.005978689529001713, + "learning_rate": 2.5816871030626135e-07, + "loss": 0.0016, + "step": 107400 + }, + { + "epoch": 1.815231995132792, + "grad_norm": 0.030084148049354553, + "learning_rate": 2.5770113425061526e-07, + "loss": 0.0007, + "step": 107410 + }, + { + "epoch": 1.8154009954116423, + "grad_norm": 0.030426248908042908, + "learning_rate": 2.572339707979465e-07, + "loss": 0.0006, + "step": 107420 + }, + { + "epoch": 1.815569995690493, + "grad_norm": 0.016602622345089912, + "learning_rate": 2.567672199889026e-07, + "loss": 0.0005, + "step": 107430 + }, + { + "epoch": 1.8157389959693433, + "grad_norm": 0.009840616956353188, + "learning_rate": 2.563008818640911e-07, + "loss": 0.0003, + "step": 107440 + }, + { + "epoch": 1.8159079962481939, + "grad_norm": 0.04440809041261673, + "learning_rate": 2.558349564640872e-07, + "loss": 0.0006, + "step": 107450 + }, + { + "epoch": 1.8160769965270442, + "grad_norm": 0.026042316108942032, + "learning_rate": 2.55369443829428e-07, + "loss": 0.0006, + "step": 107460 + }, + { + "epoch": 1.8162459968058946, + "grad_norm": 0.017470931634306908, + "learning_rate": 2.549043440006149e-07, + "loss": 0.0004, + "step": 107470 + }, + { + "epoch": 1.8164149970847452, + "grad_norm": 0.014459982514381409, + "learning_rate": 2.5443965701811514e-07, + "loss": 0.0006, + "step": 107480 + }, + { + "epoch": 1.8165839973635958, + "grad_norm": 0.018677975982427597, + "learning_rate": 2.5397538292235734e-07, + "loss": 0.0004, + "step": 107490 + }, + { + "epoch": 1.8167529976424461, + "grad_norm": 0.007446676958352327, + "learning_rate": 2.5351152175373704e-07, + "loss": 0.0016, + "step": 107500 + }, + { + "epoch": 1.8169219979212965, + "grad_norm": 0.025096848607063293, + "learning_rate": 2.5304807355261086e-07, + "loss": 0.0006, + "step": 107510 + }, + { + "epoch": 1.817090998200147, + "grad_norm": 0.009793787263333797, + "learning_rate": 2.525850383593026e-07, + "loss": 0.0008, + "step": 107520 + }, + { + "epoch": 1.8172599984789974, + "grad_norm": 0.01451643742620945, + "learning_rate": 2.521224162140967e-07, + "loss": 0.001, + "step": 107530 + }, + { + "epoch": 1.817428998757848, + "grad_norm": 0.09454888105392456, + "learning_rate": 2.516602071572449e-07, + "loss": 0.0009, + "step": 107540 + }, + { + "epoch": 1.8175979990366984, + "grad_norm": 0.021753238514065742, + "learning_rate": 2.511984112289617e-07, + "loss": 0.0011, + "step": 107550 + }, + { + "epoch": 1.8177669993155487, + "grad_norm": 0.0140855573117733, + "learning_rate": 2.507370284694249e-07, + "loss": 0.0006, + "step": 107560 + }, + { + "epoch": 1.8179359995943993, + "grad_norm": 0.0358390286564827, + "learning_rate": 2.5027605891877747e-07, + "loss": 0.0006, + "step": 107570 + }, + { + "epoch": 1.81810499987325, + "grad_norm": 0.07012967020273209, + "learning_rate": 2.4981550261712617e-07, + "loss": 0.0007, + "step": 107580 + }, + { + "epoch": 1.8182740001521003, + "grad_norm": 0.02137385495007038, + "learning_rate": 2.493553596045406e-07, + "loss": 0.0006, + "step": 107590 + }, + { + "epoch": 1.8184430004309506, + "grad_norm": 0.009358204901218414, + "learning_rate": 2.488956299210571e-07, + "loss": 0.0005, + "step": 107600 + }, + { + "epoch": 1.8186120007098012, + "grad_norm": 0.03795533999800682, + "learning_rate": 2.484363136066725e-07, + "loss": 0.0011, + "step": 107610 + }, + { + "epoch": 1.8187810009886516, + "grad_norm": 0.023416364565491676, + "learning_rate": 2.4797741070135107e-07, + "loss": 0.0004, + "step": 107620 + }, + { + "epoch": 1.8189500012675022, + "grad_norm": 0.011213819496333599, + "learning_rate": 2.475189212450185e-07, + "loss": 0.0006, + "step": 107630 + }, + { + "epoch": 1.8191190015463525, + "grad_norm": 0.08420540392398834, + "learning_rate": 2.470608452775669e-07, + "loss": 0.0024, + "step": 107640 + }, + { + "epoch": 1.819288001825203, + "grad_norm": 0.0453086718916893, + "learning_rate": 2.4660318283884934e-07, + "loss": 0.0008, + "step": 107650 + }, + { + "epoch": 1.8194570021040535, + "grad_norm": 0.006805508863180876, + "learning_rate": 2.4614593396868613e-07, + "loss": 0.0003, + "step": 107660 + }, + { + "epoch": 1.819626002382904, + "grad_norm": 0.10280157625675201, + "learning_rate": 2.456890987068594e-07, + "loss": 0.0008, + "step": 107670 + }, + { + "epoch": 1.8197950026617544, + "grad_norm": 0.004269697703421116, + "learning_rate": 2.452326770931168e-07, + "loss": 0.0004, + "step": 107680 + }, + { + "epoch": 1.8199640029406048, + "grad_norm": 0.01963418908417225, + "learning_rate": 2.447766691671677e-07, + "loss": 0.0008, + "step": 107690 + }, + { + "epoch": 1.8201330032194551, + "grad_norm": 0.04317108541727066, + "learning_rate": 2.4432107496868915e-07, + "loss": 0.0005, + "step": 107700 + }, + { + "epoch": 1.8203020034983057, + "grad_norm": 0.0014225431950762868, + "learning_rate": 2.438658945373179e-07, + "loss": 0.0008, + "step": 107710 + }, + { + "epoch": 1.8204710037771563, + "grad_norm": 0.006587664596736431, + "learning_rate": 2.4341112791265777e-07, + "loss": 0.0008, + "step": 107720 + }, + { + "epoch": 1.8206400040560067, + "grad_norm": 0.03664252161979675, + "learning_rate": 2.4295677513427653e-07, + "loss": 0.0004, + "step": 107730 + }, + { + "epoch": 1.820809004334857, + "grad_norm": 0.003876063507050276, + "learning_rate": 2.425028362417031e-07, + "loss": 0.0005, + "step": 107740 + }, + { + "epoch": 1.8209780046137076, + "grad_norm": 0.07400530576705933, + "learning_rate": 2.420493112744343e-07, + "loss": 0.001, + "step": 107750 + }, + { + "epoch": 1.8211470048925582, + "grad_norm": 0.019484998658299446, + "learning_rate": 2.41596200271928e-07, + "loss": 0.0008, + "step": 107760 + }, + { + "epoch": 1.8213160051714086, + "grad_norm": 0.0583035871386528, + "learning_rate": 2.411435032736076e-07, + "loss": 0.0006, + "step": 107770 + }, + { + "epoch": 1.821485005450259, + "grad_norm": 0.030501442030072212, + "learning_rate": 2.4069122031885883e-07, + "loss": 0.0005, + "step": 107780 + }, + { + "epoch": 1.8216540057291093, + "grad_norm": 0.016799982637166977, + "learning_rate": 2.402393514470347e-07, + "loss": 0.0006, + "step": 107790 + }, + { + "epoch": 1.8218230060079599, + "grad_norm": 0.028180431574583054, + "learning_rate": 2.397878966974471e-07, + "loss": 0.0002, + "step": 107800 + }, + { + "epoch": 1.8219920062868105, + "grad_norm": 0.03444041684269905, + "learning_rate": 2.393368561093773e-07, + "loss": 0.0006, + "step": 107810 + }, + { + "epoch": 1.8221610065656608, + "grad_norm": 0.009577547200024128, + "learning_rate": 2.3888622972206623e-07, + "loss": 0.0004, + "step": 107820 + }, + { + "epoch": 1.8223300068445112, + "grad_norm": 0.007195120677351952, + "learning_rate": 2.3843601757472193e-07, + "loss": 0.0002, + "step": 107830 + }, + { + "epoch": 1.8224990071233618, + "grad_norm": 0.0045670741237699986, + "learning_rate": 2.3798621970651415e-07, + "loss": 0.0005, + "step": 107840 + }, + { + "epoch": 1.8226680074022124, + "grad_norm": 0.07623469084501266, + "learning_rate": 2.3753683615657775e-07, + "loss": 0.0007, + "step": 107850 + }, + { + "epoch": 1.8228370076810627, + "grad_norm": 0.0028391906525939703, + "learning_rate": 2.3708786696401087e-07, + "loss": 0.0006, + "step": 107860 + }, + { + "epoch": 1.823006007959913, + "grad_norm": 0.04393308609724045, + "learning_rate": 2.3663931216787727e-07, + "loss": 0.0009, + "step": 107870 + }, + { + "epoch": 1.8231750082387634, + "grad_norm": 0.003135798731818795, + "learning_rate": 2.3619117180720187e-07, + "loss": 0.0014, + "step": 107880 + }, + { + "epoch": 1.823344008517614, + "grad_norm": 0.015100251883268356, + "learning_rate": 2.357434459209762e-07, + "loss": 0.0007, + "step": 107890 + }, + { + "epoch": 1.8235130087964646, + "grad_norm": 0.07303640991449356, + "learning_rate": 2.3529613454815303e-07, + "loss": 0.0006, + "step": 107900 + }, + { + "epoch": 1.823682009075315, + "grad_norm": 0.0011348397238180041, + "learning_rate": 2.348492377276529e-07, + "loss": 0.0006, + "step": 107910 + }, + { + "epoch": 1.8238510093541653, + "grad_norm": 0.01658669300377369, + "learning_rate": 2.3440275549835633e-07, + "loss": 0.0004, + "step": 107920 + }, + { + "epoch": 1.824020009633016, + "grad_norm": 0.01174141000956297, + "learning_rate": 2.3395668789910897e-07, + "loss": 0.0013, + "step": 107930 + }, + { + "epoch": 1.8241890099118665, + "grad_norm": 0.054488569498062134, + "learning_rate": 2.3351103496872251e-07, + "loss": 0.0005, + "step": 107940 + }, + { + "epoch": 1.8243580101907169, + "grad_norm": 0.007784908637404442, + "learning_rate": 2.3306579674597096e-07, + "loss": 0.0005, + "step": 107950 + }, + { + "epoch": 1.8245270104695672, + "grad_norm": 0.09560495615005493, + "learning_rate": 2.3262097326958998e-07, + "loss": 0.001, + "step": 107960 + }, + { + "epoch": 1.8246960107484176, + "grad_norm": 0.06250361353158951, + "learning_rate": 2.321765645782842e-07, + "loss": 0.0008, + "step": 107970 + }, + { + "epoch": 1.8248650110272682, + "grad_norm": 0.0241071954369545, + "learning_rate": 2.3173257071071708e-07, + "loss": 0.0008, + "step": 107980 + }, + { + "epoch": 1.8250340113061188, + "grad_norm": 0.09130355715751648, + "learning_rate": 2.3128899170551888e-07, + "loss": 0.0006, + "step": 107990 + }, + { + "epoch": 1.8252030115849691, + "grad_norm": 0.01994970068335533, + "learning_rate": 2.3084582760128427e-07, + "loss": 0.0005, + "step": 108000 + }, + { + "epoch": 1.8253720118638195, + "grad_norm": 0.02133479341864586, + "learning_rate": 2.3040307843656906e-07, + "loss": 0.0006, + "step": 108010 + }, + { + "epoch": 1.82554101214267, + "grad_norm": 0.014401676133275032, + "learning_rate": 2.299607442498958e-07, + "loss": 0.0006, + "step": 108020 + }, + { + "epoch": 1.8257100124215206, + "grad_norm": 0.01538936235010624, + "learning_rate": 2.295188250797481e-07, + "loss": 0.0006, + "step": 108030 + }, + { + "epoch": 1.825879012700371, + "grad_norm": 0.01944626122713089, + "learning_rate": 2.2907732096457746e-07, + "loss": 0.0007, + "step": 108040 + }, + { + "epoch": 1.8260480129792214, + "grad_norm": 0.020264191552996635, + "learning_rate": 2.2863623194279428e-07, + "loss": 0.0005, + "step": 108050 + }, + { + "epoch": 1.8262170132580717, + "grad_norm": 0.07285454869270325, + "learning_rate": 2.2819555805277783e-07, + "loss": 0.0008, + "step": 108060 + }, + { + "epoch": 1.8263860135369223, + "grad_norm": 0.04085834324359894, + "learning_rate": 2.277552993328669e-07, + "loss": 0.0005, + "step": 108070 + }, + { + "epoch": 1.826555013815773, + "grad_norm": 0.010012646205723286, + "learning_rate": 2.2731545582136804e-07, + "loss": 0.0004, + "step": 108080 + }, + { + "epoch": 1.8267240140946233, + "grad_norm": 0.013109447434544563, + "learning_rate": 2.2687602755654736e-07, + "loss": 0.0006, + "step": 108090 + }, + { + "epoch": 1.8268930143734736, + "grad_norm": 0.03321567177772522, + "learning_rate": 2.264370145766398e-07, + "loss": 0.0015, + "step": 108100 + }, + { + "epoch": 1.8270620146523242, + "grad_norm": 0.0005492489435710013, + "learning_rate": 2.259984169198398e-07, + "loss": 0.0004, + "step": 108110 + }, + { + "epoch": 1.8272310149311748, + "grad_norm": 0.005728592164814472, + "learning_rate": 2.2556023462430798e-07, + "loss": 0.0005, + "step": 108120 + }, + { + "epoch": 1.8274000152100252, + "grad_norm": 0.08615639805793762, + "learning_rate": 2.2512246772816825e-07, + "loss": 0.0009, + "step": 108130 + }, + { + "epoch": 1.8275690154888755, + "grad_norm": 0.03961627185344696, + "learning_rate": 2.2468511626950905e-07, + "loss": 0.0003, + "step": 108140 + }, + { + "epoch": 1.8277380157677259, + "grad_norm": 0.027792375534772873, + "learning_rate": 2.2424818028638163e-07, + "loss": 0.0005, + "step": 108150 + }, + { + "epoch": 1.8279070160465765, + "grad_norm": 0.02066863141953945, + "learning_rate": 2.238116598168022e-07, + "loss": 0.0004, + "step": 108160 + }, + { + "epoch": 1.828076016325427, + "grad_norm": 0.024340318515896797, + "learning_rate": 2.2337555489874817e-07, + "loss": 0.0006, + "step": 108170 + }, + { + "epoch": 1.8282450166042774, + "grad_norm": 0.018854424357414246, + "learning_rate": 2.2293986557016535e-07, + "loss": 0.0007, + "step": 108180 + }, + { + "epoch": 1.8284140168831278, + "grad_norm": 0.04398643970489502, + "learning_rate": 2.225045918689589e-07, + "loss": 0.0003, + "step": 108190 + }, + { + "epoch": 1.8285830171619784, + "grad_norm": 0.012175377458333969, + "learning_rate": 2.2206973383300136e-07, + "loss": 0.0005, + "step": 108200 + }, + { + "epoch": 1.828752017440829, + "grad_norm": 0.04109586402773857, + "learning_rate": 2.2163529150012574e-07, + "loss": 0.0007, + "step": 108210 + }, + { + "epoch": 1.8289210177196793, + "grad_norm": 0.07205361127853394, + "learning_rate": 2.2120126490813187e-07, + "loss": 0.0008, + "step": 108220 + }, + { + "epoch": 1.8290900179985297, + "grad_norm": 0.06628002971410751, + "learning_rate": 2.2076765409478118e-07, + "loss": 0.0008, + "step": 108230 + }, + { + "epoch": 1.82925901827738, + "grad_norm": 0.00014799812925048172, + "learning_rate": 2.2033445909780072e-07, + "loss": 0.0005, + "step": 108240 + }, + { + "epoch": 1.8294280185562306, + "grad_norm": 0.00020395268802531064, + "learning_rate": 2.1990167995488087e-07, + "loss": 0.0011, + "step": 108250 + }, + { + "epoch": 1.8295970188350812, + "grad_norm": 0.0024706334806978703, + "learning_rate": 2.1946931670367434e-07, + "loss": 0.0004, + "step": 108260 + }, + { + "epoch": 1.8297660191139316, + "grad_norm": 0.00913759134709835, + "learning_rate": 2.1903736938179988e-07, + "loss": 0.0003, + "step": 108270 + }, + { + "epoch": 1.829935019392782, + "grad_norm": 0.034086182713508606, + "learning_rate": 2.18605838026838e-07, + "loss": 0.0006, + "step": 108280 + }, + { + "epoch": 1.8301040196716325, + "grad_norm": 0.1580514907836914, + "learning_rate": 2.1817472267633588e-07, + "loss": 0.0012, + "step": 108290 + }, + { + "epoch": 1.830273019950483, + "grad_norm": 0.0010112915188074112, + "learning_rate": 2.177440233678002e-07, + "loss": 0.0003, + "step": 108300 + }, + { + "epoch": 1.8304420202293334, + "grad_norm": 0.0032198044937103987, + "learning_rate": 2.1731374013870487e-07, + "loss": 0.0005, + "step": 108310 + }, + { + "epoch": 1.8306110205081838, + "grad_norm": 0.005323966033756733, + "learning_rate": 2.1688387302648771e-07, + "loss": 0.0006, + "step": 108320 + }, + { + "epoch": 1.8307800207870342, + "grad_norm": 0.5507721304893494, + "learning_rate": 2.164544220685477e-07, + "loss": 0.0006, + "step": 108330 + }, + { + "epoch": 1.8309490210658848, + "grad_norm": 0.009454223327338696, + "learning_rate": 2.1602538730224942e-07, + "loss": 0.0007, + "step": 108340 + }, + { + "epoch": 1.8311180213447353, + "grad_norm": 0.05927232652902603, + "learning_rate": 2.1559676876492242e-07, + "loss": 0.0006, + "step": 108350 + }, + { + "epoch": 1.8312870216235857, + "grad_norm": 0.04558410122990608, + "learning_rate": 2.1516856649385687e-07, + "loss": 0.0006, + "step": 108360 + }, + { + "epoch": 1.831456021902436, + "grad_norm": 0.012260254472494125, + "learning_rate": 2.1474078052630908e-07, + "loss": 0.0003, + "step": 108370 + }, + { + "epoch": 1.8316250221812866, + "grad_norm": 0.019253376871347427, + "learning_rate": 2.1431341089949875e-07, + "loss": 0.0004, + "step": 108380 + }, + { + "epoch": 1.831794022460137, + "grad_norm": 0.032780036330223083, + "learning_rate": 2.138864576506089e-07, + "loss": 0.0005, + "step": 108390 + }, + { + "epoch": 1.8319630227389876, + "grad_norm": 0.06949431449174881, + "learning_rate": 2.134599208167859e-07, + "loss": 0.0006, + "step": 108400 + }, + { + "epoch": 1.832132023017838, + "grad_norm": 0.10431086272001266, + "learning_rate": 2.1303380043514178e-07, + "loss": 0.0006, + "step": 108410 + }, + { + "epoch": 1.8323010232966883, + "grad_norm": 0.032952189445495605, + "learning_rate": 2.1260809654275018e-07, + "loss": 0.0012, + "step": 108420 + }, + { + "epoch": 1.832470023575539, + "grad_norm": 0.05434500053524971, + "learning_rate": 2.121828091766498e-07, + "loss": 0.0007, + "step": 108430 + }, + { + "epoch": 1.8326390238543895, + "grad_norm": 0.015353082679212093, + "learning_rate": 2.1175793837384217e-07, + "loss": 0.0004, + "step": 108440 + }, + { + "epoch": 1.8328080241332398, + "grad_norm": 0.19077074527740479, + "learning_rate": 2.1133348417129384e-07, + "loss": 0.0011, + "step": 108450 + }, + { + "epoch": 1.8329770244120902, + "grad_norm": 0.0716923251748085, + "learning_rate": 2.109094466059336e-07, + "loss": 0.0007, + "step": 108460 + }, + { + "epoch": 1.8331460246909408, + "grad_norm": 0.008195516653358936, + "learning_rate": 2.104858257146558e-07, + "loss": 0.0003, + "step": 108470 + }, + { + "epoch": 1.8333150249697912, + "grad_norm": 0.016290076076984406, + "learning_rate": 2.10062621534316e-07, + "loss": 0.0005, + "step": 108480 + }, + { + "epoch": 1.8334840252486417, + "grad_norm": 0.047955095767974854, + "learning_rate": 2.0963983410173583e-07, + "loss": 0.0005, + "step": 108490 + }, + { + "epoch": 1.833653025527492, + "grad_norm": 0.07434266060590744, + "learning_rate": 2.0921746345370086e-07, + "loss": 0.0011, + "step": 108500 + }, + { + "epoch": 1.8338220258063425, + "grad_norm": 0.10034968703985214, + "learning_rate": 2.0879550962695783e-07, + "loss": 0.0008, + "step": 108510 + }, + { + "epoch": 1.833991026085193, + "grad_norm": 0.030944790691137314, + "learning_rate": 2.0837397265821958e-07, + "loss": 0.0012, + "step": 108520 + }, + { + "epoch": 1.8341600263640436, + "grad_norm": 0.04390372335910797, + "learning_rate": 2.079528525841612e-07, + "loss": 0.0007, + "step": 108530 + }, + { + "epoch": 1.834329026642894, + "grad_norm": 0.048307739198207855, + "learning_rate": 2.075321494414234e-07, + "loss": 0.0011, + "step": 108540 + }, + { + "epoch": 1.8344980269217444, + "grad_norm": 0.13810618221759796, + "learning_rate": 2.0711186326660803e-07, + "loss": 0.0015, + "step": 108550 + }, + { + "epoch": 1.834667027200595, + "grad_norm": 0.00930106732994318, + "learning_rate": 2.066919940962836e-07, + "loss": 0.0003, + "step": 108560 + }, + { + "epoch": 1.8348360274794453, + "grad_norm": 0.02009192854166031, + "learning_rate": 2.0627254196697865e-07, + "loss": 0.0016, + "step": 108570 + }, + { + "epoch": 1.8350050277582959, + "grad_norm": 0.021137626841664314, + "learning_rate": 2.0585350691518958e-07, + "loss": 0.0005, + "step": 108580 + }, + { + "epoch": 1.8351740280371462, + "grad_norm": 0.05029303953051567, + "learning_rate": 2.054348889773733e-07, + "loss": 0.0004, + "step": 108590 + }, + { + "epoch": 1.8353430283159966, + "grad_norm": 0.03773169964551926, + "learning_rate": 2.050166881899518e-07, + "loss": 0.0004, + "step": 108600 + }, + { + "epoch": 1.8355120285948472, + "grad_norm": 0.0030799570959061384, + "learning_rate": 2.0459890458931043e-07, + "loss": 0.0007, + "step": 108610 + }, + { + "epoch": 1.8356810288736978, + "grad_norm": 0.01885673590004444, + "learning_rate": 2.0418153821179954e-07, + "loss": 0.0006, + "step": 108620 + }, + { + "epoch": 1.8358500291525481, + "grad_norm": 0.026617132127285004, + "learning_rate": 2.037645890937301e-07, + "loss": 0.0007, + "step": 108630 + }, + { + "epoch": 1.8360190294313985, + "grad_norm": 0.058524031192064285, + "learning_rate": 2.033480572713803e-07, + "loss": 0.0009, + "step": 108640 + }, + { + "epoch": 1.8361880297102489, + "grad_norm": 0.010247909463942051, + "learning_rate": 2.0293194278098892e-07, + "loss": 0.0007, + "step": 108650 + }, + { + "epoch": 1.8363570299890994, + "grad_norm": 0.07031330466270447, + "learning_rate": 2.0251624565876203e-07, + "loss": 0.0005, + "step": 108660 + }, + { + "epoch": 1.83652603026795, + "grad_norm": 0.0027228700928390026, + "learning_rate": 2.0210096594086514e-07, + "loss": 0.0007, + "step": 108670 + }, + { + "epoch": 1.8366950305468004, + "grad_norm": 0.00010234397632302716, + "learning_rate": 2.0168610366343099e-07, + "loss": 0.0005, + "step": 108680 + }, + { + "epoch": 1.8368640308256508, + "grad_norm": 0.03394344076514244, + "learning_rate": 2.0127165886255407e-07, + "loss": 0.0012, + "step": 108690 + }, + { + "epoch": 1.8370330311045013, + "grad_norm": 0.023084286600351334, + "learning_rate": 2.0085763157429328e-07, + "loss": 0.0011, + "step": 108700 + }, + { + "epoch": 1.837202031383352, + "grad_norm": 0.04127196967601776, + "learning_rate": 2.0044402183467038e-07, + "loss": 0.0005, + "step": 108710 + }, + { + "epoch": 1.8373710316622023, + "grad_norm": 0.007525291293859482, + "learning_rate": 2.0003082967967325e-07, + "loss": 0.0003, + "step": 108720 + }, + { + "epoch": 1.8375400319410526, + "grad_norm": 0.0504603311419487, + "learning_rate": 1.9961805514524923e-07, + "loss": 0.0003, + "step": 108730 + }, + { + "epoch": 1.837709032219903, + "grad_norm": 0.03207425773143768, + "learning_rate": 1.9920569826731352e-07, + "loss": 0.0005, + "step": 108740 + }, + { + "epoch": 1.8378780324987536, + "grad_norm": 0.013932219706475735, + "learning_rate": 1.987937590817418e-07, + "loss": 0.0003, + "step": 108750 + }, + { + "epoch": 1.8380470327776042, + "grad_norm": 0.0014516907976940274, + "learning_rate": 1.9838223762437548e-07, + "loss": 0.0004, + "step": 108760 + }, + { + "epoch": 1.8382160330564545, + "grad_norm": 0.05958481505513191, + "learning_rate": 1.979711339310192e-07, + "loss": 0.0008, + "step": 108770 + }, + { + "epoch": 1.838385033335305, + "grad_norm": 0.06512217223644257, + "learning_rate": 1.9756044803744046e-07, + "loss": 0.0008, + "step": 108780 + }, + { + "epoch": 1.8385540336141555, + "grad_norm": 0.16275621950626373, + "learning_rate": 1.9715017997937126e-07, + "loss": 0.0007, + "step": 108790 + }, + { + "epoch": 1.838723033893006, + "grad_norm": 0.029923392459750175, + "learning_rate": 1.967403297925069e-07, + "loss": 0.0005, + "step": 108800 + }, + { + "epoch": 1.8388920341718564, + "grad_norm": 0.01783330924808979, + "learning_rate": 1.963308975125061e-07, + "loss": 0.0002, + "step": 108810 + }, + { + "epoch": 1.8390610344507068, + "grad_norm": 0.04435092210769653, + "learning_rate": 1.9592188317499094e-07, + "loss": 0.0008, + "step": 108820 + }, + { + "epoch": 1.8392300347295572, + "grad_norm": 0.004966772627085447, + "learning_rate": 1.9551328681554904e-07, + "loss": 0.0007, + "step": 108830 + }, + { + "epoch": 1.8393990350084077, + "grad_norm": 0.015078980475664139, + "learning_rate": 1.9510510846972863e-07, + "loss": 0.001, + "step": 108840 + }, + { + "epoch": 1.8395680352872583, + "grad_norm": 0.03198658302426338, + "learning_rate": 1.946973481730452e-07, + "loss": 0.0011, + "step": 108850 + }, + { + "epoch": 1.8397370355661087, + "grad_norm": 0.0027642929926514626, + "learning_rate": 1.942900059609737e-07, + "loss": 0.0009, + "step": 108860 + }, + { + "epoch": 1.839906035844959, + "grad_norm": 0.048655200749635696, + "learning_rate": 1.9388308186895632e-07, + "loss": 0.0004, + "step": 108870 + }, + { + "epoch": 1.8400750361238096, + "grad_norm": 0.026829611510038376, + "learning_rate": 1.9347657593239645e-07, + "loss": 0.0005, + "step": 108880 + }, + { + "epoch": 1.8402440364026602, + "grad_norm": 0.02060195989906788, + "learning_rate": 1.9307048818666297e-07, + "loss": 0.0012, + "step": 108890 + }, + { + "epoch": 1.8404130366815106, + "grad_norm": 0.004234641324728727, + "learning_rate": 1.9266481866708653e-07, + "loss": 0.0005, + "step": 108900 + }, + { + "epoch": 1.840582036960361, + "grad_norm": 0.009983054362237453, + "learning_rate": 1.922595674089639e-07, + "loss": 0.0007, + "step": 108910 + }, + { + "epoch": 1.8407510372392113, + "grad_norm": 0.03913680464029312, + "learning_rate": 1.9185473444755242e-07, + "loss": 0.001, + "step": 108920 + }, + { + "epoch": 1.8409200375180619, + "grad_norm": 0.022576579824090004, + "learning_rate": 1.914503198180756e-07, + "loss": 0.0006, + "step": 108930 + }, + { + "epoch": 1.8410890377969125, + "grad_norm": 0.06822885572910309, + "learning_rate": 1.910463235557186e-07, + "loss": 0.0003, + "step": 108940 + }, + { + "epoch": 1.8412580380757628, + "grad_norm": 0.034167319536209106, + "learning_rate": 1.9064274569563168e-07, + "loss": 0.0004, + "step": 108950 + }, + { + "epoch": 1.8414270383546132, + "grad_norm": 0.10746181011199951, + "learning_rate": 1.9023958627292672e-07, + "loss": 0.0007, + "step": 108960 + }, + { + "epoch": 1.8415960386334638, + "grad_norm": 0.004286276176571846, + "learning_rate": 1.8983684532268287e-07, + "loss": 0.0006, + "step": 108970 + }, + { + "epoch": 1.8417650389123144, + "grad_norm": 0.03944242000579834, + "learning_rate": 1.8943452287993823e-07, + "loss": 0.0007, + "step": 108980 + }, + { + "epoch": 1.8419340391911647, + "grad_norm": 0.0013683936558663845, + "learning_rate": 1.890326189796987e-07, + "loss": 0.0002, + "step": 108990 + }, + { + "epoch": 1.842103039470015, + "grad_norm": 0.02904042787849903, + "learning_rate": 1.886311336569302e-07, + "loss": 0.0007, + "step": 109000 + }, + { + "epoch": 1.8422720397488654, + "grad_norm": 0.018441656604409218, + "learning_rate": 1.8823006694656476e-07, + "loss": 0.0004, + "step": 109010 + }, + { + "epoch": 1.842441040027716, + "grad_norm": 0.005052447319030762, + "learning_rate": 1.878294188834978e-07, + "loss": 0.0009, + "step": 109020 + }, + { + "epoch": 1.8426100403065666, + "grad_norm": 0.05015619099140167, + "learning_rate": 1.8742918950258594e-07, + "loss": 0.0007, + "step": 109030 + }, + { + "epoch": 1.842779040585417, + "grad_norm": 0.010885016061365604, + "learning_rate": 1.870293788386529e-07, + "loss": 0.0008, + "step": 109040 + }, + { + "epoch": 1.8429480408642673, + "grad_norm": 0.10773001611232758, + "learning_rate": 1.866299869264826e-07, + "loss": 0.0007, + "step": 109050 + }, + { + "epoch": 1.843117041143118, + "grad_norm": 0.014499770477414131, + "learning_rate": 1.8623101380082554e-07, + "loss": 0.0009, + "step": 109060 + }, + { + "epoch": 1.8432860414219685, + "grad_norm": 0.0013905505184084177, + "learning_rate": 1.8583245949639285e-07, + "loss": 0.0003, + "step": 109070 + }, + { + "epoch": 1.8434550417008189, + "grad_norm": 0.031105399131774902, + "learning_rate": 1.854343240478612e-07, + "loss": 0.0005, + "step": 109080 + }, + { + "epoch": 1.8436240419796692, + "grad_norm": 0.10961327701807022, + "learning_rate": 1.8503660748987073e-07, + "loss": 0.0006, + "step": 109090 + }, + { + "epoch": 1.8437930422585196, + "grad_norm": 0.0004014700825791806, + "learning_rate": 1.8463930985702428e-07, + "loss": 0.0003, + "step": 109100 + }, + { + "epoch": 1.8439620425373702, + "grad_norm": 0.0050053782761096954, + "learning_rate": 1.8424243118388918e-07, + "loss": 0.0003, + "step": 109110 + }, + { + "epoch": 1.8441310428162208, + "grad_norm": 0.03133934736251831, + "learning_rate": 1.838459715049956e-07, + "loss": 0.0019, + "step": 109120 + }, + { + "epoch": 1.8443000430950711, + "grad_norm": 0.026860803365707397, + "learning_rate": 1.8344993085483653e-07, + "loss": 0.0004, + "step": 109130 + }, + { + "epoch": 1.8444690433739215, + "grad_norm": 0.14718422293663025, + "learning_rate": 1.8305430926787105e-07, + "loss": 0.0026, + "step": 109140 + }, + { + "epoch": 1.844638043652772, + "grad_norm": 0.003192188451066613, + "learning_rate": 1.8265910677851884e-07, + "loss": 0.0002, + "step": 109150 + }, + { + "epoch": 1.8448070439316226, + "grad_norm": 0.06060683727264404, + "learning_rate": 1.8226432342116517e-07, + "loss": 0.0004, + "step": 109160 + }, + { + "epoch": 1.844976044210473, + "grad_norm": 0.08313444256782532, + "learning_rate": 1.818699592301576e-07, + "loss": 0.0019, + "step": 109170 + }, + { + "epoch": 1.8451450444893234, + "grad_norm": 0.03141210228204727, + "learning_rate": 1.814760142398081e-07, + "loss": 0.0005, + "step": 109180 + }, + { + "epoch": 1.8453140447681737, + "grad_norm": 0.0628141239285469, + "learning_rate": 1.8108248848439146e-07, + "loss": 0.0009, + "step": 109190 + }, + { + "epoch": 1.8454830450470243, + "grad_norm": 0.07199462503194809, + "learning_rate": 1.80689381998147e-07, + "loss": 0.0004, + "step": 109200 + }, + { + "epoch": 1.845652045325875, + "grad_norm": 0.012743060477077961, + "learning_rate": 1.8029669481527566e-07, + "loss": 0.0008, + "step": 109210 + }, + { + "epoch": 1.8458210456047253, + "grad_norm": 0.013341827318072319, + "learning_rate": 1.7990442696994458e-07, + "loss": 0.0003, + "step": 109220 + }, + { + "epoch": 1.8459900458835756, + "grad_norm": 0.006336951162666082, + "learning_rate": 1.79512578496282e-07, + "loss": 0.0007, + "step": 109230 + }, + { + "epoch": 1.8461590461624262, + "grad_norm": 0.023601751774549484, + "learning_rate": 1.791211494283812e-07, + "loss": 0.0003, + "step": 109240 + }, + { + "epoch": 1.8463280464412768, + "grad_norm": 0.09381477534770966, + "learning_rate": 1.7873013980029718e-07, + "loss": 0.002, + "step": 109250 + }, + { + "epoch": 1.8464970467201272, + "grad_norm": 0.02556357905268669, + "learning_rate": 1.7833954964605103e-07, + "loss": 0.0003, + "step": 109260 + }, + { + "epoch": 1.8466660469989775, + "grad_norm": 0.017666513100266457, + "learning_rate": 1.7794937899962505e-07, + "loss": 0.0004, + "step": 109270 + }, + { + "epoch": 1.8468350472778279, + "grad_norm": 0.012054857797920704, + "learning_rate": 1.775596278949676e-07, + "loss": 0.0013, + "step": 109280 + }, + { + "epoch": 1.8470040475566785, + "grad_norm": 0.09404901415109634, + "learning_rate": 1.7717029636598714e-07, + "loss": 0.0011, + "step": 109290 + }, + { + "epoch": 1.847173047835529, + "grad_norm": 0.024840453639626503, + "learning_rate": 1.767813844465577e-07, + "loss": 0.0003, + "step": 109300 + }, + { + "epoch": 1.8473420481143794, + "grad_norm": 0.1091921404004097, + "learning_rate": 1.763928921705177e-07, + "loss": 0.0007, + "step": 109310 + }, + { + "epoch": 1.8475110483932298, + "grad_norm": 0.005300764925777912, + "learning_rate": 1.7600481957166627e-07, + "loss": 0.0004, + "step": 109320 + }, + { + "epoch": 1.8476800486720804, + "grad_norm": 0.0005665574572049081, + "learning_rate": 1.7561716668376861e-07, + "loss": 0.0008, + "step": 109330 + }, + { + "epoch": 1.8478490489509307, + "grad_norm": 0.02141609601676464, + "learning_rate": 1.752299335405522e-07, + "loss": 0.0003, + "step": 109340 + }, + { + "epoch": 1.8480180492297813, + "grad_norm": 0.0409809872508049, + "learning_rate": 1.7484312017570837e-07, + "loss": 0.0005, + "step": 109350 + }, + { + "epoch": 1.8481870495086317, + "grad_norm": 0.03614840656518936, + "learning_rate": 1.7445672662289082e-07, + "loss": 0.0003, + "step": 109360 + }, + { + "epoch": 1.848356049787482, + "grad_norm": 0.002984137972816825, + "learning_rate": 1.740707529157193e-07, + "loss": 0.0006, + "step": 109370 + }, + { + "epoch": 1.8485250500663326, + "grad_norm": 0.0010859728790819645, + "learning_rate": 1.736851990877736e-07, + "loss": 0.0006, + "step": 109380 + }, + { + "epoch": 1.8486940503451832, + "grad_norm": 0.010009992867708206, + "learning_rate": 1.7330006517260024e-07, + "loss": 0.0004, + "step": 109390 + }, + { + "epoch": 1.8488630506240336, + "grad_norm": 0.058318689465522766, + "learning_rate": 1.7291535120370628e-07, + "loss": 0.0002, + "step": 109400 + }, + { + "epoch": 1.849032050902884, + "grad_norm": 0.009888230822980404, + "learning_rate": 1.7253105721456554e-07, + "loss": 0.0003, + "step": 109410 + }, + { + "epoch": 1.8492010511817345, + "grad_norm": 0.05609451234340668, + "learning_rate": 1.7214718323861123e-07, + "loss": 0.0005, + "step": 109420 + }, + { + "epoch": 1.8493700514605849, + "grad_norm": 0.046090930700302124, + "learning_rate": 1.7176372930924445e-07, + "loss": 0.0004, + "step": 109430 + }, + { + "epoch": 1.8495390517394354, + "grad_norm": 0.027034861966967583, + "learning_rate": 1.7138069545982628e-07, + "loss": 0.0006, + "step": 109440 + }, + { + "epoch": 1.8497080520182858, + "grad_norm": 0.001320467097684741, + "learning_rate": 1.7099808172368228e-07, + "loss": 0.0012, + "step": 109450 + }, + { + "epoch": 1.8498770522971362, + "grad_norm": 0.020823238417506218, + "learning_rate": 1.7061588813410357e-07, + "loss": 0.0005, + "step": 109460 + }, + { + "epoch": 1.8500460525759868, + "grad_norm": 0.008123408071696758, + "learning_rate": 1.7023411472434026e-07, + "loss": 0.0008, + "step": 109470 + }, + { + "epoch": 1.8502150528548373, + "grad_norm": 0.10001326352357864, + "learning_rate": 1.6985276152760966e-07, + "loss": 0.001, + "step": 109480 + }, + { + "epoch": 1.8503840531336877, + "grad_norm": 0.08047106862068176, + "learning_rate": 1.6947182857709242e-07, + "loss": 0.0006, + "step": 109490 + }, + { + "epoch": 1.850553053412538, + "grad_norm": 0.03494994714856148, + "learning_rate": 1.690913159059293e-07, + "loss": 0.001, + "step": 109500 + }, + { + "epoch": 1.8507220536913884, + "grad_norm": 0.003561967285349965, + "learning_rate": 1.687112235472288e-07, + "loss": 0.0003, + "step": 109510 + }, + { + "epoch": 1.850891053970239, + "grad_norm": 0.058161672204732895, + "learning_rate": 1.6833155153406e-07, + "loss": 0.0008, + "step": 109520 + }, + { + "epoch": 1.8510600542490896, + "grad_norm": 0.00724739721044898, + "learning_rate": 1.6795229989945594e-07, + "loss": 0.0006, + "step": 109530 + }, + { + "epoch": 1.85122905452794, + "grad_norm": 0.045328930020332336, + "learning_rate": 1.675734686764141e-07, + "loss": 0.0007, + "step": 109540 + }, + { + "epoch": 1.8513980548067903, + "grad_norm": 0.03157823905348778, + "learning_rate": 1.6719505789789315e-07, + "loss": 0.0018, + "step": 109550 + }, + { + "epoch": 1.851567055085641, + "grad_norm": 0.014154426753520966, + "learning_rate": 1.6681706759681892e-07, + "loss": 0.0004, + "step": 109560 + }, + { + "epoch": 1.8517360553644915, + "grad_norm": 0.033427465707063675, + "learning_rate": 1.664394978060757e-07, + "loss": 0.0007, + "step": 109570 + }, + { + "epoch": 1.8519050556433418, + "grad_norm": 0.015785370022058487, + "learning_rate": 1.6606234855851665e-07, + "loss": 0.0011, + "step": 109580 + }, + { + "epoch": 1.8520740559221922, + "grad_norm": 0.0132228909060359, + "learning_rate": 1.656856198869533e-07, + "loss": 0.0003, + "step": 109590 + }, + { + "epoch": 1.8522430562010426, + "grad_norm": 0.19660967588424683, + "learning_rate": 1.6530931182416444e-07, + "loss": 0.0025, + "step": 109600 + }, + { + "epoch": 1.8524120564798932, + "grad_norm": 0.02984684519469738, + "learning_rate": 1.649334244028894e-07, + "loss": 0.0003, + "step": 109610 + }, + { + "epoch": 1.8525810567587437, + "grad_norm": 0.004236331209540367, + "learning_rate": 1.6455795765583372e-07, + "loss": 0.0003, + "step": 109620 + }, + { + "epoch": 1.852750057037594, + "grad_norm": 0.06756148487329483, + "learning_rate": 1.6418291161566347e-07, + "loss": 0.0005, + "step": 109630 + }, + { + "epoch": 1.8529190573164445, + "grad_norm": 0.04586157202720642, + "learning_rate": 1.6380828631501035e-07, + "loss": 0.0007, + "step": 109640 + }, + { + "epoch": 1.853088057595295, + "grad_norm": 0.003409165423363447, + "learning_rate": 1.6343408178646714e-07, + "loss": 0.0007, + "step": 109650 + }, + { + "epoch": 1.8532570578741456, + "grad_norm": 0.04364131763577461, + "learning_rate": 1.6306029806259338e-07, + "loss": 0.0007, + "step": 109660 + }, + { + "epoch": 1.853426058152996, + "grad_norm": 0.044084835797548294, + "learning_rate": 1.626869351759086e-07, + "loss": 0.0002, + "step": 109670 + }, + { + "epoch": 1.8535950584318464, + "grad_norm": 0.01193965319544077, + "learning_rate": 1.6231399315889852e-07, + "loss": 0.0003, + "step": 109680 + }, + { + "epoch": 1.8537640587106967, + "grad_norm": 0.010008934885263443, + "learning_rate": 1.619414720440099e-07, + "loss": 0.0004, + "step": 109690 + }, + { + "epoch": 1.8539330589895473, + "grad_norm": 0.04185475409030914, + "learning_rate": 1.6156937186365407e-07, + "loss": 0.0003, + "step": 109700 + }, + { + "epoch": 1.8541020592683979, + "grad_norm": 0.012754472903907299, + "learning_rate": 1.6119769265020568e-07, + "loss": 0.0008, + "step": 109710 + }, + { + "epoch": 1.8542710595472482, + "grad_norm": 0.027824141085147858, + "learning_rate": 1.6082643443600277e-07, + "loss": 0.0009, + "step": 109720 + }, + { + "epoch": 1.8544400598260986, + "grad_norm": 0.0018118839943781495, + "learning_rate": 1.6045559725334614e-07, + "loss": 0.0004, + "step": 109730 + }, + { + "epoch": 1.8546090601049492, + "grad_norm": 0.011216542683541775, + "learning_rate": 1.6008518113450112e-07, + "loss": 0.0004, + "step": 109740 + }, + { + "epoch": 1.8547780603837998, + "grad_norm": 0.0073775663040578365, + "learning_rate": 1.5971518611169467e-07, + "loss": 0.0004, + "step": 109750 + }, + { + "epoch": 1.8549470606626501, + "grad_norm": 0.027089470997452736, + "learning_rate": 1.5934561221711942e-07, + "loss": 0.0004, + "step": 109760 + }, + { + "epoch": 1.8551160609415005, + "grad_norm": 0.016810627654194832, + "learning_rate": 1.589764594829285e-07, + "loss": 0.0006, + "step": 109770 + }, + { + "epoch": 1.8552850612203509, + "grad_norm": 0.0004215096414554864, + "learning_rate": 1.5860772794124125e-07, + "loss": 0.0005, + "step": 109780 + }, + { + "epoch": 1.8554540614992014, + "grad_norm": 0.08347168564796448, + "learning_rate": 1.582394176241392e-07, + "loss": 0.0008, + "step": 109790 + }, + { + "epoch": 1.855623061778052, + "grad_norm": 0.027736514806747437, + "learning_rate": 1.5787152856366616e-07, + "loss": 0.0003, + "step": 109800 + }, + { + "epoch": 1.8557920620569024, + "grad_norm": 0.04129638522863388, + "learning_rate": 1.57504060791831e-07, + "loss": 0.0003, + "step": 109810 + }, + { + "epoch": 1.8559610623357528, + "grad_norm": 0.02299007587134838, + "learning_rate": 1.571370143406048e-07, + "loss": 0.001, + "step": 109820 + }, + { + "epoch": 1.8561300626146033, + "grad_norm": 0.07348429411649704, + "learning_rate": 1.5677038924192312e-07, + "loss": 0.0012, + "step": 109830 + }, + { + "epoch": 1.856299062893454, + "grad_norm": 0.04478226974606514, + "learning_rate": 1.5640418552768266e-07, + "loss": 0.0007, + "step": 109840 + }, + { + "epoch": 1.8564680631723043, + "grad_norm": 0.037637028843164444, + "learning_rate": 1.560384032297463e-07, + "loss": 0.0009, + "step": 109850 + }, + { + "epoch": 1.8566370634511546, + "grad_norm": 0.2575816214084625, + "learning_rate": 1.5567304237993797e-07, + "loss": 0.0005, + "step": 109860 + }, + { + "epoch": 1.856806063730005, + "grad_norm": 0.010352968238294125, + "learning_rate": 1.5530810301004727e-07, + "loss": 0.0011, + "step": 109870 + }, + { + "epoch": 1.8569750640088556, + "grad_norm": 0.07358036190271378, + "learning_rate": 1.5494358515182384e-07, + "loss": 0.0008, + "step": 109880 + }, + { + "epoch": 1.8571440642877062, + "grad_norm": 0.1082979217171669, + "learning_rate": 1.5457948883698393e-07, + "loss": 0.0009, + "step": 109890 + }, + { + "epoch": 1.8573130645665565, + "grad_norm": 0.02853509411215782, + "learning_rate": 1.5421581409720444e-07, + "loss": 0.0007, + "step": 109900 + }, + { + "epoch": 1.857482064845407, + "grad_norm": 0.006572206504642963, + "learning_rate": 1.5385256096412838e-07, + "loss": 0.0004, + "step": 109910 + }, + { + "epoch": 1.8576510651242575, + "grad_norm": 0.008874989114701748, + "learning_rate": 1.534897294693588e-07, + "loss": 0.001, + "step": 109920 + }, + { + "epoch": 1.857820065403108, + "grad_norm": 0.01589968614280224, + "learning_rate": 1.5312731964446548e-07, + "loss": 0.0003, + "step": 109930 + }, + { + "epoch": 1.8579890656819584, + "grad_norm": 0.022283537313342094, + "learning_rate": 1.5276533152097816e-07, + "loss": 0.001, + "step": 109940 + }, + { + "epoch": 1.8581580659608088, + "grad_norm": 0.011150048114359379, + "learning_rate": 1.524037651303928e-07, + "loss": 0.0004, + "step": 109950 + }, + { + "epoch": 1.8583270662396592, + "grad_norm": 0.006013581529259682, + "learning_rate": 1.5204262050416696e-07, + "loss": 0.0006, + "step": 109960 + }, + { + "epoch": 1.8584960665185097, + "grad_norm": 0.1214689388871193, + "learning_rate": 1.5168189767372222e-07, + "loss": 0.001, + "step": 109970 + }, + { + "epoch": 1.8586650667973603, + "grad_norm": 0.05199957266449928, + "learning_rate": 1.5132159667044233e-07, + "loss": 0.0014, + "step": 109980 + }, + { + "epoch": 1.8588340670762107, + "grad_norm": 0.00684434873983264, + "learning_rate": 1.5096171752567667e-07, + "loss": 0.0004, + "step": 109990 + }, + { + "epoch": 1.859003067355061, + "grad_norm": 0.0022178571671247482, + "learning_rate": 1.506022602707352e-07, + "loss": 0.0005, + "step": 110000 + }, + { + "epoch": 1.8591720676339116, + "grad_norm": 0.0020311574917286634, + "learning_rate": 1.5024322493689348e-07, + "loss": 0.0004, + "step": 110010 + }, + { + "epoch": 1.8593410679127622, + "grad_norm": 0.021953493356704712, + "learning_rate": 1.4988461155538813e-07, + "loss": 0.0006, + "step": 110020 + }, + { + "epoch": 1.8595100681916126, + "grad_norm": 0.05131346359848976, + "learning_rate": 1.4952642015742091e-07, + "loss": 0.0008, + "step": 110030 + }, + { + "epoch": 1.859679068470463, + "grad_norm": 0.07163525372743607, + "learning_rate": 1.4916865077415688e-07, + "loss": 0.0004, + "step": 110040 + }, + { + "epoch": 1.8598480687493133, + "grad_norm": 0.058817777782678604, + "learning_rate": 1.4881130343672278e-07, + "loss": 0.0009, + "step": 110050 + }, + { + "epoch": 1.8600170690281639, + "grad_norm": 0.12411772459745407, + "learning_rate": 1.4845437817620933e-07, + "loss": 0.0005, + "step": 110060 + }, + { + "epoch": 1.8601860693070145, + "grad_norm": 0.0067708902060985565, + "learning_rate": 1.480978750236717e-07, + "loss": 0.0007, + "step": 110070 + }, + { + "epoch": 1.8603550695858648, + "grad_norm": 0.06462078541517258, + "learning_rate": 1.4774179401012723e-07, + "loss": 0.0005, + "step": 110080 + }, + { + "epoch": 1.8605240698647152, + "grad_norm": 0.0010421044426038861, + "learning_rate": 1.473861351665562e-07, + "loss": 0.0015, + "step": 110090 + }, + { + "epoch": 1.8606930701435658, + "grad_norm": 0.045905958861112595, + "learning_rate": 1.470308985239033e-07, + "loss": 0.0006, + "step": 110100 + }, + { + "epoch": 1.8608620704224164, + "grad_norm": 0.02327723614871502, + "learning_rate": 1.4667608411307432e-07, + "loss": 0.0009, + "step": 110110 + }, + { + "epoch": 1.8610310707012667, + "grad_norm": 0.06241496652364731, + "learning_rate": 1.4632169196494184e-07, + "loss": 0.0011, + "step": 110120 + }, + { + "epoch": 1.861200070980117, + "grad_norm": 0.0030036966782063246, + "learning_rate": 1.4596772211033837e-07, + "loss": 0.0009, + "step": 110130 + }, + { + "epoch": 1.8613690712589674, + "grad_norm": 0.020759856328368187, + "learning_rate": 1.456141745800621e-07, + "loss": 0.0008, + "step": 110140 + }, + { + "epoch": 1.861538071537818, + "grad_norm": 0.035996537655591965, + "learning_rate": 1.4526104940487173e-07, + "loss": 0.0004, + "step": 110150 + }, + { + "epoch": 1.8617070718166686, + "grad_norm": 0.050926003605127335, + "learning_rate": 1.4490834661549212e-07, + "loss": 0.001, + "step": 110160 + }, + { + "epoch": 1.861876072095519, + "grad_norm": 0.0453384630382061, + "learning_rate": 1.4455606624260987e-07, + "loss": 0.0003, + "step": 110170 + }, + { + "epoch": 1.8620450723743693, + "grad_norm": 0.021722691133618355, + "learning_rate": 1.4420420831687542e-07, + "loss": 0.0004, + "step": 110180 + }, + { + "epoch": 1.86221407265322, + "grad_norm": 0.09107028692960739, + "learning_rate": 1.4385277286890099e-07, + "loss": 0.0012, + "step": 110190 + }, + { + "epoch": 1.8623830729320703, + "grad_norm": 0.008340844884514809, + "learning_rate": 1.435017599292643e-07, + "loss": 0.0004, + "step": 110200 + }, + { + "epoch": 1.8625520732109209, + "grad_norm": 0.004562507849186659, + "learning_rate": 1.4315116952850482e-07, + "loss": 0.0004, + "step": 110210 + }, + { + "epoch": 1.8627210734897712, + "grad_norm": 0.01268729753792286, + "learning_rate": 1.4280100169712486e-07, + "loss": 0.0005, + "step": 110220 + }, + { + "epoch": 1.8628900737686216, + "grad_norm": 0.1103207990527153, + "learning_rate": 1.4245125646559277e-07, + "loss": 0.0006, + "step": 110230 + }, + { + "epoch": 1.8630590740474722, + "grad_norm": 0.05450146272778511, + "learning_rate": 1.4210193386433535e-07, + "loss": 0.0006, + "step": 110240 + }, + { + "epoch": 1.8632280743263228, + "grad_norm": 0.06385930627584457, + "learning_rate": 1.4175303392374719e-07, + "loss": 0.0005, + "step": 110250 + }, + { + "epoch": 1.8633970746051731, + "grad_norm": 0.0600234791636467, + "learning_rate": 1.414045566741845e-07, + "loss": 0.0005, + "step": 110260 + }, + { + "epoch": 1.8635660748840235, + "grad_norm": 0.05960167944431305, + "learning_rate": 1.4105650214596478e-07, + "loss": 0.0007, + "step": 110270 + }, + { + "epoch": 1.863735075162874, + "grad_norm": 0.06081349402666092, + "learning_rate": 1.407088703693721e-07, + "loss": 0.0006, + "step": 110280 + }, + { + "epoch": 1.8639040754417244, + "grad_norm": 0.021502451971173286, + "learning_rate": 1.4036166137465168e-07, + "loss": 0.0006, + "step": 110290 + }, + { + "epoch": 1.864073075720575, + "grad_norm": 0.002646598732098937, + "learning_rate": 1.4001487519201162e-07, + "loss": 0.0007, + "step": 110300 + }, + { + "epoch": 1.8642420759994254, + "grad_norm": 0.03035661205649376, + "learning_rate": 1.3966851185162556e-07, + "loss": 0.0005, + "step": 110310 + }, + { + "epoch": 1.8644110762782757, + "grad_norm": 0.019656464457511902, + "learning_rate": 1.3932257138362658e-07, + "loss": 0.001, + "step": 110320 + }, + { + "epoch": 1.8645800765571263, + "grad_norm": 0.017677245661616325, + "learning_rate": 1.3897705381811566e-07, + "loss": 0.0004, + "step": 110330 + }, + { + "epoch": 1.864749076835977, + "grad_norm": 0.08664114028215408, + "learning_rate": 1.3863195918515204e-07, + "loss": 0.0005, + "step": 110340 + }, + { + "epoch": 1.8649180771148273, + "grad_norm": 0.02080652117729187, + "learning_rate": 1.3828728751476284e-07, + "loss": 0.0006, + "step": 110350 + }, + { + "epoch": 1.8650870773936776, + "grad_norm": 0.026353400200605392, + "learning_rate": 1.379430388369346e-07, + "loss": 0.0005, + "step": 110360 + }, + { + "epoch": 1.8652560776725282, + "grad_norm": 0.0249568372964859, + "learning_rate": 1.375992131816195e-07, + "loss": 0.0008, + "step": 110370 + }, + { + "epoch": 1.8654250779513786, + "grad_norm": 0.043732523918151855, + "learning_rate": 1.372558105787314e-07, + "loss": 0.0004, + "step": 110380 + }, + { + "epoch": 1.8655940782302292, + "grad_norm": 0.042085371911525726, + "learning_rate": 1.3691283105814857e-07, + "loss": 0.0005, + "step": 110390 + }, + { + "epoch": 1.8657630785090795, + "grad_norm": 0.01947295106947422, + "learning_rate": 1.3657027464971162e-07, + "loss": 0.0005, + "step": 110400 + }, + { + "epoch": 1.8659320787879299, + "grad_norm": 0.07680048793554306, + "learning_rate": 1.362281413832245e-07, + "loss": 0.001, + "step": 110410 + }, + { + "epoch": 1.8661010790667805, + "grad_norm": 0.0022646563593298197, + "learning_rate": 1.35886431288455e-07, + "loss": 0.0006, + "step": 110420 + }, + { + "epoch": 1.866270079345631, + "grad_norm": 0.06517709791660309, + "learning_rate": 1.3554514439513222e-07, + "loss": 0.0006, + "step": 110430 + }, + { + "epoch": 1.8664390796244814, + "grad_norm": 0.09205411374568939, + "learning_rate": 1.3520428073295122e-07, + "loss": 0.0008, + "step": 110440 + }, + { + "epoch": 1.8666080799033318, + "grad_norm": 0.10866151005029678, + "learning_rate": 1.3486384033156886e-07, + "loss": 0.001, + "step": 110450 + }, + { + "epoch": 1.8667770801821821, + "grad_norm": 0.020667634904384613, + "learning_rate": 1.3452382322060366e-07, + "loss": 0.0005, + "step": 110460 + }, + { + "epoch": 1.8669460804610327, + "grad_norm": 0.004244860261678696, + "learning_rate": 1.3418422942964027e-07, + "loss": 0.0008, + "step": 110470 + }, + { + "epoch": 1.8671150807398833, + "grad_norm": 0.043460264801979065, + "learning_rate": 1.3384505898822343e-07, + "loss": 0.0008, + "step": 110480 + }, + { + "epoch": 1.8672840810187337, + "grad_norm": 0.023740937933325768, + "learning_rate": 1.335063119258645e-07, + "loss": 0.0003, + "step": 110490 + }, + { + "epoch": 1.867453081297584, + "grad_norm": 0.04052386432886124, + "learning_rate": 1.3316798827203493e-07, + "loss": 0.0007, + "step": 110500 + }, + { + "epoch": 1.8676220815764346, + "grad_norm": 0.07000137120485306, + "learning_rate": 1.328300880561706e-07, + "loss": 0.0006, + "step": 110510 + }, + { + "epoch": 1.8677910818552852, + "grad_norm": 0.006084958557039499, + "learning_rate": 1.3249261130767022e-07, + "loss": 0.0006, + "step": 110520 + }, + { + "epoch": 1.8679600821341356, + "grad_norm": 0.0023774036671966314, + "learning_rate": 1.321555580558964e-07, + "loss": 0.0013, + "step": 110530 + }, + { + "epoch": 1.868129082412986, + "grad_norm": 0.06854826956987381, + "learning_rate": 1.3181892833017463e-07, + "loss": 0.0005, + "step": 110540 + }, + { + "epoch": 1.8682980826918363, + "grad_norm": 0.03659120947122574, + "learning_rate": 1.3148272215979307e-07, + "loss": 0.0006, + "step": 110550 + }, + { + "epoch": 1.8684670829706869, + "grad_norm": 0.0016194320050999522, + "learning_rate": 1.3114693957400283e-07, + "loss": 0.0008, + "step": 110560 + }, + { + "epoch": 1.8686360832495374, + "grad_norm": 0.031096505001187325, + "learning_rate": 1.3081158060201883e-07, + "loss": 0.0009, + "step": 110570 + }, + { + "epoch": 1.8688050835283878, + "grad_norm": 0.0014152682851999998, + "learning_rate": 1.3047664527301994e-07, + "loss": 0.0003, + "step": 110580 + }, + { + "epoch": 1.8689740838072382, + "grad_norm": 0.06311298161745071, + "learning_rate": 1.3014213361614514e-07, + "loss": 0.001, + "step": 110590 + }, + { + "epoch": 1.8691430840860888, + "grad_norm": 0.010693120770156384, + "learning_rate": 1.2980804566050054e-07, + "loss": 0.0006, + "step": 110600 + }, + { + "epoch": 1.8693120843649393, + "grad_norm": 0.04796507582068443, + "learning_rate": 1.2947438143515234e-07, + "loss": 0.0005, + "step": 110610 + }, + { + "epoch": 1.8694810846437897, + "grad_norm": 0.003044690238311887, + "learning_rate": 1.2914114096913122e-07, + "loss": 0.0005, + "step": 110620 + }, + { + "epoch": 1.86965008492264, + "grad_norm": 0.0033997853752225637, + "learning_rate": 1.2880832429143008e-07, + "loss": 0.0007, + "step": 110630 + }, + { + "epoch": 1.8698190852014904, + "grad_norm": 0.025361163541674614, + "learning_rate": 1.284759314310069e-07, + "loss": 0.0006, + "step": 110640 + }, + { + "epoch": 1.869988085480341, + "grad_norm": 0.016363272443413734, + "learning_rate": 1.2814396241678074e-07, + "loss": 0.0007, + "step": 110650 + }, + { + "epoch": 1.8701570857591916, + "grad_norm": 0.07601359486579895, + "learning_rate": 1.278124172776346e-07, + "loss": 0.001, + "step": 110660 + }, + { + "epoch": 1.870326086038042, + "grad_norm": 0.03996589034795761, + "learning_rate": 1.2748129604241432e-07, + "loss": 0.0005, + "step": 110670 + }, + { + "epoch": 1.8704950863168923, + "grad_norm": 0.05205439031124115, + "learning_rate": 1.2715059873992907e-07, + "loss": 0.0008, + "step": 110680 + }, + { + "epoch": 1.870664086595743, + "grad_norm": 0.08824488520622253, + "learning_rate": 1.268203253989514e-07, + "loss": 0.0006, + "step": 110690 + }, + { + "epoch": 1.8708330868745935, + "grad_norm": 0.01283740159124136, + "learning_rate": 1.2649047604821663e-07, + "loss": 0.0006, + "step": 110700 + }, + { + "epoch": 1.8710020871534438, + "grad_norm": 0.03213268145918846, + "learning_rate": 1.2616105071642238e-07, + "loss": 0.0004, + "step": 110710 + }, + { + "epoch": 1.8711710874322942, + "grad_norm": 0.06657550483942032, + "learning_rate": 1.258320494322318e-07, + "loss": 0.0012, + "step": 110720 + }, + { + "epoch": 1.8713400877111446, + "grad_norm": 0.027185996994376183, + "learning_rate": 1.2550347222426873e-07, + "loss": 0.0006, + "step": 110730 + }, + { + "epoch": 1.8715090879899952, + "grad_norm": 0.05009664222598076, + "learning_rate": 1.251753191211208e-07, + "loss": 0.0007, + "step": 110740 + }, + { + "epoch": 1.8716780882688457, + "grad_norm": 0.019768835976719856, + "learning_rate": 1.2484759015133906e-07, + "loss": 0.0006, + "step": 110750 + }, + { + "epoch": 1.871847088547696, + "grad_norm": 0.000817089865449816, + "learning_rate": 1.2452028534343852e-07, + "loss": 0.0005, + "step": 110760 + }, + { + "epoch": 1.8720160888265465, + "grad_norm": 0.003386547788977623, + "learning_rate": 1.2419340472589415e-07, + "loss": 0.0008, + "step": 110770 + }, + { + "epoch": 1.872185089105397, + "grad_norm": 0.006252896040678024, + "learning_rate": 1.238669483271482e-07, + "loss": 0.0006, + "step": 110780 + }, + { + "epoch": 1.8723540893842476, + "grad_norm": 0.023714499548077583, + "learning_rate": 1.2354091617560292e-07, + "loss": 0.0003, + "step": 110790 + }, + { + "epoch": 1.872523089663098, + "grad_norm": 0.008874212391674519, + "learning_rate": 1.2321530829962458e-07, + "loss": 0.0006, + "step": 110800 + }, + { + "epoch": 1.8726920899419484, + "grad_norm": 0.05294567719101906, + "learning_rate": 1.2289012472754324e-07, + "loss": 0.0003, + "step": 110810 + }, + { + "epoch": 1.8728610902207987, + "grad_norm": 0.00282698730006814, + "learning_rate": 1.225653654876513e-07, + "loss": 0.0002, + "step": 110820 + }, + { + "epoch": 1.8730300904996493, + "grad_norm": 0.03028349205851555, + "learning_rate": 1.2224103060820393e-07, + "loss": 0.0004, + "step": 110830 + }, + { + "epoch": 1.8731990907784999, + "grad_norm": 0.030639857053756714, + "learning_rate": 1.2191712011742074e-07, + "loss": 0.001, + "step": 110840 + }, + { + "epoch": 1.8733680910573502, + "grad_norm": 0.04265419766306877, + "learning_rate": 1.2159363404348256e-07, + "loss": 0.0009, + "step": 110850 + }, + { + "epoch": 1.8735370913362006, + "grad_norm": 0.010115021839737892, + "learning_rate": 1.2127057241453465e-07, + "loss": 0.0005, + "step": 110860 + }, + { + "epoch": 1.8737060916150512, + "grad_norm": 0.05838964879512787, + "learning_rate": 1.2094793525868564e-07, + "loss": 0.0005, + "step": 110870 + }, + { + "epoch": 1.8738750918939018, + "grad_norm": 0.009496191516518593, + "learning_rate": 1.2062572260400472e-07, + "loss": 0.0004, + "step": 110880 + }, + { + "epoch": 1.8740440921727521, + "grad_norm": 0.06020539253950119, + "learning_rate": 1.2030393447852784e-07, + "loss": 0.0006, + "step": 110890 + }, + { + "epoch": 1.8742130924516025, + "grad_norm": 0.07069530338048935, + "learning_rate": 1.199825709102509e-07, + "loss": 0.0007, + "step": 110900 + }, + { + "epoch": 1.8743820927304529, + "grad_norm": 0.1499357521533966, + "learning_rate": 1.1966163192713488e-07, + "loss": 0.0007, + "step": 110910 + }, + { + "epoch": 1.8745510930093034, + "grad_norm": 0.0009730269084684551, + "learning_rate": 1.1934111755710242e-07, + "loss": 0.0012, + "step": 110920 + }, + { + "epoch": 1.874720093288154, + "grad_norm": 0.005736366380006075, + "learning_rate": 1.1902102782804015e-07, + "loss": 0.0009, + "step": 110930 + }, + { + "epoch": 1.8748890935670044, + "grad_norm": 0.003876183880493045, + "learning_rate": 1.1870136276779742e-07, + "loss": 0.0009, + "step": 110940 + }, + { + "epoch": 1.8750580938458548, + "grad_norm": 0.01212595496326685, + "learning_rate": 1.1838212240418645e-07, + "loss": 0.0007, + "step": 110950 + }, + { + "epoch": 1.8752270941247053, + "grad_norm": 0.0345769077539444, + "learning_rate": 1.1806330676498279e-07, + "loss": 0.0004, + "step": 110960 + }, + { + "epoch": 1.875396094403556, + "grad_norm": 0.0129469595849514, + "learning_rate": 1.1774491587792536e-07, + "loss": 0.0008, + "step": 110970 + }, + { + "epoch": 1.8755650946824063, + "grad_norm": 0.10325078666210175, + "learning_rate": 1.1742694977071479e-07, + "loss": 0.0003, + "step": 110980 + }, + { + "epoch": 1.8757340949612566, + "grad_norm": 0.037547655403614044, + "learning_rate": 1.1710940847101615e-07, + "loss": 0.0017, + "step": 110990 + }, + { + "epoch": 1.875903095240107, + "grad_norm": 0.04398932680487633, + "learning_rate": 1.167922920064568e-07, + "loss": 0.0004, + "step": 111000 + }, + { + "epoch": 1.8760720955189576, + "grad_norm": 0.018700091168284416, + "learning_rate": 1.1647560040462857e-07, + "loss": 0.0005, + "step": 111010 + }, + { + "epoch": 1.8762410957978082, + "grad_norm": 0.048042118549346924, + "learning_rate": 1.1615933369308385e-07, + "loss": 0.0005, + "step": 111020 + }, + { + "epoch": 1.8764100960766585, + "grad_norm": 0.002763420110568404, + "learning_rate": 1.1584349189934063e-07, + "loss": 0.0007, + "step": 111030 + }, + { + "epoch": 1.876579096355509, + "grad_norm": 0.03974473103880882, + "learning_rate": 1.1552807505087694e-07, + "loss": 0.0008, + "step": 111040 + }, + { + "epoch": 1.8767480966343595, + "grad_norm": 0.007572009228169918, + "learning_rate": 1.1521308317513691e-07, + "loss": 0.0003, + "step": 111050 + }, + { + "epoch": 1.87691709691321, + "grad_norm": 0.04790041968226433, + "learning_rate": 1.1489851629952587e-07, + "loss": 0.001, + "step": 111060 + }, + { + "epoch": 1.8770860971920604, + "grad_norm": 0.0019335891120135784, + "learning_rate": 1.1458437445141301e-07, + "loss": 0.0003, + "step": 111070 + }, + { + "epoch": 1.8772550974709108, + "grad_norm": 0.04101487621665001, + "learning_rate": 1.1427065765812983e-07, + "loss": 0.0008, + "step": 111080 + }, + { + "epoch": 1.8774240977497612, + "grad_norm": 0.046852122992277145, + "learning_rate": 1.1395736594697116e-07, + "loss": 0.0005, + "step": 111090 + }, + { + "epoch": 1.8775930980286117, + "grad_norm": 0.0006627286784350872, + "learning_rate": 1.1364449934519572e-07, + "loss": 0.0006, + "step": 111100 + }, + { + "epoch": 1.8777620983074623, + "grad_norm": 0.04942318797111511, + "learning_rate": 1.1333205788002289e-07, + "loss": 0.0006, + "step": 111110 + }, + { + "epoch": 1.8779310985863127, + "grad_norm": 0.04318777099251747, + "learning_rate": 1.1302004157863811e-07, + "loss": 0.0012, + "step": 111120 + }, + { + "epoch": 1.878100098865163, + "grad_norm": 0.06974602490663528, + "learning_rate": 1.1270845046818745e-07, + "loss": 0.0015, + "step": 111130 + }, + { + "epoch": 1.8782690991440136, + "grad_norm": 0.08759629726409912, + "learning_rate": 1.1239728457578092e-07, + "loss": 0.0005, + "step": 111140 + }, + { + "epoch": 1.878438099422864, + "grad_norm": 0.018597111105918884, + "learning_rate": 1.1208654392849183e-07, + "loss": 0.0006, + "step": 111150 + }, + { + "epoch": 1.8786070997017146, + "grad_norm": 0.04458988830447197, + "learning_rate": 1.1177622855335579e-07, + "loss": 0.0015, + "step": 111160 + }, + { + "epoch": 1.878776099980565, + "grad_norm": 0.0010363217443227768, + "learning_rate": 1.1146633847737176e-07, + "loss": 0.0005, + "step": 111170 + }, + { + "epoch": 1.8789451002594153, + "grad_norm": 0.016656868159770966, + "learning_rate": 1.1115687372750151e-07, + "loss": 0.0006, + "step": 111180 + }, + { + "epoch": 1.8791141005382659, + "grad_norm": 0.034169506281614304, + "learning_rate": 1.1084783433067125e-07, + "loss": 0.0001, + "step": 111190 + }, + { + "epoch": 1.8792831008171165, + "grad_norm": 0.00958054419606924, + "learning_rate": 1.1053922031376673e-07, + "loss": 0.0007, + "step": 111200 + }, + { + "epoch": 1.8794521010959668, + "grad_norm": 0.03231184184551239, + "learning_rate": 1.1023103170364035e-07, + "loss": 0.0006, + "step": 111210 + }, + { + "epoch": 1.8796211013748172, + "grad_norm": 0.04403608292341232, + "learning_rate": 1.0992326852710622e-07, + "loss": 0.0008, + "step": 111220 + }, + { + "epoch": 1.8797901016536678, + "grad_norm": 0.05286185070872307, + "learning_rate": 1.0961593081094012e-07, + "loss": 0.0008, + "step": 111230 + }, + { + "epoch": 1.8799591019325181, + "grad_norm": 0.014492380432784557, + "learning_rate": 1.0930901858188347e-07, + "loss": 0.0004, + "step": 111240 + }, + { + "epoch": 1.8801281022113687, + "grad_norm": 0.0011498809326440096, + "learning_rate": 1.0900253186663712e-07, + "loss": 0.0003, + "step": 111250 + }, + { + "epoch": 1.880297102490219, + "grad_norm": 0.010239934548735619, + "learning_rate": 1.0869647069186861e-07, + "loss": 0.0003, + "step": 111260 + }, + { + "epoch": 1.8804661027690694, + "grad_norm": 0.08053793013095856, + "learning_rate": 1.08390835084205e-07, + "loss": 0.0007, + "step": 111270 + }, + { + "epoch": 1.88063510304792, + "grad_norm": 0.00035482627572491765, + "learning_rate": 1.0808562507024056e-07, + "loss": 0.0004, + "step": 111280 + }, + { + "epoch": 1.8808041033267706, + "grad_norm": 0.0689387395977974, + "learning_rate": 1.0778084067652739e-07, + "loss": 0.0005, + "step": 111290 + }, + { + "epoch": 1.880973103605621, + "grad_norm": 0.23589535057544708, + "learning_rate": 1.0747648192958482e-07, + "loss": 0.0003, + "step": 111300 + }, + { + "epoch": 1.8811421038844713, + "grad_norm": 0.04281485453248024, + "learning_rate": 1.0717254885589334e-07, + "loss": 0.0004, + "step": 111310 + }, + { + "epoch": 1.881311104163322, + "grad_norm": 0.11141208559274673, + "learning_rate": 1.0686904148189625e-07, + "loss": 0.0009, + "step": 111320 + }, + { + "epoch": 1.8814801044421723, + "grad_norm": 0.017679790034890175, + "learning_rate": 1.065659598340002e-07, + "loss": 0.0008, + "step": 111330 + }, + { + "epoch": 1.8816491047210229, + "grad_norm": 0.021234925836324692, + "learning_rate": 1.062633039385752e-07, + "loss": 0.0004, + "step": 111340 + }, + { + "epoch": 1.8818181049998732, + "grad_norm": 0.03269995003938675, + "learning_rate": 1.0596107382195352e-07, + "loss": 0.0005, + "step": 111350 + }, + { + "epoch": 1.8819871052787236, + "grad_norm": 0.07181331515312195, + "learning_rate": 1.0565926951043026e-07, + "loss": 0.0004, + "step": 111360 + }, + { + "epoch": 1.8821561055575742, + "grad_norm": 0.010781611315906048, + "learning_rate": 1.0535789103026439e-07, + "loss": 0.0003, + "step": 111370 + }, + { + "epoch": 1.8823251058364248, + "grad_norm": 0.1765391230583191, + "learning_rate": 1.0505693840767661e-07, + "loss": 0.0007, + "step": 111380 + }, + { + "epoch": 1.8824941061152751, + "grad_norm": 0.020220760256052017, + "learning_rate": 1.0475641166885209e-07, + "loss": 0.0007, + "step": 111390 + }, + { + "epoch": 1.8826631063941255, + "grad_norm": 0.0009305546409450471, + "learning_rate": 1.0445631083993768e-07, + "loss": 0.0007, + "step": 111400 + }, + { + "epoch": 1.8828321066729758, + "grad_norm": 0.005653336178511381, + "learning_rate": 1.0415663594704362e-07, + "loss": 0.0002, + "step": 111410 + }, + { + "epoch": 1.8830011069518264, + "grad_norm": 0.028162632137537003, + "learning_rate": 1.0385738701624349e-07, + "loss": 0.0005, + "step": 111420 + }, + { + "epoch": 1.883170107230677, + "grad_norm": 0.005178904160857201, + "learning_rate": 1.0355856407357312e-07, + "loss": 0.0002, + "step": 111430 + }, + { + "epoch": 1.8833391075095274, + "grad_norm": 0.0075320713222026825, + "learning_rate": 1.0326016714503062e-07, + "loss": 0.001, + "step": 111440 + }, + { + "epoch": 1.8835081077883777, + "grad_norm": 0.028461139649152756, + "learning_rate": 1.0296219625657966e-07, + "loss": 0.0009, + "step": 111450 + }, + { + "epoch": 1.8836771080672283, + "grad_norm": 0.04403156787157059, + "learning_rate": 1.0266465143414395e-07, + "loss": 0.0006, + "step": 111460 + }, + { + "epoch": 1.883846108346079, + "grad_norm": 0.0008579157874919474, + "learning_rate": 1.0236753270361222e-07, + "loss": 0.0004, + "step": 111470 + }, + { + "epoch": 1.8840151086249293, + "grad_norm": 0.0005617240676656365, + "learning_rate": 1.0207084009083379e-07, + "loss": 0.0005, + "step": 111480 + }, + { + "epoch": 1.8841841089037796, + "grad_norm": 0.02151833474636078, + "learning_rate": 1.0177457362162414e-07, + "loss": 0.0006, + "step": 111490 + }, + { + "epoch": 1.88435310918263, + "grad_norm": 0.044212453067302704, + "learning_rate": 1.0147873332175873e-07, + "loss": 0.0003, + "step": 111500 + }, + { + "epoch": 1.8845221094614806, + "grad_norm": 0.02930149808526039, + "learning_rate": 1.0118331921697755e-07, + "loss": 0.0005, + "step": 111510 + }, + { + "epoch": 1.8846911097403312, + "grad_norm": 0.10000759363174438, + "learning_rate": 1.0088833133298226e-07, + "loss": 0.0008, + "step": 111520 + }, + { + "epoch": 1.8848601100191815, + "grad_norm": 0.0430021695792675, + "learning_rate": 1.005937696954401e-07, + "loss": 0.0005, + "step": 111530 + }, + { + "epoch": 1.8850291102980319, + "grad_norm": 0.0028830496594309807, + "learning_rate": 1.0029963432997725e-07, + "loss": 0.0004, + "step": 111540 + }, + { + "epoch": 1.8851981105768825, + "grad_norm": 0.004205740988254547, + "learning_rate": 1.0000592526218544e-07, + "loss": 0.0006, + "step": 111550 + }, + { + "epoch": 1.885367110855733, + "grad_norm": 0.051776617765426636, + "learning_rate": 9.971264251761981e-08, + "loss": 0.0008, + "step": 111560 + }, + { + "epoch": 1.8855361111345834, + "grad_norm": 0.04432372376322746, + "learning_rate": 9.941978612179659e-08, + "loss": 0.0005, + "step": 111570 + }, + { + "epoch": 1.8857051114134338, + "grad_norm": 0.03758419677615166, + "learning_rate": 9.912735610019541e-08, + "loss": 0.0008, + "step": 111580 + }, + { + "epoch": 1.8858741116922841, + "grad_norm": 0.016436981037259102, + "learning_rate": 9.883535247825982e-08, + "loss": 0.0005, + "step": 111590 + }, + { + "epoch": 1.8860431119711347, + "grad_norm": 0.0603216327726841, + "learning_rate": 9.854377528139558e-08, + "loss": 0.0009, + "step": 111600 + }, + { + "epoch": 1.8862121122499853, + "grad_norm": 0.010252782143652439, + "learning_rate": 9.825262453497075e-08, + "loss": 0.0013, + "step": 111610 + }, + { + "epoch": 1.8863811125288357, + "grad_norm": 0.13375674188137054, + "learning_rate": 9.796190026431729e-08, + "loss": 0.0008, + "step": 111620 + }, + { + "epoch": 1.886550112807686, + "grad_norm": 0.024899881333112717, + "learning_rate": 9.767160249472941e-08, + "loss": 0.0003, + "step": 111630 + }, + { + "epoch": 1.8867191130865366, + "grad_norm": 0.00046924164053052664, + "learning_rate": 9.73817312514641e-08, + "loss": 0.0003, + "step": 111640 + }, + { + "epoch": 1.8868881133653872, + "grad_norm": 0.04159824922680855, + "learning_rate": 9.709228655974235e-08, + "loss": 0.002, + "step": 111650 + }, + { + "epoch": 1.8870571136442376, + "grad_norm": 0.027793768793344498, + "learning_rate": 9.680326844474675e-08, + "loss": 0.0008, + "step": 111660 + }, + { + "epoch": 1.887226113923088, + "grad_norm": 0.03554021567106247, + "learning_rate": 9.651467693162276e-08, + "loss": 0.0004, + "step": 111670 + }, + { + "epoch": 1.8873951142019383, + "grad_norm": 0.028292205184698105, + "learning_rate": 9.622651204548028e-08, + "loss": 0.0003, + "step": 111680 + }, + { + "epoch": 1.8875641144807889, + "grad_norm": 0.03103375807404518, + "learning_rate": 9.593877381139039e-08, + "loss": 0.0003, + "step": 111690 + }, + { + "epoch": 1.8877331147596395, + "grad_norm": 0.05996498093008995, + "learning_rate": 9.56514622543886e-08, + "loss": 0.0007, + "step": 111700 + }, + { + "epoch": 1.8879021150384898, + "grad_norm": 0.016905006021261215, + "learning_rate": 9.536457739947047e-08, + "loss": 0.0008, + "step": 111710 + }, + { + "epoch": 1.8880711153173402, + "grad_norm": 0.004696437623351812, + "learning_rate": 9.507811927159882e-08, + "loss": 0.0007, + "step": 111720 + }, + { + "epoch": 1.8882401155961908, + "grad_norm": 0.026309417560696602, + "learning_rate": 9.479208789569484e-08, + "loss": 0.0004, + "step": 111730 + }, + { + "epoch": 1.8884091158750413, + "grad_norm": 0.06356547027826309, + "learning_rate": 9.45064832966458e-08, + "loss": 0.0004, + "step": 111740 + }, + { + "epoch": 1.8885781161538917, + "grad_norm": 0.012746340595185757, + "learning_rate": 9.42213054993002e-08, + "loss": 0.0007, + "step": 111750 + }, + { + "epoch": 1.888747116432742, + "grad_norm": 0.02876432053744793, + "learning_rate": 9.393655452847039e-08, + "loss": 0.0005, + "step": 111760 + }, + { + "epoch": 1.8889161167115924, + "grad_norm": 0.007819334976375103, + "learning_rate": 9.365223040893046e-08, + "loss": 0.0003, + "step": 111770 + }, + { + "epoch": 1.889085116990443, + "grad_norm": 0.01989988051354885, + "learning_rate": 9.336833316541838e-08, + "loss": 0.0011, + "step": 111780 + }, + { + "epoch": 1.8892541172692936, + "grad_norm": 0.010571062564849854, + "learning_rate": 9.30848628226344e-08, + "loss": 0.0004, + "step": 111790 + }, + { + "epoch": 1.889423117548144, + "grad_norm": 0.008451041765511036, + "learning_rate": 9.280181940524214e-08, + "loss": 0.0011, + "step": 111800 + }, + { + "epoch": 1.8895921178269943, + "grad_norm": 0.001930863130837679, + "learning_rate": 9.251920293786742e-08, + "loss": 0.0006, + "step": 111810 + }, + { + "epoch": 1.889761118105845, + "grad_norm": 0.010977152734994888, + "learning_rate": 9.223701344509894e-08, + "loss": 0.0008, + "step": 111820 + }, + { + "epoch": 1.8899301183846955, + "grad_norm": 0.017599212005734444, + "learning_rate": 9.195525095148983e-08, + "loss": 0.0006, + "step": 111830 + }, + { + "epoch": 1.8900991186635459, + "grad_norm": 0.06310348212718964, + "learning_rate": 9.167391548155324e-08, + "loss": 0.0008, + "step": 111840 + }, + { + "epoch": 1.8902681189423962, + "grad_norm": 0.07708850502967834, + "learning_rate": 9.139300705976739e-08, + "loss": 0.0006, + "step": 111850 + }, + { + "epoch": 1.8904371192212466, + "grad_norm": 0.00364290876314044, + "learning_rate": 9.111252571057272e-08, + "loss": 0.0004, + "step": 111860 + }, + { + "epoch": 1.8906061195000972, + "grad_norm": 0.004606438800692558, + "learning_rate": 9.083247145837303e-08, + "loss": 0.001, + "step": 111870 + }, + { + "epoch": 1.8907751197789477, + "grad_norm": 0.18687938153743744, + "learning_rate": 9.05528443275333e-08, + "loss": 0.0009, + "step": 111880 + }, + { + "epoch": 1.890944120057798, + "grad_norm": 0.17379017174243927, + "learning_rate": 9.027364434238294e-08, + "loss": 0.0021, + "step": 111890 + }, + { + "epoch": 1.8911131203366485, + "grad_norm": 0.00936430599540472, + "learning_rate": 8.999487152721365e-08, + "loss": 0.0004, + "step": 111900 + }, + { + "epoch": 1.891282120615499, + "grad_norm": 0.07176022231578827, + "learning_rate": 8.971652590628043e-08, + "loss": 0.0007, + "step": 111910 + }, + { + "epoch": 1.8914511208943496, + "grad_norm": 0.2520315945148468, + "learning_rate": 8.943860750379952e-08, + "loss": 0.0008, + "step": 111920 + }, + { + "epoch": 1.8916201211732, + "grad_norm": 0.00437377393245697, + "learning_rate": 8.916111634395264e-08, + "loss": 0.001, + "step": 111930 + }, + { + "epoch": 1.8917891214520504, + "grad_norm": 0.0005836533382534981, + "learning_rate": 8.888405245088217e-08, + "loss": 0.0006, + "step": 111940 + }, + { + "epoch": 1.8919581217309007, + "grad_norm": 0.08991707861423492, + "learning_rate": 8.860741584869325e-08, + "loss": 0.0008, + "step": 111950 + }, + { + "epoch": 1.8921271220097513, + "grad_norm": 0.053484927862882614, + "learning_rate": 8.833120656145667e-08, + "loss": 0.0007, + "step": 111960 + }, + { + "epoch": 1.892296122288602, + "grad_norm": 0.1418548822402954, + "learning_rate": 8.805542461320149e-08, + "loss": 0.002, + "step": 111970 + }, + { + "epoch": 1.8924651225674523, + "grad_norm": 0.028906142339110374, + "learning_rate": 8.778007002792411e-08, + "loss": 0.0004, + "step": 111980 + }, + { + "epoch": 1.8926341228463026, + "grad_norm": 0.004131465218961239, + "learning_rate": 8.750514282958089e-08, + "loss": 0.0005, + "step": 111990 + }, + { + "epoch": 1.8928031231251532, + "grad_norm": 0.014240585267543793, + "learning_rate": 8.723064304209106e-08, + "loss": 0.0006, + "step": 112000 + }, + { + "epoch": 1.8929721234040038, + "grad_norm": 0.0017454311018809676, + "learning_rate": 8.695657068933883e-08, + "loss": 0.0005, + "step": 112010 + }, + { + "epoch": 1.8931411236828541, + "grad_norm": 0.055087897926568985, + "learning_rate": 8.668292579516902e-08, + "loss": 0.0011, + "step": 112020 + }, + { + "epoch": 1.8933101239617045, + "grad_norm": 0.0882839784026146, + "learning_rate": 8.640970838339036e-08, + "loss": 0.0005, + "step": 112030 + }, + { + "epoch": 1.8934791242405549, + "grad_norm": 0.09185419976711273, + "learning_rate": 8.613691847777384e-08, + "loss": 0.0009, + "step": 112040 + }, + { + "epoch": 1.8936481245194055, + "grad_norm": 0.04476842284202576, + "learning_rate": 8.58645561020538e-08, + "loss": 0.0009, + "step": 112050 + }, + { + "epoch": 1.893817124798256, + "grad_norm": 0.05075672268867493, + "learning_rate": 8.559262127992685e-08, + "loss": 0.0006, + "step": 112060 + }, + { + "epoch": 1.8939861250771064, + "grad_norm": 0.037351932376623154, + "learning_rate": 8.532111403505238e-08, + "loss": 0.0012, + "step": 112070 + }, + { + "epoch": 1.8941551253559568, + "grad_norm": 0.011177490465342999, + "learning_rate": 8.505003439105375e-08, + "loss": 0.0005, + "step": 112080 + }, + { + "epoch": 1.8943241256348073, + "grad_norm": 0.026821009814739227, + "learning_rate": 8.477938237151539e-08, + "loss": 0.0004, + "step": 112090 + }, + { + "epoch": 1.8944931259136577, + "grad_norm": 0.05947759002447128, + "learning_rate": 8.450915799998516e-08, + "loss": 0.0007, + "step": 112100 + }, + { + "epoch": 1.8946621261925083, + "grad_norm": 0.020872613415122032, + "learning_rate": 8.423936129997479e-08, + "loss": 0.0004, + "step": 112110 + }, + { + "epoch": 1.8948311264713587, + "grad_norm": 0.037279363721609116, + "learning_rate": 8.396999229495718e-08, + "loss": 0.0006, + "step": 112120 + }, + { + "epoch": 1.895000126750209, + "grad_norm": 0.1426037698984146, + "learning_rate": 8.370105100836911e-08, + "loss": 0.0007, + "step": 112130 + }, + { + "epoch": 1.8951691270290596, + "grad_norm": 0.03487572818994522, + "learning_rate": 8.343253746360968e-08, + "loss": 0.0059, + "step": 112140 + }, + { + "epoch": 1.8953381273079102, + "grad_norm": 0.0009698563371784985, + "learning_rate": 8.31644516840402e-08, + "loss": 0.0004, + "step": 112150 + }, + { + "epoch": 1.8955071275867605, + "grad_norm": 0.01319518405944109, + "learning_rate": 8.289679369298642e-08, + "loss": 0.0008, + "step": 112160 + }, + { + "epoch": 1.895676127865611, + "grad_norm": 0.0071914829313755035, + "learning_rate": 8.26295635137353e-08, + "loss": 0.0006, + "step": 112170 + }, + { + "epoch": 1.8958451281444615, + "grad_norm": 0.011561156250536442, + "learning_rate": 8.236276116953767e-08, + "loss": 0.0006, + "step": 112180 + }, + { + "epoch": 1.8960141284233119, + "grad_norm": 0.004148657899349928, + "learning_rate": 8.209638668360609e-08, + "loss": 0.0006, + "step": 112190 + }, + { + "epoch": 1.8961831287021624, + "grad_norm": 0.018528766930103302, + "learning_rate": 8.1830440079117e-08, + "loss": 0.0003, + "step": 112200 + }, + { + "epoch": 1.8963521289810128, + "grad_norm": 0.0021219479385763407, + "learning_rate": 8.156492137920857e-08, + "loss": 0.0006, + "step": 112210 + }, + { + "epoch": 1.8965211292598632, + "grad_norm": 0.03522593155503273, + "learning_rate": 8.129983060698233e-08, + "loss": 0.0005, + "step": 112220 + }, + { + "epoch": 1.8966901295387137, + "grad_norm": 0.01392155047506094, + "learning_rate": 8.103516778550202e-08, + "loss": 0.0006, + "step": 112230 + }, + { + "epoch": 1.8968591298175643, + "grad_norm": 0.0011064070276916027, + "learning_rate": 8.077093293779592e-08, + "loss": 0.0004, + "step": 112240 + }, + { + "epoch": 1.8970281300964147, + "grad_norm": 0.09907397627830505, + "learning_rate": 8.05071260868523e-08, + "loss": 0.0006, + "step": 112250 + }, + { + "epoch": 1.897197130375265, + "grad_norm": 0.17797499895095825, + "learning_rate": 8.024374725562445e-08, + "loss": 0.0011, + "step": 112260 + }, + { + "epoch": 1.8973661306541156, + "grad_norm": 0.026572344824671745, + "learning_rate": 7.998079646702683e-08, + "loss": 0.0003, + "step": 112270 + }, + { + "epoch": 1.897535130932966, + "grad_norm": 0.001539503806270659, + "learning_rate": 7.971827374393837e-08, + "loss": 0.0007, + "step": 112280 + }, + { + "epoch": 1.8977041312118166, + "grad_norm": 0.005540081299841404, + "learning_rate": 7.945617910919967e-08, + "loss": 0.0007, + "step": 112290 + }, + { + "epoch": 1.897873131490667, + "grad_norm": 0.028968367725610733, + "learning_rate": 7.919451258561361e-08, + "loss": 0.0005, + "step": 112300 + }, + { + "epoch": 1.8980421317695173, + "grad_norm": 0.09642549604177475, + "learning_rate": 7.893327419594699e-08, + "loss": 0.0006, + "step": 112310 + }, + { + "epoch": 1.8982111320483679, + "grad_norm": 0.03285757452249527, + "learning_rate": 7.867246396292827e-08, + "loss": 0.0005, + "step": 112320 + }, + { + "epoch": 1.8983801323272185, + "grad_norm": 0.03382939100265503, + "learning_rate": 7.841208190924988e-08, + "loss": 0.0023, + "step": 112330 + }, + { + "epoch": 1.8985491326060688, + "grad_norm": 0.016068004071712494, + "learning_rate": 7.815212805756589e-08, + "loss": 0.0007, + "step": 112340 + }, + { + "epoch": 1.8987181328849192, + "grad_norm": 0.04922647401690483, + "learning_rate": 7.789260243049324e-08, + "loss": 0.0009, + "step": 112350 + }, + { + "epoch": 1.8988871331637696, + "grad_norm": 0.04650222882628441, + "learning_rate": 7.763350505061273e-08, + "loss": 0.0004, + "step": 112360 + }, + { + "epoch": 1.8990561334426201, + "grad_norm": 0.05765797942876816, + "learning_rate": 7.73748359404669e-08, + "loss": 0.0007, + "step": 112370 + }, + { + "epoch": 1.8992251337214707, + "grad_norm": 0.16425466537475586, + "learning_rate": 7.711659512256109e-08, + "loss": 0.0006, + "step": 112380 + }, + { + "epoch": 1.899394134000321, + "grad_norm": 0.10192276537418365, + "learning_rate": 7.685878261936341e-08, + "loss": 0.0006, + "step": 112390 + }, + { + "epoch": 1.8995631342791715, + "grad_norm": 0.042300738394260406, + "learning_rate": 7.660139845330428e-08, + "loss": 0.0016, + "step": 112400 + }, + { + "epoch": 1.899732134558022, + "grad_norm": 0.02796986885368824, + "learning_rate": 7.634444264677854e-08, + "loss": 0.0008, + "step": 112410 + }, + { + "epoch": 1.8999011348368726, + "grad_norm": 0.1012326180934906, + "learning_rate": 7.60879152221411e-08, + "loss": 0.0005, + "step": 112420 + }, + { + "epoch": 1.900070135115723, + "grad_norm": 0.008573319762945175, + "learning_rate": 7.583181620171298e-08, + "loss": 0.0003, + "step": 112430 + }, + { + "epoch": 1.9002391353945733, + "grad_norm": 0.0624210424721241, + "learning_rate": 7.557614560777416e-08, + "loss": 0.0007, + "step": 112440 + }, + { + "epoch": 1.9004081356734237, + "grad_norm": 0.073053739964962, + "learning_rate": 7.532090346257071e-08, + "loss": 0.0007, + "step": 112450 + }, + { + "epoch": 1.9005771359522743, + "grad_norm": 0.03362878039479256, + "learning_rate": 7.506608978830876e-08, + "loss": 0.0009, + "step": 112460 + }, + { + "epoch": 1.9007461362311249, + "grad_norm": 0.032777752727270126, + "learning_rate": 7.481170460715947e-08, + "loss": 0.0007, + "step": 112470 + }, + { + "epoch": 1.9009151365099752, + "grad_norm": 0.08562567830085754, + "learning_rate": 7.455774794125403e-08, + "loss": 0.0007, + "step": 112480 + }, + { + "epoch": 1.9010841367888256, + "grad_norm": 0.048164382576942444, + "learning_rate": 7.430421981268976e-08, + "loss": 0.0003, + "step": 112490 + }, + { + "epoch": 1.9012531370676762, + "grad_norm": 0.010092861019074917, + "learning_rate": 7.405112024352346e-08, + "loss": 0.0009, + "step": 112500 + }, + { + "epoch": 1.9014221373465268, + "grad_norm": 0.07001367956399918, + "learning_rate": 7.379844925577639e-08, + "loss": 0.0012, + "step": 112510 + }, + { + "epoch": 1.9015911376253771, + "grad_norm": 0.024450233206152916, + "learning_rate": 7.35462068714321e-08, + "loss": 0.0003, + "step": 112520 + }, + { + "epoch": 1.9017601379042275, + "grad_norm": 0.014951656572520733, + "learning_rate": 7.329439311243747e-08, + "loss": 0.0003, + "step": 112530 + }, + { + "epoch": 1.9019291381830778, + "grad_norm": 0.021235046908259392, + "learning_rate": 7.304300800070053e-08, + "loss": 0.0009, + "step": 112540 + }, + { + "epoch": 1.9020981384619284, + "grad_norm": 0.022454794496297836, + "learning_rate": 7.279205155809432e-08, + "loss": 0.0006, + "step": 112550 + }, + { + "epoch": 1.902267138740779, + "grad_norm": 0.04021525755524635, + "learning_rate": 7.254152380645196e-08, + "loss": 0.0006, + "step": 112560 + }, + { + "epoch": 1.9024361390196294, + "grad_norm": 0.036809828132390976, + "learning_rate": 7.229142476757101e-08, + "loss": 0.0005, + "step": 112570 + }, + { + "epoch": 1.9026051392984797, + "grad_norm": 0.022405659779906273, + "learning_rate": 7.204175446321182e-08, + "loss": 0.0005, + "step": 112580 + }, + { + "epoch": 1.9027741395773303, + "grad_norm": 0.0018231738358736038, + "learning_rate": 7.179251291509593e-08, + "loss": 0.0004, + "step": 112590 + }, + { + "epoch": 1.902943139856181, + "grad_norm": 0.00038132930058054626, + "learning_rate": 7.154370014490986e-08, + "loss": 0.0009, + "step": 112600 + }, + { + "epoch": 1.9031121401350313, + "grad_norm": 0.032419078052043915, + "learning_rate": 7.129531617430019e-08, + "loss": 0.0012, + "step": 112610 + }, + { + "epoch": 1.9032811404138816, + "grad_norm": 0.05176890268921852, + "learning_rate": 7.104736102487852e-08, + "loss": 0.0003, + "step": 112620 + }, + { + "epoch": 1.903450140692732, + "grad_norm": 0.07563209533691406, + "learning_rate": 7.079983471821761e-08, + "loss": 0.0005, + "step": 112630 + }, + { + "epoch": 1.9036191409715826, + "grad_norm": 0.009525924921035767, + "learning_rate": 7.05527372758541e-08, + "loss": 0.0008, + "step": 112640 + }, + { + "epoch": 1.9037881412504332, + "grad_norm": 0.07802736014127731, + "learning_rate": 7.030606871928525e-08, + "loss": 0.0007, + "step": 112650 + }, + { + "epoch": 1.9039571415292835, + "grad_norm": 0.001047362806275487, + "learning_rate": 7.005982906997389e-08, + "loss": 0.0006, + "step": 112660 + }, + { + "epoch": 1.9041261418081339, + "grad_norm": 0.010851298458874226, + "learning_rate": 6.981401834934343e-08, + "loss": 0.0009, + "step": 112670 + }, + { + "epoch": 1.9042951420869845, + "grad_norm": 0.05091039463877678, + "learning_rate": 6.956863657878065e-08, + "loss": 0.0003, + "step": 112680 + }, + { + "epoch": 1.904464142365835, + "grad_norm": 0.026290442794561386, + "learning_rate": 6.932368377963517e-08, + "loss": 0.0011, + "step": 112690 + }, + { + "epoch": 1.9046331426446854, + "grad_norm": 0.05944421887397766, + "learning_rate": 6.90791599732188e-08, + "loss": 0.0005, + "step": 112700 + }, + { + "epoch": 1.9048021429235358, + "grad_norm": 0.009202356450259686, + "learning_rate": 6.883506518080619e-08, + "loss": 0.0006, + "step": 112710 + }, + { + "epoch": 1.9049711432023861, + "grad_norm": 0.021600691601634026, + "learning_rate": 6.85913994236348e-08, + "loss": 0.0004, + "step": 112720 + }, + { + "epoch": 1.9051401434812367, + "grad_norm": 0.028977418318390846, + "learning_rate": 6.834816272290546e-08, + "loss": 0.0006, + "step": 112730 + }, + { + "epoch": 1.9053091437600873, + "grad_norm": 0.02023027464747429, + "learning_rate": 6.810535509977956e-08, + "loss": 0.0007, + "step": 112740 + }, + { + "epoch": 1.9054781440389377, + "grad_norm": 0.022253448143601418, + "learning_rate": 6.786297657538355e-08, + "loss": 0.0005, + "step": 112750 + }, + { + "epoch": 1.905647144317788, + "grad_norm": 0.003908324986696243, + "learning_rate": 6.76210271708061e-08, + "loss": 0.0006, + "step": 112760 + }, + { + "epoch": 1.9058161445966386, + "grad_norm": 0.0015063261380419135, + "learning_rate": 6.73795069070965e-08, + "loss": 0.0005, + "step": 112770 + }, + { + "epoch": 1.9059851448754892, + "grad_norm": 0.017922574654221535, + "learning_rate": 6.713841580526903e-08, + "loss": 0.0004, + "step": 112780 + }, + { + "epoch": 1.9061541451543396, + "grad_norm": 0.07381732761859894, + "learning_rate": 6.689775388629971e-08, + "loss": 0.0008, + "step": 112790 + }, + { + "epoch": 1.90632314543319, + "grad_norm": 0.003931921906769276, + "learning_rate": 6.665752117112733e-08, + "loss": 0.0006, + "step": 112800 + }, + { + "epoch": 1.9064921457120403, + "grad_norm": 0.05270419269800186, + "learning_rate": 6.641771768065298e-08, + "loss": 0.0005, + "step": 112810 + }, + { + "epoch": 1.9066611459908909, + "grad_norm": 0.030747007578611374, + "learning_rate": 6.617834343574159e-08, + "loss": 0.001, + "step": 112820 + }, + { + "epoch": 1.9068301462697415, + "grad_norm": 0.025559140369296074, + "learning_rate": 6.59393984572182e-08, + "loss": 0.0005, + "step": 112830 + }, + { + "epoch": 1.9069991465485918, + "grad_norm": 0.028459573164582253, + "learning_rate": 6.570088276587394e-08, + "loss": 0.0008, + "step": 112840 + }, + { + "epoch": 1.9071681468274422, + "grad_norm": 0.00013460649643093348, + "learning_rate": 6.546279638246e-08, + "loss": 0.0006, + "step": 112850 + }, + { + "epoch": 1.9073371471062928, + "grad_norm": 0.02334650419652462, + "learning_rate": 6.522513932769093e-08, + "loss": 0.0003, + "step": 112860 + }, + { + "epoch": 1.9075061473851433, + "grad_norm": 0.12924088537693024, + "learning_rate": 6.49879116222446e-08, + "loss": 0.0005, + "step": 112870 + }, + { + "epoch": 1.9076751476639937, + "grad_norm": 0.016007723286747932, + "learning_rate": 6.475111328676009e-08, + "loss": 0.0006, + "step": 112880 + }, + { + "epoch": 1.907844147942844, + "grad_norm": 0.05216017737984657, + "learning_rate": 6.45147443418409e-08, + "loss": 0.0008, + "step": 112890 + }, + { + "epoch": 1.9080131482216944, + "grad_norm": 0.07244875282049179, + "learning_rate": 6.427880480805226e-08, + "loss": 0.0004, + "step": 112900 + }, + { + "epoch": 1.908182148500545, + "grad_norm": 0.005763023626059294, + "learning_rate": 6.404329470592108e-08, + "loss": 0.0004, + "step": 112910 + }, + { + "epoch": 1.9083511487793956, + "grad_norm": 0.030477937310934067, + "learning_rate": 6.380821405593929e-08, + "loss": 0.0009, + "step": 112920 + }, + { + "epoch": 1.908520149058246, + "grad_norm": 0.02988293394446373, + "learning_rate": 6.357356287855832e-08, + "loss": 0.0005, + "step": 112930 + }, + { + "epoch": 1.9086891493370963, + "grad_norm": 0.04334455728530884, + "learning_rate": 6.333934119419516e-08, + "loss": 0.0005, + "step": 112940 + }, + { + "epoch": 1.908858149615947, + "grad_norm": 0.0667472705245018, + "learning_rate": 6.310554902322852e-08, + "loss": 0.001, + "step": 112950 + }, + { + "epoch": 1.9090271498947975, + "grad_norm": 0.06299169361591339, + "learning_rate": 6.287218638599879e-08, + "loss": 0.0004, + "step": 112960 + }, + { + "epoch": 1.9091961501736479, + "grad_norm": 0.0009392150095663965, + "learning_rate": 6.263925330280973e-08, + "loss": 0.0003, + "step": 112970 + }, + { + "epoch": 1.9093651504524982, + "grad_norm": 0.018651852384209633, + "learning_rate": 6.240674979392736e-08, + "loss": 0.0012, + "step": 112980 + }, + { + "epoch": 1.9095341507313486, + "grad_norm": 0.015531927347183228, + "learning_rate": 6.217467587958159e-08, + "loss": 0.0005, + "step": 112990 + }, + { + "epoch": 1.9097031510101992, + "grad_norm": 0.008683258667588234, + "learning_rate": 6.19430315799624e-08, + "loss": 0.0007, + "step": 113000 + }, + { + "epoch": 1.9098721512890497, + "grad_norm": 0.004097287543118, + "learning_rate": 6.171181691522587e-08, + "loss": 0.0004, + "step": 113010 + }, + { + "epoch": 1.9100411515679, + "grad_norm": 0.03216277435421944, + "learning_rate": 6.148103190548705e-08, + "loss": 0.0004, + "step": 113020 + }, + { + "epoch": 1.9102101518467505, + "grad_norm": 0.020455561578273773, + "learning_rate": 6.125067657082706e-08, + "loss": 0.0005, + "step": 113030 + }, + { + "epoch": 1.910379152125601, + "grad_norm": 0.031674377620220184, + "learning_rate": 6.102075093128601e-08, + "loss": 0.0005, + "step": 113040 + }, + { + "epoch": 1.9105481524044514, + "grad_norm": 0.018984632566571236, + "learning_rate": 6.079125500687012e-08, + "loss": 0.0004, + "step": 113050 + }, + { + "epoch": 1.910717152683302, + "grad_norm": 0.01681148260831833, + "learning_rate": 6.05621888175456e-08, + "loss": 0.0006, + "step": 113060 + }, + { + "epoch": 1.9108861529621524, + "grad_norm": 0.03032616339623928, + "learning_rate": 6.033355238324324e-08, + "loss": 0.0003, + "step": 113070 + }, + { + "epoch": 1.9110551532410027, + "grad_norm": 0.04922556132078171, + "learning_rate": 6.010534572385485e-08, + "loss": 0.0005, + "step": 113080 + }, + { + "epoch": 1.9112241535198533, + "grad_norm": 0.11718804389238358, + "learning_rate": 5.987756885923568e-08, + "loss": 0.0025, + "step": 113090 + }, + { + "epoch": 1.911393153798704, + "grad_norm": 0.03614810109138489, + "learning_rate": 5.965022180920377e-08, + "loss": 0.0008, + "step": 113100 + }, + { + "epoch": 1.9115621540775543, + "grad_norm": 0.0017513089114800096, + "learning_rate": 5.942330459353884e-08, + "loss": 0.0007, + "step": 113110 + }, + { + "epoch": 1.9117311543564046, + "grad_norm": 0.01735696755349636, + "learning_rate": 5.9196817231984e-08, + "loss": 0.0009, + "step": 113120 + }, + { + "epoch": 1.9119001546352552, + "grad_norm": 0.032418813556432724, + "learning_rate": 5.89707597442446e-08, + "loss": 0.0014, + "step": 113130 + }, + { + "epoch": 1.9120691549141056, + "grad_norm": 0.10654094070196152, + "learning_rate": 5.8745132149989335e-08, + "loss": 0.0006, + "step": 113140 + }, + { + "epoch": 1.9122381551929561, + "grad_norm": 0.028356071561574936, + "learning_rate": 5.851993446884807e-08, + "loss": 0.0006, + "step": 113150 + }, + { + "epoch": 1.9124071554718065, + "grad_norm": 0.01239448320120573, + "learning_rate": 5.829516672041513e-08, + "loss": 0.0004, + "step": 113160 + }, + { + "epoch": 1.9125761557506569, + "grad_norm": 0.07548968493938446, + "learning_rate": 5.807082892424543e-08, + "loss": 0.0003, + "step": 113170 + }, + { + "epoch": 1.9127451560295075, + "grad_norm": 0.0353437177836895, + "learning_rate": 5.784692109985834e-08, + "loss": 0.0004, + "step": 113180 + }, + { + "epoch": 1.912914156308358, + "grad_norm": 0.026465613394975662, + "learning_rate": 5.7623443266733856e-08, + "loss": 0.0004, + "step": 113190 + }, + { + "epoch": 1.9130831565872084, + "grad_norm": 0.0006597606698051095, + "learning_rate": 5.7400395444316415e-08, + "loss": 0.0006, + "step": 113200 + }, + { + "epoch": 1.9132521568660588, + "grad_norm": 0.0004718085110653192, + "learning_rate": 5.71777776520116e-08, + "loss": 0.0004, + "step": 113210 + }, + { + "epoch": 1.9134211571449091, + "grad_norm": 0.10736609250307083, + "learning_rate": 5.695558990918892e-08, + "loss": 0.0007, + "step": 113220 + }, + { + "epoch": 1.9135901574237597, + "grad_norm": 0.01379178836941719, + "learning_rate": 5.673383223517959e-08, + "loss": 0.0004, + "step": 113230 + }, + { + "epoch": 1.9137591577026103, + "grad_norm": 0.0035380241461098194, + "learning_rate": 5.65125046492776e-08, + "loss": 0.0008, + "step": 113240 + }, + { + "epoch": 1.9139281579814607, + "grad_norm": 0.06314975768327713, + "learning_rate": 5.6291607170739226e-08, + "loss": 0.0007, + "step": 113250 + }, + { + "epoch": 1.914097158260311, + "grad_norm": 0.015247541479766369, + "learning_rate": 5.6071139818784095e-08, + "loss": 0.0005, + "step": 113260 + }, + { + "epoch": 1.9142661585391616, + "grad_norm": 0.022014690563082695, + "learning_rate": 5.5851102612593525e-08, + "loss": 0.0009, + "step": 113270 + }, + { + "epoch": 1.9144351588180122, + "grad_norm": 0.008971292525529861, + "learning_rate": 5.56314955713122e-08, + "loss": 0.0004, + "step": 113280 + }, + { + "epoch": 1.9146041590968625, + "grad_norm": 0.09483298659324646, + "learning_rate": 5.541231871404651e-08, + "loss": 0.0006, + "step": 113290 + }, + { + "epoch": 1.914773159375713, + "grad_norm": 0.07216836512088776, + "learning_rate": 5.519357205986564e-08, + "loss": 0.0014, + "step": 113300 + }, + { + "epoch": 1.9149421596545633, + "grad_norm": 0.04808535799384117, + "learning_rate": 5.49752556278027e-08, + "loss": 0.0006, + "step": 113310 + }, + { + "epoch": 1.9151111599334139, + "grad_norm": 0.014555119909346104, + "learning_rate": 5.475736943685195e-08, + "loss": 0.0023, + "step": 113320 + }, + { + "epoch": 1.9152801602122644, + "grad_norm": 0.10032723098993301, + "learning_rate": 5.4539913505969876e-08, + "loss": 0.0006, + "step": 113330 + }, + { + "epoch": 1.9154491604911148, + "grad_norm": 0.06952448934316635, + "learning_rate": 5.432288785407691e-08, + "loss": 0.0008, + "step": 113340 + }, + { + "epoch": 1.9156181607699652, + "grad_norm": 0.04177826642990112, + "learning_rate": 5.410629250005517e-08, + "loss": 0.0011, + "step": 113350 + }, + { + "epoch": 1.9157871610488157, + "grad_norm": 0.0009109816746786237, + "learning_rate": 5.389012746274902e-08, + "loss": 0.0009, + "step": 113360 + }, + { + "epoch": 1.9159561613276663, + "grad_norm": 0.004139615222811699, + "learning_rate": 5.367439276096675e-08, + "loss": 0.0005, + "step": 113370 + }, + { + "epoch": 1.9161251616065167, + "grad_norm": 0.00573534332215786, + "learning_rate": 5.3459088413477245e-08, + "loss": 0.0006, + "step": 113380 + }, + { + "epoch": 1.916294161885367, + "grad_norm": 0.021768197417259216, + "learning_rate": 5.3244214439014396e-08, + "loss": 0.0005, + "step": 113390 + }, + { + "epoch": 1.9164631621642174, + "grad_norm": 0.00780107406899333, + "learning_rate": 5.302977085627159e-08, + "loss": 0.0004, + "step": 113400 + }, + { + "epoch": 1.916632162443068, + "grad_norm": 0.14811588823795319, + "learning_rate": 5.281575768390779e-08, + "loss": 0.0008, + "step": 113410 + }, + { + "epoch": 1.9168011627219186, + "grad_norm": 0.032275181263685226, + "learning_rate": 5.260217494054254e-08, + "loss": 0.0007, + "step": 113420 + }, + { + "epoch": 1.916970163000769, + "grad_norm": 0.011303196661174297, + "learning_rate": 5.238902264475876e-08, + "loss": 0.0008, + "step": 113430 + }, + { + "epoch": 1.9171391632796193, + "grad_norm": 0.00017243089678231627, + "learning_rate": 5.217630081510161e-08, + "loss": 0.0003, + "step": 113440 + }, + { + "epoch": 1.91730816355847, + "grad_norm": 0.0003776454250328243, + "learning_rate": 5.196400947007962e-08, + "loss": 0.0006, + "step": 113450 + }, + { + "epoch": 1.9174771638373205, + "grad_norm": 0.006594053935259581, + "learning_rate": 5.1752148628161917e-08, + "loss": 0.0009, + "step": 113460 + }, + { + "epoch": 1.9176461641161708, + "grad_norm": 0.024989726021885872, + "learning_rate": 5.1540718307782624e-08, + "loss": 0.0003, + "step": 113470 + }, + { + "epoch": 1.9178151643950212, + "grad_norm": 0.012359109707176685, + "learning_rate": 5.132971852733648e-08, + "loss": 0.0003, + "step": 113480 + }, + { + "epoch": 1.9179841646738716, + "grad_norm": 0.009366139769554138, + "learning_rate": 5.1119149305181584e-08, + "loss": 0.001, + "step": 113490 + }, + { + "epoch": 1.9181531649527221, + "grad_norm": 0.025234593078494072, + "learning_rate": 5.090901065963827e-08, + "loss": 0.0005, + "step": 113500 + }, + { + "epoch": 1.9183221652315727, + "grad_norm": 0.04369024187326431, + "learning_rate": 5.069930260899081e-08, + "loss": 0.0007, + "step": 113510 + }, + { + "epoch": 1.918491165510423, + "grad_norm": 0.011204466223716736, + "learning_rate": 5.049002517148349e-08, + "loss": 0.0004, + "step": 113520 + }, + { + "epoch": 1.9186601657892735, + "grad_norm": 0.12482357025146484, + "learning_rate": 5.028117836532453e-08, + "loss": 0.0007, + "step": 113530 + }, + { + "epoch": 1.918829166068124, + "grad_norm": 0.04277415573596954, + "learning_rate": 5.007276220868551e-08, + "loss": 0.0005, + "step": 113540 + }, + { + "epoch": 1.9189981663469746, + "grad_norm": 0.004969023633748293, + "learning_rate": 4.986477671969914e-08, + "loss": 0.0004, + "step": 113550 + }, + { + "epoch": 1.919167166625825, + "grad_norm": 0.0015870180213823915, + "learning_rate": 4.965722191646094e-08, + "loss": 0.0007, + "step": 113560 + }, + { + "epoch": 1.9193361669046753, + "grad_norm": 0.036459941416978836, + "learning_rate": 4.945009781702925e-08, + "loss": 0.0003, + "step": 113570 + }, + { + "epoch": 1.9195051671835257, + "grad_norm": 0.023583462461829185, + "learning_rate": 4.924340443942521e-08, + "loss": 0.0006, + "step": 113580 + }, + { + "epoch": 1.9196741674623763, + "grad_norm": 0.05077134445309639, + "learning_rate": 4.903714180163221e-08, + "loss": 0.0016, + "step": 113590 + }, + { + "epoch": 1.9198431677412269, + "grad_norm": 0.03774239867925644, + "learning_rate": 4.883130992159535e-08, + "loss": 0.0005, + "step": 113600 + }, + { + "epoch": 1.9200121680200772, + "grad_norm": 0.021962564438581467, + "learning_rate": 4.862590881722362e-08, + "loss": 0.0006, + "step": 113610 + }, + { + "epoch": 1.9201811682989276, + "grad_norm": 0.04520101100206375, + "learning_rate": 4.8420938506387736e-08, + "loss": 0.0005, + "step": 113620 + }, + { + "epoch": 1.9203501685777782, + "grad_norm": 0.03361218050122261, + "learning_rate": 4.821639900692121e-08, + "loss": 0.0005, + "step": 113630 + }, + { + "epoch": 1.9205191688566288, + "grad_norm": 0.005732972640544176, + "learning_rate": 4.801229033662036e-08, + "loss": 0.0003, + "step": 113640 + }, + { + "epoch": 1.9206881691354791, + "grad_norm": 0.03359975293278694, + "learning_rate": 4.7808612513242644e-08, + "loss": 0.0002, + "step": 113650 + }, + { + "epoch": 1.9208571694143295, + "grad_norm": 0.03723776340484619, + "learning_rate": 4.760536555450945e-08, + "loss": 0.001, + "step": 113660 + }, + { + "epoch": 1.9210261696931799, + "grad_norm": 0.007026666309684515, + "learning_rate": 4.740254947810441e-08, + "loss": 0.0002, + "step": 113670 + }, + { + "epoch": 1.9211951699720304, + "grad_norm": 0.04161248356103897, + "learning_rate": 4.720016430167396e-08, + "loss": 0.0006, + "step": 113680 + }, + { + "epoch": 1.921364170250881, + "grad_norm": 0.002371510025113821, + "learning_rate": 4.699821004282568e-08, + "loss": 0.0006, + "step": 113690 + }, + { + "epoch": 1.9215331705297314, + "grad_norm": 0.03056161478161812, + "learning_rate": 4.679668671913107e-08, + "loss": 0.0011, + "step": 113700 + }, + { + "epoch": 1.9217021708085817, + "grad_norm": 0.04500853270292282, + "learning_rate": 4.659559434812333e-08, + "loss": 0.0008, + "step": 113710 + }, + { + "epoch": 1.9218711710874323, + "grad_norm": 0.025726649910211563, + "learning_rate": 4.639493294729902e-08, + "loss": 0.0008, + "step": 113720 + }, + { + "epoch": 1.922040171366283, + "grad_norm": 0.0328289270401001, + "learning_rate": 4.61947025341164e-08, + "loss": 0.0022, + "step": 113730 + }, + { + "epoch": 1.9222091716451333, + "grad_norm": 0.04501111060380936, + "learning_rate": 4.5994903125995974e-08, + "loss": 0.0008, + "step": 113740 + }, + { + "epoch": 1.9223781719239836, + "grad_norm": 0.12412121891975403, + "learning_rate": 4.579553474032161e-08, + "loss": 0.0014, + "step": 113750 + }, + { + "epoch": 1.922547172202834, + "grad_norm": 0.06506326049566269, + "learning_rate": 4.559659739444e-08, + "loss": 0.0011, + "step": 113760 + }, + { + "epoch": 1.9227161724816846, + "grad_norm": 0.023001553490757942, + "learning_rate": 4.5398091105658956e-08, + "loss": 0.0016, + "step": 113770 + }, + { + "epoch": 1.9228851727605352, + "grad_norm": 0.01615159958600998, + "learning_rate": 4.5200015891249115e-08, + "loss": 0.0013, + "step": 113780 + }, + { + "epoch": 1.9230541730393855, + "grad_norm": 0.003368740202859044, + "learning_rate": 4.500237176844447e-08, + "loss": 0.0008, + "step": 113790 + }, + { + "epoch": 1.923223173318236, + "grad_norm": 0.012499597854912281, + "learning_rate": 4.4805158754441804e-08, + "loss": 0.0002, + "step": 113800 + }, + { + "epoch": 1.9233921735970865, + "grad_norm": 0.019146615639328957, + "learning_rate": 4.460837686639796e-08, + "loss": 0.0003, + "step": 113810 + }, + { + "epoch": 1.923561173875937, + "grad_norm": 0.017188021913170815, + "learning_rate": 4.441202612143536e-08, + "loss": 0.0006, + "step": 113820 + }, + { + "epoch": 1.9237301741547874, + "grad_norm": 0.0036173483822494745, + "learning_rate": 4.421610653663588e-08, + "loss": 0.0006, + "step": 113830 + }, + { + "epoch": 1.9238991744336378, + "grad_norm": 0.005861383862793446, + "learning_rate": 4.4020618129047565e-08, + "loss": 0.0008, + "step": 113840 + }, + { + "epoch": 1.9240681747124881, + "grad_norm": 0.16966085135936737, + "learning_rate": 4.38255609156768e-08, + "loss": 0.0011, + "step": 113850 + }, + { + "epoch": 1.9242371749913387, + "grad_norm": 0.028564633801579475, + "learning_rate": 4.363093491349612e-08, + "loss": 0.0003, + "step": 113860 + }, + { + "epoch": 1.9244061752701893, + "grad_norm": 0.015617894940078259, + "learning_rate": 4.3436740139438104e-08, + "loss": 0.0004, + "step": 113870 + }, + { + "epoch": 1.9245751755490397, + "grad_norm": 0.038057055324316025, + "learning_rate": 4.324297661039867e-08, + "loss": 0.0007, + "step": 113880 + }, + { + "epoch": 1.92474417582789, + "grad_norm": 0.009012563154101372, + "learning_rate": 4.3049644343236554e-08, + "loss": 0.0004, + "step": 113890 + }, + { + "epoch": 1.9249131761067406, + "grad_norm": 0.017938757315278053, + "learning_rate": 4.2856743354771636e-08, + "loss": 0.0003, + "step": 113900 + }, + { + "epoch": 1.925082176385591, + "grad_norm": 0.0009190856944769621, + "learning_rate": 4.266427366178882e-08, + "loss": 0.0007, + "step": 113910 + }, + { + "epoch": 1.9252511766644416, + "grad_norm": 0.026434002444148064, + "learning_rate": 4.2472235281032506e-08, + "loss": 0.0003, + "step": 113920 + }, + { + "epoch": 1.925420176943292, + "grad_norm": 0.049956440925598145, + "learning_rate": 4.2280628229212086e-08, + "loss": 0.0004, + "step": 113930 + }, + { + "epoch": 1.9255891772221423, + "grad_norm": 0.046455077826976776, + "learning_rate": 4.208945252299701e-08, + "loss": 0.0007, + "step": 113940 + }, + { + "epoch": 1.9257581775009929, + "grad_norm": 0.005489541217684746, + "learning_rate": 4.189870817902175e-08, + "loss": 0.0004, + "step": 113950 + }, + { + "epoch": 1.9259271777798435, + "grad_norm": 0.09641050547361374, + "learning_rate": 4.170839521388137e-08, + "loss": 0.001, + "step": 113960 + }, + { + "epoch": 1.9260961780586938, + "grad_norm": 0.00044007538235746324, + "learning_rate": 4.151851364413373e-08, + "loss": 0.0005, + "step": 113970 + }, + { + "epoch": 1.9262651783375442, + "grad_norm": 0.25061318278312683, + "learning_rate": 4.132906348630006e-08, + "loss": 0.0005, + "step": 113980 + }, + { + "epoch": 1.9264341786163948, + "grad_norm": 0.02810671553015709, + "learning_rate": 4.114004475686328e-08, + "loss": 0.0004, + "step": 113990 + }, + { + "epoch": 1.9266031788952451, + "grad_norm": 0.04363776743412018, + "learning_rate": 4.0951457472268584e-08, + "loss": 0.0009, + "step": 114000 + }, + { + "epoch": 1.9267721791740957, + "grad_norm": 0.04865751415491104, + "learning_rate": 4.07633016489245e-08, + "loss": 0.0002, + "step": 114010 + }, + { + "epoch": 1.926941179452946, + "grad_norm": 0.004051418509334326, + "learning_rate": 4.057557730320072e-08, + "loss": 0.0007, + "step": 114020 + }, + { + "epoch": 1.9271101797317964, + "grad_norm": 0.06691235303878784, + "learning_rate": 4.038828445143139e-08, + "loss": 0.0006, + "step": 114030 + }, + { + "epoch": 1.927279180010647, + "grad_norm": 0.05675894394516945, + "learning_rate": 4.020142310991071e-08, + "loss": 0.0009, + "step": 114040 + }, + { + "epoch": 1.9274481802894976, + "grad_norm": 0.07560726255178452, + "learning_rate": 4.0014993294896776e-08, + "loss": 0.0004, + "step": 114050 + }, + { + "epoch": 1.927617180568348, + "grad_norm": 0.004082811065018177, + "learning_rate": 3.9828995022610506e-08, + "loss": 0.0005, + "step": 114060 + }, + { + "epoch": 1.9277861808471983, + "grad_norm": 0.10598105937242508, + "learning_rate": 3.964342830923451e-08, + "loss": 0.0006, + "step": 114070 + }, + { + "epoch": 1.927955181126049, + "grad_norm": 0.0012448799097910523, + "learning_rate": 3.9458293170913096e-08, + "loss": 0.0005, + "step": 114080 + }, + { + "epoch": 1.9281241814048993, + "grad_norm": 0.002484311815351248, + "learning_rate": 3.9273589623755045e-08, + "loss": 0.0003, + "step": 114090 + }, + { + "epoch": 1.9282931816837499, + "grad_norm": 0.027685141190886497, + "learning_rate": 3.908931768382973e-08, + "loss": 0.0009, + "step": 114100 + }, + { + "epoch": 1.9284621819626002, + "grad_norm": 0.024779699742794037, + "learning_rate": 3.8905477367169876e-08, + "loss": 0.0005, + "step": 114110 + }, + { + "epoch": 1.9286311822414506, + "grad_norm": 0.06355228275060654, + "learning_rate": 3.8722068689770464e-08, + "loss": 0.001, + "step": 114120 + }, + { + "epoch": 1.9288001825203012, + "grad_norm": 0.050809379667043686, + "learning_rate": 3.853909166758929e-08, + "loss": 0.0005, + "step": 114130 + }, + { + "epoch": 1.9289691827991517, + "grad_norm": 0.017558986321091652, + "learning_rate": 3.835654631654584e-08, + "loss": 0.0007, + "step": 114140 + }, + { + "epoch": 1.9291381830780021, + "grad_norm": 0.040090251713991165, + "learning_rate": 3.817443265252296e-08, + "loss": 0.0008, + "step": 114150 + }, + { + "epoch": 1.9293071833568525, + "grad_norm": 0.012433486990630627, + "learning_rate": 3.799275069136466e-08, + "loss": 0.0007, + "step": 114160 + }, + { + "epoch": 1.9294761836357028, + "grad_norm": 0.045401155948638916, + "learning_rate": 3.781150044887827e-08, + "loss": 0.0008, + "step": 114170 + }, + { + "epoch": 1.9296451839145534, + "grad_norm": 0.03455643728375435, + "learning_rate": 3.763068194083452e-08, + "loss": 0.0005, + "step": 114180 + }, + { + "epoch": 1.929814184193404, + "grad_norm": 0.018427403643727303, + "learning_rate": 3.7450295182963616e-08, + "loss": 0.0005, + "step": 114190 + }, + { + "epoch": 1.9299831844722544, + "grad_norm": 0.03793042525649071, + "learning_rate": 3.7270340190961875e-08, + "loss": 0.0005, + "step": 114200 + }, + { + "epoch": 1.9301521847511047, + "grad_norm": 0.0003596784081310034, + "learning_rate": 3.709081698048511e-08, + "loss": 0.0008, + "step": 114210 + }, + { + "epoch": 1.9303211850299553, + "grad_norm": 0.0029309610836207867, + "learning_rate": 3.691172556715361e-08, + "loss": 0.0004, + "step": 114220 + }, + { + "epoch": 1.930490185308806, + "grad_norm": 0.07106602191925049, + "learning_rate": 3.6733065966548244e-08, + "loss": 0.0001, + "step": 114230 + }, + { + "epoch": 1.9306591855876563, + "grad_norm": 0.09174923598766327, + "learning_rate": 3.6554838194214345e-08, + "loss": 0.0005, + "step": 114240 + }, + { + "epoch": 1.9308281858665066, + "grad_norm": 0.051286231726408005, + "learning_rate": 3.63770422656573e-08, + "loss": 0.0004, + "step": 114250 + }, + { + "epoch": 1.930997186145357, + "grad_norm": 0.05942637473344803, + "learning_rate": 3.619967819634695e-08, + "loss": 0.0006, + "step": 114260 + }, + { + "epoch": 1.9311661864242076, + "grad_norm": 0.01240352913737297, + "learning_rate": 3.602274600171485e-08, + "loss": 0.0006, + "step": 114270 + }, + { + "epoch": 1.9313351867030581, + "grad_norm": 0.035853300243616104, + "learning_rate": 3.584624569715478e-08, + "loss": 0.0003, + "step": 114280 + }, + { + "epoch": 1.9315041869819085, + "grad_norm": 0.00031042128102853894, + "learning_rate": 3.5670177298023356e-08, + "loss": 0.0013, + "step": 114290 + }, + { + "epoch": 1.9316731872607589, + "grad_norm": 0.013527607545256615, + "learning_rate": 3.549454081963943e-08, + "loss": 0.0013, + "step": 114300 + }, + { + "epoch": 1.9318421875396095, + "grad_norm": 0.007355086971074343, + "learning_rate": 3.5319336277284125e-08, + "loss": 0.0012, + "step": 114310 + }, + { + "epoch": 1.93201118781846, + "grad_norm": 0.09528569877147675, + "learning_rate": 3.5144563686200784e-08, + "loss": 0.0008, + "step": 114320 + }, + { + "epoch": 1.9321801880973104, + "grad_norm": 0.012314501218497753, + "learning_rate": 3.497022306159559e-08, + "loss": 0.0004, + "step": 114330 + }, + { + "epoch": 1.9323491883761608, + "grad_norm": 0.011436869390308857, + "learning_rate": 3.479631441863696e-08, + "loss": 0.001, + "step": 114340 + }, + { + "epoch": 1.9325181886550111, + "grad_norm": 0.05101019889116287, + "learning_rate": 3.462283777245612e-08, + "loss": 0.0006, + "step": 114350 + }, + { + "epoch": 1.9326871889338617, + "grad_norm": 0.018834391608834267, + "learning_rate": 3.4449793138146e-08, + "loss": 0.0005, + "step": 114360 + }, + { + "epoch": 1.9328561892127123, + "grad_norm": 0.04030166193842888, + "learning_rate": 3.4277180530762896e-08, + "loss": 0.0007, + "step": 114370 + }, + { + "epoch": 1.9330251894915627, + "grad_norm": 0.026856768876314163, + "learning_rate": 3.410499996532424e-08, + "loss": 0.0006, + "step": 114380 + }, + { + "epoch": 1.933194189770413, + "grad_norm": 0.05108177289366722, + "learning_rate": 3.393325145681137e-08, + "loss": 0.0006, + "step": 114390 + }, + { + "epoch": 1.9333631900492636, + "grad_norm": 0.02175753563642502, + "learning_rate": 3.3761935020166224e-08, + "loss": 0.0003, + "step": 114400 + }, + { + "epoch": 1.9335321903281142, + "grad_norm": 0.00420082313939929, + "learning_rate": 3.3591050670295224e-08, + "loss": 0.0005, + "step": 114410 + }, + { + "epoch": 1.9337011906069645, + "grad_norm": 0.03318898379802704, + "learning_rate": 3.342059842206535e-08, + "loss": 0.001, + "step": 114420 + }, + { + "epoch": 1.933870190885815, + "grad_norm": 0.0321153961122036, + "learning_rate": 3.325057829030698e-08, + "loss": 0.0004, + "step": 114430 + }, + { + "epoch": 1.9340391911646653, + "grad_norm": 0.0018499215366318822, + "learning_rate": 3.308099028981271e-08, + "loss": 0.0002, + "step": 114440 + }, + { + "epoch": 1.9342081914435159, + "grad_norm": 0.009329008869826794, + "learning_rate": 3.291183443533741e-08, + "loss": 0.0004, + "step": 114450 + }, + { + "epoch": 1.9343771917223664, + "grad_norm": 0.2155163586139679, + "learning_rate": 3.27431107415993e-08, + "loss": 0.0008, + "step": 114460 + }, + { + "epoch": 1.9345461920012168, + "grad_norm": 0.03188404440879822, + "learning_rate": 3.257481922327665e-08, + "loss": 0.0006, + "step": 114470 + }, + { + "epoch": 1.9347151922800672, + "grad_norm": 0.025516286492347717, + "learning_rate": 3.240695989501275e-08, + "loss": 0.0005, + "step": 114480 + }, + { + "epoch": 1.9348841925589177, + "grad_norm": 0.019352493807673454, + "learning_rate": 3.223953277141201e-08, + "loss": 0.001, + "step": 114490 + }, + { + "epoch": 1.9350531928377683, + "grad_norm": 0.02275828830897808, + "learning_rate": 3.207253786704112e-08, + "loss": 0.0006, + "step": 114500 + }, + { + "epoch": 1.9352221931166187, + "grad_norm": 0.04524204879999161, + "learning_rate": 3.190597519643013e-08, + "loss": 0.0009, + "step": 114510 + }, + { + "epoch": 1.935391193395469, + "grad_norm": 0.013399074785411358, + "learning_rate": 3.173984477406966e-08, + "loss": 0.0005, + "step": 114520 + }, + { + "epoch": 1.9355601936743194, + "grad_norm": 0.09556085616350174, + "learning_rate": 3.1574146614414826e-08, + "loss": 0.0012, + "step": 114530 + }, + { + "epoch": 1.93572919395317, + "grad_norm": 0.19229021668434143, + "learning_rate": 3.140888073188131e-08, + "loss": 0.0008, + "step": 114540 + }, + { + "epoch": 1.9358981942320206, + "grad_norm": 0.09433633834123611, + "learning_rate": 3.1244047140849274e-08, + "loss": 0.0006, + "step": 114550 + }, + { + "epoch": 1.936067194510871, + "grad_norm": 0.044848211109638214, + "learning_rate": 3.1079645855658926e-08, + "loss": 0.0006, + "step": 114560 + }, + { + "epoch": 1.9362361947897213, + "grad_norm": 0.028230784460902214, + "learning_rate": 3.091567689061437e-08, + "loss": 0.0008, + "step": 114570 + }, + { + "epoch": 1.936405195068572, + "grad_norm": 0.03477562591433525, + "learning_rate": 3.075214025998141e-08, + "loss": 0.0002, + "step": 114580 + }, + { + "epoch": 1.9365741953474225, + "grad_norm": 0.016347402706742287, + "learning_rate": 3.0589035977989235e-08, + "loss": 0.0006, + "step": 114590 + }, + { + "epoch": 1.9367431956262728, + "grad_norm": 0.056321483105421066, + "learning_rate": 3.042636405882815e-08, + "loss": 0.0006, + "step": 114600 + }, + { + "epoch": 1.9369121959051232, + "grad_norm": 0.07306868582963943, + "learning_rate": 3.0264124516651285e-08, + "loss": 0.0008, + "step": 114610 + }, + { + "epoch": 1.9370811961839736, + "grad_norm": 0.1208910420536995, + "learning_rate": 3.010231736557512e-08, + "loss": 0.0009, + "step": 114620 + }, + { + "epoch": 1.9372501964628241, + "grad_norm": 0.01394712645560503, + "learning_rate": 2.994094261967617e-08, + "loss": 0.0009, + "step": 114630 + }, + { + "epoch": 1.9374191967416747, + "grad_norm": 0.0054717701859772205, + "learning_rate": 2.978000029299599e-08, + "loss": 0.0006, + "step": 114640 + }, + { + "epoch": 1.937588197020525, + "grad_norm": 0.018918504938483238, + "learning_rate": 2.961949039953782e-08, + "loss": 0.0003, + "step": 114650 + }, + { + "epoch": 1.9377571972993755, + "grad_norm": 0.04983522742986679, + "learning_rate": 2.9459412953264933e-08, + "loss": 0.0004, + "step": 114660 + }, + { + "epoch": 1.937926197578226, + "grad_norm": 0.042717378586530685, + "learning_rate": 2.929976796810674e-08, + "loss": 0.0005, + "step": 114670 + }, + { + "epoch": 1.9380951978570766, + "grad_norm": 0.00874321162700653, + "learning_rate": 2.9140555457952135e-08, + "loss": 0.0002, + "step": 114680 + }, + { + "epoch": 1.938264198135927, + "grad_norm": 0.021173780784010887, + "learning_rate": 2.898177543665337e-08, + "loss": 0.0005, + "step": 114690 + }, + { + "epoch": 1.9384331984147773, + "grad_norm": 0.0008916796068660915, + "learning_rate": 2.8823427918025505e-08, + "loss": 0.0006, + "step": 114700 + }, + { + "epoch": 1.9386021986936277, + "grad_norm": 0.06250502914190292, + "learning_rate": 2.8665512915845295e-08, + "loss": 0.0008, + "step": 114710 + }, + { + "epoch": 1.9387711989724783, + "grad_norm": 0.00023853992752265185, + "learning_rate": 2.8508030443851752e-08, + "loss": 0.0002, + "step": 114720 + }, + { + "epoch": 1.9389401992513289, + "grad_norm": 0.016771283000707626, + "learning_rate": 2.835098051574725e-08, + "loss": 0.0007, + "step": 114730 + }, + { + "epoch": 1.9391091995301792, + "grad_norm": 0.050640370696783066, + "learning_rate": 2.8194363145195857e-08, + "loss": 0.001, + "step": 114740 + }, + { + "epoch": 1.9392781998090296, + "grad_norm": 0.03314594551920891, + "learning_rate": 2.8038178345823895e-08, + "loss": 0.0006, + "step": 114750 + }, + { + "epoch": 1.9394472000878802, + "grad_norm": 0.011672952212393284, + "learning_rate": 2.7882426131219943e-08, + "loss": 0.0003, + "step": 114760 + }, + { + "epoch": 1.9396162003667308, + "grad_norm": 0.05314328148961067, + "learning_rate": 2.7727106514935375e-08, + "loss": 0.0005, + "step": 114770 + }, + { + "epoch": 1.9397852006455811, + "grad_norm": 0.00885872170329094, + "learning_rate": 2.7572219510483832e-08, + "loss": 0.0007, + "step": 114780 + }, + { + "epoch": 1.9399542009244315, + "grad_norm": 0.07471611350774765, + "learning_rate": 2.7417765131340647e-08, + "loss": 0.0006, + "step": 114790 + }, + { + "epoch": 1.9401232012032819, + "grad_norm": 0.04525149613618851, + "learning_rate": 2.7263743390945062e-08, + "loss": 0.0006, + "step": 114800 + }, + { + "epoch": 1.9402922014821324, + "grad_norm": 0.007435646373778582, + "learning_rate": 2.711015430269748e-08, + "loss": 0.0002, + "step": 114810 + }, + { + "epoch": 1.940461201760983, + "grad_norm": 0.06884460896253586, + "learning_rate": 2.695699787995998e-08, + "loss": 0.0005, + "step": 114820 + }, + { + "epoch": 1.9406302020398334, + "grad_norm": 0.00087527692085132, + "learning_rate": 2.6804274136059128e-08, + "loss": 0.0004, + "step": 114830 + }, + { + "epoch": 1.9407992023186837, + "grad_norm": 0.02839229255914688, + "learning_rate": 2.6651983084282075e-08, + "loss": 0.0005, + "step": 114840 + }, + { + "epoch": 1.9409682025975343, + "grad_norm": 0.023673107847571373, + "learning_rate": 2.6500124737878775e-08, + "loss": 0.0004, + "step": 114850 + }, + { + "epoch": 1.9411372028763847, + "grad_norm": 0.0012992864940315485, + "learning_rate": 2.6348699110061992e-08, + "loss": 0.0008, + "step": 114860 + }, + { + "epoch": 1.9413062031552353, + "grad_norm": 0.007297023665159941, + "learning_rate": 2.6197706214006192e-08, + "loss": 0.0005, + "step": 114870 + }, + { + "epoch": 1.9414752034340856, + "grad_norm": 0.05078316107392311, + "learning_rate": 2.6047146062848083e-08, + "loss": 0.0005, + "step": 114880 + }, + { + "epoch": 1.941644203712936, + "grad_norm": 0.013788257725536823, + "learning_rate": 2.5897018669688302e-08, + "loss": 0.0006, + "step": 114890 + }, + { + "epoch": 1.9418132039917866, + "grad_norm": 0.015619263984262943, + "learning_rate": 2.5747324047587508e-08, + "loss": 0.0007, + "step": 114900 + }, + { + "epoch": 1.9419822042706372, + "grad_norm": 0.005703152623027563, + "learning_rate": 2.559806220957084e-08, + "loss": 0.0006, + "step": 114910 + }, + { + "epoch": 1.9421512045494875, + "grad_norm": 0.03437591344118118, + "learning_rate": 2.544923316862402e-08, + "loss": 0.0009, + "step": 114920 + }, + { + "epoch": 1.942320204828338, + "grad_norm": 0.013513635843992233, + "learning_rate": 2.5300836937696138e-08, + "loss": 0.0002, + "step": 114930 + }, + { + "epoch": 1.9424892051071885, + "grad_norm": 0.005370032507926226, + "learning_rate": 2.5152873529698528e-08, + "loss": 0.0007, + "step": 114940 + }, + { + "epoch": 1.9426582053860388, + "grad_norm": 0.010226094163954258, + "learning_rate": 2.5005342957504785e-08, + "loss": 0.0002, + "step": 114950 + }, + { + "epoch": 1.9428272056648894, + "grad_norm": 0.02545175701379776, + "learning_rate": 2.4858245233950194e-08, + "loss": 0.0004, + "step": 114960 + }, + { + "epoch": 1.9429962059437398, + "grad_norm": 0.07027272880077362, + "learning_rate": 2.4711580371833966e-08, + "loss": 0.0003, + "step": 114970 + }, + { + "epoch": 1.9431652062225901, + "grad_norm": 0.014199493452906609, + "learning_rate": 2.456534838391533e-08, + "loss": 0.0004, + "step": 114980 + }, + { + "epoch": 1.9433342065014407, + "grad_norm": 0.02236546389758587, + "learning_rate": 2.4419549282918565e-08, + "loss": 0.0007, + "step": 114990 + }, + { + "epoch": 1.9435032067802913, + "grad_norm": 0.022393174469470978, + "learning_rate": 2.4274183081527403e-08, + "loss": 0.0004, + "step": 115000 + }, + { + "epoch": 1.9436722070591417, + "grad_norm": 0.03282710537314415, + "learning_rate": 2.4129249792391173e-08, + "loss": 0.0006, + "step": 115010 + }, + { + "epoch": 1.943841207337992, + "grad_norm": 0.02575145661830902, + "learning_rate": 2.3984749428118124e-08, + "loss": 0.0003, + "step": 115020 + }, + { + "epoch": 1.9440102076168426, + "grad_norm": 0.004959126468747854, + "learning_rate": 2.3840682001281534e-08, + "loss": 0.0003, + "step": 115030 + }, + { + "epoch": 1.944179207895693, + "grad_norm": 0.10797479748725891, + "learning_rate": 2.3697047524415818e-08, + "loss": 0.0005, + "step": 115040 + }, + { + "epoch": 1.9443482081745436, + "grad_norm": 0.03880942985415459, + "learning_rate": 2.3553846010017644e-08, + "loss": 0.0005, + "step": 115050 + }, + { + "epoch": 1.944517208453394, + "grad_norm": 0.01588328368961811, + "learning_rate": 2.3411077470546494e-08, + "loss": 0.0017, + "step": 115060 + }, + { + "epoch": 1.9446862087322443, + "grad_norm": 0.0011270693503320217, + "learning_rate": 2.3268741918423543e-08, + "loss": 0.0002, + "step": 115070 + }, + { + "epoch": 1.9448552090110949, + "grad_norm": 0.07981029152870178, + "learning_rate": 2.3126839366032772e-08, + "loss": 0.0006, + "step": 115080 + }, + { + "epoch": 1.9450242092899455, + "grad_norm": 0.0014120024861767888, + "learning_rate": 2.2985369825720416e-08, + "loss": 0.0005, + "step": 115090 + }, + { + "epoch": 1.9451932095687958, + "grad_norm": 0.04195106029510498, + "learning_rate": 2.2844333309794965e-08, + "loss": 0.0013, + "step": 115100 + }, + { + "epoch": 1.9453622098476462, + "grad_norm": 0.017905879765748978, + "learning_rate": 2.270372983052771e-08, + "loss": 0.0006, + "step": 115110 + }, + { + "epoch": 1.9455312101264965, + "grad_norm": 0.015927642583847046, + "learning_rate": 2.2563559400151647e-08, + "loss": 0.0007, + "step": 115120 + }, + { + "epoch": 1.9457002104053471, + "grad_norm": 0.000682278536260128, + "learning_rate": 2.2423822030861462e-08, + "loss": 0.0006, + "step": 115130 + }, + { + "epoch": 1.9458692106841977, + "grad_norm": 0.03333627060055733, + "learning_rate": 2.2284517734816324e-08, + "loss": 0.0004, + "step": 115140 + }, + { + "epoch": 1.946038210963048, + "grad_norm": 0.013326448388397694, + "learning_rate": 2.2145646524135423e-08, + "loss": 0.0011, + "step": 115150 + }, + { + "epoch": 1.9462072112418984, + "grad_norm": 0.009179934859275818, + "learning_rate": 2.200720841090187e-08, + "loss": 0.001, + "step": 115160 + }, + { + "epoch": 1.946376211520749, + "grad_norm": 0.018448440358042717, + "learning_rate": 2.1869203407159924e-08, + "loss": 0.0007, + "step": 115170 + }, + { + "epoch": 1.9465452117995996, + "grad_norm": 0.008957796730101109, + "learning_rate": 2.173163152491664e-08, + "loss": 0.0008, + "step": 115180 + }, + { + "epoch": 1.94671421207845, + "grad_norm": 0.06361734122037888, + "learning_rate": 2.1594492776141894e-08, + "loss": 0.0005, + "step": 115190 + }, + { + "epoch": 1.9468832123573003, + "grad_norm": 0.00624101934954524, + "learning_rate": 2.1457787172767253e-08, + "loss": 0.0008, + "step": 115200 + }, + { + "epoch": 1.9470522126361507, + "grad_norm": 0.01926502026617527, + "learning_rate": 2.132151472668653e-08, + "loss": 0.0003, + "step": 115210 + }, + { + "epoch": 1.9472212129150013, + "grad_norm": 0.06001385673880577, + "learning_rate": 2.118567544975636e-08, + "loss": 0.0005, + "step": 115220 + }, + { + "epoch": 1.9473902131938519, + "grad_norm": 0.010368529707193375, + "learning_rate": 2.1050269353795616e-08, + "loss": 0.0005, + "step": 115230 + }, + { + "epoch": 1.9475592134727022, + "grad_norm": 0.06347133964300156, + "learning_rate": 2.0915296450584878e-08, + "loss": 0.0005, + "step": 115240 + }, + { + "epoch": 1.9477282137515526, + "grad_norm": 0.07419361174106598, + "learning_rate": 2.0780756751867526e-08, + "loss": 0.0005, + "step": 115250 + }, + { + "epoch": 1.9478972140304032, + "grad_norm": 0.07868395000696182, + "learning_rate": 2.06466502693492e-08, + "loss": 0.001, + "step": 115260 + }, + { + "epoch": 1.9480662143092538, + "grad_norm": 0.11085677891969681, + "learning_rate": 2.0512977014697788e-08, + "loss": 0.001, + "step": 115270 + }, + { + "epoch": 1.9482352145881041, + "grad_norm": 0.034024741500616074, + "learning_rate": 2.0379736999543433e-08, + "loss": 0.0003, + "step": 115280 + }, + { + "epoch": 1.9484042148669545, + "grad_norm": 0.03799097239971161, + "learning_rate": 2.0246930235478524e-08, + "loss": 0.0004, + "step": 115290 + }, + { + "epoch": 1.9485732151458048, + "grad_norm": 0.018938124179840088, + "learning_rate": 2.0114556734058822e-08, + "loss": 0.0007, + "step": 115300 + }, + { + "epoch": 1.9487422154246554, + "grad_norm": 0.09261860698461533, + "learning_rate": 1.9982616506800668e-08, + "loss": 0.001, + "step": 115310 + }, + { + "epoch": 1.948911215703506, + "grad_norm": 0.006570580881088972, + "learning_rate": 1.9851109565183212e-08, + "loss": 0.0004, + "step": 115320 + }, + { + "epoch": 1.9490802159823564, + "grad_norm": 0.010035947896540165, + "learning_rate": 1.9720035920648973e-08, + "loss": 0.0009, + "step": 115330 + }, + { + "epoch": 1.9492492162612067, + "grad_norm": 0.016145925968885422, + "learning_rate": 1.95893955846016e-08, + "loss": 0.0005, + "step": 115340 + }, + { + "epoch": 1.9494182165400573, + "grad_norm": 0.05869593098759651, + "learning_rate": 1.9459188568407007e-08, + "loss": 0.0004, + "step": 115350 + }, + { + "epoch": 1.949587216818908, + "grad_norm": 0.040348123759031296, + "learning_rate": 1.932941488339446e-08, + "loss": 0.0013, + "step": 115360 + }, + { + "epoch": 1.9497562170977583, + "grad_norm": 0.026913391426205635, + "learning_rate": 1.920007454085493e-08, + "loss": 0.0005, + "step": 115370 + }, + { + "epoch": 1.9499252173766086, + "grad_norm": 0.002638952573761344, + "learning_rate": 1.907116755204108e-08, + "loss": 0.0009, + "step": 115380 + }, + { + "epoch": 1.950094217655459, + "grad_norm": 0.00859399139881134, + "learning_rate": 1.8942693928169496e-08, + "loss": 0.0005, + "step": 115390 + }, + { + "epoch": 1.9502632179343096, + "grad_norm": 0.09759102761745453, + "learning_rate": 1.8814653680416794e-08, + "loss": 0.0007, + "step": 115400 + }, + { + "epoch": 1.9504322182131602, + "grad_norm": 0.0072417533956468105, + "learning_rate": 1.8687046819923504e-08, + "loss": 0.0002, + "step": 115410 + }, + { + "epoch": 1.9506012184920105, + "grad_norm": 0.0020835737232118845, + "learning_rate": 1.8559873357792413e-08, + "loss": 0.0008, + "step": 115420 + }, + { + "epoch": 1.9507702187708609, + "grad_norm": 0.0367913618683815, + "learning_rate": 1.8433133305088003e-08, + "loss": 0.0027, + "step": 115430 + }, + { + "epoch": 1.9509392190497115, + "grad_norm": 0.014195457100868225, + "learning_rate": 1.830682667283701e-08, + "loss": 0.0008, + "step": 115440 + }, + { + "epoch": 1.951108219328562, + "grad_norm": 0.003432026132941246, + "learning_rate": 1.818095347202953e-08, + "loss": 0.0005, + "step": 115450 + }, + { + "epoch": 1.9512772196074124, + "grad_norm": 0.010386678390204906, + "learning_rate": 1.8055513713615693e-08, + "loss": 0.0004, + "step": 115460 + }, + { + "epoch": 1.9514462198862628, + "grad_norm": 0.05468853563070297, + "learning_rate": 1.793050740851121e-08, + "loss": 0.0007, + "step": 115470 + }, + { + "epoch": 1.9516152201651131, + "grad_norm": 0.014295250177383423, + "learning_rate": 1.7805934567590723e-08, + "loss": 0.0006, + "step": 115480 + }, + { + "epoch": 1.9517842204439637, + "grad_norm": 0.07433163374662399, + "learning_rate": 1.768179520169333e-08, + "loss": 0.0009, + "step": 115490 + }, + { + "epoch": 1.9519532207228143, + "grad_norm": 0.014535046182572842, + "learning_rate": 1.7558089321619286e-08, + "loss": 0.0003, + "step": 115500 + }, + { + "epoch": 1.9521222210016647, + "grad_norm": 0.05266273766756058, + "learning_rate": 1.7434816938132205e-08, + "loss": 0.0007, + "step": 115510 + }, + { + "epoch": 1.952291221280515, + "grad_norm": 0.023370753973722458, + "learning_rate": 1.7311978061957392e-08, + "loss": 0.0011, + "step": 115520 + }, + { + "epoch": 1.9524602215593656, + "grad_norm": 0.06951917707920074, + "learning_rate": 1.7189572703781855e-08, + "loss": 0.0007, + "step": 115530 + }, + { + "epoch": 1.9526292218382162, + "grad_norm": 0.025955677032470703, + "learning_rate": 1.7067600874255963e-08, + "loss": 0.0005, + "step": 115540 + }, + { + "epoch": 1.9527982221170666, + "grad_norm": 0.007542863488197327, + "learning_rate": 1.694606258399123e-08, + "loss": 0.0001, + "step": 115550 + }, + { + "epoch": 1.952967222395917, + "grad_norm": 0.0012686381815001369, + "learning_rate": 1.6824957843563083e-08, + "loss": 0.0003, + "step": 115560 + }, + { + "epoch": 1.9531362226747673, + "grad_norm": 0.0691961944103241, + "learning_rate": 1.670428666350754e-08, + "loss": 0.0011, + "step": 115570 + }, + { + "epoch": 1.9533052229536179, + "grad_norm": 0.01200967188924551, + "learning_rate": 1.6584049054323426e-08, + "loss": 0.0003, + "step": 115580 + }, + { + "epoch": 1.9534742232324684, + "grad_norm": 0.14624682068824768, + "learning_rate": 1.646424502647237e-08, + "loss": 0.0005, + "step": 115590 + }, + { + "epoch": 1.9536432235113188, + "grad_norm": 0.007319706957787275, + "learning_rate": 1.63448745903777e-08, + "loss": 0.0005, + "step": 115600 + }, + { + "epoch": 1.9538122237901692, + "grad_norm": 0.021409472450613976, + "learning_rate": 1.6225937756425005e-08, + "loss": 0.0008, + "step": 115610 + }, + { + "epoch": 1.9539812240690198, + "grad_norm": 0.0024789045564830303, + "learning_rate": 1.6107434534963218e-08, + "loss": 0.0004, + "step": 115620 + }, + { + "epoch": 1.9541502243478703, + "grad_norm": 0.025097843259572983, + "learning_rate": 1.5989364936301877e-08, + "loss": 0.0007, + "step": 115630 + }, + { + "epoch": 1.9543192246267207, + "grad_norm": 0.0783858373761177, + "learning_rate": 1.5871728970713873e-08, + "loss": 0.0009, + "step": 115640 + }, + { + "epoch": 1.954488224905571, + "grad_norm": 0.01838279701769352, + "learning_rate": 1.5754526648434355e-08, + "loss": 0.0002, + "step": 115650 + }, + { + "epoch": 1.9546572251844214, + "grad_norm": 0.02446633391082287, + "learning_rate": 1.563775797966016e-08, + "loss": 0.0007, + "step": 115660 + }, + { + "epoch": 1.954826225463272, + "grad_norm": 0.03329815715551376, + "learning_rate": 1.5521422974550392e-08, + "loss": 0.001, + "step": 115670 + }, + { + "epoch": 1.9549952257421226, + "grad_norm": 0.01897369883954525, + "learning_rate": 1.5405521643227505e-08, + "loss": 0.001, + "step": 115680 + }, + { + "epoch": 1.955164226020973, + "grad_norm": 0.011578774079680443, + "learning_rate": 1.52900539957751e-08, + "loss": 0.0009, + "step": 115690 + }, + { + "epoch": 1.9553332262998233, + "grad_norm": 0.04486700892448425, + "learning_rate": 1.5175020042239586e-08, + "loss": 0.0007, + "step": 115700 + }, + { + "epoch": 1.955502226578674, + "grad_norm": 0.012665933929383755, + "learning_rate": 1.506041979262962e-08, + "loss": 0.0006, + "step": 115710 + }, + { + "epoch": 1.9556712268575245, + "grad_norm": 0.04158971831202507, + "learning_rate": 1.4946253256915567e-08, + "loss": 0.0005, + "step": 115720 + }, + { + "epoch": 1.9558402271363748, + "grad_norm": 0.01785312034189701, + "learning_rate": 1.4832520445030029e-08, + "loss": 0.0005, + "step": 115730 + }, + { + "epoch": 1.9560092274152252, + "grad_norm": 0.02986268885433674, + "learning_rate": 1.4719221366869541e-08, + "loss": 0.0005, + "step": 115740 + }, + { + "epoch": 1.9561782276940756, + "grad_norm": 0.0688711553812027, + "learning_rate": 1.4606356032290659e-08, + "loss": 0.0005, + "step": 115750 + }, + { + "epoch": 1.9563472279729262, + "grad_norm": 0.028957679867744446, + "learning_rate": 1.449392445111386e-08, + "loss": 0.0005, + "step": 115760 + }, + { + "epoch": 1.9565162282517767, + "grad_norm": 0.004538863431662321, + "learning_rate": 1.4381926633120769e-08, + "loss": 0.0003, + "step": 115770 + }, + { + "epoch": 1.956685228530627, + "grad_norm": 0.010118100792169571, + "learning_rate": 1.4270362588055808e-08, + "loss": 0.0006, + "step": 115780 + }, + { + "epoch": 1.9568542288094775, + "grad_norm": 0.01838894560933113, + "learning_rate": 1.4159232325626215e-08, + "loss": 0.0018, + "step": 115790 + }, + { + "epoch": 1.957023229088328, + "grad_norm": 0.03047986514866352, + "learning_rate": 1.4048535855500366e-08, + "loss": 0.0008, + "step": 115800 + }, + { + "epoch": 1.9571922293671784, + "grad_norm": 0.02254408970475197, + "learning_rate": 1.3938273187308892e-08, + "loss": 0.0002, + "step": 115810 + }, + { + "epoch": 1.957361229646029, + "grad_norm": 0.09422022104263306, + "learning_rate": 1.3828444330645785e-08, + "loss": 0.0008, + "step": 115820 + }, + { + "epoch": 1.9575302299248794, + "grad_norm": 0.09769272804260254, + "learning_rate": 1.3719049295066732e-08, + "loss": 0.0008, + "step": 115830 + }, + { + "epoch": 1.9576992302037297, + "grad_norm": 0.05070381611585617, + "learning_rate": 1.3610088090089679e-08, + "loss": 0.0005, + "step": 115840 + }, + { + "epoch": 1.9578682304825803, + "grad_norm": 0.035554975271224976, + "learning_rate": 1.3501560725194263e-08, + "loss": 0.0005, + "step": 115850 + }, + { + "epoch": 1.9580372307614309, + "grad_norm": 0.030171066522598267, + "learning_rate": 1.339346720982293e-08, + "loss": 0.0008, + "step": 115860 + }, + { + "epoch": 1.9582062310402812, + "grad_norm": 0.025329411029815674, + "learning_rate": 1.3285807553380937e-08, + "loss": 0.0003, + "step": 115870 + }, + { + "epoch": 1.9583752313191316, + "grad_norm": 0.022418124601244926, + "learning_rate": 1.3178581765235232e-08, + "loss": 0.001, + "step": 115880 + }, + { + "epoch": 1.9585442315979822, + "grad_norm": 0.07500436156988144, + "learning_rate": 1.3071789854713912e-08, + "loss": 0.001, + "step": 115890 + }, + { + "epoch": 1.9587132318768326, + "grad_norm": 0.046022024005651474, + "learning_rate": 1.2965431831109542e-08, + "loss": 0.0003, + "step": 115900 + }, + { + "epoch": 1.9588822321556831, + "grad_norm": 0.011159485206007957, + "learning_rate": 1.2859507703675279e-08, + "loss": 0.0005, + "step": 115910 + }, + { + "epoch": 1.9590512324345335, + "grad_norm": 0.03546522185206413, + "learning_rate": 1.275401748162708e-08, + "loss": 0.0003, + "step": 115920 + }, + { + "epoch": 1.9592202327133839, + "grad_norm": 0.028044404461979866, + "learning_rate": 1.264896117414316e-08, + "loss": 0.0008, + "step": 115930 + }, + { + "epoch": 1.9593892329922344, + "grad_norm": 0.010457886382937431, + "learning_rate": 1.2544338790363986e-08, + "loss": 0.0003, + "step": 115940 + }, + { + "epoch": 1.959558233271085, + "grad_norm": 0.13008491694927216, + "learning_rate": 1.2440150339392276e-08, + "loss": 0.0015, + "step": 115950 + }, + { + "epoch": 1.9597272335499354, + "grad_norm": 0.025522135198116302, + "learning_rate": 1.2336395830292447e-08, + "loss": 0.0008, + "step": 115960 + }, + { + "epoch": 1.9598962338287857, + "grad_norm": 0.019671659916639328, + "learning_rate": 1.223307527209172e-08, + "loss": 0.0003, + "step": 115970 + }, + { + "epoch": 1.9600652341076361, + "grad_norm": 0.2073049694299698, + "learning_rate": 1.213018867378013e-08, + "loss": 0.0009, + "step": 115980 + }, + { + "epoch": 1.9602342343864867, + "grad_norm": 0.01109260879456997, + "learning_rate": 1.2027736044308846e-08, + "loss": 0.0006, + "step": 115990 + }, + { + "epoch": 1.9604032346653373, + "grad_norm": 0.03754560649394989, + "learning_rate": 1.1925717392591297e-08, + "loss": 0.0006, + "step": 116000 + }, + { + "epoch": 1.9605722349441876, + "grad_norm": 0.03922279179096222, + "learning_rate": 1.1824132727504822e-08, + "loss": 0.0017, + "step": 116010 + }, + { + "epoch": 1.960741235223038, + "grad_norm": 0.0020950832404196262, + "learning_rate": 1.17229820578868e-08, + "loss": 0.0003, + "step": 116020 + }, + { + "epoch": 1.9609102355018886, + "grad_norm": 0.02770181931555271, + "learning_rate": 1.1622265392537967e-08, + "loss": 0.0004, + "step": 116030 + }, + { + "epoch": 1.9610792357807392, + "grad_norm": 0.0313149057328701, + "learning_rate": 1.1521982740221316e-08, + "loss": 0.0008, + "step": 116040 + }, + { + "epoch": 1.9612482360595895, + "grad_norm": 0.08879216015338898, + "learning_rate": 1.1422134109662642e-08, + "loss": 0.0007, + "step": 116050 + }, + { + "epoch": 1.96141723633844, + "grad_norm": 0.029225923120975494, + "learning_rate": 1.1322719509547775e-08, + "loss": 0.0007, + "step": 116060 + }, + { + "epoch": 1.9615862366172903, + "grad_norm": 0.008420857600867748, + "learning_rate": 1.1223738948527018e-08, + "loss": 0.0006, + "step": 116070 + }, + { + "epoch": 1.9617552368961408, + "grad_norm": 0.01581859588623047, + "learning_rate": 1.1125192435212373e-08, + "loss": 0.0001, + "step": 116080 + }, + { + "epoch": 1.9619242371749914, + "grad_norm": 0.07278886437416077, + "learning_rate": 1.1027079978177535e-08, + "loss": 0.0007, + "step": 116090 + }, + { + "epoch": 1.9620932374538418, + "grad_norm": 0.00021185188961680979, + "learning_rate": 1.0929401585958454e-08, + "loss": 0.0003, + "step": 116100 + }, + { + "epoch": 1.9622622377326921, + "grad_norm": 0.015291017480194569, + "learning_rate": 1.0832157267054999e-08, + "loss": 0.001, + "step": 116110 + }, + { + "epoch": 1.9624312380115427, + "grad_norm": 0.039209552109241486, + "learning_rate": 1.0735347029925958e-08, + "loss": 0.0003, + "step": 116120 + }, + { + "epoch": 1.9626002382903933, + "grad_norm": 0.069135382771492, + "learning_rate": 1.0638970882995704e-08, + "loss": 0.0008, + "step": 116130 + }, + { + "epoch": 1.9627692385692437, + "grad_norm": 0.04019022360444069, + "learning_rate": 1.0543028834649194e-08, + "loss": 0.0009, + "step": 116140 + }, + { + "epoch": 1.962938238848094, + "grad_norm": 0.08860526233911514, + "learning_rate": 1.0447520893233087e-08, + "loss": 0.0006, + "step": 116150 + }, + { + "epoch": 1.9631072391269444, + "grad_norm": 0.02146318554878235, + "learning_rate": 1.0352447067057958e-08, + "loss": 0.0011, + "step": 116160 + }, + { + "epoch": 1.963276239405795, + "grad_norm": 0.04622725397348404, + "learning_rate": 1.0257807364395522e-08, + "loss": 0.0007, + "step": 116170 + }, + { + "epoch": 1.9634452396846456, + "grad_norm": 0.015559851191937923, + "learning_rate": 1.0163601793479194e-08, + "loss": 0.0007, + "step": 116180 + }, + { + "epoch": 1.963614239963496, + "grad_norm": 0.05115272477269173, + "learning_rate": 1.0069830362506306e-08, + "loss": 0.001, + "step": 116190 + }, + { + "epoch": 1.9637832402423463, + "grad_norm": 0.010766013525426388, + "learning_rate": 9.976493079634775e-09, + "loss": 0.0004, + "step": 116200 + }, + { + "epoch": 1.9639522405211969, + "grad_norm": 0.016246164217591286, + "learning_rate": 9.883589952985884e-09, + "loss": 0.0018, + "step": 116210 + }, + { + "epoch": 1.9641212408000475, + "grad_norm": 0.06242336705327034, + "learning_rate": 9.791120990642056e-09, + "loss": 0.0005, + "step": 116220 + }, + { + "epoch": 1.9642902410788978, + "grad_norm": 0.013016702607274055, + "learning_rate": 9.699086200648522e-09, + "loss": 0.0003, + "step": 116230 + }, + { + "epoch": 1.9644592413577482, + "grad_norm": 0.017408093437552452, + "learning_rate": 9.607485591013322e-09, + "loss": 0.0011, + "step": 116240 + }, + { + "epoch": 1.9646282416365985, + "grad_norm": 0.03357071802020073, + "learning_rate": 9.516319169705635e-09, + "loss": 0.0006, + "step": 116250 + }, + { + "epoch": 1.9647972419154491, + "grad_norm": 0.020204557105898857, + "learning_rate": 9.425586944658006e-09, + "loss": 0.0003, + "step": 116260 + }, + { + "epoch": 1.9649662421942997, + "grad_norm": 0.01886160299181938, + "learning_rate": 9.335288923764118e-09, + "loss": 0.0018, + "step": 116270 + }, + { + "epoch": 1.96513524247315, + "grad_norm": 0.04038720577955246, + "learning_rate": 9.245425114880469e-09, + "loss": 0.0007, + "step": 116280 + }, + { + "epoch": 1.9653042427520004, + "grad_norm": 0.07107829302549362, + "learning_rate": 9.155995525825245e-09, + "loss": 0.0013, + "step": 116290 + }, + { + "epoch": 1.965473243030851, + "grad_norm": 0.016410810872912407, + "learning_rate": 9.067000164380003e-09, + "loss": 0.0004, + "step": 116300 + }, + { + "epoch": 1.9656422433097016, + "grad_norm": 0.05313790962100029, + "learning_rate": 8.978439038287435e-09, + "loss": 0.0018, + "step": 116310 + }, + { + "epoch": 1.965811243588552, + "grad_norm": 0.026425667107105255, + "learning_rate": 8.89031215525249e-09, + "loss": 0.0012, + "step": 116320 + }, + { + "epoch": 1.9659802438674023, + "grad_norm": 0.015971403568983078, + "learning_rate": 8.802619522942924e-09, + "loss": 0.0008, + "step": 116330 + }, + { + "epoch": 1.9661492441462527, + "grad_norm": 0.04588667303323746, + "learning_rate": 8.715361148988188e-09, + "loss": 0.0015, + "step": 116340 + }, + { + "epoch": 1.9663182444251033, + "grad_norm": 0.08584762364625931, + "learning_rate": 8.628537040980545e-09, + "loss": 0.0004, + "step": 116350 + }, + { + "epoch": 1.9664872447039539, + "grad_norm": 0.05310555920004845, + "learning_rate": 8.54214720647395e-09, + "loss": 0.0008, + "step": 116360 + }, + { + "epoch": 1.9666562449828042, + "grad_norm": 0.04578254744410515, + "learning_rate": 8.456191652984614e-09, + "loss": 0.0007, + "step": 116370 + }, + { + "epoch": 1.9668252452616546, + "grad_norm": 0.06566239148378372, + "learning_rate": 8.370670387991553e-09, + "loss": 0.0004, + "step": 116380 + }, + { + "epoch": 1.9669942455405052, + "grad_norm": 0.009525866247713566, + "learning_rate": 8.285583418934373e-09, + "loss": 0.0012, + "step": 116390 + }, + { + "epoch": 1.9671632458193558, + "grad_norm": 0.0021493479143828154, + "learning_rate": 8.200930753217706e-09, + "loss": 0.0008, + "step": 116400 + }, + { + "epoch": 1.9673322460982061, + "grad_norm": 0.141135573387146, + "learning_rate": 8.116712398205106e-09, + "loss": 0.0014, + "step": 116410 + }, + { + "epoch": 1.9675012463770565, + "grad_norm": 0.10668013244867325, + "learning_rate": 8.032928361225156e-09, + "loss": 0.0005, + "step": 116420 + }, + { + "epoch": 1.9676702466559068, + "grad_norm": 0.029699115082621574, + "learning_rate": 7.94957864956758e-09, + "loss": 0.0009, + "step": 116430 + }, + { + "epoch": 1.9678392469347574, + "grad_norm": 0.09455449134111404, + "learning_rate": 7.866663270483243e-09, + "loss": 0.0028, + "step": 116440 + }, + { + "epoch": 1.968008247213608, + "grad_norm": 0.079514279961586, + "learning_rate": 7.784182231186377e-09, + "loss": 0.0006, + "step": 116450 + }, + { + "epoch": 1.9681772474924584, + "grad_norm": 0.0032341405749320984, + "learning_rate": 7.702135538853461e-09, + "loss": 0.0006, + "step": 116460 + }, + { + "epoch": 1.9683462477713087, + "grad_norm": 0.22825823724269867, + "learning_rate": 7.620523200623786e-09, + "loss": 0.0007, + "step": 116470 + }, + { + "epoch": 1.9685152480501593, + "grad_norm": 0.029165804386138916, + "learning_rate": 7.539345223596672e-09, + "loss": 0.0006, + "step": 116480 + }, + { + "epoch": 1.96868424832901, + "grad_norm": 0.022949257865548134, + "learning_rate": 7.45860161483536e-09, + "loss": 0.0008, + "step": 116490 + }, + { + "epoch": 1.9688532486078603, + "grad_norm": 7.957002526381984e-05, + "learning_rate": 7.378292381365338e-09, + "loss": 0.0009, + "step": 116500 + }, + { + "epoch": 1.9690222488867106, + "grad_norm": 0.0328722819685936, + "learning_rate": 7.298417530173796e-09, + "loss": 0.0011, + "step": 116510 + }, + { + "epoch": 1.969191249165561, + "grad_norm": 0.02899502031505108, + "learning_rate": 7.218977068210175e-09, + "loss": 0.0006, + "step": 116520 + }, + { + "epoch": 1.9693602494444116, + "grad_norm": 0.011906208470463753, + "learning_rate": 7.13997100238617e-09, + "loss": 0.0005, + "step": 116530 + }, + { + "epoch": 1.9695292497232622, + "grad_norm": 0.0017242592293769121, + "learning_rate": 7.06139933957517e-09, + "loss": 0.0009, + "step": 116540 + }, + { + "epoch": 1.9696982500021125, + "grad_norm": 0.020453786477446556, + "learning_rate": 6.983262086613929e-09, + "loss": 0.0004, + "step": 116550 + }, + { + "epoch": 1.9698672502809629, + "grad_norm": 0.0002729600528255105, + "learning_rate": 6.905559250300897e-09, + "loss": 0.0003, + "step": 116560 + }, + { + "epoch": 1.9700362505598135, + "grad_norm": 0.003292068839073181, + "learning_rate": 6.8282908373962234e-09, + "loss": 0.0003, + "step": 116570 + }, + { + "epoch": 1.970205250838664, + "grad_norm": 0.041888050734996796, + "learning_rate": 6.751456854622307e-09, + "loss": 0.001, + "step": 116580 + }, + { + "epoch": 1.9703742511175144, + "grad_norm": 0.009963915683329105, + "learning_rate": 6.6750573086649116e-09, + "loss": 0.0007, + "step": 116590 + }, + { + "epoch": 1.9705432513963648, + "grad_norm": 0.019223950803279877, + "learning_rate": 6.599092206170942e-09, + "loss": 0.0004, + "step": 116600 + }, + { + "epoch": 1.9707122516752151, + "grad_norm": 0.07051514834165573, + "learning_rate": 6.523561553749558e-09, + "loss": 0.0004, + "step": 116610 + }, + { + "epoch": 1.9708812519540657, + "grad_norm": 0.06629837304353714, + "learning_rate": 6.448465357971612e-09, + "loss": 0.0007, + "step": 116620 + }, + { + "epoch": 1.9710502522329163, + "grad_norm": 0.007716650143265724, + "learning_rate": 6.373803625371877e-09, + "loss": 0.0008, + "step": 116630 + }, + { + "epoch": 1.9712192525117667, + "grad_norm": 0.1488904505968094, + "learning_rate": 6.299576362445714e-09, + "loss": 0.0011, + "step": 116640 + }, + { + "epoch": 1.971388252790617, + "grad_norm": 0.19914045929908752, + "learning_rate": 6.225783575651845e-09, + "loss": 0.0013, + "step": 116650 + }, + { + "epoch": 1.9715572530694676, + "grad_norm": 0.06798209995031357, + "learning_rate": 6.152425271410134e-09, + "loss": 0.0003, + "step": 116660 + }, + { + "epoch": 1.971726253348318, + "grad_norm": 0.03254680335521698, + "learning_rate": 6.079501456102699e-09, + "loss": 0.0002, + "step": 116670 + }, + { + "epoch": 1.9718952536271686, + "grad_norm": 0.023305783048272133, + "learning_rate": 6.007012136075019e-09, + "loss": 0.0004, + "step": 116680 + }, + { + "epoch": 1.972064253906019, + "grad_norm": 0.0026318468153476715, + "learning_rate": 5.934957317633716e-09, + "loss": 0.0004, + "step": 116690 + }, + { + "epoch": 1.9722332541848693, + "grad_norm": 0.04221843183040619, + "learning_rate": 5.86333700704822e-09, + "loss": 0.0009, + "step": 116700 + }, + { + "epoch": 1.9724022544637199, + "grad_norm": 0.015916381031274796, + "learning_rate": 5.7921512105491014e-09, + "loss": 0.0002, + "step": 116710 + }, + { + "epoch": 1.9725712547425704, + "grad_norm": 0.07472866773605347, + "learning_rate": 5.721399934330851e-09, + "loss": 0.001, + "step": 116720 + }, + { + "epoch": 1.9727402550214208, + "grad_norm": 0.011363265104591846, + "learning_rate": 5.6510831845485446e-09, + "loss": 0.0002, + "step": 116730 + }, + { + "epoch": 1.9729092553002712, + "grad_norm": 0.01561447698622942, + "learning_rate": 5.581200967319511e-09, + "loss": 0.0004, + "step": 116740 + }, + { + "epoch": 1.9730782555791218, + "grad_norm": 0.0016669128090143204, + "learning_rate": 5.511753288724997e-09, + "loss": 0.0004, + "step": 116750 + }, + { + "epoch": 1.9732472558579721, + "grad_norm": 0.050874121487140656, + "learning_rate": 5.442740154806836e-09, + "loss": 0.0003, + "step": 116760 + }, + { + "epoch": 1.9734162561368227, + "grad_norm": 0.010174013674259186, + "learning_rate": 5.374161571569114e-09, + "loss": 0.0003, + "step": 116770 + }, + { + "epoch": 1.973585256415673, + "grad_norm": 0.15439996123313904, + "learning_rate": 5.30601754497928e-09, + "loss": 0.0009, + "step": 116780 + }, + { + "epoch": 1.9737542566945234, + "grad_norm": 0.030735179781913757, + "learning_rate": 5.238308080965926e-09, + "loss": 0.0002, + "step": 116790 + }, + { + "epoch": 1.973923256973374, + "grad_norm": 0.009195013903081417, + "learning_rate": 5.171033185419339e-09, + "loss": 0.0005, + "step": 116800 + }, + { + "epoch": 1.9740922572522246, + "grad_norm": 0.016738492995500565, + "learning_rate": 5.1041928641937245e-09, + "loss": 0.0006, + "step": 116810 + }, + { + "epoch": 1.974261257531075, + "grad_norm": 0.12038344144821167, + "learning_rate": 5.037787123104432e-09, + "loss": 0.0007, + "step": 116820 + }, + { + "epoch": 1.9744302578099253, + "grad_norm": 0.0054839253425598145, + "learning_rate": 4.971815967928506e-09, + "loss": 0.0007, + "step": 116830 + }, + { + "epoch": 1.974599258088776, + "grad_norm": 0.0022869163658469915, + "learning_rate": 4.9062794044058005e-09, + "loss": 0.0005, + "step": 116840 + }, + { + "epoch": 1.9747682583676263, + "grad_norm": 0.029526591300964355, + "learning_rate": 4.841177438238975e-09, + "loss": 0.0005, + "step": 116850 + }, + { + "epoch": 1.9749372586464768, + "grad_norm": 0.049096524715423584, + "learning_rate": 4.776510075091834e-09, + "loss": 0.0004, + "step": 116860 + }, + { + "epoch": 1.9751062589253272, + "grad_norm": 0.013877199031412601, + "learning_rate": 4.712277320590431e-09, + "loss": 0.0008, + "step": 116870 + }, + { + "epoch": 1.9752752592041776, + "grad_norm": 0.028061717748641968, + "learning_rate": 4.648479180323628e-09, + "loss": 0.0005, + "step": 116880 + }, + { + "epoch": 1.9754442594830282, + "grad_norm": 0.0018484432948753238, + "learning_rate": 4.5851156598419875e-09, + "loss": 0.0015, + "step": 116890 + }, + { + "epoch": 1.9756132597618787, + "grad_norm": 0.05465136468410492, + "learning_rate": 4.522186764659431e-09, + "loss": 0.0019, + "step": 116900 + }, + { + "epoch": 1.975782260040729, + "grad_norm": 0.07190567255020142, + "learning_rate": 4.459692500249357e-09, + "loss": 0.0003, + "step": 116910 + }, + { + "epoch": 1.9759512603195795, + "grad_norm": 0.0014867663849145174, + "learning_rate": 4.39763287205075e-09, + "loss": 0.0002, + "step": 116920 + }, + { + "epoch": 1.9761202605984298, + "grad_norm": 0.0075894673354923725, + "learning_rate": 4.336007885461513e-09, + "loss": 0.0013, + "step": 116930 + }, + { + "epoch": 1.9762892608772804, + "grad_norm": 0.03782112896442413, + "learning_rate": 4.274817545844579e-09, + "loss": 0.0007, + "step": 116940 + }, + { + "epoch": 1.976458261156131, + "grad_norm": 0.008285623043775558, + "learning_rate": 4.214061858523466e-09, + "loss": 0.0002, + "step": 116950 + }, + { + "epoch": 1.9766272614349814, + "grad_norm": 0.03550859913229942, + "learning_rate": 4.153740828783948e-09, + "loss": 0.0003, + "step": 116960 + }, + { + "epoch": 1.9767962617138317, + "grad_norm": 0.0036612313706427813, + "learning_rate": 4.093854461874602e-09, + "loss": 0.0006, + "step": 116970 + }, + { + "epoch": 1.9769652619926823, + "grad_norm": 0.03043430857360363, + "learning_rate": 4.034402763005707e-09, + "loss": 0.0007, + "step": 116980 + }, + { + "epoch": 1.9771342622715329, + "grad_norm": 0.06967907398939133, + "learning_rate": 3.975385737349791e-09, + "loss": 0.0012, + "step": 116990 + }, + { + "epoch": 1.9773032625503832, + "grad_norm": 0.04638737067580223, + "learning_rate": 3.916803390041079e-09, + "loss": 0.0008, + "step": 117000 + }, + { + "epoch": 1.9774722628292336, + "grad_norm": 0.0009102841722778976, + "learning_rate": 3.858655726177718e-09, + "loss": 0.0004, + "step": 117010 + }, + { + "epoch": 1.977641263108084, + "grad_norm": 0.03580011427402496, + "learning_rate": 3.800942750817882e-09, + "loss": 0.0015, + "step": 117020 + }, + { + "epoch": 1.9778102633869346, + "grad_norm": 0.013440757989883423, + "learning_rate": 3.74366446898311e-09, + "loss": 0.0004, + "step": 117030 + }, + { + "epoch": 1.9779792636657851, + "grad_norm": 0.11920007318258286, + "learning_rate": 3.686820885656639e-09, + "loss": 0.0018, + "step": 117040 + }, + { + "epoch": 1.9781482639446355, + "grad_norm": 0.0707445740699768, + "learning_rate": 3.6304120057850666e-09, + "loss": 0.0006, + "step": 117050 + }, + { + "epoch": 1.9783172642234859, + "grad_norm": 0.030176930129528046, + "learning_rate": 3.574437834275024e-09, + "loss": 0.0003, + "step": 117060 + }, + { + "epoch": 1.9784862645023364, + "grad_norm": 0.0009361015981994569, + "learning_rate": 3.5188983759976148e-09, + "loss": 0.001, + "step": 117070 + }, + { + "epoch": 1.978655264781187, + "grad_norm": 0.006318709347397089, + "learning_rate": 3.4637936357845294e-09, + "loss": 0.0006, + "step": 117080 + }, + { + "epoch": 1.9788242650600374, + "grad_norm": 0.009384984150528908, + "learning_rate": 3.4091236184297104e-09, + "loss": 0.0003, + "step": 117090 + }, + { + "epoch": 1.9789932653388878, + "grad_norm": 0.0048936703242361546, + "learning_rate": 3.3548883286904643e-09, + "loss": 0.0006, + "step": 117100 + }, + { + "epoch": 1.9791622656177381, + "grad_norm": 0.2528400421142578, + "learning_rate": 3.3010877712857935e-09, + "loss": 0.0007, + "step": 117110 + }, + { + "epoch": 1.9793312658965887, + "grad_norm": 0.0070115034468472, + "learning_rate": 3.2477219508952884e-09, + "loss": 0.0003, + "step": 117120 + }, + { + "epoch": 1.9795002661754393, + "grad_norm": 0.06373371928930283, + "learning_rate": 3.1947908721630118e-09, + "loss": 0.0007, + "step": 117130 + }, + { + "epoch": 1.9796692664542896, + "grad_norm": 0.012733974494040012, + "learning_rate": 3.1422945396936134e-09, + "loss": 0.0006, + "step": 117140 + }, + { + "epoch": 1.97983826673314, + "grad_norm": 0.015270589850842953, + "learning_rate": 3.090232958055661e-09, + "loss": 0.0006, + "step": 117150 + }, + { + "epoch": 1.9800072670119906, + "grad_norm": 0.04006693512201309, + "learning_rate": 3.0386061317777547e-09, + "loss": 0.0003, + "step": 117160 + }, + { + "epoch": 1.9801762672908412, + "grad_norm": 0.04597114399075508, + "learning_rate": 2.987414065351857e-09, + "loss": 0.0008, + "step": 117170 + }, + { + "epoch": 1.9803452675696915, + "grad_norm": 0.05991408973932266, + "learning_rate": 2.936656763232182e-09, + "loss": 0.0009, + "step": 117180 + }, + { + "epoch": 1.980514267848542, + "grad_norm": 0.0410887710750103, + "learning_rate": 2.886334229834087e-09, + "loss": 0.0006, + "step": 117190 + }, + { + "epoch": 1.9806832681273923, + "grad_norm": 0.02065216936171055, + "learning_rate": 2.8364464695374016e-09, + "loss": 0.0005, + "step": 117200 + }, + { + "epoch": 1.9808522684062428, + "grad_norm": 0.028431864455342293, + "learning_rate": 2.786993486680878e-09, + "loss": 0.0005, + "step": 117210 + }, + { + "epoch": 1.9810212686850934, + "grad_norm": 0.0024671663995832205, + "learning_rate": 2.73797528556885e-09, + "loss": 0.0003, + "step": 117220 + }, + { + "epoch": 1.9811902689639438, + "grad_norm": 0.08679312467575073, + "learning_rate": 2.689391870464575e-09, + "loss": 0.0005, + "step": 117230 + }, + { + "epoch": 1.9813592692427942, + "grad_norm": 0.015164055861532688, + "learning_rate": 2.6412432455957815e-09, + "loss": 0.0007, + "step": 117240 + }, + { + "epoch": 1.9815282695216447, + "grad_norm": 0.09785941988229752, + "learning_rate": 2.5935294151518962e-09, + "loss": 0.0004, + "step": 117250 + }, + { + "epoch": 1.9816972698004953, + "grad_norm": 0.008396162651479244, + "learning_rate": 2.5462503832834885e-09, + "loss": 0.0006, + "step": 117260 + }, + { + "epoch": 1.9818662700793457, + "grad_norm": 0.06484334170818329, + "learning_rate": 2.499406154105044e-09, + "loss": 0.0008, + "step": 117270 + }, + { + "epoch": 1.982035270358196, + "grad_norm": 0.00378989614546299, + "learning_rate": 2.452996731691637e-09, + "loss": 0.0008, + "step": 117280 + }, + { + "epoch": 1.9822042706370464, + "grad_norm": 0.02313057705760002, + "learning_rate": 2.407022120080593e-09, + "loss": 0.0003, + "step": 117290 + }, + { + "epoch": 1.982373270915897, + "grad_norm": 0.019154351204633713, + "learning_rate": 2.3614823232731566e-09, + "loss": 0.001, + "step": 117300 + }, + { + "epoch": 1.9825422711947476, + "grad_norm": 0.00604259455576539, + "learning_rate": 2.3163773452306025e-09, + "loss": 0.0003, + "step": 117310 + }, + { + "epoch": 1.982711271473598, + "grad_norm": 0.002542471280321479, + "learning_rate": 2.27170718987757e-09, + "loss": 0.0006, + "step": 117320 + }, + { + "epoch": 1.9828802717524483, + "grad_norm": 0.023002471774816513, + "learning_rate": 2.2274718611009492e-09, + "loss": 0.0002, + "step": 117330 + }, + { + "epoch": 1.9830492720312989, + "grad_norm": 0.013058079406619072, + "learning_rate": 2.1836713627487737e-09, + "loss": 0.0009, + "step": 117340 + }, + { + "epoch": 1.9832182723101495, + "grad_norm": 0.002443569479510188, + "learning_rate": 2.1403056986318837e-09, + "loss": 0.0005, + "step": 117350 + }, + { + "epoch": 1.9833872725889998, + "grad_norm": 0.0016154100885614753, + "learning_rate": 2.0973748725239276e-09, + "loss": 0.0003, + "step": 117360 + }, + { + "epoch": 1.9835562728678502, + "grad_norm": 0.1025247722864151, + "learning_rate": 2.054878888159695e-09, + "loss": 0.001, + "step": 117370 + }, + { + "epoch": 1.9837252731467006, + "grad_norm": 0.03784005716443062, + "learning_rate": 2.0128177492362288e-09, + "loss": 0.0006, + "step": 117380 + }, + { + "epoch": 1.9838942734255511, + "grad_norm": 0.0560203418135643, + "learning_rate": 1.9711914594139347e-09, + "loss": 0.0005, + "step": 117390 + }, + { + "epoch": 1.9840632737044017, + "grad_norm": 0.08814750611782074, + "learning_rate": 1.9300000223138047e-09, + "loss": 0.0011, + "step": 117400 + }, + { + "epoch": 1.984232273983252, + "grad_norm": 0.030366649851202965, + "learning_rate": 1.889243441519639e-09, + "loss": 0.0005, + "step": 117410 + }, + { + "epoch": 1.9844012742621024, + "grad_norm": 0.010514308698475361, + "learning_rate": 1.8489217205780453e-09, + "loss": 0.0004, + "step": 117420 + }, + { + "epoch": 1.984570274540953, + "grad_norm": 0.02470279298722744, + "learning_rate": 1.8090348629967724e-09, + "loss": 0.0006, + "step": 117430 + }, + { + "epoch": 1.9847392748198036, + "grad_norm": 0.033508431166410446, + "learning_rate": 1.769582872245823e-09, + "loss": 0.0005, + "step": 117440 + }, + { + "epoch": 1.984908275098654, + "grad_norm": 0.030686290934681892, + "learning_rate": 1.7305657517585616e-09, + "loss": 0.0003, + "step": 117450 + }, + { + "epoch": 1.9850772753775043, + "grad_norm": 0.051201045513153076, + "learning_rate": 1.6919835049294952e-09, + "loss": 0.0007, + "step": 117460 + }, + { + "epoch": 1.9852462756563547, + "grad_norm": 0.07473219931125641, + "learning_rate": 1.653836135114828e-09, + "loss": 0.0004, + "step": 117470 + }, + { + "epoch": 1.9854152759352053, + "grad_norm": 0.08911773562431335, + "learning_rate": 1.6161236456341268e-09, + "loss": 0.0006, + "step": 117480 + }, + { + "epoch": 1.9855842762140559, + "grad_norm": 0.03183027356863022, + "learning_rate": 1.5788460397686555e-09, + "loss": 0.0005, + "step": 117490 + }, + { + "epoch": 1.9857532764929062, + "grad_norm": 0.01687448099255562, + "learning_rate": 1.542003320760821e-09, + "loss": 0.0007, + "step": 117500 + }, + { + "epoch": 1.9859222767717566, + "grad_norm": 0.0014734736178070307, + "learning_rate": 1.505595491817502e-09, + "loss": 0.0007, + "step": 117510 + }, + { + "epoch": 1.9860912770506072, + "grad_norm": 0.011509898118674755, + "learning_rate": 1.4696225561050548e-09, + "loss": 0.0004, + "step": 117520 + }, + { + "epoch": 1.9862602773294578, + "grad_norm": 0.19320784509181976, + "learning_rate": 1.4340845167543082e-09, + "loss": 0.002, + "step": 117530 + }, + { + "epoch": 1.9864292776083081, + "grad_norm": 0.09815225750207901, + "learning_rate": 1.3989813768566785e-09, + "loss": 0.0003, + "step": 117540 + }, + { + "epoch": 1.9865982778871585, + "grad_norm": 0.002282196655869484, + "learning_rate": 1.3643131394663888e-09, + "loss": 0.0001, + "step": 117550 + }, + { + "epoch": 1.9867672781660088, + "grad_norm": 0.0062440186738967896, + "learning_rate": 1.3300798075993604e-09, + "loss": 0.0007, + "step": 117560 + }, + { + "epoch": 1.9869362784448594, + "grad_norm": 0.08370395749807358, + "learning_rate": 1.2962813842354316e-09, + "loss": 0.001, + "step": 117570 + }, + { + "epoch": 1.98710527872371, + "grad_norm": 0.014962395653128624, + "learning_rate": 1.2629178723133628e-09, + "loss": 0.0003, + "step": 117580 + }, + { + "epoch": 1.9872742790025604, + "grad_norm": 0.021809512749314308, + "learning_rate": 1.2299892747374975e-09, + "loss": 0.0004, + "step": 117590 + }, + { + "epoch": 1.9874432792814107, + "grad_norm": 0.06284677982330322, + "learning_rate": 1.197495594372211e-09, + "loss": 0.0003, + "step": 117600 + }, + { + "epoch": 1.9876122795602613, + "grad_norm": 0.16132612526416779, + "learning_rate": 1.1654368340441313e-09, + "loss": 0.0007, + "step": 117610 + }, + { + "epoch": 1.9877812798391117, + "grad_norm": 0.00846133567392826, + "learning_rate": 1.1338129965432488e-09, + "loss": 0.0003, + "step": 117620 + }, + { + "epoch": 1.9879502801179623, + "grad_norm": 0.07285846769809723, + "learning_rate": 1.1026240846206959e-09, + "loss": 0.0005, + "step": 117630 + }, + { + "epoch": 1.9881192803968126, + "grad_norm": 0.03198720142245293, + "learning_rate": 1.071870100989858e-09, + "loss": 0.0005, + "step": 117640 + }, + { + "epoch": 1.988288280675663, + "grad_norm": 0.013117395341396332, + "learning_rate": 1.0415510483269276e-09, + "loss": 0.0002, + "step": 117650 + }, + { + "epoch": 1.9884572809545136, + "grad_norm": 0.0321161225438118, + "learning_rate": 1.0116669292692393e-09, + "loss": 0.0019, + "step": 117660 + }, + { + "epoch": 1.9886262812333642, + "grad_norm": 0.002161074196919799, + "learning_rate": 9.82217746417491e-10, + "loss": 0.0009, + "step": 117670 + }, + { + "epoch": 1.9887952815122145, + "grad_norm": 0.025001564994454384, + "learning_rate": 9.532035023335219e-10, + "loss": 0.0024, + "step": 117680 + }, + { + "epoch": 1.9889642817910649, + "grad_norm": 0.04951312392950058, + "learning_rate": 9.246241995419791e-10, + "loss": 0.0003, + "step": 117690 + }, + { + "epoch": 1.9891332820699155, + "grad_norm": 0.05861971899867058, + "learning_rate": 8.964798405292074e-10, + "loss": 0.0005, + "step": 117700 + }, + { + "epoch": 1.9893022823487658, + "grad_norm": 0.04036771133542061, + "learning_rate": 8.687704277432485e-10, + "loss": 0.0007, + "step": 117710 + }, + { + "epoch": 1.9894712826276164, + "grad_norm": 0.0003445586480665952, + "learning_rate": 8.414959635960618e-10, + "loss": 0.0006, + "step": 117720 + }, + { + "epoch": 1.9896402829064668, + "grad_norm": 0.09235519915819168, + "learning_rate": 8.146564504601939e-10, + "loss": 0.0007, + "step": 117730 + }, + { + "epoch": 1.9898092831853171, + "grad_norm": 0.07501112669706345, + "learning_rate": 7.882518906709991e-10, + "loss": 0.0006, + "step": 117740 + }, + { + "epoch": 1.9899782834641677, + "grad_norm": 0.009308923967182636, + "learning_rate": 7.622822865255286e-10, + "loss": 0.0012, + "step": 117750 + }, + { + "epoch": 1.9901472837430183, + "grad_norm": 0.059830378741025925, + "learning_rate": 7.367476402830864e-10, + "loss": 0.0009, + "step": 117760 + }, + { + "epoch": 1.9903162840218687, + "grad_norm": 0.015614648349583149, + "learning_rate": 7.116479541657839e-10, + "loss": 0.0003, + "step": 117770 + }, + { + "epoch": 1.990485284300719, + "grad_norm": 0.06734458357095718, + "learning_rate": 6.869832303574298e-10, + "loss": 0.0006, + "step": 117780 + }, + { + "epoch": 1.9906542845795696, + "grad_norm": 0.006938898470252752, + "learning_rate": 6.627534710035299e-10, + "loss": 0.0024, + "step": 117790 + }, + { + "epoch": 1.99082328485842, + "grad_norm": 0.036555707454681396, + "learning_rate": 6.38958678212398e-10, + "loss": 0.0009, + "step": 117800 + }, + { + "epoch": 1.9909922851372706, + "grad_norm": 0.07078488916158676, + "learning_rate": 6.155988540545999e-10, + "loss": 0.0009, + "step": 117810 + }, + { + "epoch": 1.991161285416121, + "grad_norm": 0.004905702080577612, + "learning_rate": 5.926740005618436e-10, + "loss": 0.001, + "step": 117820 + }, + { + "epoch": 1.9913302856949713, + "grad_norm": 0.11632024496793747, + "learning_rate": 5.701841197297553e-10, + "loss": 0.0013, + "step": 117830 + }, + { + "epoch": 1.9914992859738219, + "grad_norm": 0.010307352989912033, + "learning_rate": 5.481292135139926e-10, + "loss": 0.0004, + "step": 117840 + }, + { + "epoch": 1.9916682862526724, + "grad_norm": 0.1011987254023552, + "learning_rate": 5.265092838335762e-10, + "loss": 0.0007, + "step": 117850 + }, + { + "epoch": 1.9918372865315228, + "grad_norm": 0.00017056192154996097, + "learning_rate": 5.053243325703339e-10, + "loss": 0.0006, + "step": 117860 + }, + { + "epoch": 1.9920062868103732, + "grad_norm": 0.00032645714236423373, + "learning_rate": 4.84574361567236e-10, + "loss": 0.0003, + "step": 117870 + }, + { + "epoch": 1.9921752870892235, + "grad_norm": 0.006950442213565111, + "learning_rate": 4.6425937262895017e-10, + "loss": 0.0008, + "step": 117880 + }, + { + "epoch": 1.9923442873680741, + "grad_norm": 0.019113315269351006, + "learning_rate": 4.443793675235064e-10, + "loss": 0.0005, + "step": 117890 + }, + { + "epoch": 1.9925132876469247, + "grad_norm": 0.01378389447927475, + "learning_rate": 4.2493434798007715e-10, + "loss": 0.0006, + "step": 117900 + }, + { + "epoch": 1.992682287925775, + "grad_norm": 0.03983868658542633, + "learning_rate": 4.059243156911974e-10, + "loss": 0.0005, + "step": 117910 + }, + { + "epoch": 1.9928512882046254, + "grad_norm": 0.017251068726181984, + "learning_rate": 3.873492723105443e-10, + "loss": 0.0003, + "step": 117920 + }, + { + "epoch": 1.993020288483476, + "grad_norm": 0.0006921543390490115, + "learning_rate": 3.692092194540475e-10, + "loss": 0.0006, + "step": 117930 + }, + { + "epoch": 1.9931892887623266, + "grad_norm": 0.08097910135984421, + "learning_rate": 3.515041587004442e-10, + "loss": 0.0007, + "step": 117940 + }, + { + "epoch": 1.993358289041177, + "grad_norm": 0.01713497005403042, + "learning_rate": 3.342340915896136e-10, + "loss": 0.0005, + "step": 117950 + }, + { + "epoch": 1.9935272893200273, + "grad_norm": 0.018909644335508347, + "learning_rate": 3.173990196242427e-10, + "loss": 0.0011, + "step": 117960 + }, + { + "epoch": 1.9936962895988777, + "grad_norm": 0.0007610021275468171, + "learning_rate": 3.0099894426927066e-10, + "loss": 0.0013, + "step": 117970 + }, + { + "epoch": 1.9938652898777283, + "grad_norm": 0.0057690683752298355, + "learning_rate": 2.850338669518893e-10, + "loss": 0.0005, + "step": 117980 + }, + { + "epoch": 1.9940342901565788, + "grad_norm": 0.0039121732115745544, + "learning_rate": 2.695037890604324e-10, + "loss": 0.0004, + "step": 117990 + }, + { + "epoch": 1.9942032904354292, + "grad_norm": 0.05107155814766884, + "learning_rate": 2.5440871194604144e-10, + "loss": 0.0006, + "step": 118000 + }, + { + "epoch": 1.9943722907142796, + "grad_norm": 0.058372240513563156, + "learning_rate": 2.3974863692266537e-10, + "loss": 0.0009, + "step": 118010 + }, + { + "epoch": 1.9945412909931302, + "grad_norm": 0.03363621607422829, + "learning_rate": 2.255235652653953e-10, + "loss": 0.0005, + "step": 118020 + }, + { + "epoch": 1.9947102912719807, + "grad_norm": 0.015248659998178482, + "learning_rate": 2.1173349821268508e-10, + "loss": 0.0005, + "step": 118030 + }, + { + "epoch": 1.994879291550831, + "grad_norm": 0.10280773043632507, + "learning_rate": 1.9837843696302039e-10, + "loss": 0.002, + "step": 118040 + }, + { + "epoch": 1.9950482918296815, + "grad_norm": 0.07708601653575897, + "learning_rate": 1.854583826793599e-10, + "loss": 0.0013, + "step": 118050 + }, + { + "epoch": 1.9952172921085318, + "grad_norm": 0.02261054702103138, + "learning_rate": 1.7297333648524928e-10, + "loss": 0.0032, + "step": 118060 + }, + { + "epoch": 1.9953862923873824, + "grad_norm": 0.026327621191740036, + "learning_rate": 1.6092329946704178e-10, + "loss": 0.0008, + "step": 118070 + }, + { + "epoch": 1.995555292666233, + "grad_norm": 0.019617615267634392, + "learning_rate": 1.4930827267389814e-10, + "loss": 0.0003, + "step": 118080 + }, + { + "epoch": 1.9957242929450834, + "grad_norm": 0.11332027614116669, + "learning_rate": 1.3812825711556622e-10, + "loss": 0.0008, + "step": 118090 + }, + { + "epoch": 1.9958932932239337, + "grad_norm": 0.01800505258142948, + "learning_rate": 1.2738325376460136e-10, + "loss": 0.0004, + "step": 118100 + }, + { + "epoch": 1.9960622935027843, + "grad_norm": 0.0007165533606894314, + "learning_rate": 1.170732635563665e-10, + "loss": 0.0007, + "step": 118110 + }, + { + "epoch": 1.9962312937816349, + "grad_norm": 0.19624696671962738, + "learning_rate": 1.0719828738792182e-10, + "loss": 0.0006, + "step": 118120 + }, + { + "epoch": 1.9964002940604852, + "grad_norm": 0.03249282389879227, + "learning_rate": 9.775832611802483e-11, + "loss": 0.0006, + "step": 118130 + }, + { + "epoch": 1.9965692943393356, + "grad_norm": 0.04424285143613815, + "learning_rate": 8.875338056824057e-11, + "loss": 0.0003, + "step": 118140 + }, + { + "epoch": 1.996738294618186, + "grad_norm": 0.0007160080131143332, + "learning_rate": 8.018345152238649e-11, + "loss": 0.0008, + "step": 118150 + }, + { + "epoch": 1.9969072948970366, + "grad_norm": 0.08032648265361786, + "learning_rate": 7.204853972542225e-11, + "loss": 0.0005, + "step": 118160 + }, + { + "epoch": 1.9970762951758871, + "grad_norm": 0.014756135642528534, + "learning_rate": 6.434864588567013e-11, + "loss": 0.0007, + "step": 118170 + }, + { + "epoch": 1.9972452954547375, + "grad_norm": 0.05486934632062912, + "learning_rate": 5.708377067259463e-11, + "loss": 0.0013, + "step": 118180 + }, + { + "epoch": 1.9974142957335879, + "grad_norm": 0.07138478010892868, + "learning_rate": 5.0253914718467746e-11, + "loss": 0.0018, + "step": 118190 + }, + { + "epoch": 1.9975832960124384, + "grad_norm": 0.03233276307582855, + "learning_rate": 4.385907861781391e-11, + "loss": 0.0003, + "step": 118200 + }, + { + "epoch": 1.997752296291289, + "grad_norm": 0.05701679736375809, + "learning_rate": 3.789926292685486e-11, + "loss": 0.0008, + "step": 118210 + }, + { + "epoch": 1.9979212965701394, + "grad_norm": 0.15732277929782867, + "learning_rate": 3.237446816350964e-11, + "loss": 0.0004, + "step": 118220 + }, + { + "epoch": 1.9980902968489898, + "grad_norm": 0.02079329639673233, + "learning_rate": 2.7284694809615042e-11, + "loss": 0.001, + "step": 118230 + }, + { + "epoch": 1.9982592971278401, + "grad_norm": 0.04122069850564003, + "learning_rate": 2.2629943307039827e-11, + "loss": 0.001, + "step": 118240 + }, + { + "epoch": 1.9984282974066907, + "grad_norm": 0.01309211365878582, + "learning_rate": 1.8410214061015396e-11, + "loss": 0.0003, + "step": 118250 + }, + { + "epoch": 1.9985972976855413, + "grad_norm": 0.09661834686994553, + "learning_rate": 1.4625507439025577e-11, + "loss": 0.0009, + "step": 118260 + }, + { + "epoch": 1.9987662979643916, + "grad_norm": 0.16295121610164642, + "learning_rate": 1.1275823770251493e-11, + "loss": 0.0006, + "step": 118270 + }, + { + "epoch": 1.998935298243242, + "grad_norm": 0.011920265853404999, + "learning_rate": 8.361163345571577e-12, + "loss": 0.0012, + "step": 118280 + }, + { + "epoch": 1.9991042985220926, + "grad_norm": 0.009564779698848724, + "learning_rate": 5.881526419226902e-12, + "loss": 0.0012, + "step": 118290 + }, + { + "epoch": 1.9992732988009432, + "grad_norm": 0.05826535448431969, + "learning_rate": 3.836913206600734e-12, + "loss": 0.0008, + "step": 118300 + }, + { + "epoch": 1.9994422990797935, + "grad_norm": 0.009812925010919571, + "learning_rate": 2.2273238858838697e-12, + "loss": 0.0007, + "step": 118310 + }, + { + "epoch": 1.999611299358644, + "grad_norm": 0.0485992468893528, + "learning_rate": 1.0527585964092979e-12, + "loss": 0.0004, + "step": 118320 + }, + { + "epoch": 1.9997802996374943, + "grad_norm": 0.030909573659300804, + "learning_rate": 3.1321744142776e-13, + "loss": 0.0006, + "step": 118330 + }, + { + "epoch": 1.9999492999163448, + "grad_norm": 0.021162638440728188, + "learning_rate": 8.700484777079965e-15, + "loss": 0.0006, + "step": 118340 + }, + { + "epoch": 1.9999830999721149, + "step": 118342, + "total_flos": 1.4764570273982185e+19, + "train_loss": 0.003653812557283815, + "train_runtime": 130211.2924, + "train_samples_per_second": 7.271, + "train_steps_per_second": 0.909 + } + ], + "logging_steps": 10, + "max_steps": 118342, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 62000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.4764570273982185e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}