{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.983960849317231, "eval_steps": 3959, "global_step": 15836, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012629252506117294, "grad_norm": 5.59375, "learning_rate": 2.0000000000000002e-07, "loss": 0.8454, "step": 1 }, { "epoch": 0.00012629252506117294, "eval_loss": 0.8203133344650269, "eval_runtime": 4337.3857, "eval_samples_per_second": 11.491, "eval_steps_per_second": 3.83, "step": 1 }, { "epoch": 0.0002525850501223459, "grad_norm": 5.71875, "learning_rate": 4.0000000000000003e-07, "loss": 0.9067, "step": 2 }, { "epoch": 0.0003788775751835188, "grad_norm": 6.25, "learning_rate": 6.000000000000001e-07, "loss": 0.9001, "step": 3 }, { "epoch": 0.0005051701002446918, "grad_norm": 6.46875, "learning_rate": 8.000000000000001e-07, "loss": 0.9717, "step": 4 }, { "epoch": 0.0006314626253058647, "grad_norm": 6.4375, "learning_rate": 1.0000000000000002e-06, "loss": 0.834, "step": 5 }, { "epoch": 0.0007577551503670377, "grad_norm": 6.15625, "learning_rate": 1.2000000000000002e-06, "loss": 0.8507, "step": 6 }, { "epoch": 0.0008840476754282105, "grad_norm": 5.875, "learning_rate": 1.4000000000000001e-06, "loss": 0.8883, "step": 7 }, { "epoch": 0.0010103402004893835, "grad_norm": 4.625, "learning_rate": 1.6000000000000001e-06, "loss": 0.7588, "step": 8 }, { "epoch": 0.0011366327255505565, "grad_norm": 4.65625, "learning_rate": 1.8000000000000001e-06, "loss": 0.8324, "step": 9 }, { "epoch": 0.0012629252506117293, "grad_norm": 5.59375, "learning_rate": 2.0000000000000003e-06, "loss": 0.9332, "step": 10 }, { "epoch": 0.0013892177756729023, "grad_norm": 4.84375, "learning_rate": 2.2e-06, "loss": 0.9111, "step": 11 }, { "epoch": 0.0015155103007340753, "grad_norm": 4.0625, "learning_rate": 2.4000000000000003e-06, "loss": 0.842, "step": 12 }, { "epoch": 0.0016418028257952483, "grad_norm": 3.90625, "learning_rate": 2.6e-06, "loss": 0.7434, "step": 13 }, { "epoch": 0.001768095350856421, "grad_norm": 3.265625, "learning_rate": 2.8000000000000003e-06, "loss": 0.8226, "step": 14 }, { "epoch": 0.001894387875917594, "grad_norm": 3.3125, "learning_rate": 3e-06, "loss": 0.834, "step": 15 }, { "epoch": 0.002020680400978767, "grad_norm": 3.234375, "learning_rate": 3.2000000000000003e-06, "loss": 0.8324, "step": 16 }, { "epoch": 0.00214697292603994, "grad_norm": 3.140625, "learning_rate": 3.4000000000000005e-06, "loss": 0.7871, "step": 17 }, { "epoch": 0.002273265451101113, "grad_norm": 2.953125, "learning_rate": 3.6000000000000003e-06, "loss": 0.7632, "step": 18 }, { "epoch": 0.002399557976162286, "grad_norm": 2.96875, "learning_rate": 3.8000000000000005e-06, "loss": 0.76, "step": 19 }, { "epoch": 0.0025258505012234586, "grad_norm": 3.265625, "learning_rate": 4.000000000000001e-06, "loss": 0.8381, "step": 20 }, { "epoch": 0.0026521430262846316, "grad_norm": 3.109375, "learning_rate": 4.2000000000000004e-06, "loss": 0.7496, "step": 21 }, { "epoch": 0.0027784355513458046, "grad_norm": 3.140625, "learning_rate": 4.4e-06, "loss": 0.8531, "step": 22 }, { "epoch": 0.0029047280764069776, "grad_norm": 8.875, "learning_rate": 4.600000000000001e-06, "loss": 1.3642, "step": 23 }, { "epoch": 0.0030310206014681506, "grad_norm": 2.546875, "learning_rate": 4.800000000000001e-06, "loss": 0.7662, "step": 24 }, { "epoch": 0.0031573131265293236, "grad_norm": 3.203125, "learning_rate": 5e-06, "loss": 0.8783, "step": 25 }, { "epoch": 0.0032836056515904966, "grad_norm": 3.1875, "learning_rate": 5.2e-06, "loss": 0.7804, "step": 26 }, { "epoch": 0.0034098981766516696, "grad_norm": 2.828125, "learning_rate": 5.400000000000001e-06, "loss": 0.8204, "step": 27 }, { "epoch": 0.003536190701712842, "grad_norm": 2.78125, "learning_rate": 5.600000000000001e-06, "loss": 0.7168, "step": 28 }, { "epoch": 0.003662483226774015, "grad_norm": 2.765625, "learning_rate": 5.8e-06, "loss": 0.7216, "step": 29 }, { "epoch": 0.003788775751835188, "grad_norm": 2.546875, "learning_rate": 6e-06, "loss": 0.841, "step": 30 }, { "epoch": 0.003915068276896362, "grad_norm": 2.71875, "learning_rate": 6.200000000000001e-06, "loss": 0.7497, "step": 31 }, { "epoch": 0.004041360801957534, "grad_norm": 3.078125, "learning_rate": 6.4000000000000006e-06, "loss": 0.8927, "step": 32 }, { "epoch": 0.004167653327018707, "grad_norm": 2.703125, "learning_rate": 6.600000000000001e-06, "loss": 0.7574, "step": 33 }, { "epoch": 0.00429394585207988, "grad_norm": 2.765625, "learning_rate": 6.800000000000001e-06, "loss": 0.7417, "step": 34 }, { "epoch": 0.004420238377141053, "grad_norm": 2.671875, "learning_rate": 7e-06, "loss": 0.8044, "step": 35 }, { "epoch": 0.004546530902202226, "grad_norm": 2.71875, "learning_rate": 7.2000000000000005e-06, "loss": 0.7584, "step": 36 }, { "epoch": 0.004672823427263399, "grad_norm": 2.71875, "learning_rate": 7.4e-06, "loss": 0.8108, "step": 37 }, { "epoch": 0.004799115952324572, "grad_norm": 2.46875, "learning_rate": 7.600000000000001e-06, "loss": 0.752, "step": 38 }, { "epoch": 0.004925408477385745, "grad_norm": 2.8125, "learning_rate": 7.800000000000002e-06, "loss": 0.7976, "step": 39 }, { "epoch": 0.005051701002446917, "grad_norm": 2.6875, "learning_rate": 8.000000000000001e-06, "loss": 0.7411, "step": 40 }, { "epoch": 0.005177993527508091, "grad_norm": 2.765625, "learning_rate": 8.2e-06, "loss": 0.7607, "step": 41 }, { "epoch": 0.005304286052569263, "grad_norm": 2.59375, "learning_rate": 8.400000000000001e-06, "loss": 0.7165, "step": 42 }, { "epoch": 0.005430578577630437, "grad_norm": 2.6875, "learning_rate": 8.6e-06, "loss": 0.7692, "step": 43 }, { "epoch": 0.005556871102691609, "grad_norm": 2.484375, "learning_rate": 8.8e-06, "loss": 0.6716, "step": 44 }, { "epoch": 0.005683163627752783, "grad_norm": 2.71875, "learning_rate": 9e-06, "loss": 0.763, "step": 45 }, { "epoch": 0.005809456152813955, "grad_norm": 2.703125, "learning_rate": 9.200000000000002e-06, "loss": 0.7133, "step": 46 }, { "epoch": 0.005935748677875129, "grad_norm": 2.53125, "learning_rate": 9.4e-06, "loss": 0.7162, "step": 47 }, { "epoch": 0.006062041202936301, "grad_norm": 2.5, "learning_rate": 9.600000000000001e-06, "loss": 0.7998, "step": 48 }, { "epoch": 0.006188333727997474, "grad_norm": 2.578125, "learning_rate": 9.800000000000001e-06, "loss": 0.7608, "step": 49 }, { "epoch": 0.006314626253058647, "grad_norm": 2.453125, "learning_rate": 1e-05, "loss": 0.6336, "step": 50 }, { "epoch": 0.00644091877811982, "grad_norm": 2.625, "learning_rate": 1.02e-05, "loss": 0.7345, "step": 51 }, { "epoch": 0.006567211303180993, "grad_norm": 2.484375, "learning_rate": 1.04e-05, "loss": 0.6878, "step": 52 }, { "epoch": 0.006693503828242166, "grad_norm": 2.5, "learning_rate": 1.0600000000000002e-05, "loss": 0.704, "step": 53 }, { "epoch": 0.006819796353303339, "grad_norm": 2.375, "learning_rate": 1.0800000000000002e-05, "loss": 0.7585, "step": 54 }, { "epoch": 0.006946088878364512, "grad_norm": 2.625, "learning_rate": 1.1000000000000001e-05, "loss": 0.6779, "step": 55 }, { "epoch": 0.007072381403425684, "grad_norm": 2.625, "learning_rate": 1.1200000000000001e-05, "loss": 0.692, "step": 56 }, { "epoch": 0.007198673928486858, "grad_norm": 2.84375, "learning_rate": 1.14e-05, "loss": 0.7789, "step": 57 }, { "epoch": 0.00732496645354803, "grad_norm": 2.765625, "learning_rate": 1.16e-05, "loss": 0.8273, "step": 58 }, { "epoch": 0.007451258978609204, "grad_norm": 2.546875, "learning_rate": 1.18e-05, "loss": 0.7558, "step": 59 }, { "epoch": 0.007577551503670376, "grad_norm": 2.78125, "learning_rate": 1.2e-05, "loss": 0.7932, "step": 60 }, { "epoch": 0.00770384402873155, "grad_norm": 2.40625, "learning_rate": 1.22e-05, "loss": 0.7052, "step": 61 }, { "epoch": 0.007830136553792723, "grad_norm": 2.609375, "learning_rate": 1.2400000000000002e-05, "loss": 0.7514, "step": 62 }, { "epoch": 0.007956429078853895, "grad_norm": 2.5625, "learning_rate": 1.2600000000000001e-05, "loss": 0.76, "step": 63 }, { "epoch": 0.008082721603915068, "grad_norm": 2.53125, "learning_rate": 1.2800000000000001e-05, "loss": 0.7256, "step": 64 }, { "epoch": 0.008209014128976242, "grad_norm": 2.546875, "learning_rate": 1.3000000000000001e-05, "loss": 0.7125, "step": 65 }, { "epoch": 0.008335306654037413, "grad_norm": 2.75, "learning_rate": 1.3200000000000002e-05, "loss": 0.7212, "step": 66 }, { "epoch": 0.008461599179098587, "grad_norm": 2.75, "learning_rate": 1.3400000000000002e-05, "loss": 0.8474, "step": 67 }, { "epoch": 0.00858789170415976, "grad_norm": 2.375, "learning_rate": 1.3600000000000002e-05, "loss": 0.7398, "step": 68 }, { "epoch": 0.008714184229220934, "grad_norm": 2.59375, "learning_rate": 1.38e-05, "loss": 0.7633, "step": 69 }, { "epoch": 0.008840476754282105, "grad_norm": 2.609375, "learning_rate": 1.4e-05, "loss": 0.7809, "step": 70 }, { "epoch": 0.008966769279343279, "grad_norm": 2.578125, "learning_rate": 1.4200000000000001e-05, "loss": 0.7227, "step": 71 }, { "epoch": 0.009093061804404452, "grad_norm": 2.609375, "learning_rate": 1.4400000000000001e-05, "loss": 0.6658, "step": 72 }, { "epoch": 0.009219354329465624, "grad_norm": 2.34375, "learning_rate": 1.46e-05, "loss": 0.627, "step": 73 }, { "epoch": 0.009345646854526797, "grad_norm": 2.59375, "learning_rate": 1.48e-05, "loss": 0.7184, "step": 74 }, { "epoch": 0.00947193937958797, "grad_norm": 2.65625, "learning_rate": 1.5000000000000002e-05, "loss": 0.7323, "step": 75 }, { "epoch": 0.009598231904649144, "grad_norm": 2.703125, "learning_rate": 1.5200000000000002e-05, "loss": 0.6901, "step": 76 }, { "epoch": 0.009724524429710316, "grad_norm": 2.453125, "learning_rate": 1.54e-05, "loss": 0.7059, "step": 77 }, { "epoch": 0.00985081695477149, "grad_norm": 2.6875, "learning_rate": 1.5600000000000003e-05, "loss": 0.7765, "step": 78 }, { "epoch": 0.009977109479832663, "grad_norm": 2.8125, "learning_rate": 1.58e-05, "loss": 0.8105, "step": 79 }, { "epoch": 0.010103402004893835, "grad_norm": 2.484375, "learning_rate": 1.6000000000000003e-05, "loss": 0.7544, "step": 80 }, { "epoch": 0.010229694529955008, "grad_norm": 2.46875, "learning_rate": 1.62e-05, "loss": 0.7326, "step": 81 }, { "epoch": 0.010355987055016181, "grad_norm": 2.78125, "learning_rate": 1.64e-05, "loss": 0.8053, "step": 82 }, { "epoch": 0.010482279580077355, "grad_norm": 2.546875, "learning_rate": 1.66e-05, "loss": 0.7019, "step": 83 }, { "epoch": 0.010608572105138526, "grad_norm": 2.671875, "learning_rate": 1.6800000000000002e-05, "loss": 0.7803, "step": 84 }, { "epoch": 0.0107348646301997, "grad_norm": 2.6875, "learning_rate": 1.7e-05, "loss": 0.7596, "step": 85 }, { "epoch": 0.010861157155260873, "grad_norm": 2.53125, "learning_rate": 1.72e-05, "loss": 0.7499, "step": 86 }, { "epoch": 0.010987449680322047, "grad_norm": 2.71875, "learning_rate": 1.7400000000000003e-05, "loss": 0.8082, "step": 87 }, { "epoch": 0.011113742205383218, "grad_norm": 2.59375, "learning_rate": 1.76e-05, "loss": 0.6442, "step": 88 }, { "epoch": 0.011240034730444392, "grad_norm": 2.453125, "learning_rate": 1.7800000000000002e-05, "loss": 0.7336, "step": 89 }, { "epoch": 0.011366327255505565, "grad_norm": 2.5625, "learning_rate": 1.8e-05, "loss": 0.6671, "step": 90 }, { "epoch": 0.011492619780566737, "grad_norm": 2.9375, "learning_rate": 1.8200000000000002e-05, "loss": 0.8877, "step": 91 }, { "epoch": 0.01161891230562791, "grad_norm": 2.8125, "learning_rate": 1.8400000000000003e-05, "loss": 0.7771, "step": 92 }, { "epoch": 0.011745204830689084, "grad_norm": 2.671875, "learning_rate": 1.86e-05, "loss": 0.7652, "step": 93 }, { "epoch": 0.011871497355750257, "grad_norm": 2.640625, "learning_rate": 1.88e-05, "loss": 0.7989, "step": 94 }, { "epoch": 0.011997789880811429, "grad_norm": 2.484375, "learning_rate": 1.9e-05, "loss": 0.758, "step": 95 }, { "epoch": 0.012124082405872602, "grad_norm": 2.71875, "learning_rate": 1.9200000000000003e-05, "loss": 0.7642, "step": 96 }, { "epoch": 0.012250374930933776, "grad_norm": 2.625, "learning_rate": 1.94e-05, "loss": 0.7289, "step": 97 }, { "epoch": 0.012376667455994948, "grad_norm": 2.671875, "learning_rate": 1.9600000000000002e-05, "loss": 0.6816, "step": 98 }, { "epoch": 0.012502959981056121, "grad_norm": 2.96875, "learning_rate": 1.98e-05, "loss": 0.7969, "step": 99 }, { "epoch": 0.012629252506117294, "grad_norm": 2.765625, "learning_rate": 2e-05, "loss": 0.7745, "step": 100 }, { "epoch": 0.012755545031178468, "grad_norm": 2.421875, "learning_rate": 1.9999999800712045e-05, "loss": 0.6676, "step": 101 }, { "epoch": 0.01288183755623964, "grad_norm": 2.859375, "learning_rate": 1.999999920284818e-05, "loss": 0.8382, "step": 102 }, { "epoch": 0.013008130081300813, "grad_norm": 2.6875, "learning_rate": 1.9999998206408437e-05, "loss": 0.7789, "step": 103 }, { "epoch": 0.013134422606361986, "grad_norm": 2.390625, "learning_rate": 1.999999681139285e-05, "loss": 0.6839, "step": 104 }, { "epoch": 0.013260715131423158, "grad_norm": 2.890625, "learning_rate": 1.9999995017801483e-05, "loss": 0.7338, "step": 105 }, { "epoch": 0.013387007656484332, "grad_norm": 3.03125, "learning_rate": 1.9999992825634394e-05, "loss": 0.6855, "step": 106 }, { "epoch": 0.013513300181545505, "grad_norm": 2.671875, "learning_rate": 1.9999990234891677e-05, "loss": 0.7621, "step": 107 }, { "epoch": 0.013639592706606678, "grad_norm": 2.578125, "learning_rate": 1.9999987245573438e-05, "loss": 0.7296, "step": 108 }, { "epoch": 0.01376588523166785, "grad_norm": 2.8125, "learning_rate": 1.9999983857679794e-05, "loss": 0.7212, "step": 109 }, { "epoch": 0.013892177756729024, "grad_norm": 2.921875, "learning_rate": 1.999998007121088e-05, "loss": 0.9027, "step": 110 }, { "epoch": 0.014018470281790197, "grad_norm": 2.59375, "learning_rate": 1.999997588616685e-05, "loss": 0.7912, "step": 111 }, { "epoch": 0.014144762806851369, "grad_norm": 2.578125, "learning_rate": 1.9999971302547862e-05, "loss": 0.73, "step": 112 }, { "epoch": 0.014271055331912542, "grad_norm": 2.78125, "learning_rate": 1.9999966320354104e-05, "loss": 0.7976, "step": 113 }, { "epoch": 0.014397347856973716, "grad_norm": 2.375, "learning_rate": 1.999996093958578e-05, "loss": 0.6868, "step": 114 }, { "epoch": 0.014523640382034889, "grad_norm": 2.484375, "learning_rate": 1.9999955160243094e-05, "loss": 0.8094, "step": 115 }, { "epoch": 0.01464993290709606, "grad_norm": 2.515625, "learning_rate": 1.9999948982326284e-05, "loss": 0.7773, "step": 116 }, { "epoch": 0.014776225432157234, "grad_norm": 2.578125, "learning_rate": 1.9999942405835596e-05, "loss": 0.7262, "step": 117 }, { "epoch": 0.014902517957218408, "grad_norm": 2.484375, "learning_rate": 1.9999935430771288e-05, "loss": 0.8033, "step": 118 }, { "epoch": 0.015028810482279581, "grad_norm": 2.578125, "learning_rate": 1.999992805713364e-05, "loss": 0.7948, "step": 119 }, { "epoch": 0.015155103007340753, "grad_norm": 2.328125, "learning_rate": 1.9999920284922947e-05, "loss": 0.642, "step": 120 }, { "epoch": 0.015281395532401926, "grad_norm": 2.46875, "learning_rate": 1.999991211413952e-05, "loss": 0.6889, "step": 121 }, { "epoch": 0.0154076880574631, "grad_norm": 2.8125, "learning_rate": 1.9999903544783676e-05, "loss": 0.7026, "step": 122 }, { "epoch": 0.015533980582524271, "grad_norm": 2.234375, "learning_rate": 1.9999894576855768e-05, "loss": 0.7458, "step": 123 }, { "epoch": 0.015660273107585446, "grad_norm": 2.296875, "learning_rate": 1.9999885210356147e-05, "loss": 0.6597, "step": 124 }, { "epoch": 0.015786565632646618, "grad_norm": 2.53125, "learning_rate": 1.999987544528519e-05, "loss": 0.7565, "step": 125 }, { "epoch": 0.01591285815770779, "grad_norm": 2.609375, "learning_rate": 1.999986528164328e-05, "loss": 0.7635, "step": 126 }, { "epoch": 0.016039150682768965, "grad_norm": 2.375, "learning_rate": 1.9999854719430828e-05, "loss": 0.6738, "step": 127 }, { "epoch": 0.016165443207830137, "grad_norm": 2.453125, "learning_rate": 1.9999843758648253e-05, "loss": 0.731, "step": 128 }, { "epoch": 0.01629173573289131, "grad_norm": 2.546875, "learning_rate": 1.9999832399295992e-05, "loss": 0.6943, "step": 129 }, { "epoch": 0.016418028257952483, "grad_norm": 2.328125, "learning_rate": 1.9999820641374495e-05, "loss": 0.659, "step": 130 }, { "epoch": 0.016544320783013655, "grad_norm": 2.484375, "learning_rate": 1.9999808484884237e-05, "loss": 0.7581, "step": 131 }, { "epoch": 0.016670613308074827, "grad_norm": 2.515625, "learning_rate": 1.99997959298257e-05, "loss": 0.7868, "step": 132 }, { "epoch": 0.016796905833136002, "grad_norm": 2.375, "learning_rate": 1.999978297619938e-05, "loss": 0.7118, "step": 133 }, { "epoch": 0.016923198358197174, "grad_norm": 2.5625, "learning_rate": 1.9999769624005797e-05, "loss": 0.8046, "step": 134 }, { "epoch": 0.017049490883258345, "grad_norm": 2.421875, "learning_rate": 1.9999755873245484e-05, "loss": 0.7309, "step": 135 }, { "epoch": 0.01717578340831952, "grad_norm": 2.484375, "learning_rate": 1.9999741723918985e-05, "loss": 0.7246, "step": 136 }, { "epoch": 0.017302075933380692, "grad_norm": 2.4375, "learning_rate": 1.999972717602687e-05, "loss": 0.7137, "step": 137 }, { "epoch": 0.017428368458441867, "grad_norm": 2.359375, "learning_rate": 1.9999712229569714e-05, "loss": 0.728, "step": 138 }, { "epoch": 0.01755466098350304, "grad_norm": 2.359375, "learning_rate": 1.9999696884548114e-05, "loss": 0.7035, "step": 139 }, { "epoch": 0.01768095350856421, "grad_norm": 2.328125, "learning_rate": 1.9999681140962683e-05, "loss": 0.6344, "step": 140 }, { "epoch": 0.017807246033625386, "grad_norm": 2.59375, "learning_rate": 1.9999664998814047e-05, "loss": 0.686, "step": 141 }, { "epoch": 0.017933538558686558, "grad_norm": 2.5, "learning_rate": 1.999964845810285e-05, "loss": 0.746, "step": 142 }, { "epoch": 0.01805983108374773, "grad_norm": 2.296875, "learning_rate": 1.9999631518829753e-05, "loss": 0.717, "step": 143 }, { "epoch": 0.018186123608808905, "grad_norm": 2.34375, "learning_rate": 1.9999614180995426e-05, "loss": 0.7587, "step": 144 }, { "epoch": 0.018312416133870076, "grad_norm": 2.359375, "learning_rate": 1.9999596444600567e-05, "loss": 0.7113, "step": 145 }, { "epoch": 0.018438708658931248, "grad_norm": 2.25, "learning_rate": 1.9999578309645874e-05, "loss": 0.6707, "step": 146 }, { "epoch": 0.018565001183992423, "grad_norm": 2.75, "learning_rate": 1.9999559776132077e-05, "loss": 0.7295, "step": 147 }, { "epoch": 0.018691293709053595, "grad_norm": 2.5625, "learning_rate": 1.9999540844059916e-05, "loss": 0.7483, "step": 148 }, { "epoch": 0.01881758623411477, "grad_norm": 2.515625, "learning_rate": 1.999952151343014e-05, "loss": 0.7211, "step": 149 }, { "epoch": 0.01894387875917594, "grad_norm": 2.21875, "learning_rate": 1.9999501784243523e-05, "loss": 0.6625, "step": 150 }, { "epoch": 0.019070171284237113, "grad_norm": 2.578125, "learning_rate": 1.9999481656500846e-05, "loss": 0.8249, "step": 151 }, { "epoch": 0.01919646380929829, "grad_norm": 2.390625, "learning_rate": 1.999946113020292e-05, "loss": 0.7932, "step": 152 }, { "epoch": 0.01932275633435946, "grad_norm": 2.796875, "learning_rate": 1.9999440205350558e-05, "loss": 0.8483, "step": 153 }, { "epoch": 0.019449048859420632, "grad_norm": 2.296875, "learning_rate": 1.9999418881944592e-05, "loss": 0.7314, "step": 154 }, { "epoch": 0.019575341384481807, "grad_norm": 2.390625, "learning_rate": 1.9999397159985878e-05, "loss": 0.726, "step": 155 }, { "epoch": 0.01970163390954298, "grad_norm": 2.390625, "learning_rate": 1.9999375039475278e-05, "loss": 0.7099, "step": 156 }, { "epoch": 0.01982792643460415, "grad_norm": 2.3125, "learning_rate": 1.999935252041367e-05, "loss": 0.7707, "step": 157 }, { "epoch": 0.019954218959665326, "grad_norm": 2.359375, "learning_rate": 1.9999329602801957e-05, "loss": 0.79, "step": 158 }, { "epoch": 0.020080511484726497, "grad_norm": 2.546875, "learning_rate": 1.9999306286641055e-05, "loss": 0.7337, "step": 159 }, { "epoch": 0.02020680400978767, "grad_norm": 2.46875, "learning_rate": 1.9999282571931883e-05, "loss": 0.723, "step": 160 }, { "epoch": 0.020333096534848844, "grad_norm": 2.421875, "learning_rate": 1.99992584586754e-05, "loss": 0.7435, "step": 161 }, { "epoch": 0.020459389059910016, "grad_norm": 2.40625, "learning_rate": 1.9999233946872555e-05, "loss": 0.8141, "step": 162 }, { "epoch": 0.02058568158497119, "grad_norm": 2.40625, "learning_rate": 1.9999209036524326e-05, "loss": 0.7423, "step": 163 }, { "epoch": 0.020711974110032363, "grad_norm": 2.46875, "learning_rate": 1.9999183727631714e-05, "loss": 0.6941, "step": 164 }, { "epoch": 0.020838266635093534, "grad_norm": 2.625, "learning_rate": 1.9999158020195724e-05, "loss": 0.7711, "step": 165 }, { "epoch": 0.02096455916015471, "grad_norm": 2.40625, "learning_rate": 1.9999131914217376e-05, "loss": 0.6969, "step": 166 }, { "epoch": 0.02109085168521588, "grad_norm": 2.453125, "learning_rate": 1.9999105409697717e-05, "loss": 0.7686, "step": 167 }, { "epoch": 0.021217144210277053, "grad_norm": 2.640625, "learning_rate": 1.9999078506637798e-05, "loss": 0.7083, "step": 168 }, { "epoch": 0.021343436735338228, "grad_norm": 2.1875, "learning_rate": 1.9999051205038696e-05, "loss": 0.6447, "step": 169 }, { "epoch": 0.0214697292603994, "grad_norm": 2.28125, "learning_rate": 1.99990235049015e-05, "loss": 0.727, "step": 170 }, { "epoch": 0.02159602178546057, "grad_norm": 2.234375, "learning_rate": 1.9998995406227307e-05, "loss": 0.6824, "step": 171 }, { "epoch": 0.021722314310521747, "grad_norm": 2.5625, "learning_rate": 1.9998966909017246e-05, "loss": 0.7535, "step": 172 }, { "epoch": 0.02184860683558292, "grad_norm": 2.390625, "learning_rate": 1.9998938013272444e-05, "loss": 0.7203, "step": 173 }, { "epoch": 0.021974899360644094, "grad_norm": 2.28125, "learning_rate": 1.9998908718994056e-05, "loss": 0.759, "step": 174 }, { "epoch": 0.022101191885705265, "grad_norm": 2.484375, "learning_rate": 1.9998879026183254e-05, "loss": 0.7551, "step": 175 }, { "epoch": 0.022227484410766437, "grad_norm": 2.1875, "learning_rate": 1.9998848934841214e-05, "loss": 0.6386, "step": 176 }, { "epoch": 0.022353776935827612, "grad_norm": 2.390625, "learning_rate": 1.999881844496914e-05, "loss": 0.7109, "step": 177 }, { "epoch": 0.022480069460888784, "grad_norm": 2.265625, "learning_rate": 1.999878755656825e-05, "loss": 0.7733, "step": 178 }, { "epoch": 0.022606361985949956, "grad_norm": 2.34375, "learning_rate": 1.9998756269639768e-05, "loss": 0.7062, "step": 179 }, { "epoch": 0.02273265451101113, "grad_norm": 2.359375, "learning_rate": 1.9998724584184947e-05, "loss": 0.6544, "step": 180 }, { "epoch": 0.022858947036072302, "grad_norm": 2.671875, "learning_rate": 1.9998692500205045e-05, "loss": 0.821, "step": 181 }, { "epoch": 0.022985239561133474, "grad_norm": 2.8125, "learning_rate": 1.9998660017701345e-05, "loss": 0.7186, "step": 182 }, { "epoch": 0.02311153208619465, "grad_norm": 2.46875, "learning_rate": 1.9998627136675138e-05, "loss": 0.6877, "step": 183 }, { "epoch": 0.02323782461125582, "grad_norm": 2.375, "learning_rate": 1.9998593857127736e-05, "loss": 0.7841, "step": 184 }, { "epoch": 0.023364117136316993, "grad_norm": 2.515625, "learning_rate": 1.999856017906047e-05, "loss": 0.7384, "step": 185 }, { "epoch": 0.023490409661378168, "grad_norm": 2.25, "learning_rate": 1.9998526102474676e-05, "loss": 0.7312, "step": 186 }, { "epoch": 0.02361670218643934, "grad_norm": 2.5625, "learning_rate": 1.9998491627371714e-05, "loss": 0.6914, "step": 187 }, { "epoch": 0.023742994711500515, "grad_norm": 2.328125, "learning_rate": 1.999845675375296e-05, "loss": 0.7053, "step": 188 }, { "epoch": 0.023869287236561686, "grad_norm": 2.390625, "learning_rate": 1.9998421481619802e-05, "loss": 0.7534, "step": 189 }, { "epoch": 0.023995579761622858, "grad_norm": 2.25, "learning_rate": 1.9998385810973647e-05, "loss": 0.6624, "step": 190 }, { "epoch": 0.024121872286684033, "grad_norm": 2.453125, "learning_rate": 1.9998349741815916e-05, "loss": 0.7969, "step": 191 }, { "epoch": 0.024248164811745205, "grad_norm": 2.5, "learning_rate": 1.9998313274148045e-05, "loss": 0.7236, "step": 192 }, { "epoch": 0.024374457336806377, "grad_norm": 2.390625, "learning_rate": 1.999827640797149e-05, "loss": 0.6894, "step": 193 }, { "epoch": 0.024500749861867552, "grad_norm": 2.234375, "learning_rate": 1.999823914328772e-05, "loss": 0.7169, "step": 194 }, { "epoch": 0.024627042386928723, "grad_norm": 2.3125, "learning_rate": 1.999820148009822e-05, "loss": 0.7392, "step": 195 }, { "epoch": 0.024753334911989895, "grad_norm": 2.1875, "learning_rate": 1.9998163418404494e-05, "loss": 0.7072, "step": 196 }, { "epoch": 0.02487962743705107, "grad_norm": 2.53125, "learning_rate": 1.9998124958208054e-05, "loss": 0.7762, "step": 197 }, { "epoch": 0.025005919962112242, "grad_norm": 2.28125, "learning_rate": 1.9998086099510433e-05, "loss": 0.8025, "step": 198 }, { "epoch": 0.025132212487173414, "grad_norm": 2.296875, "learning_rate": 1.9998046842313185e-05, "loss": 0.7147, "step": 199 }, { "epoch": 0.02525850501223459, "grad_norm": 2.40625, "learning_rate": 1.9998007186617872e-05, "loss": 0.7138, "step": 200 }, { "epoch": 0.02538479753729576, "grad_norm": 2.46875, "learning_rate": 1.9997967132426073e-05, "loss": 0.7639, "step": 201 }, { "epoch": 0.025511090062356936, "grad_norm": 2.546875, "learning_rate": 1.9997926679739385e-05, "loss": 0.7435, "step": 202 }, { "epoch": 0.025637382587418107, "grad_norm": 2.140625, "learning_rate": 1.999788582855942e-05, "loss": 0.693, "step": 203 }, { "epoch": 0.02576367511247928, "grad_norm": 2.515625, "learning_rate": 1.999784457888781e-05, "loss": 0.789, "step": 204 }, { "epoch": 0.025889967637540454, "grad_norm": 2.59375, "learning_rate": 1.9997802930726195e-05, "loss": 0.7907, "step": 205 }, { "epoch": 0.026016260162601626, "grad_norm": 2.265625, "learning_rate": 1.999776088407624e-05, "loss": 0.7559, "step": 206 }, { "epoch": 0.026142552687662798, "grad_norm": 2.34375, "learning_rate": 1.999771843893961e-05, "loss": 0.7378, "step": 207 }, { "epoch": 0.026268845212723973, "grad_norm": 2.375, "learning_rate": 1.999767559531801e-05, "loss": 0.7214, "step": 208 }, { "epoch": 0.026395137737785145, "grad_norm": 2.3125, "learning_rate": 1.999763235321314e-05, "loss": 0.6613, "step": 209 }, { "epoch": 0.026521430262846316, "grad_norm": 2.328125, "learning_rate": 1.999758871262672e-05, "loss": 0.6872, "step": 210 }, { "epoch": 0.02664772278790749, "grad_norm": 2.5, "learning_rate": 1.99975446735605e-05, "loss": 0.7684, "step": 211 }, { "epoch": 0.026774015312968663, "grad_norm": 2.46875, "learning_rate": 1.9997500236016233e-05, "loss": 0.8671, "step": 212 }, { "epoch": 0.02690030783802984, "grad_norm": 2.390625, "learning_rate": 1.9997455399995683e-05, "loss": 0.7641, "step": 213 }, { "epoch": 0.02702660036309101, "grad_norm": 2.46875, "learning_rate": 1.9997410165500643e-05, "loss": 0.7303, "step": 214 }, { "epoch": 0.02715289288815218, "grad_norm": 2.359375, "learning_rate": 1.999736453253291e-05, "loss": 0.6635, "step": 215 }, { "epoch": 0.027279185413213357, "grad_norm": 2.3125, "learning_rate": 1.9997318501094312e-05, "loss": 0.7382, "step": 216 }, { "epoch": 0.02740547793827453, "grad_norm": 2.5, "learning_rate": 1.9997272071186678e-05, "loss": 0.7409, "step": 217 }, { "epoch": 0.0275317704633357, "grad_norm": 2.171875, "learning_rate": 1.9997225242811854e-05, "loss": 0.6896, "step": 218 }, { "epoch": 0.027658062988396875, "grad_norm": 2.140625, "learning_rate": 1.999717801597172e-05, "loss": 0.6472, "step": 219 }, { "epoch": 0.027784355513458047, "grad_norm": 2.171875, "learning_rate": 1.9997130390668144e-05, "loss": 0.7064, "step": 220 }, { "epoch": 0.02791064803851922, "grad_norm": 2.578125, "learning_rate": 1.999708236690303e-05, "loss": 0.7241, "step": 221 }, { "epoch": 0.028036940563580394, "grad_norm": 2.34375, "learning_rate": 1.9997033944678294e-05, "loss": 0.6958, "step": 222 }, { "epoch": 0.028163233088641566, "grad_norm": 2.453125, "learning_rate": 1.9996985123995863e-05, "loss": 0.8603, "step": 223 }, { "epoch": 0.028289525613702737, "grad_norm": 2.265625, "learning_rate": 1.9996935904857685e-05, "loss": 0.6708, "step": 224 }, { "epoch": 0.028415818138763912, "grad_norm": 2.234375, "learning_rate": 1.999688628726572e-05, "loss": 0.7174, "step": 225 }, { "epoch": 0.028542110663825084, "grad_norm": 2.421875, "learning_rate": 1.999683627122195e-05, "loss": 0.7275, "step": 226 }, { "epoch": 0.02866840318888626, "grad_norm": 2.328125, "learning_rate": 1.999678585672836e-05, "loss": 0.7059, "step": 227 }, { "epoch": 0.02879469571394743, "grad_norm": 2.34375, "learning_rate": 1.999673504378697e-05, "loss": 0.7896, "step": 228 }, { "epoch": 0.028920988239008603, "grad_norm": 2.296875, "learning_rate": 1.9996683832399797e-05, "loss": 0.7732, "step": 229 }, { "epoch": 0.029047280764069778, "grad_norm": 2.453125, "learning_rate": 1.9996632222568886e-05, "loss": 0.7645, "step": 230 }, { "epoch": 0.02917357328913095, "grad_norm": 2.296875, "learning_rate": 1.999658021429629e-05, "loss": 0.7614, "step": 231 }, { "epoch": 0.02929986581419212, "grad_norm": 2.203125, "learning_rate": 1.999652780758409e-05, "loss": 0.6292, "step": 232 }, { "epoch": 0.029426158339253296, "grad_norm": 2.171875, "learning_rate": 1.9996475002434365e-05, "loss": 0.7068, "step": 233 }, { "epoch": 0.029552450864314468, "grad_norm": 2.46875, "learning_rate": 1.999642179884923e-05, "loss": 0.7761, "step": 234 }, { "epoch": 0.02967874338937564, "grad_norm": 2.171875, "learning_rate": 1.9996368196830792e-05, "loss": 0.6756, "step": 235 }, { "epoch": 0.029805035914436815, "grad_norm": 2.390625, "learning_rate": 1.9996314196381203e-05, "loss": 0.6776, "step": 236 }, { "epoch": 0.029931328439497987, "grad_norm": 2.3125, "learning_rate": 1.9996259797502602e-05, "loss": 0.7151, "step": 237 }, { "epoch": 0.030057620964559162, "grad_norm": 2.078125, "learning_rate": 1.9996205000197166e-05, "loss": 0.6866, "step": 238 }, { "epoch": 0.030183913489620334, "grad_norm": 2.171875, "learning_rate": 1.9996149804467076e-05, "loss": 0.6644, "step": 239 }, { "epoch": 0.030310206014681505, "grad_norm": 2.25, "learning_rate": 1.999609421031453e-05, "loss": 0.79, "step": 240 }, { "epoch": 0.03043649853974268, "grad_norm": 2.375, "learning_rate": 1.999603821774175e-05, "loss": 0.7643, "step": 241 }, { "epoch": 0.030562791064803852, "grad_norm": 2.15625, "learning_rate": 1.9995981826750958e-05, "loss": 0.7272, "step": 242 }, { "epoch": 0.030689083589865024, "grad_norm": 2.328125, "learning_rate": 1.9995925037344413e-05, "loss": 0.7634, "step": 243 }, { "epoch": 0.0308153761149262, "grad_norm": 2.21875, "learning_rate": 1.999586784952437e-05, "loss": 0.8602, "step": 244 }, { "epoch": 0.03094166863998737, "grad_norm": 2.1875, "learning_rate": 1.999581026329311e-05, "loss": 0.6201, "step": 245 }, { "epoch": 0.031067961165048542, "grad_norm": 2.328125, "learning_rate": 1.9995752278652933e-05, "loss": 0.7459, "step": 246 }, { "epoch": 0.031194253690109718, "grad_norm": 2.328125, "learning_rate": 1.999569389560614e-05, "loss": 0.7969, "step": 247 }, { "epoch": 0.03132054621517089, "grad_norm": 2.46875, "learning_rate": 1.999563511415507e-05, "loss": 0.7565, "step": 248 }, { "epoch": 0.03144683874023206, "grad_norm": 2.359375, "learning_rate": 1.9995575934302058e-05, "loss": 0.8018, "step": 249 }, { "epoch": 0.031573131265293236, "grad_norm": 2.375, "learning_rate": 1.9995516356049465e-05, "loss": 0.7225, "step": 250 }, { "epoch": 0.03169942379035441, "grad_norm": 2.234375, "learning_rate": 1.9995456379399665e-05, "loss": 0.7736, "step": 251 }, { "epoch": 0.03182571631541558, "grad_norm": 2.234375, "learning_rate": 1.9995396004355055e-05, "loss": 0.8157, "step": 252 }, { "epoch": 0.031952008840476755, "grad_norm": 2.296875, "learning_rate": 1.9995335230918028e-05, "loss": 0.7416, "step": 253 }, { "epoch": 0.03207830136553793, "grad_norm": 2.109375, "learning_rate": 1.9995274059091018e-05, "loss": 0.6387, "step": 254 }, { "epoch": 0.0322045938905991, "grad_norm": 2.234375, "learning_rate": 1.9995212488876457e-05, "loss": 0.6889, "step": 255 }, { "epoch": 0.03233088641566027, "grad_norm": 2.109375, "learning_rate": 1.9995150520276806e-05, "loss": 0.6923, "step": 256 }, { "epoch": 0.03245717894072145, "grad_norm": 2.3125, "learning_rate": 1.9995088153294527e-05, "loss": 0.7242, "step": 257 }, { "epoch": 0.03258347146578262, "grad_norm": 2.28125, "learning_rate": 1.9995025387932107e-05, "loss": 0.7073, "step": 258 }, { "epoch": 0.03270976399084379, "grad_norm": 2.09375, "learning_rate": 1.999496222419205e-05, "loss": 0.7363, "step": 259 }, { "epoch": 0.03283605651590497, "grad_norm": 2.234375, "learning_rate": 1.9994898662076874e-05, "loss": 0.6322, "step": 260 }, { "epoch": 0.032962349040966135, "grad_norm": 2.03125, "learning_rate": 1.9994834701589113e-05, "loss": 0.6562, "step": 261 }, { "epoch": 0.03308864156602731, "grad_norm": 2.140625, "learning_rate": 1.9994770342731314e-05, "loss": 0.66, "step": 262 }, { "epoch": 0.033214934091088485, "grad_norm": 2.359375, "learning_rate": 1.9994705585506043e-05, "loss": 0.7807, "step": 263 }, { "epoch": 0.033341226616149654, "grad_norm": 2.1875, "learning_rate": 1.9994640429915885e-05, "loss": 0.6535, "step": 264 }, { "epoch": 0.03346751914121083, "grad_norm": 2.25, "learning_rate": 1.9994574875963428e-05, "loss": 0.7781, "step": 265 }, { "epoch": 0.033593811666272004, "grad_norm": 2.078125, "learning_rate": 1.999450892365129e-05, "loss": 0.6297, "step": 266 }, { "epoch": 0.03372010419133317, "grad_norm": 2.234375, "learning_rate": 1.99944425729821e-05, "loss": 0.757, "step": 267 }, { "epoch": 0.03384639671639435, "grad_norm": 2.3125, "learning_rate": 1.9994375823958504e-05, "loss": 0.7784, "step": 268 }, { "epoch": 0.03397268924145552, "grad_norm": 2.375, "learning_rate": 1.999430867658316e-05, "loss": 0.7196, "step": 269 }, { "epoch": 0.03409898176651669, "grad_norm": 2.234375, "learning_rate": 1.999424113085874e-05, "loss": 0.818, "step": 270 }, { "epoch": 0.034225274291577866, "grad_norm": 2.5, "learning_rate": 1.9994173186787948e-05, "loss": 0.9831, "step": 271 }, { "epoch": 0.03435156681663904, "grad_norm": 2.453125, "learning_rate": 1.9994104844373484e-05, "loss": 0.7562, "step": 272 }, { "epoch": 0.034477859341700216, "grad_norm": 2.484375, "learning_rate": 1.999403610361807e-05, "loss": 0.7444, "step": 273 }, { "epoch": 0.034604151866761385, "grad_norm": 2.15625, "learning_rate": 1.9993966964524453e-05, "loss": 0.6361, "step": 274 }, { "epoch": 0.03473044439182256, "grad_norm": 2.28125, "learning_rate": 1.999389742709538e-05, "loss": 0.7049, "step": 275 }, { "epoch": 0.034856736916883735, "grad_norm": 2.21875, "learning_rate": 1.999382749133363e-05, "loss": 0.6772, "step": 276 }, { "epoch": 0.0349830294419449, "grad_norm": 2.46875, "learning_rate": 1.9993757157241987e-05, "loss": 0.6618, "step": 277 }, { "epoch": 0.03510932196700608, "grad_norm": 2.59375, "learning_rate": 1.999368642482326e-05, "loss": 0.7653, "step": 278 }, { "epoch": 0.03523561449206725, "grad_norm": 2.265625, "learning_rate": 1.9993615294080256e-05, "loss": 0.7198, "step": 279 }, { "epoch": 0.03536190701712842, "grad_norm": 2.359375, "learning_rate": 1.9993543765015823e-05, "loss": 0.7015, "step": 280 }, { "epoch": 0.0354881995421896, "grad_norm": 2.296875, "learning_rate": 1.9993471837632804e-05, "loss": 0.7198, "step": 281 }, { "epoch": 0.03561449206725077, "grad_norm": 2.40625, "learning_rate": 1.999339951193407e-05, "loss": 0.7593, "step": 282 }, { "epoch": 0.03574078459231194, "grad_norm": 2.234375, "learning_rate": 1.99933267879225e-05, "loss": 0.711, "step": 283 }, { "epoch": 0.035867077117373115, "grad_norm": 2.15625, "learning_rate": 1.9993253665600996e-05, "loss": 0.7338, "step": 284 }, { "epoch": 0.03599336964243429, "grad_norm": 2.515625, "learning_rate": 1.999318014497247e-05, "loss": 0.7034, "step": 285 }, { "epoch": 0.03611966216749546, "grad_norm": 2.234375, "learning_rate": 1.9993106226039852e-05, "loss": 0.6691, "step": 286 }, { "epoch": 0.036245954692556634, "grad_norm": 2.21875, "learning_rate": 1.9993031908806096e-05, "loss": 0.6736, "step": 287 }, { "epoch": 0.03637224721761781, "grad_norm": 2.359375, "learning_rate": 1.999295719327415e-05, "loss": 0.784, "step": 288 }, { "epoch": 0.03649853974267898, "grad_norm": 2.390625, "learning_rate": 1.999288207944701e-05, "loss": 0.7177, "step": 289 }, { "epoch": 0.03662483226774015, "grad_norm": 2.359375, "learning_rate": 1.999280656732765e-05, "loss": 0.8176, "step": 290 }, { "epoch": 0.03675112479280133, "grad_norm": 2.28125, "learning_rate": 1.999273065691909e-05, "loss": 0.8373, "step": 291 }, { "epoch": 0.036877417317862496, "grad_norm": 2.3125, "learning_rate": 1.999265434822436e-05, "loss": 0.8012, "step": 292 }, { "epoch": 0.03700370984292367, "grad_norm": 2.125, "learning_rate": 1.9992577641246495e-05, "loss": 0.6632, "step": 293 }, { "epoch": 0.037130002367984846, "grad_norm": 2.171875, "learning_rate": 1.9992500535988553e-05, "loss": 0.7092, "step": 294 }, { "epoch": 0.037256294893046014, "grad_norm": 2.5, "learning_rate": 1.9992423032453608e-05, "loss": 0.7944, "step": 295 }, { "epoch": 0.03738258741810719, "grad_norm": 2.734375, "learning_rate": 1.999234513064475e-05, "loss": 0.8456, "step": 296 }, { "epoch": 0.037508879943168365, "grad_norm": 2.375, "learning_rate": 1.999226683056508e-05, "loss": 0.722, "step": 297 }, { "epoch": 0.03763517246822954, "grad_norm": 2.3125, "learning_rate": 1.999218813221773e-05, "loss": 0.7215, "step": 298 }, { "epoch": 0.03776146499329071, "grad_norm": 2.25, "learning_rate": 1.999210903560582e-05, "loss": 0.7737, "step": 299 }, { "epoch": 0.03788775751835188, "grad_norm": 2.375, "learning_rate": 1.9992029540732515e-05, "loss": 0.681, "step": 300 }, { "epoch": 0.03801405004341306, "grad_norm": 2.296875, "learning_rate": 1.999194964760098e-05, "loss": 0.7413, "step": 301 }, { "epoch": 0.03814034256847423, "grad_norm": 2.359375, "learning_rate": 1.9991869356214396e-05, "loss": 0.6809, "step": 302 }, { "epoch": 0.0382666350935354, "grad_norm": 2.109375, "learning_rate": 1.999178866657597e-05, "loss": 0.6568, "step": 303 }, { "epoch": 0.03839292761859658, "grad_norm": 7.8125, "learning_rate": 1.999170757868891e-05, "loss": 1.4252, "step": 304 }, { "epoch": 0.038519220143657745, "grad_norm": 2.421875, "learning_rate": 1.999162609255646e-05, "loss": 0.7367, "step": 305 }, { "epoch": 0.03864551266871892, "grad_norm": 2.40625, "learning_rate": 1.9991544208181857e-05, "loss": 0.797, "step": 306 }, { "epoch": 0.038771805193780096, "grad_norm": 2.234375, "learning_rate": 1.9991461925568365e-05, "loss": 0.7008, "step": 307 }, { "epoch": 0.038898097718841264, "grad_norm": 2.296875, "learning_rate": 1.999137924471927e-05, "loss": 0.7496, "step": 308 }, { "epoch": 0.03902439024390244, "grad_norm": 2.359375, "learning_rate": 1.9991296165637864e-05, "loss": 0.8204, "step": 309 }, { "epoch": 0.039150682768963614, "grad_norm": 2.1875, "learning_rate": 1.9991212688327456e-05, "loss": 0.7592, "step": 310 }, { "epoch": 0.03927697529402478, "grad_norm": 2.03125, "learning_rate": 1.9991128812791375e-05, "loss": 0.5965, "step": 311 }, { "epoch": 0.03940326781908596, "grad_norm": 2.40625, "learning_rate": 1.999104453903297e-05, "loss": 0.7658, "step": 312 }, { "epoch": 0.03952956034414713, "grad_norm": 2.421875, "learning_rate": 1.999095986705559e-05, "loss": 0.7118, "step": 313 }, { "epoch": 0.0396558528692083, "grad_norm": 6.25, "learning_rate": 1.9990874796862614e-05, "loss": 1.1945, "step": 314 }, { "epoch": 0.039782145394269476, "grad_norm": 2.203125, "learning_rate": 1.9990789328457437e-05, "loss": 0.7177, "step": 315 }, { "epoch": 0.03990843791933065, "grad_norm": 2.171875, "learning_rate": 1.999070346184346e-05, "loss": 0.7298, "step": 316 }, { "epoch": 0.04003473044439182, "grad_norm": 6.78125, "learning_rate": 1.9990617197024103e-05, "loss": 1.2707, "step": 317 }, { "epoch": 0.040161022969452995, "grad_norm": 2.25, "learning_rate": 1.999053053400281e-05, "loss": 0.7034, "step": 318 }, { "epoch": 0.04028731549451417, "grad_norm": 2.296875, "learning_rate": 1.999044347278304e-05, "loss": 0.7256, "step": 319 }, { "epoch": 0.04041360801957534, "grad_norm": 2.203125, "learning_rate": 1.999035601336825e-05, "loss": 0.7195, "step": 320 }, { "epoch": 0.04053990054463651, "grad_norm": 2.234375, "learning_rate": 1.9990268155761935e-05, "loss": 0.7766, "step": 321 }, { "epoch": 0.04066619306969769, "grad_norm": 2.34375, "learning_rate": 1.9990179899967597e-05, "loss": 0.7847, "step": 322 }, { "epoch": 0.040792485594758864, "grad_norm": 2.265625, "learning_rate": 1.9990091245988745e-05, "loss": 0.8138, "step": 323 }, { "epoch": 0.04091877811982003, "grad_norm": 2.0625, "learning_rate": 1.9990002193828923e-05, "loss": 0.7155, "step": 324 }, { "epoch": 0.04104507064488121, "grad_norm": 2.234375, "learning_rate": 1.9989912743491676e-05, "loss": 0.7522, "step": 325 }, { "epoch": 0.04117136316994238, "grad_norm": 2.15625, "learning_rate": 1.9989822894980565e-05, "loss": 0.6571, "step": 326 }, { "epoch": 0.04129765569500355, "grad_norm": 2.15625, "learning_rate": 1.998973264829918e-05, "loss": 0.7355, "step": 327 }, { "epoch": 0.041423948220064725, "grad_norm": 2.34375, "learning_rate": 1.9989642003451115e-05, "loss": 0.7517, "step": 328 }, { "epoch": 0.0415502407451259, "grad_norm": 2.234375, "learning_rate": 1.9989550960439975e-05, "loss": 0.6738, "step": 329 }, { "epoch": 0.04167653327018707, "grad_norm": 2.203125, "learning_rate": 1.99894595192694e-05, "loss": 0.7168, "step": 330 }, { "epoch": 0.041802825795248244, "grad_norm": 2.015625, "learning_rate": 1.9989367679943025e-05, "loss": 0.6341, "step": 331 }, { "epoch": 0.04192911832030942, "grad_norm": 2.203125, "learning_rate": 1.9989275442464523e-05, "loss": 0.7491, "step": 332 }, { "epoch": 0.04205541084537059, "grad_norm": 2.28125, "learning_rate": 1.9989182806837553e-05, "loss": 0.7456, "step": 333 }, { "epoch": 0.04218170337043176, "grad_norm": 2.171875, "learning_rate": 1.9989089773065824e-05, "loss": 0.6984, "step": 334 }, { "epoch": 0.04230799589549294, "grad_norm": 2.15625, "learning_rate": 1.9988996341153033e-05, "loss": 0.7626, "step": 335 }, { "epoch": 0.042434288420554106, "grad_norm": 2.296875, "learning_rate": 1.998890251110291e-05, "loss": 0.7696, "step": 336 }, { "epoch": 0.04256058094561528, "grad_norm": 2.171875, "learning_rate": 1.9988808282919192e-05, "loss": 0.6834, "step": 337 }, { "epoch": 0.042686873470676456, "grad_norm": 2.3125, "learning_rate": 1.9988713656605635e-05, "loss": 0.7076, "step": 338 }, { "epoch": 0.042813165995737625, "grad_norm": 2.328125, "learning_rate": 1.998861863216601e-05, "loss": 0.7072, "step": 339 }, { "epoch": 0.0429394585207988, "grad_norm": 2.25, "learning_rate": 1.9988523209604106e-05, "loss": 0.7425, "step": 340 }, { "epoch": 0.043065751045859975, "grad_norm": 2.109375, "learning_rate": 1.9988427388923726e-05, "loss": 0.7537, "step": 341 }, { "epoch": 0.04319204357092114, "grad_norm": 2.21875, "learning_rate": 1.9988331170128693e-05, "loss": 0.8543, "step": 342 }, { "epoch": 0.04331833609598232, "grad_norm": 2.421875, "learning_rate": 1.9988234553222835e-05, "loss": 0.7418, "step": 343 }, { "epoch": 0.04344462862104349, "grad_norm": 2.328125, "learning_rate": 1.998813753821e-05, "loss": 0.8099, "step": 344 }, { "epoch": 0.04357092114610466, "grad_norm": 2.46875, "learning_rate": 1.998804012509407e-05, "loss": 0.8177, "step": 345 }, { "epoch": 0.04369721367116584, "grad_norm": 2.15625, "learning_rate": 1.9987942313878915e-05, "loss": 0.7098, "step": 346 }, { "epoch": 0.04382350619622701, "grad_norm": 2.53125, "learning_rate": 1.9987844104568436e-05, "loss": 0.6871, "step": 347 }, { "epoch": 0.04394979872128819, "grad_norm": 2.078125, "learning_rate": 1.9987745497166546e-05, "loss": 0.6879, "step": 348 }, { "epoch": 0.044076091246349355, "grad_norm": 2.359375, "learning_rate": 1.998764649167718e-05, "loss": 0.6808, "step": 349 }, { "epoch": 0.04420238377141053, "grad_norm": 2.359375, "learning_rate": 1.998754708810428e-05, "loss": 0.758, "step": 350 }, { "epoch": 0.044328676296471706, "grad_norm": 2.34375, "learning_rate": 1.998744728645181e-05, "loss": 0.7195, "step": 351 }, { "epoch": 0.044454968821532874, "grad_norm": 2.390625, "learning_rate": 1.998734708672375e-05, "loss": 0.729, "step": 352 }, { "epoch": 0.04458126134659405, "grad_norm": 2.09375, "learning_rate": 1.998724648892409e-05, "loss": 0.7391, "step": 353 }, { "epoch": 0.044707553871655224, "grad_norm": 2.28125, "learning_rate": 1.998714549305684e-05, "loss": 0.7561, "step": 354 }, { "epoch": 0.04483384639671639, "grad_norm": 2.234375, "learning_rate": 1.9987044099126027e-05, "loss": 0.7079, "step": 355 }, { "epoch": 0.04496013892177757, "grad_norm": 2.09375, "learning_rate": 1.998694230713569e-05, "loss": 0.7208, "step": 356 }, { "epoch": 0.04508643144683874, "grad_norm": 2.21875, "learning_rate": 1.998684011708989e-05, "loss": 0.7131, "step": 357 }, { "epoch": 0.04521272397189991, "grad_norm": 2.125, "learning_rate": 1.9986737528992693e-05, "loss": 0.6427, "step": 358 }, { "epoch": 0.045339016496961086, "grad_norm": 2.265625, "learning_rate": 1.99866345428482e-05, "loss": 0.7882, "step": 359 }, { "epoch": 0.04546530902202226, "grad_norm": 2.25, "learning_rate": 1.9986531158660503e-05, "loss": 0.7599, "step": 360 }, { "epoch": 0.04559160154708343, "grad_norm": 2.203125, "learning_rate": 1.998642737643373e-05, "loss": 0.7437, "step": 361 }, { "epoch": 0.045717894072144605, "grad_norm": 2.265625, "learning_rate": 1.9986323196172016e-05, "loss": 0.7779, "step": 362 }, { "epoch": 0.04584418659720578, "grad_norm": 2.203125, "learning_rate": 1.9986218617879508e-05, "loss": 0.6971, "step": 363 }, { "epoch": 0.04597047912226695, "grad_norm": 2.171875, "learning_rate": 1.9986113641560387e-05, "loss": 0.8063, "step": 364 }, { "epoch": 0.04609677164732812, "grad_norm": 2.1875, "learning_rate": 1.9986008267218824e-05, "loss": 0.7571, "step": 365 }, { "epoch": 0.0462230641723893, "grad_norm": 2.171875, "learning_rate": 1.9985902494859023e-05, "loss": 0.7421, "step": 366 }, { "epoch": 0.04634935669745047, "grad_norm": 2.140625, "learning_rate": 1.9985796324485207e-05, "loss": 0.7593, "step": 367 }, { "epoch": 0.04647564922251164, "grad_norm": 2.125, "learning_rate": 1.99856897561016e-05, "loss": 0.7505, "step": 368 }, { "epoch": 0.04660194174757282, "grad_norm": 2.0625, "learning_rate": 1.998558278971245e-05, "loss": 0.7646, "step": 369 }, { "epoch": 0.046728234272633985, "grad_norm": 2.21875, "learning_rate": 1.998547542532202e-05, "loss": 0.7197, "step": 370 }, { "epoch": 0.04685452679769516, "grad_norm": 2.234375, "learning_rate": 1.9985367662934595e-05, "loss": 0.6742, "step": 371 }, { "epoch": 0.046980819322756336, "grad_norm": 2.078125, "learning_rate": 1.9985259502554464e-05, "loss": 0.7536, "step": 372 }, { "epoch": 0.04710711184781751, "grad_norm": 2.234375, "learning_rate": 1.998515094418594e-05, "loss": 0.6787, "step": 373 }, { "epoch": 0.04723340437287868, "grad_norm": 2.109375, "learning_rate": 1.998504198783335e-05, "loss": 0.6641, "step": 374 }, { "epoch": 0.047359696897939854, "grad_norm": 2.046875, "learning_rate": 1.9984932633501037e-05, "loss": 0.6399, "step": 375 }, { "epoch": 0.04748598942300103, "grad_norm": 2.296875, "learning_rate": 1.9984822881193358e-05, "loss": 0.7801, "step": 376 }, { "epoch": 0.0476122819480622, "grad_norm": 2.328125, "learning_rate": 1.9984712730914693e-05, "loss": 0.7235, "step": 377 }, { "epoch": 0.04773857447312337, "grad_norm": 2.140625, "learning_rate": 1.9984602182669425e-05, "loss": 0.7733, "step": 378 }, { "epoch": 0.04786486699818455, "grad_norm": 2.296875, "learning_rate": 1.9984491236461963e-05, "loss": 0.7316, "step": 379 }, { "epoch": 0.047991159523245716, "grad_norm": 2.328125, "learning_rate": 1.998437989229673e-05, "loss": 0.7536, "step": 380 }, { "epoch": 0.04811745204830689, "grad_norm": 2.34375, "learning_rate": 1.998426815017817e-05, "loss": 0.8137, "step": 381 }, { "epoch": 0.048243744573368066, "grad_norm": 2.078125, "learning_rate": 1.998415601011072e-05, "loss": 0.6658, "step": 382 }, { "epoch": 0.048370037098429235, "grad_norm": 2.46875, "learning_rate": 1.9984043472098866e-05, "loss": 0.8377, "step": 383 }, { "epoch": 0.04849632962349041, "grad_norm": 2.234375, "learning_rate": 1.9983930536147084e-05, "loss": 0.7266, "step": 384 }, { "epoch": 0.048622622148551585, "grad_norm": 2.28125, "learning_rate": 1.998381720225988e-05, "loss": 0.7381, "step": 385 }, { "epoch": 0.04874891467361275, "grad_norm": 2.1875, "learning_rate": 1.998370347044177e-05, "loss": 0.6871, "step": 386 }, { "epoch": 0.04887520719867393, "grad_norm": 2.34375, "learning_rate": 1.9983589340697288e-05, "loss": 0.7247, "step": 387 }, { "epoch": 0.049001499723735104, "grad_norm": 2.34375, "learning_rate": 1.998347481303098e-05, "loss": 0.7243, "step": 388 }, { "epoch": 0.04912779224879627, "grad_norm": 2.28125, "learning_rate": 1.9983359887447414e-05, "loss": 0.6849, "step": 389 }, { "epoch": 0.04925408477385745, "grad_norm": 2.046875, "learning_rate": 1.998324456395117e-05, "loss": 0.6563, "step": 390 }, { "epoch": 0.04938037729891862, "grad_norm": 2.3125, "learning_rate": 1.9983128842546842e-05, "loss": 0.7879, "step": 391 }, { "epoch": 0.04950666982397979, "grad_norm": 2.234375, "learning_rate": 1.9983012723239046e-05, "loss": 0.6683, "step": 392 }, { "epoch": 0.049632962349040965, "grad_norm": 2.1875, "learning_rate": 1.9982896206032406e-05, "loss": 0.7674, "step": 393 }, { "epoch": 0.04975925487410214, "grad_norm": 2.109375, "learning_rate": 1.9982779290931572e-05, "loss": 0.7387, "step": 394 }, { "epoch": 0.04988554739916331, "grad_norm": 2.203125, "learning_rate": 1.9982661977941196e-05, "loss": 0.7239, "step": 395 }, { "epoch": 0.050011839924224484, "grad_norm": 2.21875, "learning_rate": 1.9982544267065962e-05, "loss": 0.723, "step": 396 }, { "epoch": 0.05013813244928566, "grad_norm": 2.078125, "learning_rate": 1.9982426158310554e-05, "loss": 0.7543, "step": 397 }, { "epoch": 0.05026442497434683, "grad_norm": 2.109375, "learning_rate": 1.9982307651679688e-05, "loss": 0.6956, "step": 398 }, { "epoch": 0.050390717499408, "grad_norm": 2.28125, "learning_rate": 1.998218874717808e-05, "loss": 0.7963, "step": 399 }, { "epoch": 0.05051701002446918, "grad_norm": 2.375, "learning_rate": 1.9982069444810474e-05, "loss": 0.7738, "step": 400 }, { "epoch": 0.05064330254953035, "grad_norm": 2.25, "learning_rate": 1.9981949744581622e-05, "loss": 0.7036, "step": 401 }, { "epoch": 0.05076959507459152, "grad_norm": 2.296875, "learning_rate": 1.9981829646496296e-05, "loss": 0.7841, "step": 402 }, { "epoch": 0.050895887599652696, "grad_norm": 2.109375, "learning_rate": 1.9981709150559283e-05, "loss": 0.6179, "step": 403 }, { "epoch": 0.05102218012471387, "grad_norm": 2.140625, "learning_rate": 1.9981588256775388e-05, "loss": 0.738, "step": 404 }, { "epoch": 0.05114847264977504, "grad_norm": 2.1875, "learning_rate": 1.9981466965149424e-05, "loss": 0.7237, "step": 405 }, { "epoch": 0.051274765174836215, "grad_norm": 2.390625, "learning_rate": 1.998134527568623e-05, "loss": 0.7723, "step": 406 }, { "epoch": 0.05140105769989739, "grad_norm": 2.140625, "learning_rate": 1.9981223188390655e-05, "loss": 0.6967, "step": 407 }, { "epoch": 0.05152735022495856, "grad_norm": 1.984375, "learning_rate": 1.9981100703267567e-05, "loss": 0.6734, "step": 408 }, { "epoch": 0.05165364275001973, "grad_norm": 2.21875, "learning_rate": 1.9980977820321842e-05, "loss": 0.7436, "step": 409 }, { "epoch": 0.05177993527508091, "grad_norm": 2.59375, "learning_rate": 1.9980854539558386e-05, "loss": 0.7301, "step": 410 }, { "epoch": 0.05190622780014208, "grad_norm": 2.109375, "learning_rate": 1.9980730860982105e-05, "loss": 0.7015, "step": 411 }, { "epoch": 0.05203252032520325, "grad_norm": 2.078125, "learning_rate": 1.9980606784597935e-05, "loss": 0.6546, "step": 412 }, { "epoch": 0.05215881285026443, "grad_norm": 8.0, "learning_rate": 1.9980482310410813e-05, "loss": 1.3554, "step": 413 }, { "epoch": 0.052285105375325595, "grad_norm": 2.171875, "learning_rate": 1.998035743842571e-05, "loss": 0.6723, "step": 414 }, { "epoch": 0.05241139790038677, "grad_norm": 2.078125, "learning_rate": 1.99802321686476e-05, "loss": 0.6991, "step": 415 }, { "epoch": 0.052537690425447946, "grad_norm": 2.234375, "learning_rate": 1.998010650108147e-05, "loss": 0.6755, "step": 416 }, { "epoch": 0.052663982950509114, "grad_norm": 2.09375, "learning_rate": 1.997998043573234e-05, "loss": 0.6941, "step": 417 }, { "epoch": 0.05279027547557029, "grad_norm": 2.484375, "learning_rate": 1.9979853972605222e-05, "loss": 0.7902, "step": 418 }, { "epoch": 0.052916568000631464, "grad_norm": 2.34375, "learning_rate": 1.9979727111705165e-05, "loss": 0.6776, "step": 419 }, { "epoch": 0.05304286052569263, "grad_norm": 2.328125, "learning_rate": 1.9979599853037223e-05, "loss": 0.7049, "step": 420 }, { "epoch": 0.05316915305075381, "grad_norm": 2.328125, "learning_rate": 1.9979472196606465e-05, "loss": 0.698, "step": 421 }, { "epoch": 0.05329544557581498, "grad_norm": 2.265625, "learning_rate": 1.9979344142417986e-05, "loss": 0.8863, "step": 422 }, { "epoch": 0.05342173810087615, "grad_norm": 2.34375, "learning_rate": 1.997921569047689e-05, "loss": 0.7393, "step": 423 }, { "epoch": 0.053548030625937326, "grad_norm": 2.265625, "learning_rate": 1.9979086840788287e-05, "loss": 0.7939, "step": 424 }, { "epoch": 0.0536743231509985, "grad_norm": 2.21875, "learning_rate": 1.997895759335732e-05, "loss": 0.7068, "step": 425 }, { "epoch": 0.05380061567605968, "grad_norm": 2.21875, "learning_rate": 1.997882794818914e-05, "loss": 0.744, "step": 426 }, { "epoch": 0.053926908201120845, "grad_norm": 2.296875, "learning_rate": 1.997869790528891e-05, "loss": 0.7739, "step": 427 }, { "epoch": 0.05405320072618202, "grad_norm": 2.234375, "learning_rate": 1.997856746466182e-05, "loss": 0.6854, "step": 428 }, { "epoch": 0.054179493251243195, "grad_norm": 2.328125, "learning_rate": 1.9978436626313068e-05, "loss": 0.7006, "step": 429 }, { "epoch": 0.05430578577630436, "grad_norm": 2.234375, "learning_rate": 1.9978305390247864e-05, "loss": 0.7824, "step": 430 }, { "epoch": 0.05443207830136554, "grad_norm": 2.125, "learning_rate": 1.9978173756471438e-05, "loss": 0.7416, "step": 431 }, { "epoch": 0.054558370826426714, "grad_norm": 2.125, "learning_rate": 1.9978041724989045e-05, "loss": 0.6734, "step": 432 }, { "epoch": 0.05468466335148788, "grad_norm": 2.296875, "learning_rate": 1.997790929580594e-05, "loss": 0.7486, "step": 433 }, { "epoch": 0.05481095587654906, "grad_norm": 2.390625, "learning_rate": 1.9977776468927406e-05, "loss": 0.7551, "step": 434 }, { "epoch": 0.05493724840161023, "grad_norm": 2.171875, "learning_rate": 1.9977643244358733e-05, "loss": 0.6757, "step": 435 }, { "epoch": 0.0550635409266714, "grad_norm": 2.109375, "learning_rate": 1.9977509622105233e-05, "loss": 0.6668, "step": 436 }, { "epoch": 0.055189833451732576, "grad_norm": 2.28125, "learning_rate": 1.9977375602172234e-05, "loss": 0.6845, "step": 437 }, { "epoch": 0.05531612597679375, "grad_norm": 2.5, "learning_rate": 1.9977241184565072e-05, "loss": 0.8045, "step": 438 }, { "epoch": 0.05544241850185492, "grad_norm": 2.3125, "learning_rate": 1.997710636928911e-05, "loss": 0.8058, "step": 439 }, { "epoch": 0.055568711026916094, "grad_norm": 2.234375, "learning_rate": 1.997697115634972e-05, "loss": 0.7379, "step": 440 }, { "epoch": 0.05569500355197727, "grad_norm": 2.125, "learning_rate": 1.997683554575229e-05, "loss": 0.7333, "step": 441 }, { "epoch": 0.05582129607703844, "grad_norm": 2.34375, "learning_rate": 1.9976699537502228e-05, "loss": 0.7489, "step": 442 }, { "epoch": 0.05594758860209961, "grad_norm": 2.078125, "learning_rate": 1.997656313160495e-05, "loss": 0.7331, "step": 443 }, { "epoch": 0.05607388112716079, "grad_norm": 2.3125, "learning_rate": 1.9976426328065895e-05, "loss": 0.7473, "step": 444 }, { "epoch": 0.056200173652221956, "grad_norm": 2.40625, "learning_rate": 1.9976289126890516e-05, "loss": 0.7425, "step": 445 }, { "epoch": 0.05632646617728313, "grad_norm": 2.015625, "learning_rate": 1.9976151528084286e-05, "loss": 0.655, "step": 446 }, { "epoch": 0.056452758702344306, "grad_norm": 2.171875, "learning_rate": 1.9976013531652683e-05, "loss": 0.6829, "step": 447 }, { "epoch": 0.056579051227405475, "grad_norm": 2.25, "learning_rate": 1.997587513760121e-05, "loss": 0.7692, "step": 448 }, { "epoch": 0.05670534375246665, "grad_norm": 2.046875, "learning_rate": 1.997573634593538e-05, "loss": 0.7184, "step": 449 }, { "epoch": 0.056831636277527825, "grad_norm": 2.15625, "learning_rate": 1.997559715666073e-05, "loss": 0.7313, "step": 450 }, { "epoch": 0.056957928802589, "grad_norm": 2.09375, "learning_rate": 1.9975457569782805e-05, "loss": 0.66, "step": 451 }, { "epoch": 0.05708422132765017, "grad_norm": 2.25, "learning_rate": 1.9975317585307164e-05, "loss": 0.6695, "step": 452 }, { "epoch": 0.057210513852711344, "grad_norm": 2.09375, "learning_rate": 1.9975177203239395e-05, "loss": 0.6803, "step": 453 }, { "epoch": 0.05733680637777252, "grad_norm": 2.390625, "learning_rate": 1.997503642358509e-05, "loss": 0.8562, "step": 454 }, { "epoch": 0.05746309890283369, "grad_norm": 2.203125, "learning_rate": 1.997489524634986e-05, "loss": 0.6234, "step": 455 }, { "epoch": 0.05758939142789486, "grad_norm": 2.15625, "learning_rate": 1.9974753671539327e-05, "loss": 0.7304, "step": 456 }, { "epoch": 0.05771568395295604, "grad_norm": 2.34375, "learning_rate": 1.9974611699159142e-05, "loss": 0.67, "step": 457 }, { "epoch": 0.057841976478017205, "grad_norm": 2.34375, "learning_rate": 1.997446932921496e-05, "loss": 0.7546, "step": 458 }, { "epoch": 0.05796826900307838, "grad_norm": 2.21875, "learning_rate": 1.9974326561712458e-05, "loss": 0.7566, "step": 459 }, { "epoch": 0.058094561528139556, "grad_norm": 2.28125, "learning_rate": 1.997418339665732e-05, "loss": 0.7223, "step": 460 }, { "epoch": 0.058220854053200724, "grad_norm": 2.390625, "learning_rate": 1.9974039834055256e-05, "loss": 0.7549, "step": 461 }, { "epoch": 0.0583471465782619, "grad_norm": 2.34375, "learning_rate": 1.997389587391199e-05, "loss": 0.7261, "step": 462 }, { "epoch": 0.058473439103323074, "grad_norm": 2.328125, "learning_rate": 1.9973751516233257e-05, "loss": 0.7989, "step": 463 }, { "epoch": 0.05859973162838424, "grad_norm": 2.03125, "learning_rate": 1.9973606761024813e-05, "loss": 0.6665, "step": 464 }, { "epoch": 0.05872602415344542, "grad_norm": 2.21875, "learning_rate": 1.9973461608292423e-05, "loss": 0.7178, "step": 465 }, { "epoch": 0.05885231667850659, "grad_norm": 2.1875, "learning_rate": 1.997331605804188e-05, "loss": 0.7143, "step": 466 }, { "epoch": 0.05897860920356776, "grad_norm": 2.21875, "learning_rate": 1.9973170110278982e-05, "loss": 0.7035, "step": 467 }, { "epoch": 0.059104901728628936, "grad_norm": 2.234375, "learning_rate": 1.997302376500954e-05, "loss": 0.7937, "step": 468 }, { "epoch": 0.05923119425369011, "grad_norm": 2.25, "learning_rate": 1.9972877022239396e-05, "loss": 0.7569, "step": 469 }, { "epoch": 0.05935748677875128, "grad_norm": 2.1875, "learning_rate": 1.9972729881974397e-05, "loss": 0.7099, "step": 470 }, { "epoch": 0.059483779303812455, "grad_norm": 2.25, "learning_rate": 1.99725823442204e-05, "loss": 0.6768, "step": 471 }, { "epoch": 0.05961007182887363, "grad_norm": 2.28125, "learning_rate": 1.9972434408983298e-05, "loss": 0.7346, "step": 472 }, { "epoch": 0.0597363643539348, "grad_norm": 2.125, "learning_rate": 1.9972286076268975e-05, "loss": 0.6306, "step": 473 }, { "epoch": 0.05986265687899597, "grad_norm": 2.25, "learning_rate": 1.997213734608335e-05, "loss": 0.6999, "step": 474 }, { "epoch": 0.05998894940405715, "grad_norm": 2.046875, "learning_rate": 1.997198821843235e-05, "loss": 0.7384, "step": 475 }, { "epoch": 0.060115241929118324, "grad_norm": 2.1875, "learning_rate": 1.997183869332192e-05, "loss": 0.7554, "step": 476 }, { "epoch": 0.06024153445417949, "grad_norm": 2.15625, "learning_rate": 1.9971688770758018e-05, "loss": 0.6966, "step": 477 }, { "epoch": 0.06036782697924067, "grad_norm": 2.03125, "learning_rate": 1.997153845074662e-05, "loss": 0.6586, "step": 478 }, { "epoch": 0.06049411950430184, "grad_norm": 2.421875, "learning_rate": 1.9971387733293713e-05, "loss": 0.7871, "step": 479 }, { "epoch": 0.06062041202936301, "grad_norm": 2.234375, "learning_rate": 1.9971236618405315e-05, "loss": 0.7129, "step": 480 }, { "epoch": 0.060746704554424186, "grad_norm": 2.734375, "learning_rate": 1.997108510608744e-05, "loss": 0.7559, "step": 481 }, { "epoch": 0.06087299707948536, "grad_norm": 2.21875, "learning_rate": 1.9970933196346126e-05, "loss": 0.7292, "step": 482 }, { "epoch": 0.06099928960454653, "grad_norm": 2.1875, "learning_rate": 1.9970780889187436e-05, "loss": 0.6975, "step": 483 }, { "epoch": 0.061125582129607704, "grad_norm": 2.375, "learning_rate": 1.9970628184617436e-05, "loss": 0.7757, "step": 484 }, { "epoch": 0.06125187465466888, "grad_norm": 2.421875, "learning_rate": 1.9970475082642212e-05, "loss": 0.7339, "step": 485 }, { "epoch": 0.06137816717973005, "grad_norm": 2.34375, "learning_rate": 1.9970321583267865e-05, "loss": 0.711, "step": 486 }, { "epoch": 0.06150445970479122, "grad_norm": 2.015625, "learning_rate": 1.9970167686500516e-05, "loss": 0.6698, "step": 487 }, { "epoch": 0.0616307522298524, "grad_norm": 2.0625, "learning_rate": 1.9970013392346294e-05, "loss": 0.7539, "step": 488 }, { "epoch": 0.061757044754913566, "grad_norm": 2.109375, "learning_rate": 1.996985870081136e-05, "loss": 0.711, "step": 489 }, { "epoch": 0.06188333727997474, "grad_norm": 2.109375, "learning_rate": 1.9969703611901864e-05, "loss": 0.7296, "step": 490 }, { "epoch": 0.06200962980503592, "grad_norm": 2.140625, "learning_rate": 1.9969548125624e-05, "loss": 0.6643, "step": 491 }, { "epoch": 0.062135922330097085, "grad_norm": 2.203125, "learning_rate": 1.9969392241983957e-05, "loss": 0.729, "step": 492 }, { "epoch": 0.06226221485515826, "grad_norm": 2.015625, "learning_rate": 1.9969235960987957e-05, "loss": 0.676, "step": 493 }, { "epoch": 0.062388507380219435, "grad_norm": 2.234375, "learning_rate": 1.9969079282642224e-05, "loss": 0.7491, "step": 494 }, { "epoch": 0.06251479990528061, "grad_norm": 2.09375, "learning_rate": 1.9968922206953e-05, "loss": 0.6835, "step": 495 }, { "epoch": 0.06264109243034179, "grad_norm": 1.9765625, "learning_rate": 1.996876473392655e-05, "loss": 0.6962, "step": 496 }, { "epoch": 0.06276738495540295, "grad_norm": 4.46875, "learning_rate": 1.9968606863569145e-05, "loss": 1.1527, "step": 497 }, { "epoch": 0.06289367748046412, "grad_norm": 2.3125, "learning_rate": 1.9968448595887087e-05, "loss": 0.7434, "step": 498 }, { "epoch": 0.0630199700055253, "grad_norm": 2.515625, "learning_rate": 1.9968289930886675e-05, "loss": 0.6878, "step": 499 }, { "epoch": 0.06314626253058647, "grad_norm": 2.265625, "learning_rate": 1.9968130868574237e-05, "loss": 0.7719, "step": 500 }, { "epoch": 0.06327255505564765, "grad_norm": 2.234375, "learning_rate": 1.9967971408956113e-05, "loss": 0.7088, "step": 501 }, { "epoch": 0.06339884758070882, "grad_norm": 1.8515625, "learning_rate": 1.996781155203866e-05, "loss": 0.6217, "step": 502 }, { "epoch": 0.06352514010576998, "grad_norm": 2.09375, "learning_rate": 1.9967651297828244e-05, "loss": 0.6408, "step": 503 }, { "epoch": 0.06365143263083116, "grad_norm": 2.0625, "learning_rate": 1.9967490646331256e-05, "loss": 0.6809, "step": 504 }, { "epoch": 0.06377772515589233, "grad_norm": 2.34375, "learning_rate": 1.99673295975541e-05, "loss": 0.8426, "step": 505 }, { "epoch": 0.06390401768095351, "grad_norm": 2.046875, "learning_rate": 1.9967168151503196e-05, "loss": 0.6732, "step": 506 }, { "epoch": 0.06403031020601468, "grad_norm": 2.484375, "learning_rate": 1.9967006308184973e-05, "loss": 0.7294, "step": 507 }, { "epoch": 0.06415660273107586, "grad_norm": 2.078125, "learning_rate": 1.9966844067605888e-05, "loss": 0.7808, "step": 508 }, { "epoch": 0.06428289525613702, "grad_norm": 2.25, "learning_rate": 1.9966681429772406e-05, "loss": 0.6967, "step": 509 }, { "epoch": 0.0644091877811982, "grad_norm": 2.453125, "learning_rate": 1.9966518394691006e-05, "loss": 0.7625, "step": 510 }, { "epoch": 0.06453548030625937, "grad_norm": 2.125, "learning_rate": 1.9966354962368192e-05, "loss": 0.6668, "step": 511 }, { "epoch": 0.06466177283132055, "grad_norm": 2.015625, "learning_rate": 1.9966191132810474e-05, "loss": 0.7207, "step": 512 }, { "epoch": 0.06478806535638172, "grad_norm": 2.265625, "learning_rate": 1.9966026906024377e-05, "loss": 0.6965, "step": 513 }, { "epoch": 0.0649143578814429, "grad_norm": 2.296875, "learning_rate": 1.996586228201646e-05, "loss": 0.7454, "step": 514 }, { "epoch": 0.06504065040650407, "grad_norm": 2.25, "learning_rate": 1.996569726079327e-05, "loss": 0.7493, "step": 515 }, { "epoch": 0.06516694293156523, "grad_norm": 2.09375, "learning_rate": 1.9965531842361393e-05, "loss": 0.7203, "step": 516 }, { "epoch": 0.06529323545662641, "grad_norm": 2.328125, "learning_rate": 1.9965366026727424e-05, "loss": 0.7815, "step": 517 }, { "epoch": 0.06541952798168758, "grad_norm": 2.1875, "learning_rate": 1.9965199813897968e-05, "loss": 0.7138, "step": 518 }, { "epoch": 0.06554582050674876, "grad_norm": 2.046875, "learning_rate": 1.9965033203879644e-05, "loss": 0.7117, "step": 519 }, { "epoch": 0.06567211303180993, "grad_norm": 2.015625, "learning_rate": 1.9964866196679105e-05, "loss": 0.6246, "step": 520 }, { "epoch": 0.06579840555687111, "grad_norm": 2.390625, "learning_rate": 1.9964698792302998e-05, "loss": 0.7678, "step": 521 }, { "epoch": 0.06592469808193227, "grad_norm": 2.3125, "learning_rate": 1.9964530990758e-05, "loss": 0.7493, "step": 522 }, { "epoch": 0.06605099060699345, "grad_norm": 2.328125, "learning_rate": 1.9964362792050796e-05, "loss": 0.7048, "step": 523 }, { "epoch": 0.06617728313205462, "grad_norm": 1.953125, "learning_rate": 1.9964194196188095e-05, "loss": 0.6508, "step": 524 }, { "epoch": 0.0663035756571158, "grad_norm": 2.015625, "learning_rate": 1.9964025203176613e-05, "loss": 0.7426, "step": 525 }, { "epoch": 0.06642986818217697, "grad_norm": 2.109375, "learning_rate": 1.9963855813023084e-05, "loss": 0.6896, "step": 526 }, { "epoch": 0.06655616070723815, "grad_norm": 2.09375, "learning_rate": 1.9963686025734262e-05, "loss": 0.6674, "step": 527 }, { "epoch": 0.06668245323229931, "grad_norm": 2.15625, "learning_rate": 1.9963515841316915e-05, "loss": 0.7248, "step": 528 }, { "epoch": 0.06680874575736048, "grad_norm": 2.21875, "learning_rate": 1.9963345259777824e-05, "loss": 0.7704, "step": 529 }, { "epoch": 0.06693503828242166, "grad_norm": 2.15625, "learning_rate": 1.996317428112379e-05, "loss": 0.7073, "step": 530 }, { "epoch": 0.06706133080748283, "grad_norm": 2.125, "learning_rate": 1.996300290536163e-05, "loss": 0.725, "step": 531 }, { "epoch": 0.06718762333254401, "grad_norm": 2.296875, "learning_rate": 1.9962831132498165e-05, "loss": 0.7458, "step": 532 }, { "epoch": 0.06731391585760518, "grad_norm": 2.0625, "learning_rate": 1.9962658962540255e-05, "loss": 0.7194, "step": 533 }, { "epoch": 0.06744020838266634, "grad_norm": 2.125, "learning_rate": 1.996248639549475e-05, "loss": 0.6775, "step": 534 }, { "epoch": 0.06756650090772752, "grad_norm": 2.109375, "learning_rate": 1.9962313431368536e-05, "loss": 0.6886, "step": 535 }, { "epoch": 0.0676927934327887, "grad_norm": 2.140625, "learning_rate": 1.9962140070168507e-05, "loss": 0.7439, "step": 536 }, { "epoch": 0.06781908595784987, "grad_norm": 2.15625, "learning_rate": 1.9961966311901563e-05, "loss": 0.6721, "step": 537 }, { "epoch": 0.06794537848291105, "grad_norm": 2.125, "learning_rate": 1.996179215657464e-05, "loss": 0.6745, "step": 538 }, { "epoch": 0.06807167100797222, "grad_norm": 2.140625, "learning_rate": 1.9961617604194678e-05, "loss": 0.6546, "step": 539 }, { "epoch": 0.06819796353303338, "grad_norm": 2.171875, "learning_rate": 1.996144265476863e-05, "loss": 0.7274, "step": 540 }, { "epoch": 0.06832425605809456, "grad_norm": 2.0, "learning_rate": 1.9961267308303473e-05, "loss": 0.6492, "step": 541 }, { "epoch": 0.06845054858315573, "grad_norm": 2.0625, "learning_rate": 1.9961091564806194e-05, "loss": 0.671, "step": 542 }, { "epoch": 0.06857684110821691, "grad_norm": 2.15625, "learning_rate": 1.99609154242838e-05, "loss": 0.7647, "step": 543 }, { "epoch": 0.06870313363327808, "grad_norm": 2.078125, "learning_rate": 1.9960738886743306e-05, "loss": 0.6664, "step": 544 }, { "epoch": 0.06882942615833926, "grad_norm": 2.203125, "learning_rate": 1.9960561952191756e-05, "loss": 0.6864, "step": 545 }, { "epoch": 0.06895571868340043, "grad_norm": 2.234375, "learning_rate": 1.996038462063619e-05, "loss": 0.7313, "step": 546 }, { "epoch": 0.0690820112084616, "grad_norm": 2.015625, "learning_rate": 1.9960206892083694e-05, "loss": 0.6435, "step": 547 }, { "epoch": 0.06920830373352277, "grad_norm": 2.21875, "learning_rate": 1.9960028766541336e-05, "loss": 0.8066, "step": 548 }, { "epoch": 0.06933459625858394, "grad_norm": 2.203125, "learning_rate": 1.9959850244016225e-05, "loss": 0.8039, "step": 549 }, { "epoch": 0.06946088878364512, "grad_norm": 2.203125, "learning_rate": 1.995967132451547e-05, "loss": 0.7745, "step": 550 }, { "epoch": 0.0695871813087063, "grad_norm": 2.0625, "learning_rate": 1.9959492008046206e-05, "loss": 0.6865, "step": 551 }, { "epoch": 0.06971347383376747, "grad_norm": 2.09375, "learning_rate": 1.995931229461558e-05, "loss": 0.6953, "step": 552 }, { "epoch": 0.06983976635882863, "grad_norm": 2.171875, "learning_rate": 1.9959132184230755e-05, "loss": 0.8086, "step": 553 }, { "epoch": 0.0699660588838898, "grad_norm": 2.28125, "learning_rate": 1.995895167689891e-05, "loss": 0.8571, "step": 554 }, { "epoch": 0.07009235140895098, "grad_norm": 1.96875, "learning_rate": 1.9958770772627236e-05, "loss": 0.6854, "step": 555 }, { "epoch": 0.07021864393401216, "grad_norm": 2.3125, "learning_rate": 1.995858947142295e-05, "loss": 0.7782, "step": 556 }, { "epoch": 0.07034493645907333, "grad_norm": 2.28125, "learning_rate": 1.9958407773293272e-05, "loss": 0.693, "step": 557 }, { "epoch": 0.0704712289841345, "grad_norm": 2.421875, "learning_rate": 1.9958225678245444e-05, "loss": 0.8057, "step": 558 }, { "epoch": 0.07059752150919567, "grad_norm": 2.171875, "learning_rate": 1.995804318628673e-05, "loss": 0.8207, "step": 559 }, { "epoch": 0.07072381403425684, "grad_norm": 2.125, "learning_rate": 1.99578602974244e-05, "loss": 0.6619, "step": 560 }, { "epoch": 0.07085010655931802, "grad_norm": 1.9765625, "learning_rate": 1.9957677011665742e-05, "loss": 0.7192, "step": 561 }, { "epoch": 0.0709763990843792, "grad_norm": 2.046875, "learning_rate": 1.9957493329018064e-05, "loss": 0.6674, "step": 562 }, { "epoch": 0.07110269160944037, "grad_norm": 2.15625, "learning_rate": 1.9957309249488683e-05, "loss": 0.7017, "step": 563 }, { "epoch": 0.07122898413450154, "grad_norm": 2.046875, "learning_rate": 1.9957124773084943e-05, "loss": 0.6804, "step": 564 }, { "epoch": 0.0713552766595627, "grad_norm": 2.109375, "learning_rate": 1.995693989981419e-05, "loss": 0.706, "step": 565 }, { "epoch": 0.07148156918462388, "grad_norm": 2.234375, "learning_rate": 1.9956754629683798e-05, "loss": 0.7769, "step": 566 }, { "epoch": 0.07160786170968506, "grad_norm": 2.140625, "learning_rate": 1.9956568962701145e-05, "loss": 0.7392, "step": 567 }, { "epoch": 0.07173415423474623, "grad_norm": 2.15625, "learning_rate": 1.9956382898873642e-05, "loss": 0.7735, "step": 568 }, { "epoch": 0.0718604467598074, "grad_norm": 2.015625, "learning_rate": 1.9956196438208693e-05, "loss": 0.6668, "step": 569 }, { "epoch": 0.07198673928486858, "grad_norm": 2.296875, "learning_rate": 1.9956009580713735e-05, "loss": 0.7662, "step": 570 }, { "epoch": 0.07211303180992976, "grad_norm": 2.171875, "learning_rate": 1.9955822326396218e-05, "loss": 0.6789, "step": 571 }, { "epoch": 0.07223932433499092, "grad_norm": 2.296875, "learning_rate": 1.99556346752636e-05, "loss": 0.712, "step": 572 }, { "epoch": 0.07236561686005209, "grad_norm": 2.046875, "learning_rate": 1.9955446627323366e-05, "loss": 0.7191, "step": 573 }, { "epoch": 0.07249190938511327, "grad_norm": 2.078125, "learning_rate": 1.9955258182583007e-05, "loss": 0.7081, "step": 574 }, { "epoch": 0.07261820191017444, "grad_norm": 1.984375, "learning_rate": 1.9955069341050034e-05, "loss": 0.6648, "step": 575 }, { "epoch": 0.07274449443523562, "grad_norm": 2.0625, "learning_rate": 1.995488010273198e-05, "loss": 0.6201, "step": 576 }, { "epoch": 0.0728707869602968, "grad_norm": 2.203125, "learning_rate": 1.995469046763638e-05, "loss": 0.7586, "step": 577 }, { "epoch": 0.07299707948535795, "grad_norm": 2.15625, "learning_rate": 1.9954500435770794e-05, "loss": 0.7394, "step": 578 }, { "epoch": 0.07312337201041913, "grad_norm": 2.078125, "learning_rate": 1.9954310007142802e-05, "loss": 0.7407, "step": 579 }, { "epoch": 0.0732496645354803, "grad_norm": 2.171875, "learning_rate": 1.9954119181759987e-05, "loss": 0.7967, "step": 580 }, { "epoch": 0.07337595706054148, "grad_norm": 2.046875, "learning_rate": 1.9953927959629954e-05, "loss": 0.743, "step": 581 }, { "epoch": 0.07350224958560266, "grad_norm": 1.90625, "learning_rate": 1.9953736340760335e-05, "loss": 0.6464, "step": 582 }, { "epoch": 0.07362854211066383, "grad_norm": 2.40625, "learning_rate": 1.9953544325158755e-05, "loss": 0.8058, "step": 583 }, { "epoch": 0.07375483463572499, "grad_norm": 2.109375, "learning_rate": 1.9953351912832876e-05, "loss": 0.8188, "step": 584 }, { "epoch": 0.07388112716078617, "grad_norm": 2.125, "learning_rate": 1.995315910379036e-05, "loss": 0.7158, "step": 585 }, { "epoch": 0.07400741968584734, "grad_norm": 2.15625, "learning_rate": 1.99529658980389e-05, "loss": 0.786, "step": 586 }, { "epoch": 0.07413371221090852, "grad_norm": 2.234375, "learning_rate": 1.995277229558619e-05, "loss": 0.6913, "step": 587 }, { "epoch": 0.07426000473596969, "grad_norm": 2.0625, "learning_rate": 1.995257829643995e-05, "loss": 0.6694, "step": 588 }, { "epoch": 0.07438629726103087, "grad_norm": 2.171875, "learning_rate": 1.9952383900607912e-05, "loss": 0.7494, "step": 589 }, { "epoch": 0.07451258978609203, "grad_norm": 1.9609375, "learning_rate": 1.9952189108097825e-05, "loss": 0.6947, "step": 590 }, { "epoch": 0.0746388823111532, "grad_norm": 2.109375, "learning_rate": 1.995199391891745e-05, "loss": 0.7454, "step": 591 }, { "epoch": 0.07476517483621438, "grad_norm": 2.015625, "learning_rate": 1.9951798333074567e-05, "loss": 0.6773, "step": 592 }, { "epoch": 0.07489146736127555, "grad_norm": 2.0625, "learning_rate": 1.9951602350576972e-05, "loss": 0.6757, "step": 593 }, { "epoch": 0.07501775988633673, "grad_norm": 2.1875, "learning_rate": 1.995140597143248e-05, "loss": 0.7881, "step": 594 }, { "epoch": 0.0751440524113979, "grad_norm": 2.375, "learning_rate": 1.9951209195648915e-05, "loss": 0.8284, "step": 595 }, { "epoch": 0.07527034493645908, "grad_norm": 2.03125, "learning_rate": 1.995101202323412e-05, "loss": 0.6315, "step": 596 }, { "epoch": 0.07539663746152024, "grad_norm": 2.234375, "learning_rate": 1.9950814454195953e-05, "loss": 0.7116, "step": 597 }, { "epoch": 0.07552292998658142, "grad_norm": 2.0, "learning_rate": 1.995061648854229e-05, "loss": 0.6401, "step": 598 }, { "epoch": 0.07564922251164259, "grad_norm": 4.40625, "learning_rate": 1.9950418126281022e-05, "loss": 1.0553, "step": 599 }, { "epoch": 0.07577551503670377, "grad_norm": 2.234375, "learning_rate": 1.9950219367420056e-05, "loss": 0.7088, "step": 600 }, { "epoch": 0.07590180756176494, "grad_norm": 2.203125, "learning_rate": 1.995002021196731e-05, "loss": 0.7463, "step": 601 }, { "epoch": 0.07602810008682612, "grad_norm": 2.25, "learning_rate": 1.9949820659930726e-05, "loss": 0.7722, "step": 602 }, { "epoch": 0.07615439261188728, "grad_norm": 2.09375, "learning_rate": 1.9949620711318256e-05, "loss": 0.7266, "step": 603 }, { "epoch": 0.07628068513694845, "grad_norm": 2.125, "learning_rate": 1.994942036613787e-05, "loss": 0.7349, "step": 604 }, { "epoch": 0.07640697766200963, "grad_norm": 2.015625, "learning_rate": 1.9949219624397554e-05, "loss": 0.6641, "step": 605 }, { "epoch": 0.0765332701870708, "grad_norm": 2.078125, "learning_rate": 1.9949018486105304e-05, "loss": 0.6733, "step": 606 }, { "epoch": 0.07665956271213198, "grad_norm": 2.078125, "learning_rate": 1.9948816951269145e-05, "loss": 0.6908, "step": 607 }, { "epoch": 0.07678585523719315, "grad_norm": 2.125, "learning_rate": 1.9948615019897104e-05, "loss": 0.6149, "step": 608 }, { "epoch": 0.07691214776225432, "grad_norm": 2.234375, "learning_rate": 1.994841269199723e-05, "loss": 0.7189, "step": 609 }, { "epoch": 0.07703844028731549, "grad_norm": 2.15625, "learning_rate": 1.994820996757759e-05, "loss": 0.8366, "step": 610 }, { "epoch": 0.07716473281237667, "grad_norm": 2.171875, "learning_rate": 1.9948006846646262e-05, "loss": 0.6979, "step": 611 }, { "epoch": 0.07729102533743784, "grad_norm": 2.09375, "learning_rate": 1.9947803329211343e-05, "loss": 0.8758, "step": 612 }, { "epoch": 0.07741731786249902, "grad_norm": 2.140625, "learning_rate": 1.9947599415280944e-05, "loss": 0.7267, "step": 613 }, { "epoch": 0.07754361038756019, "grad_norm": 2.15625, "learning_rate": 1.994739510486319e-05, "loss": 0.6784, "step": 614 }, { "epoch": 0.07766990291262135, "grad_norm": 2.125, "learning_rate": 1.994719039796623e-05, "loss": 0.7347, "step": 615 }, { "epoch": 0.07779619543768253, "grad_norm": 2.140625, "learning_rate": 1.9946985294598218e-05, "loss": 0.7259, "step": 616 }, { "epoch": 0.0779224879627437, "grad_norm": 2.1875, "learning_rate": 1.994677979476733e-05, "loss": 0.7462, "step": 617 }, { "epoch": 0.07804878048780488, "grad_norm": 2.171875, "learning_rate": 1.994657389848176e-05, "loss": 0.6688, "step": 618 }, { "epoch": 0.07817507301286605, "grad_norm": 2.25, "learning_rate": 1.9946367605749716e-05, "loss": 0.7392, "step": 619 }, { "epoch": 0.07830136553792723, "grad_norm": 2.09375, "learning_rate": 1.994616091657941e-05, "loss": 0.6705, "step": 620 }, { "epoch": 0.0784276580629884, "grad_norm": 2.109375, "learning_rate": 1.994595383097909e-05, "loss": 0.6907, "step": 621 }, { "epoch": 0.07855395058804956, "grad_norm": 2.015625, "learning_rate": 1.9945746348957005e-05, "loss": 0.6743, "step": 622 }, { "epoch": 0.07868024311311074, "grad_norm": 2.1875, "learning_rate": 1.9945538470521427e-05, "loss": 0.686, "step": 623 }, { "epoch": 0.07880653563817192, "grad_norm": 2.15625, "learning_rate": 1.994533019568064e-05, "loss": 0.751, "step": 624 }, { "epoch": 0.07893282816323309, "grad_norm": 2.15625, "learning_rate": 1.9945121524442947e-05, "loss": 0.7032, "step": 625 }, { "epoch": 0.07905912068829427, "grad_norm": 2.234375, "learning_rate": 1.9944912456816665e-05, "loss": 0.7141, "step": 626 }, { "epoch": 0.07918541321335544, "grad_norm": 1.984375, "learning_rate": 1.9944702992810124e-05, "loss": 0.6251, "step": 627 }, { "epoch": 0.0793117057384166, "grad_norm": 2.34375, "learning_rate": 1.994449313243168e-05, "loss": 0.7285, "step": 628 }, { "epoch": 0.07943799826347778, "grad_norm": 2.046875, "learning_rate": 1.994428287568969e-05, "loss": 0.7184, "step": 629 }, { "epoch": 0.07956429078853895, "grad_norm": 2.203125, "learning_rate": 1.9944072222592535e-05, "loss": 0.6873, "step": 630 }, { "epoch": 0.07969058331360013, "grad_norm": 2.09375, "learning_rate": 1.9943861173148612e-05, "loss": 0.725, "step": 631 }, { "epoch": 0.0798168758386613, "grad_norm": 2.171875, "learning_rate": 1.9943649727366335e-05, "loss": 0.7876, "step": 632 }, { "epoch": 0.07994316836372248, "grad_norm": 2.21875, "learning_rate": 1.9943437885254134e-05, "loss": 0.6836, "step": 633 }, { "epoch": 0.08006946088878364, "grad_norm": 2.140625, "learning_rate": 1.9943225646820448e-05, "loss": 0.6831, "step": 634 }, { "epoch": 0.08019575341384481, "grad_norm": 2.21875, "learning_rate": 1.9943013012073734e-05, "loss": 0.7147, "step": 635 }, { "epoch": 0.08032204593890599, "grad_norm": 1.9921875, "learning_rate": 1.9942799981022472e-05, "loss": 0.7293, "step": 636 }, { "epoch": 0.08044833846396716, "grad_norm": 2.0625, "learning_rate": 1.9942586553675154e-05, "loss": 0.7099, "step": 637 }, { "epoch": 0.08057463098902834, "grad_norm": 2.265625, "learning_rate": 1.9942372730040286e-05, "loss": 0.7156, "step": 638 }, { "epoch": 0.08070092351408951, "grad_norm": 2.09375, "learning_rate": 1.9942158510126384e-05, "loss": 0.7007, "step": 639 }, { "epoch": 0.08082721603915068, "grad_norm": 2.0625, "learning_rate": 1.994194389394199e-05, "loss": 0.6238, "step": 640 }, { "epoch": 0.08095350856421185, "grad_norm": 2.21875, "learning_rate": 1.9941728881495664e-05, "loss": 0.8054, "step": 641 }, { "epoch": 0.08107980108927303, "grad_norm": 2.171875, "learning_rate": 1.9941513472795967e-05, "loss": 0.7528, "step": 642 }, { "epoch": 0.0812060936143342, "grad_norm": 2.140625, "learning_rate": 1.9941297667851493e-05, "loss": 0.7053, "step": 643 }, { "epoch": 0.08133238613939538, "grad_norm": 2.078125, "learning_rate": 1.9941081466670833e-05, "loss": 0.7426, "step": 644 }, { "epoch": 0.08145867866445655, "grad_norm": 2.09375, "learning_rate": 1.9940864869262614e-05, "loss": 0.727, "step": 645 }, { "epoch": 0.08158497118951773, "grad_norm": 2.015625, "learning_rate": 1.9940647875635463e-05, "loss": 0.6998, "step": 646 }, { "epoch": 0.08171126371457889, "grad_norm": 2.109375, "learning_rate": 1.9940430485798036e-05, "loss": 0.7273, "step": 647 }, { "epoch": 0.08183755623964006, "grad_norm": 2.140625, "learning_rate": 1.994021269975899e-05, "loss": 0.8028, "step": 648 }, { "epoch": 0.08196384876470124, "grad_norm": 2.109375, "learning_rate": 1.993999451752701e-05, "loss": 0.7375, "step": 649 }, { "epoch": 0.08209014128976241, "grad_norm": 2.265625, "learning_rate": 1.9939775939110785e-05, "loss": 0.754, "step": 650 }, { "epoch": 0.08221643381482359, "grad_norm": 2.296875, "learning_rate": 1.9939556964519037e-05, "loss": 0.7128, "step": 651 }, { "epoch": 0.08234272633988476, "grad_norm": 2.15625, "learning_rate": 1.993933759376049e-05, "loss": 0.7189, "step": 652 }, { "epoch": 0.08246901886494593, "grad_norm": 2.265625, "learning_rate": 1.9939117826843883e-05, "loss": 0.8892, "step": 653 }, { "epoch": 0.0825953113900071, "grad_norm": 2.09375, "learning_rate": 1.9938897663777984e-05, "loss": 0.6966, "step": 654 }, { "epoch": 0.08272160391506828, "grad_norm": 2.171875, "learning_rate": 1.993867710457156e-05, "loss": 0.7898, "step": 655 }, { "epoch": 0.08284789644012945, "grad_norm": 2.109375, "learning_rate": 1.9938456149233407e-05, "loss": 0.7117, "step": 656 }, { "epoch": 0.08297418896519063, "grad_norm": 2.03125, "learning_rate": 1.993823479777233e-05, "loss": 0.7285, "step": 657 }, { "epoch": 0.0831004814902518, "grad_norm": 2.125, "learning_rate": 1.993801305019715e-05, "loss": 0.7262, "step": 658 }, { "epoch": 0.08322677401531296, "grad_norm": 2.28125, "learning_rate": 1.993779090651671e-05, "loss": 0.7783, "step": 659 }, { "epoch": 0.08335306654037414, "grad_norm": 2.0, "learning_rate": 1.9937568366739858e-05, "loss": 0.6617, "step": 660 }, { "epoch": 0.08347935906543531, "grad_norm": 2.03125, "learning_rate": 1.993734543087547e-05, "loss": 0.6958, "step": 661 }, { "epoch": 0.08360565159049649, "grad_norm": 2.03125, "learning_rate": 1.9937122098932428e-05, "loss": 0.6632, "step": 662 }, { "epoch": 0.08373194411555766, "grad_norm": 1.90625, "learning_rate": 1.9936898370919634e-05, "loss": 0.5806, "step": 663 }, { "epoch": 0.08385823664061884, "grad_norm": 1.9765625, "learning_rate": 1.9936674246846005e-05, "loss": 0.6403, "step": 664 }, { "epoch": 0.08398452916568, "grad_norm": 2.09375, "learning_rate": 1.9936449726720474e-05, "loss": 0.7254, "step": 665 }, { "epoch": 0.08411082169074117, "grad_norm": 2.078125, "learning_rate": 1.9936224810551993e-05, "loss": 0.8047, "step": 666 }, { "epoch": 0.08423711421580235, "grad_norm": 2.25, "learning_rate": 1.9935999498349518e-05, "loss": 0.8362, "step": 667 }, { "epoch": 0.08436340674086353, "grad_norm": 5.03125, "learning_rate": 1.9935773790122043e-05, "loss": 1.0221, "step": 668 }, { "epoch": 0.0844896992659247, "grad_norm": 2.125, "learning_rate": 1.993554768587855e-05, "loss": 0.7441, "step": 669 }, { "epoch": 0.08461599179098588, "grad_norm": 2.046875, "learning_rate": 1.993532118562806e-05, "loss": 0.7201, "step": 670 }, { "epoch": 0.08474228431604705, "grad_norm": 2.171875, "learning_rate": 1.99350942893796e-05, "loss": 0.7735, "step": 671 }, { "epoch": 0.08486857684110821, "grad_norm": 2.0625, "learning_rate": 1.9934866997142207e-05, "loss": 0.7343, "step": 672 }, { "epoch": 0.08499486936616939, "grad_norm": 1.984375, "learning_rate": 1.993463930892495e-05, "loss": 0.6834, "step": 673 }, { "epoch": 0.08512116189123056, "grad_norm": 2.234375, "learning_rate": 1.99344112247369e-05, "loss": 0.6854, "step": 674 }, { "epoch": 0.08524745441629174, "grad_norm": 2.125, "learning_rate": 1.9934182744587142e-05, "loss": 0.7284, "step": 675 }, { "epoch": 0.08537374694135291, "grad_norm": 1.9765625, "learning_rate": 1.9933953868484793e-05, "loss": 0.657, "step": 676 }, { "epoch": 0.08550003946641409, "grad_norm": 2.078125, "learning_rate": 1.993372459643897e-05, "loss": 0.722, "step": 677 }, { "epoch": 0.08562633199147525, "grad_norm": 2.15625, "learning_rate": 1.9933494928458806e-05, "loss": 0.7428, "step": 678 }, { "epoch": 0.08575262451653642, "grad_norm": 2.109375, "learning_rate": 1.9933264864553464e-05, "loss": 0.7237, "step": 679 }, { "epoch": 0.0858789170415976, "grad_norm": 1.9765625, "learning_rate": 1.993303440473211e-05, "loss": 0.7039, "step": 680 }, { "epoch": 0.08600520956665877, "grad_norm": 2.1875, "learning_rate": 1.993280354900393e-05, "loss": 0.7349, "step": 681 }, { "epoch": 0.08613150209171995, "grad_norm": 2.21875, "learning_rate": 1.9932572297378122e-05, "loss": 0.7199, "step": 682 }, { "epoch": 0.08625779461678112, "grad_norm": 2.140625, "learning_rate": 1.9932340649863913e-05, "loss": 0.7602, "step": 683 }, { "epoch": 0.08638408714184229, "grad_norm": 2.03125, "learning_rate": 1.9932108606470525e-05, "loss": 0.6826, "step": 684 }, { "epoch": 0.08651037966690346, "grad_norm": 2.03125, "learning_rate": 1.993187616720721e-05, "loss": 0.7361, "step": 685 }, { "epoch": 0.08663667219196464, "grad_norm": 2.09375, "learning_rate": 1.9931643332083236e-05, "loss": 0.6855, "step": 686 }, { "epoch": 0.08676296471702581, "grad_norm": 1.9296875, "learning_rate": 1.993141010110788e-05, "loss": 0.7215, "step": 687 }, { "epoch": 0.08688925724208699, "grad_norm": 2.046875, "learning_rate": 1.9931176474290438e-05, "loss": 0.6708, "step": 688 }, { "epoch": 0.08701554976714816, "grad_norm": 4.1875, "learning_rate": 1.993094245164022e-05, "loss": 0.9988, "step": 689 }, { "epoch": 0.08714184229220932, "grad_norm": 2.078125, "learning_rate": 1.9930708033166558e-05, "loss": 0.7076, "step": 690 }, { "epoch": 0.0872681348172705, "grad_norm": 2.0625, "learning_rate": 1.9930473218878797e-05, "loss": 0.6698, "step": 691 }, { "epoch": 0.08739442734233167, "grad_norm": 1.9296875, "learning_rate": 1.9930238008786288e-05, "loss": 0.6872, "step": 692 }, { "epoch": 0.08752071986739285, "grad_norm": 2.109375, "learning_rate": 1.9930002402898414e-05, "loss": 0.6903, "step": 693 }, { "epoch": 0.08764701239245402, "grad_norm": 1.9765625, "learning_rate": 1.992976640122456e-05, "loss": 0.6858, "step": 694 }, { "epoch": 0.0877733049175152, "grad_norm": 2.078125, "learning_rate": 1.9929530003774133e-05, "loss": 0.7246, "step": 695 }, { "epoch": 0.08789959744257637, "grad_norm": 2.1875, "learning_rate": 1.992929321055656e-05, "loss": 0.7034, "step": 696 }, { "epoch": 0.08802588996763754, "grad_norm": 2.046875, "learning_rate": 1.9929056021581273e-05, "loss": 0.7733, "step": 697 }, { "epoch": 0.08815218249269871, "grad_norm": 2.09375, "learning_rate": 1.992881843685773e-05, "loss": 0.6727, "step": 698 }, { "epoch": 0.08827847501775989, "grad_norm": 2.046875, "learning_rate": 1.9928580456395402e-05, "loss": 0.7715, "step": 699 }, { "epoch": 0.08840476754282106, "grad_norm": 1.96875, "learning_rate": 1.992834208020377e-05, "loss": 0.6122, "step": 700 }, { "epoch": 0.08853106006788224, "grad_norm": 2.046875, "learning_rate": 1.9928103308292333e-05, "loss": 0.6769, "step": 701 }, { "epoch": 0.08865735259294341, "grad_norm": 2.015625, "learning_rate": 1.9927864140670615e-05, "loss": 0.7659, "step": 702 }, { "epoch": 0.08878364511800457, "grad_norm": 2.140625, "learning_rate": 1.9927624577348144e-05, "loss": 0.6321, "step": 703 }, { "epoch": 0.08890993764306575, "grad_norm": 2.140625, "learning_rate": 1.992738461833447e-05, "loss": 0.7047, "step": 704 }, { "epoch": 0.08903623016812692, "grad_norm": 1.9921875, "learning_rate": 1.9927144263639155e-05, "loss": 0.6603, "step": 705 }, { "epoch": 0.0891625226931881, "grad_norm": 2.109375, "learning_rate": 1.9926903513271783e-05, "loss": 0.6391, "step": 706 }, { "epoch": 0.08928881521824927, "grad_norm": 6.90625, "learning_rate": 1.9926662367241945e-05, "loss": 1.1325, "step": 707 }, { "epoch": 0.08941510774331045, "grad_norm": 2.015625, "learning_rate": 1.9926420825559257e-05, "loss": 0.648, "step": 708 }, { "epoch": 0.08954140026837161, "grad_norm": 2.046875, "learning_rate": 1.9926178888233344e-05, "loss": 0.7062, "step": 709 }, { "epoch": 0.08966769279343278, "grad_norm": 2.09375, "learning_rate": 1.9925936555273848e-05, "loss": 0.7959, "step": 710 }, { "epoch": 0.08979398531849396, "grad_norm": 2.109375, "learning_rate": 1.992569382669043e-05, "loss": 0.6931, "step": 711 }, { "epoch": 0.08992027784355514, "grad_norm": 2.078125, "learning_rate": 1.9925450702492762e-05, "loss": 0.669, "step": 712 }, { "epoch": 0.09004657036861631, "grad_norm": 2.328125, "learning_rate": 1.9925207182690535e-05, "loss": 0.7815, "step": 713 }, { "epoch": 0.09017286289367749, "grad_norm": 2.359375, "learning_rate": 1.992496326729346e-05, "loss": 0.6886, "step": 714 }, { "epoch": 0.09029915541873865, "grad_norm": 2.0625, "learning_rate": 1.9924718956311255e-05, "loss": 0.6449, "step": 715 }, { "epoch": 0.09042544794379982, "grad_norm": 2.15625, "learning_rate": 1.9924474249753656e-05, "loss": 0.7387, "step": 716 }, { "epoch": 0.090551740468861, "grad_norm": 2.078125, "learning_rate": 1.9924229147630417e-05, "loss": 0.695, "step": 717 }, { "epoch": 0.09067803299392217, "grad_norm": 1.875, "learning_rate": 1.992398364995131e-05, "loss": 0.6295, "step": 718 }, { "epoch": 0.09080432551898335, "grad_norm": 2.171875, "learning_rate": 1.9923737756726114e-05, "loss": 0.6828, "step": 719 }, { "epoch": 0.09093061804404452, "grad_norm": 4.21875, "learning_rate": 1.9923491467964638e-05, "loss": 1.0567, "step": 720 }, { "epoch": 0.0910569105691057, "grad_norm": 2.09375, "learning_rate": 1.992324478367669e-05, "loss": 0.6572, "step": 721 }, { "epoch": 0.09118320309416686, "grad_norm": 2.421875, "learning_rate": 1.9922997703872115e-05, "loss": 0.6533, "step": 722 }, { "epoch": 0.09130949561922803, "grad_norm": 2.078125, "learning_rate": 1.9922750228560746e-05, "loss": 0.7235, "step": 723 }, { "epoch": 0.09143578814428921, "grad_norm": 2.0625, "learning_rate": 1.9922502357752453e-05, "loss": 0.7626, "step": 724 }, { "epoch": 0.09156208066935038, "grad_norm": 2.125, "learning_rate": 1.9922254091457117e-05, "loss": 0.6684, "step": 725 }, { "epoch": 0.09168837319441156, "grad_norm": 1.8984375, "learning_rate": 1.9922005429684636e-05, "loss": 0.6212, "step": 726 }, { "epoch": 0.09181466571947274, "grad_norm": 2.0625, "learning_rate": 1.9921756372444913e-05, "loss": 0.6477, "step": 727 }, { "epoch": 0.0919409582445339, "grad_norm": 2.109375, "learning_rate": 1.992150691974788e-05, "loss": 0.7326, "step": 728 }, { "epoch": 0.09206725076959507, "grad_norm": 2.109375, "learning_rate": 1.9921257071603477e-05, "loss": 0.7643, "step": 729 }, { "epoch": 0.09219354329465625, "grad_norm": 2.171875, "learning_rate": 1.9921006828021666e-05, "loss": 0.7828, "step": 730 }, { "epoch": 0.09231983581971742, "grad_norm": 2.078125, "learning_rate": 1.9920756189012418e-05, "loss": 0.717, "step": 731 }, { "epoch": 0.0924461283447786, "grad_norm": 2.078125, "learning_rate": 1.9920505154585726e-05, "loss": 0.7032, "step": 732 }, { "epoch": 0.09257242086983977, "grad_norm": 1.8828125, "learning_rate": 1.9920253724751593e-05, "loss": 0.6158, "step": 733 }, { "epoch": 0.09269871339490093, "grad_norm": 4.625, "learning_rate": 1.9920001899520037e-05, "loss": 1.0549, "step": 734 }, { "epoch": 0.09282500591996211, "grad_norm": 2.0625, "learning_rate": 1.9919749678901104e-05, "loss": 0.6506, "step": 735 }, { "epoch": 0.09295129844502328, "grad_norm": 2.0, "learning_rate": 1.991949706290484e-05, "loss": 0.6319, "step": 736 }, { "epoch": 0.09307759097008446, "grad_norm": 2.15625, "learning_rate": 1.9919244051541315e-05, "loss": 0.7914, "step": 737 }, { "epoch": 0.09320388349514563, "grad_norm": 2.140625, "learning_rate": 1.991899064482062e-05, "loss": 0.6911, "step": 738 }, { "epoch": 0.09333017602020681, "grad_norm": 2.359375, "learning_rate": 1.991873684275284e-05, "loss": 0.7564, "step": 739 }, { "epoch": 0.09345646854526797, "grad_norm": 2.0, "learning_rate": 1.9918482645348104e-05, "loss": 0.6898, "step": 740 }, { "epoch": 0.09358276107032915, "grad_norm": 2.0, "learning_rate": 1.991822805261654e-05, "loss": 0.6714, "step": 741 }, { "epoch": 0.09370905359539032, "grad_norm": 2.0, "learning_rate": 1.9917973064568296e-05, "loss": 0.7603, "step": 742 }, { "epoch": 0.0938353461204515, "grad_norm": 2.109375, "learning_rate": 1.9917717681213536e-05, "loss": 0.7242, "step": 743 }, { "epoch": 0.09396163864551267, "grad_norm": 4.15625, "learning_rate": 1.9917461902562435e-05, "loss": 0.8519, "step": 744 }, { "epoch": 0.09408793117057385, "grad_norm": 2.09375, "learning_rate": 1.9917205728625192e-05, "loss": 0.7826, "step": 745 }, { "epoch": 0.09421422369563502, "grad_norm": 2.015625, "learning_rate": 1.9916949159412017e-05, "loss": 0.72, "step": 746 }, { "epoch": 0.09434051622069618, "grad_norm": 2.109375, "learning_rate": 1.991669219493313e-05, "loss": 0.6984, "step": 747 }, { "epoch": 0.09446680874575736, "grad_norm": 2.1875, "learning_rate": 1.991643483519878e-05, "loss": 0.7436, "step": 748 }, { "epoch": 0.09459310127081853, "grad_norm": 2.203125, "learning_rate": 1.9916177080219222e-05, "loss": 0.7409, "step": 749 }, { "epoch": 0.09471939379587971, "grad_norm": 2.375, "learning_rate": 1.9915918930004733e-05, "loss": 0.6859, "step": 750 }, { "epoch": 0.09484568632094088, "grad_norm": 2.015625, "learning_rate": 1.99156603845656e-05, "loss": 0.6513, "step": 751 }, { "epoch": 0.09497197884600206, "grad_norm": 2.421875, "learning_rate": 1.9915401443912124e-05, "loss": 0.7539, "step": 752 }, { "epoch": 0.09509827137106322, "grad_norm": 1.8984375, "learning_rate": 1.991514210805463e-05, "loss": 0.6746, "step": 753 }, { "epoch": 0.0952245638961244, "grad_norm": 2.0, "learning_rate": 1.9914882377003457e-05, "loss": 0.7686, "step": 754 }, { "epoch": 0.09535085642118557, "grad_norm": 2.203125, "learning_rate": 1.991462225076895e-05, "loss": 0.8293, "step": 755 }, { "epoch": 0.09547714894624675, "grad_norm": 2.09375, "learning_rate": 1.9914361729361484e-05, "loss": 0.7157, "step": 756 }, { "epoch": 0.09560344147130792, "grad_norm": 2.09375, "learning_rate": 1.9914100812791437e-05, "loss": 0.6677, "step": 757 }, { "epoch": 0.0957297339963691, "grad_norm": 1.9453125, "learning_rate": 1.9913839501069213e-05, "loss": 0.6431, "step": 758 }, { "epoch": 0.09585602652143026, "grad_norm": 2.03125, "learning_rate": 1.9913577794205222e-05, "loss": 0.75, "step": 759 }, { "epoch": 0.09598231904649143, "grad_norm": 2.171875, "learning_rate": 1.9913315692209906e-05, "loss": 0.7288, "step": 760 }, { "epoch": 0.09610861157155261, "grad_norm": 2.0, "learning_rate": 1.99130531950937e-05, "loss": 0.6654, "step": 761 }, { "epoch": 0.09623490409661378, "grad_norm": 2.171875, "learning_rate": 1.9912790302867065e-05, "loss": 0.7615, "step": 762 }, { "epoch": 0.09636119662167496, "grad_norm": 2.15625, "learning_rate": 1.9912527015540493e-05, "loss": 0.7643, "step": 763 }, { "epoch": 0.09648748914673613, "grad_norm": 2.640625, "learning_rate": 1.991226333312447e-05, "loss": 0.8258, "step": 764 }, { "epoch": 0.0966137816717973, "grad_norm": 2.21875, "learning_rate": 1.99119992556295e-05, "loss": 0.6747, "step": 765 }, { "epoch": 0.09674007419685847, "grad_norm": 2.234375, "learning_rate": 1.9911734783066118e-05, "loss": 0.8067, "step": 766 }, { "epoch": 0.09686636672191964, "grad_norm": 3.390625, "learning_rate": 1.991146991544486e-05, "loss": 0.9087, "step": 767 }, { "epoch": 0.09699265924698082, "grad_norm": 2.1875, "learning_rate": 1.9911204652776286e-05, "loss": 0.6505, "step": 768 }, { "epoch": 0.097118951772042, "grad_norm": 2.28125, "learning_rate": 1.9910938995070966e-05, "loss": 0.7147, "step": 769 }, { "epoch": 0.09724524429710317, "grad_norm": 1.9609375, "learning_rate": 1.991067294233949e-05, "loss": 0.6731, "step": 770 }, { "epoch": 0.09737153682216433, "grad_norm": 1.96875, "learning_rate": 1.991040649459246e-05, "loss": 0.6812, "step": 771 }, { "epoch": 0.0974978293472255, "grad_norm": 2.078125, "learning_rate": 1.9910139651840497e-05, "loss": 0.6889, "step": 772 }, { "epoch": 0.09762412187228668, "grad_norm": 2.140625, "learning_rate": 1.9909872414094242e-05, "loss": 0.6696, "step": 773 }, { "epoch": 0.09775041439734786, "grad_norm": 2.296875, "learning_rate": 1.990960478136434e-05, "loss": 0.6925, "step": 774 }, { "epoch": 0.09787670692240903, "grad_norm": 2.09375, "learning_rate": 1.9909336753661464e-05, "loss": 0.7083, "step": 775 }, { "epoch": 0.09800299944747021, "grad_norm": 2.1875, "learning_rate": 1.990906833099629e-05, "loss": 0.6513, "step": 776 }, { "epoch": 0.09812929197253138, "grad_norm": 2.0625, "learning_rate": 1.990879951337952e-05, "loss": 0.7388, "step": 777 }, { "epoch": 0.09825558449759254, "grad_norm": 2.109375, "learning_rate": 1.9908530300821864e-05, "loss": 0.7309, "step": 778 }, { "epoch": 0.09838187702265372, "grad_norm": 2.0, "learning_rate": 1.990826069333406e-05, "loss": 0.7743, "step": 779 }, { "epoch": 0.0985081695477149, "grad_norm": 2.171875, "learning_rate": 1.990799069092685e-05, "loss": 0.6707, "step": 780 }, { "epoch": 0.09863446207277607, "grad_norm": 2.109375, "learning_rate": 1.9907720293611e-05, "loss": 0.6448, "step": 781 }, { "epoch": 0.09876075459783724, "grad_norm": 2.09375, "learning_rate": 1.9907449501397276e-05, "loss": 0.6906, "step": 782 }, { "epoch": 0.09888704712289842, "grad_norm": 2.21875, "learning_rate": 1.990717831429648e-05, "loss": 0.751, "step": 783 }, { "epoch": 0.09901333964795958, "grad_norm": 1.96875, "learning_rate": 1.990690673231942e-05, "loss": 0.6448, "step": 784 }, { "epoch": 0.09913963217302076, "grad_norm": 2.078125, "learning_rate": 1.9906634755476925e-05, "loss": 0.6904, "step": 785 }, { "epoch": 0.09926592469808193, "grad_norm": 1.9609375, "learning_rate": 1.9906362383779826e-05, "loss": 0.6595, "step": 786 }, { "epoch": 0.0993922172231431, "grad_norm": 2.015625, "learning_rate": 1.9906089617238983e-05, "loss": 0.7797, "step": 787 }, { "epoch": 0.09951850974820428, "grad_norm": 2.28125, "learning_rate": 1.990581645586527e-05, "loss": 0.7875, "step": 788 }, { "epoch": 0.09964480227326546, "grad_norm": 1.96875, "learning_rate": 1.9905542899669573e-05, "loss": 0.6152, "step": 789 }, { "epoch": 0.09977109479832662, "grad_norm": 2.15625, "learning_rate": 1.9905268948662793e-05, "loss": 0.6735, "step": 790 }, { "epoch": 0.09989738732338779, "grad_norm": 2.296875, "learning_rate": 1.9904994602855853e-05, "loss": 0.7408, "step": 791 }, { "epoch": 0.10002367984844897, "grad_norm": 2.015625, "learning_rate": 1.9904719862259684e-05, "loss": 0.6697, "step": 792 }, { "epoch": 0.10014997237351014, "grad_norm": 2.046875, "learning_rate": 1.9904444726885236e-05, "loss": 0.6959, "step": 793 }, { "epoch": 0.10027626489857132, "grad_norm": 2.15625, "learning_rate": 1.990416919674348e-05, "loss": 0.7553, "step": 794 }, { "epoch": 0.1004025574236325, "grad_norm": 2.015625, "learning_rate": 1.9903893271845397e-05, "loss": 0.6619, "step": 795 }, { "epoch": 0.10052884994869365, "grad_norm": 1.9453125, "learning_rate": 1.9903616952201982e-05, "loss": 0.6736, "step": 796 }, { "epoch": 0.10065514247375483, "grad_norm": 5.96875, "learning_rate": 1.990334023782425e-05, "loss": 1.068, "step": 797 }, { "epoch": 0.100781434998816, "grad_norm": 2.1875, "learning_rate": 1.990306312872323e-05, "loss": 0.6905, "step": 798 }, { "epoch": 0.10090772752387718, "grad_norm": 2.171875, "learning_rate": 1.9902785624909966e-05, "loss": 0.7028, "step": 799 }, { "epoch": 0.10103402004893836, "grad_norm": 1.9765625, "learning_rate": 1.990250772639552e-05, "loss": 0.6417, "step": 800 }, { "epoch": 0.10116031257399953, "grad_norm": 2.015625, "learning_rate": 1.9902229433190965e-05, "loss": 0.7293, "step": 801 }, { "epoch": 0.1012866050990607, "grad_norm": 1.9375, "learning_rate": 1.99019507453074e-05, "loss": 0.673, "step": 802 }, { "epoch": 0.10141289762412187, "grad_norm": 2.125, "learning_rate": 1.9901671662755926e-05, "loss": 0.7653, "step": 803 }, { "epoch": 0.10153919014918304, "grad_norm": 2.171875, "learning_rate": 1.990139218554767e-05, "loss": 0.6954, "step": 804 }, { "epoch": 0.10166548267424422, "grad_norm": 2.0625, "learning_rate": 1.9901112313693774e-05, "loss": 0.7341, "step": 805 }, { "epoch": 0.10179177519930539, "grad_norm": 2.109375, "learning_rate": 1.9900832047205382e-05, "loss": 0.6582, "step": 806 }, { "epoch": 0.10191806772436657, "grad_norm": 1.9921875, "learning_rate": 1.9900551386093677e-05, "loss": 0.6857, "step": 807 }, { "epoch": 0.10204436024942774, "grad_norm": 1.953125, "learning_rate": 1.990027033036984e-05, "loss": 0.7804, "step": 808 }, { "epoch": 0.1021706527744889, "grad_norm": 4.34375, "learning_rate": 1.9899988880045074e-05, "loss": 1.0326, "step": 809 }, { "epoch": 0.10229694529955008, "grad_norm": 2.0, "learning_rate": 1.9899707035130598e-05, "loss": 0.6679, "step": 810 }, { "epoch": 0.10242323782461125, "grad_norm": 2.1875, "learning_rate": 1.9899424795637645e-05, "loss": 0.744, "step": 811 }, { "epoch": 0.10254953034967243, "grad_norm": 2.34375, "learning_rate": 1.989914216157746e-05, "loss": 0.8327, "step": 812 }, { "epoch": 0.1026758228747336, "grad_norm": 2.140625, "learning_rate": 1.9898859132961314e-05, "loss": 0.7174, "step": 813 }, { "epoch": 0.10280211539979478, "grad_norm": 2.34375, "learning_rate": 1.9898575709800485e-05, "loss": 0.7995, "step": 814 }, { "epoch": 0.10292840792485594, "grad_norm": 2.109375, "learning_rate": 1.989829189210627e-05, "loss": 0.715, "step": 815 }, { "epoch": 0.10305470044991712, "grad_norm": 1.9765625, "learning_rate": 1.9898007679889986e-05, "loss": 0.6717, "step": 816 }, { "epoch": 0.10318099297497829, "grad_norm": 2.125, "learning_rate": 1.9897723073162956e-05, "loss": 0.6967, "step": 817 }, { "epoch": 0.10330728550003947, "grad_norm": 2.015625, "learning_rate": 1.989743807193652e-05, "loss": 0.6305, "step": 818 }, { "epoch": 0.10343357802510064, "grad_norm": 2.0, "learning_rate": 1.9897152676222043e-05, "loss": 0.721, "step": 819 }, { "epoch": 0.10355987055016182, "grad_norm": 2.21875, "learning_rate": 1.9896866886030903e-05, "loss": 0.7296, "step": 820 }, { "epoch": 0.10368616307522298, "grad_norm": 2.078125, "learning_rate": 1.9896580701374482e-05, "loss": 0.6576, "step": 821 }, { "epoch": 0.10381245560028415, "grad_norm": 2.15625, "learning_rate": 1.9896294122264196e-05, "loss": 0.7623, "step": 822 }, { "epoch": 0.10393874812534533, "grad_norm": 1.984375, "learning_rate": 1.989600714871146e-05, "loss": 0.7018, "step": 823 }, { "epoch": 0.1040650406504065, "grad_norm": 2.15625, "learning_rate": 1.9895719780727715e-05, "loss": 0.7408, "step": 824 }, { "epoch": 0.10419133317546768, "grad_norm": 2.25, "learning_rate": 1.989543201832442e-05, "loss": 0.6641, "step": 825 }, { "epoch": 0.10431762570052885, "grad_norm": 4.0, "learning_rate": 1.9895143861513033e-05, "loss": 1.0414, "step": 826 }, { "epoch": 0.10444391822559003, "grad_norm": 2.015625, "learning_rate": 1.9894855310305047e-05, "loss": 0.7387, "step": 827 }, { "epoch": 0.10457021075065119, "grad_norm": 2.046875, "learning_rate": 1.9894566364711965e-05, "loss": 0.6545, "step": 828 }, { "epoch": 0.10469650327571237, "grad_norm": 2.015625, "learning_rate": 1.9894277024745296e-05, "loss": 0.7008, "step": 829 }, { "epoch": 0.10482279580077354, "grad_norm": 1.9375, "learning_rate": 1.989398729041658e-05, "loss": 0.7092, "step": 830 }, { "epoch": 0.10494908832583472, "grad_norm": 2.046875, "learning_rate": 1.989369716173736e-05, "loss": 0.7647, "step": 831 }, { "epoch": 0.10507538085089589, "grad_norm": 2.078125, "learning_rate": 1.9893406638719202e-05, "loss": 0.6589, "step": 832 }, { "epoch": 0.10520167337595707, "grad_norm": 2.1875, "learning_rate": 1.9893115721373687e-05, "loss": 0.7316, "step": 833 }, { "epoch": 0.10532796590101823, "grad_norm": 2.203125, "learning_rate": 1.9892824409712404e-05, "loss": 0.6965, "step": 834 }, { "epoch": 0.1054542584260794, "grad_norm": 2.109375, "learning_rate": 1.989253270374697e-05, "loss": 0.711, "step": 835 }, { "epoch": 0.10558055095114058, "grad_norm": 1.9921875, "learning_rate": 1.989224060348901e-05, "loss": 0.7258, "step": 836 }, { "epoch": 0.10570684347620175, "grad_norm": 2.078125, "learning_rate": 1.9891948108950168e-05, "loss": 0.6587, "step": 837 }, { "epoch": 0.10583313600126293, "grad_norm": 2.25, "learning_rate": 1.98916552201421e-05, "loss": 0.722, "step": 838 }, { "epoch": 0.1059594285263241, "grad_norm": 2.0625, "learning_rate": 1.989136193707648e-05, "loss": 0.7793, "step": 839 }, { "epoch": 0.10608572105138526, "grad_norm": 2.140625, "learning_rate": 1.9891068259765e-05, "loss": 0.7263, "step": 840 }, { "epoch": 0.10621201357644644, "grad_norm": 2.0625, "learning_rate": 1.9890774188219362e-05, "loss": 0.7437, "step": 841 }, { "epoch": 0.10633830610150762, "grad_norm": 2.03125, "learning_rate": 1.989047972245129e-05, "loss": 0.6914, "step": 842 }, { "epoch": 0.10646459862656879, "grad_norm": 3.4375, "learning_rate": 1.9890184862472516e-05, "loss": 0.9435, "step": 843 }, { "epoch": 0.10659089115162997, "grad_norm": 2.03125, "learning_rate": 1.9889889608294797e-05, "loss": 0.6568, "step": 844 }, { "epoch": 0.10671718367669114, "grad_norm": 2.203125, "learning_rate": 1.9889593959929897e-05, "loss": 0.8084, "step": 845 }, { "epoch": 0.1068434762017523, "grad_norm": 1.890625, "learning_rate": 1.9889297917389606e-05, "loss": 0.6682, "step": 846 }, { "epoch": 0.10696976872681348, "grad_norm": 2.0, "learning_rate": 1.988900148068572e-05, "loss": 0.6955, "step": 847 }, { "epoch": 0.10709606125187465, "grad_norm": 3.703125, "learning_rate": 1.988870464983005e-05, "loss": 0.9726, "step": 848 }, { "epoch": 0.10722235377693583, "grad_norm": 2.203125, "learning_rate": 1.9888407424834437e-05, "loss": 0.736, "step": 849 }, { "epoch": 0.107348646301997, "grad_norm": 2.09375, "learning_rate": 1.9888109805710717e-05, "loss": 0.6282, "step": 850 }, { "epoch": 0.10747493882705818, "grad_norm": 2.203125, "learning_rate": 1.9887811792470757e-05, "loss": 0.7604, "step": 851 }, { "epoch": 0.10760123135211935, "grad_norm": 1.96875, "learning_rate": 1.9887513385126434e-05, "loss": 0.7725, "step": 852 }, { "epoch": 0.10772752387718051, "grad_norm": 1.921875, "learning_rate": 1.9887214583689648e-05, "loss": 0.6554, "step": 853 }, { "epoch": 0.10785381640224169, "grad_norm": 1.9453125, "learning_rate": 1.98869153881723e-05, "loss": 0.6578, "step": 854 }, { "epoch": 0.10798010892730286, "grad_norm": 2.015625, "learning_rate": 1.9886615798586323e-05, "loss": 0.6473, "step": 855 }, { "epoch": 0.10810640145236404, "grad_norm": 2.03125, "learning_rate": 1.988631581494365e-05, "loss": 0.7702, "step": 856 }, { "epoch": 0.10823269397742522, "grad_norm": 2.046875, "learning_rate": 1.988601543725624e-05, "loss": 0.7171, "step": 857 }, { "epoch": 0.10835898650248639, "grad_norm": 2.78125, "learning_rate": 1.988571466553607e-05, "loss": 0.6992, "step": 858 }, { "epoch": 0.10848527902754755, "grad_norm": 2.078125, "learning_rate": 1.9885413499795124e-05, "loss": 0.6902, "step": 859 }, { "epoch": 0.10861157155260873, "grad_norm": 2.125, "learning_rate": 1.9885111940045405e-05, "loss": 0.6969, "step": 860 }, { "epoch": 0.1087378640776699, "grad_norm": 2.078125, "learning_rate": 1.9884809986298934e-05, "loss": 0.7804, "step": 861 }, { "epoch": 0.10886415660273108, "grad_norm": 2.046875, "learning_rate": 1.9884507638567746e-05, "loss": 0.7252, "step": 862 }, { "epoch": 0.10899044912779225, "grad_norm": 2.1875, "learning_rate": 1.988420489686389e-05, "loss": 0.7175, "step": 863 }, { "epoch": 0.10911674165285343, "grad_norm": 2.109375, "learning_rate": 1.988390176119944e-05, "loss": 0.7449, "step": 864 }, { "epoch": 0.10924303417791459, "grad_norm": 2.078125, "learning_rate": 1.988359823158647e-05, "loss": 0.7455, "step": 865 }, { "epoch": 0.10936932670297576, "grad_norm": 2.03125, "learning_rate": 1.9883294308037077e-05, "loss": 0.6593, "step": 866 }, { "epoch": 0.10949561922803694, "grad_norm": 1.9765625, "learning_rate": 1.9882989990563384e-05, "loss": 0.6701, "step": 867 }, { "epoch": 0.10962191175309811, "grad_norm": 2.140625, "learning_rate": 1.988268527917751e-05, "loss": 0.7196, "step": 868 }, { "epoch": 0.10974820427815929, "grad_norm": 1.84375, "learning_rate": 1.988238017389161e-05, "loss": 0.6026, "step": 869 }, { "epoch": 0.10987449680322046, "grad_norm": 2.234375, "learning_rate": 1.9882074674717836e-05, "loss": 0.8043, "step": 870 }, { "epoch": 0.11000078932828163, "grad_norm": 2.0, "learning_rate": 1.9881768781668366e-05, "loss": 0.6671, "step": 871 }, { "epoch": 0.1101270818533428, "grad_norm": 1.96875, "learning_rate": 1.9881462494755396e-05, "loss": 0.7173, "step": 872 }, { "epoch": 0.11025337437840398, "grad_norm": 2.0625, "learning_rate": 1.9881155813991133e-05, "loss": 0.6912, "step": 873 }, { "epoch": 0.11037966690346515, "grad_norm": 1.96875, "learning_rate": 1.9880848739387798e-05, "loss": 0.6902, "step": 874 }, { "epoch": 0.11050595942852633, "grad_norm": 1.8671875, "learning_rate": 1.9880541270957633e-05, "loss": 0.6363, "step": 875 }, { "epoch": 0.1106322519535875, "grad_norm": 1.9140625, "learning_rate": 1.9880233408712892e-05, "loss": 0.7375, "step": 876 }, { "epoch": 0.11075854447864868, "grad_norm": 2.515625, "learning_rate": 1.9879925152665845e-05, "loss": 0.7131, "step": 877 }, { "epoch": 0.11088483700370984, "grad_norm": 1.953125, "learning_rate": 1.987961650282878e-05, "loss": 0.7198, "step": 878 }, { "epoch": 0.11101112952877101, "grad_norm": 1.984375, "learning_rate": 1.9879307459213993e-05, "loss": 0.6695, "step": 879 }, { "epoch": 0.11113742205383219, "grad_norm": 6.15625, "learning_rate": 1.9878998021833807e-05, "loss": 1.0791, "step": 880 }, { "epoch": 0.11126371457889336, "grad_norm": 2.015625, "learning_rate": 1.987868819070056e-05, "loss": 0.6833, "step": 881 }, { "epoch": 0.11139000710395454, "grad_norm": 2.015625, "learning_rate": 1.9878377965826594e-05, "loss": 0.7125, "step": 882 }, { "epoch": 0.11151629962901571, "grad_norm": 2.28125, "learning_rate": 1.9878067347224275e-05, "loss": 0.8048, "step": 883 }, { "epoch": 0.11164259215407688, "grad_norm": 1.96875, "learning_rate": 1.9877756334905983e-05, "loss": 0.6873, "step": 884 }, { "epoch": 0.11176888467913805, "grad_norm": 2.1875, "learning_rate": 1.9877444928884117e-05, "loss": 0.7378, "step": 885 }, { "epoch": 0.11189517720419923, "grad_norm": 2.109375, "learning_rate": 1.9877133129171088e-05, "loss": 0.6469, "step": 886 }, { "epoch": 0.1120214697292604, "grad_norm": 1.984375, "learning_rate": 1.9876820935779324e-05, "loss": 0.7576, "step": 887 }, { "epoch": 0.11214776225432158, "grad_norm": 2.046875, "learning_rate": 1.9876508348721266e-05, "loss": 0.7449, "step": 888 }, { "epoch": 0.11227405477938275, "grad_norm": 1.921875, "learning_rate": 1.9876195368009375e-05, "loss": 0.685, "step": 889 }, { "epoch": 0.11240034730444391, "grad_norm": 2.0, "learning_rate": 1.9875881993656124e-05, "loss": 0.7376, "step": 890 }, { "epoch": 0.11252663982950509, "grad_norm": 2.140625, "learning_rate": 1.9875568225674e-05, "loss": 0.7027, "step": 891 }, { "epoch": 0.11265293235456626, "grad_norm": 1.875, "learning_rate": 1.9875254064075522e-05, "loss": 0.6445, "step": 892 }, { "epoch": 0.11277922487962744, "grad_norm": 2.078125, "learning_rate": 1.98749395088732e-05, "loss": 0.6448, "step": 893 }, { "epoch": 0.11290551740468861, "grad_norm": 1.8828125, "learning_rate": 1.987462456007957e-05, "loss": 0.6059, "step": 894 }, { "epoch": 0.11303180992974979, "grad_norm": 2.109375, "learning_rate": 1.9874309217707195e-05, "loss": 0.696, "step": 895 }, { "epoch": 0.11315810245481095, "grad_norm": 2.0625, "learning_rate": 1.9873993481768638e-05, "loss": 0.7343, "step": 896 }, { "epoch": 0.11328439497987212, "grad_norm": 1.859375, "learning_rate": 1.9873677352276483e-05, "loss": 0.659, "step": 897 }, { "epoch": 0.1134106875049333, "grad_norm": 1.90625, "learning_rate": 1.9873360829243327e-05, "loss": 0.6735, "step": 898 }, { "epoch": 0.11353698002999447, "grad_norm": 1.921875, "learning_rate": 1.9873043912681794e-05, "loss": 0.6699, "step": 899 }, { "epoch": 0.11366327255505565, "grad_norm": 2.03125, "learning_rate": 1.987272660260451e-05, "loss": 0.6572, "step": 900 }, { "epoch": 0.11378956508011683, "grad_norm": 2.078125, "learning_rate": 1.9872408899024123e-05, "loss": 0.7455, "step": 901 }, { "epoch": 0.113915857605178, "grad_norm": 1.9375, "learning_rate": 1.9872090801953296e-05, "loss": 0.6522, "step": 902 }, { "epoch": 0.11404215013023916, "grad_norm": 2.0625, "learning_rate": 1.9871772311404712e-05, "loss": 0.7468, "step": 903 }, { "epoch": 0.11416844265530034, "grad_norm": 2.15625, "learning_rate": 1.9871453427391056e-05, "loss": 0.7735, "step": 904 }, { "epoch": 0.11429473518036151, "grad_norm": 2.15625, "learning_rate": 1.987113414992505e-05, "loss": 0.725, "step": 905 }, { "epoch": 0.11442102770542269, "grad_norm": 2.109375, "learning_rate": 1.9870814479019403e-05, "loss": 0.8012, "step": 906 }, { "epoch": 0.11454732023048386, "grad_norm": 2.03125, "learning_rate": 1.9870494414686873e-05, "loss": 0.7079, "step": 907 }, { "epoch": 0.11467361275554504, "grad_norm": 1.921875, "learning_rate": 1.987017395694021e-05, "loss": 0.707, "step": 908 }, { "epoch": 0.1147999052806062, "grad_norm": 2.171875, "learning_rate": 1.9869853105792188e-05, "loss": 0.7529, "step": 909 }, { "epoch": 0.11492619780566737, "grad_norm": 2.078125, "learning_rate": 1.9869531861255594e-05, "loss": 0.7041, "step": 910 }, { "epoch": 0.11505249033072855, "grad_norm": 2.171875, "learning_rate": 1.986921022334323e-05, "loss": 0.7506, "step": 911 }, { "epoch": 0.11517878285578972, "grad_norm": 2.015625, "learning_rate": 1.9868888192067915e-05, "loss": 0.7101, "step": 912 }, { "epoch": 0.1153050753808509, "grad_norm": 1.96875, "learning_rate": 1.9868565767442495e-05, "loss": 0.6114, "step": 913 }, { "epoch": 0.11543136790591207, "grad_norm": 2.0, "learning_rate": 1.986824294947981e-05, "loss": 0.7597, "step": 914 }, { "epoch": 0.11555766043097324, "grad_norm": 2.0, "learning_rate": 1.9867919738192726e-05, "loss": 0.7125, "step": 915 }, { "epoch": 0.11568395295603441, "grad_norm": 2.046875, "learning_rate": 1.9867596133594133e-05, "loss": 0.7556, "step": 916 }, { "epoch": 0.11581024548109559, "grad_norm": 1.9140625, "learning_rate": 1.9867272135696927e-05, "loss": 0.6842, "step": 917 }, { "epoch": 0.11593653800615676, "grad_norm": 1.875, "learning_rate": 1.986694774451402e-05, "loss": 0.5913, "step": 918 }, { "epoch": 0.11606283053121794, "grad_norm": 2.140625, "learning_rate": 1.986662296005834e-05, "loss": 0.7903, "step": 919 }, { "epoch": 0.11618912305627911, "grad_norm": 1.8984375, "learning_rate": 1.9866297782342837e-05, "loss": 0.6521, "step": 920 }, { "epoch": 0.11631541558134027, "grad_norm": 2.046875, "learning_rate": 1.9865972211380466e-05, "loss": 0.7236, "step": 921 }, { "epoch": 0.11644170810640145, "grad_norm": 1.9609375, "learning_rate": 1.9865646247184208e-05, "loss": 0.6951, "step": 922 }, { "epoch": 0.11656800063146262, "grad_norm": 2.03125, "learning_rate": 1.9865319889767053e-05, "loss": 0.7575, "step": 923 }, { "epoch": 0.1166942931565238, "grad_norm": 2.03125, "learning_rate": 1.986499313914201e-05, "loss": 0.7069, "step": 924 }, { "epoch": 0.11682058568158497, "grad_norm": 2.109375, "learning_rate": 1.98646659953221e-05, "loss": 0.7183, "step": 925 }, { "epoch": 0.11694687820664615, "grad_norm": 2.546875, "learning_rate": 1.9864338458320366e-05, "loss": 0.7752, "step": 926 }, { "epoch": 0.11707317073170732, "grad_norm": 2.046875, "learning_rate": 1.986401052814986e-05, "loss": 0.7261, "step": 927 }, { "epoch": 0.11719946325676849, "grad_norm": 2.140625, "learning_rate": 1.9863682204823654e-05, "loss": 0.6994, "step": 928 }, { "epoch": 0.11732575578182966, "grad_norm": 1.8515625, "learning_rate": 1.986335348835483e-05, "loss": 0.674, "step": 929 }, { "epoch": 0.11745204830689084, "grad_norm": 2.46875, "learning_rate": 1.98630243787565e-05, "loss": 0.7322, "step": 930 }, { "epoch": 0.11757834083195201, "grad_norm": 2.171875, "learning_rate": 1.9862694876041768e-05, "loss": 0.6391, "step": 931 }, { "epoch": 0.11770463335701319, "grad_norm": 2.015625, "learning_rate": 1.986236498022378e-05, "loss": 0.6225, "step": 932 }, { "epoch": 0.11783092588207436, "grad_norm": 2.140625, "learning_rate": 1.986203469131567e-05, "loss": 0.7441, "step": 933 }, { "epoch": 0.11795721840713552, "grad_norm": 2.25, "learning_rate": 1.986170400933062e-05, "loss": 0.7425, "step": 934 }, { "epoch": 0.1180835109321967, "grad_norm": 1.96875, "learning_rate": 1.98613729342818e-05, "loss": 0.705, "step": 935 }, { "epoch": 0.11820980345725787, "grad_norm": 2.125, "learning_rate": 1.9861041466182408e-05, "loss": 0.6751, "step": 936 }, { "epoch": 0.11833609598231905, "grad_norm": 2.0625, "learning_rate": 1.9860709605045652e-05, "loss": 0.742, "step": 937 }, { "epoch": 0.11846238850738022, "grad_norm": 2.09375, "learning_rate": 1.9860377350884765e-05, "loss": 0.7308, "step": 938 }, { "epoch": 0.1185886810324414, "grad_norm": 2.109375, "learning_rate": 1.9860044703712984e-05, "loss": 0.7414, "step": 939 }, { "epoch": 0.11871497355750256, "grad_norm": 2.375, "learning_rate": 1.9859711663543573e-05, "loss": 0.7779, "step": 940 }, { "epoch": 0.11884126608256373, "grad_norm": 1.9609375, "learning_rate": 1.9859378230389802e-05, "loss": 0.6649, "step": 941 }, { "epoch": 0.11896755860762491, "grad_norm": 2.25, "learning_rate": 1.985904440426496e-05, "loss": 0.7293, "step": 942 }, { "epoch": 0.11909385113268608, "grad_norm": 2.125, "learning_rate": 1.985871018518236e-05, "loss": 0.7046, "step": 943 }, { "epoch": 0.11922014365774726, "grad_norm": 1.875, "learning_rate": 1.9858375573155318e-05, "loss": 0.6557, "step": 944 }, { "epoch": 0.11934643618280844, "grad_norm": 1.90625, "learning_rate": 1.9858040568197166e-05, "loss": 0.6376, "step": 945 }, { "epoch": 0.1194727287078696, "grad_norm": 1.9453125, "learning_rate": 1.9857705170321268e-05, "loss": 0.6268, "step": 946 }, { "epoch": 0.11959902123293077, "grad_norm": 2.0, "learning_rate": 1.9857369379540982e-05, "loss": 0.6938, "step": 947 }, { "epoch": 0.11972531375799195, "grad_norm": 1.9140625, "learning_rate": 1.9857033195869694e-05, "loss": 0.6253, "step": 948 }, { "epoch": 0.11985160628305312, "grad_norm": 1.9921875, "learning_rate": 1.985669661932081e-05, "loss": 0.6799, "step": 949 }, { "epoch": 0.1199778988081143, "grad_norm": 1.8671875, "learning_rate": 1.9856359649907736e-05, "loss": 0.7098, "step": 950 }, { "epoch": 0.12010419133317547, "grad_norm": 2.0, "learning_rate": 1.985602228764391e-05, "loss": 0.749, "step": 951 }, { "epoch": 0.12023048385823665, "grad_norm": 1.84375, "learning_rate": 1.985568453254277e-05, "loss": 0.5856, "step": 952 }, { "epoch": 0.12035677638329781, "grad_norm": 2.1875, "learning_rate": 1.985534638461779e-05, "loss": 0.7542, "step": 953 }, { "epoch": 0.12048306890835898, "grad_norm": 2.046875, "learning_rate": 1.9855007843882437e-05, "loss": 0.7006, "step": 954 }, { "epoch": 0.12060936143342016, "grad_norm": 1.9921875, "learning_rate": 1.985466891035021e-05, "loss": 0.6093, "step": 955 }, { "epoch": 0.12073565395848133, "grad_norm": 2.046875, "learning_rate": 1.9854329584034614e-05, "loss": 0.7075, "step": 956 }, { "epoch": 0.12086194648354251, "grad_norm": 2.015625, "learning_rate": 1.985398986494918e-05, "loss": 0.7435, "step": 957 }, { "epoch": 0.12098823900860368, "grad_norm": 2.21875, "learning_rate": 1.9853649753107446e-05, "loss": 0.7341, "step": 958 }, { "epoch": 0.12111453153366485, "grad_norm": 2.140625, "learning_rate": 1.9853309248522963e-05, "loss": 0.8189, "step": 959 }, { "epoch": 0.12124082405872602, "grad_norm": 2.96875, "learning_rate": 1.9852968351209306e-05, "loss": 0.8741, "step": 960 }, { "epoch": 0.1213671165837872, "grad_norm": 2.3125, "learning_rate": 1.9852627061180066e-05, "loss": 0.7681, "step": 961 }, { "epoch": 0.12149340910884837, "grad_norm": 2.03125, "learning_rate": 1.985228537844884e-05, "loss": 0.7089, "step": 962 }, { "epoch": 0.12161970163390955, "grad_norm": 2.109375, "learning_rate": 1.9851943303029255e-05, "loss": 0.713, "step": 963 }, { "epoch": 0.12174599415897072, "grad_norm": 2.046875, "learning_rate": 1.9851600834934936e-05, "loss": 0.71, "step": 964 }, { "epoch": 0.12187228668403188, "grad_norm": 2.234375, "learning_rate": 1.9851257974179535e-05, "loss": 0.8148, "step": 965 }, { "epoch": 0.12199857920909306, "grad_norm": 1.8671875, "learning_rate": 1.9850914720776725e-05, "loss": 0.6793, "step": 966 }, { "epoch": 0.12212487173415423, "grad_norm": 2.171875, "learning_rate": 1.9850571074740175e-05, "loss": 0.7873, "step": 967 }, { "epoch": 0.12225116425921541, "grad_norm": 1.984375, "learning_rate": 1.9850227036083592e-05, "loss": 0.7454, "step": 968 }, { "epoch": 0.12237745678427658, "grad_norm": 2.046875, "learning_rate": 1.9849882604820686e-05, "loss": 0.6829, "step": 969 }, { "epoch": 0.12250374930933776, "grad_norm": 2.109375, "learning_rate": 1.984953778096518e-05, "loss": 0.6985, "step": 970 }, { "epoch": 0.12263004183439892, "grad_norm": 3.828125, "learning_rate": 1.9849192564530825e-05, "loss": 0.805, "step": 971 }, { "epoch": 0.1227563343594601, "grad_norm": 2.3125, "learning_rate": 1.984884695553138e-05, "loss": 0.7206, "step": 972 }, { "epoch": 0.12288262688452127, "grad_norm": 2.296875, "learning_rate": 1.9848500953980613e-05, "loss": 0.7447, "step": 973 }, { "epoch": 0.12300891940958245, "grad_norm": 1.9765625, "learning_rate": 1.9848154559892324e-05, "loss": 0.6822, "step": 974 }, { "epoch": 0.12313521193464362, "grad_norm": 2.140625, "learning_rate": 1.984780777328031e-05, "loss": 0.7859, "step": 975 }, { "epoch": 0.1232615044597048, "grad_norm": 2.109375, "learning_rate": 1.9847460594158403e-05, "loss": 0.6944, "step": 976 }, { "epoch": 0.12338779698476596, "grad_norm": 2.046875, "learning_rate": 1.9847113022540435e-05, "loss": 0.6679, "step": 977 }, { "epoch": 0.12351408950982713, "grad_norm": 2.125, "learning_rate": 1.9846765058440258e-05, "loss": 0.6688, "step": 978 }, { "epoch": 0.12364038203488831, "grad_norm": 1.9140625, "learning_rate": 1.9846416701871748e-05, "loss": 0.6828, "step": 979 }, { "epoch": 0.12376667455994948, "grad_norm": 1.9375, "learning_rate": 1.984606795284878e-05, "loss": 0.7157, "step": 980 }, { "epoch": 0.12389296708501066, "grad_norm": 2.03125, "learning_rate": 1.984571881138526e-05, "loss": 0.6749, "step": 981 }, { "epoch": 0.12401925961007183, "grad_norm": 2.1875, "learning_rate": 1.9845369277495102e-05, "loss": 0.7282, "step": 982 }, { "epoch": 0.12414555213513301, "grad_norm": 2.125, "learning_rate": 1.984501935119224e-05, "loss": 0.7703, "step": 983 }, { "epoch": 0.12427184466019417, "grad_norm": 2.140625, "learning_rate": 1.984466903249062e-05, "loss": 0.664, "step": 984 }, { "epoch": 0.12439813718525534, "grad_norm": 2.0625, "learning_rate": 1.9844318321404207e-05, "loss": 0.7266, "step": 985 }, { "epoch": 0.12452442971031652, "grad_norm": 1.96875, "learning_rate": 1.9843967217946973e-05, "loss": 0.6954, "step": 986 }, { "epoch": 0.1246507222353777, "grad_norm": 1.96875, "learning_rate": 1.984361572213292e-05, "loss": 0.7336, "step": 987 }, { "epoch": 0.12477701476043887, "grad_norm": 2.0625, "learning_rate": 1.9843263833976052e-05, "loss": 0.6883, "step": 988 }, { "epoch": 0.12490330728550005, "grad_norm": 1.9375, "learning_rate": 1.9842911553490396e-05, "loss": 0.7903, "step": 989 }, { "epoch": 0.12502959981056122, "grad_norm": 2.265625, "learning_rate": 1.9842558880689995e-05, "loss": 0.7429, "step": 990 }, { "epoch": 0.12515589233562238, "grad_norm": 1.90625, "learning_rate": 1.98422058155889e-05, "loss": 0.66, "step": 991 }, { "epoch": 0.12528218486068357, "grad_norm": 1.9453125, "learning_rate": 1.9841852358201193e-05, "loss": 0.6579, "step": 992 }, { "epoch": 0.12540847738574473, "grad_norm": 1.921875, "learning_rate": 1.9841498508540957e-05, "loss": 0.6765, "step": 993 }, { "epoch": 0.1255347699108059, "grad_norm": 4.125, "learning_rate": 1.984114426662229e-05, "loss": 0.7504, "step": 994 }, { "epoch": 0.12566106243586708, "grad_norm": 1.9609375, "learning_rate": 1.9840789632459323e-05, "loss": 0.6313, "step": 995 }, { "epoch": 0.12578735496092824, "grad_norm": 2.078125, "learning_rate": 1.9840434606066182e-05, "loss": 0.7418, "step": 996 }, { "epoch": 0.12591364748598943, "grad_norm": 2.21875, "learning_rate": 1.984007918745702e-05, "loss": 0.8049, "step": 997 }, { "epoch": 0.1260399400110506, "grad_norm": 2.03125, "learning_rate": 1.9839723376646e-05, "loss": 0.7275, "step": 998 }, { "epoch": 0.12616623253611176, "grad_norm": 1.9609375, "learning_rate": 1.983936717364731e-05, "loss": 0.6694, "step": 999 }, { "epoch": 0.12629252506117294, "grad_norm": 2.09375, "learning_rate": 1.983901057847514e-05, "loss": 0.8236, "step": 1000 }, { "epoch": 0.1264188175862341, "grad_norm": 1.9453125, "learning_rate": 1.983865359114371e-05, "loss": 0.6468, "step": 1001 }, { "epoch": 0.1265451101112953, "grad_norm": 1.9296875, "learning_rate": 1.9838296211667244e-05, "loss": 0.7125, "step": 1002 }, { "epoch": 0.12667140263635646, "grad_norm": 2.03125, "learning_rate": 1.983793844005999e-05, "loss": 0.7395, "step": 1003 }, { "epoch": 0.12679769516141765, "grad_norm": 1.96875, "learning_rate": 1.9837580276336205e-05, "loss": 0.6801, "step": 1004 }, { "epoch": 0.1269239876864788, "grad_norm": 2.109375, "learning_rate": 1.9837221720510167e-05, "loss": 0.6374, "step": 1005 }, { "epoch": 0.12705028021153997, "grad_norm": 2.265625, "learning_rate": 1.9836862772596165e-05, "loss": 0.7059, "step": 1006 }, { "epoch": 0.12717657273660116, "grad_norm": 1.859375, "learning_rate": 1.9836503432608505e-05, "loss": 0.6331, "step": 1007 }, { "epoch": 0.12730286526166232, "grad_norm": 2.296875, "learning_rate": 1.9836143700561512e-05, "loss": 0.8428, "step": 1008 }, { "epoch": 0.1274291577867235, "grad_norm": 2.0625, "learning_rate": 1.9835783576469524e-05, "loss": 0.6796, "step": 1009 }, { "epoch": 0.12755545031178467, "grad_norm": 2.140625, "learning_rate": 1.983542306034689e-05, "loss": 0.7279, "step": 1010 }, { "epoch": 0.12768174283684586, "grad_norm": 1.9296875, "learning_rate": 1.9835062152207984e-05, "loss": 0.6807, "step": 1011 }, { "epoch": 0.12780803536190702, "grad_norm": 3.453125, "learning_rate": 1.983470085206719e-05, "loss": 0.9074, "step": 1012 }, { "epoch": 0.12793432788696818, "grad_norm": 1.96875, "learning_rate": 1.9834339159938913e-05, "loss": 0.6577, "step": 1013 }, { "epoch": 0.12806062041202937, "grad_norm": 1.90625, "learning_rate": 1.983397707583756e-05, "loss": 0.6493, "step": 1014 }, { "epoch": 0.12818691293709053, "grad_norm": 1.9609375, "learning_rate": 1.983361459977757e-05, "loss": 0.7011, "step": 1015 }, { "epoch": 0.12831320546215172, "grad_norm": 2.109375, "learning_rate": 1.9833251731773384e-05, "loss": 0.6943, "step": 1016 }, { "epoch": 0.12843949798721288, "grad_norm": 2.1875, "learning_rate": 1.983288847183947e-05, "loss": 0.7181, "step": 1017 }, { "epoch": 0.12856579051227404, "grad_norm": 2.109375, "learning_rate": 1.983252481999031e-05, "loss": 0.7531, "step": 1018 }, { "epoch": 0.12869208303733523, "grad_norm": 1.921875, "learning_rate": 1.983216077624039e-05, "loss": 0.6706, "step": 1019 }, { "epoch": 0.1288183755623964, "grad_norm": 2.125, "learning_rate": 1.9831796340604224e-05, "loss": 0.8069, "step": 1020 }, { "epoch": 0.12894466808745758, "grad_norm": 2.125, "learning_rate": 1.9831431513096336e-05, "loss": 0.7785, "step": 1021 }, { "epoch": 0.12907096061251874, "grad_norm": 1.90625, "learning_rate": 1.983106629373127e-05, "loss": 0.6849, "step": 1022 }, { "epoch": 0.12919725313757993, "grad_norm": 1.9609375, "learning_rate": 1.983070068252358e-05, "loss": 0.6797, "step": 1023 }, { "epoch": 0.1293235456626411, "grad_norm": 1.8125, "learning_rate": 1.983033467948784e-05, "loss": 0.6294, "step": 1024 }, { "epoch": 0.12944983818770225, "grad_norm": 1.9609375, "learning_rate": 1.982996828463864e-05, "loss": 0.7381, "step": 1025 }, { "epoch": 0.12957613071276344, "grad_norm": 1.9296875, "learning_rate": 1.9829601497990575e-05, "loss": 0.6417, "step": 1026 }, { "epoch": 0.1297024232378246, "grad_norm": 1.9609375, "learning_rate": 1.9829234319558276e-05, "loss": 0.7311, "step": 1027 }, { "epoch": 0.1298287157628858, "grad_norm": 2.125, "learning_rate": 1.982886674935637e-05, "loss": 0.7089, "step": 1028 }, { "epoch": 0.12995500828794695, "grad_norm": 1.984375, "learning_rate": 1.982849878739951e-05, "loss": 0.7318, "step": 1029 }, { "epoch": 0.13008130081300814, "grad_norm": 1.9921875, "learning_rate": 1.9828130433702363e-05, "loss": 0.6691, "step": 1030 }, { "epoch": 0.1302075933380693, "grad_norm": 1.9765625, "learning_rate": 1.982776168827961e-05, "loss": 0.6985, "step": 1031 }, { "epoch": 0.13033388586313047, "grad_norm": 2.109375, "learning_rate": 1.9827392551145945e-05, "loss": 0.6894, "step": 1032 }, { "epoch": 0.13046017838819166, "grad_norm": 2.109375, "learning_rate": 1.9827023022316084e-05, "loss": 0.7271, "step": 1033 }, { "epoch": 0.13058647091325282, "grad_norm": 2.0625, "learning_rate": 1.9826653101804757e-05, "loss": 0.6402, "step": 1034 }, { "epoch": 0.130712763438314, "grad_norm": 1.9921875, "learning_rate": 1.9826282789626706e-05, "loss": 0.7238, "step": 1035 }, { "epoch": 0.13083905596337517, "grad_norm": 2.15625, "learning_rate": 1.982591208579669e-05, "loss": 0.709, "step": 1036 }, { "epoch": 0.13096534848843633, "grad_norm": 1.9921875, "learning_rate": 1.9825540990329486e-05, "loss": 0.7277, "step": 1037 }, { "epoch": 0.13109164101349752, "grad_norm": 2.046875, "learning_rate": 1.9825169503239885e-05, "loss": 0.6353, "step": 1038 }, { "epoch": 0.13121793353855868, "grad_norm": 2.0625, "learning_rate": 1.982479762454269e-05, "loss": 0.7581, "step": 1039 }, { "epoch": 0.13134422606361987, "grad_norm": 1.984375, "learning_rate": 1.982442535425273e-05, "loss": 0.7404, "step": 1040 }, { "epoch": 0.13147051858868103, "grad_norm": 1.9921875, "learning_rate": 1.9824052692384837e-05, "loss": 0.7824, "step": 1041 }, { "epoch": 0.13159681111374222, "grad_norm": 1.921875, "learning_rate": 1.9823679638953866e-05, "loss": 0.7028, "step": 1042 }, { "epoch": 0.13172310363880338, "grad_norm": 1.890625, "learning_rate": 1.982330619397469e-05, "loss": 0.7334, "step": 1043 }, { "epoch": 0.13184939616386454, "grad_norm": 2.03125, "learning_rate": 1.9822932357462185e-05, "loss": 0.7182, "step": 1044 }, { "epoch": 0.13197568868892573, "grad_norm": 1.9375, "learning_rate": 1.982255812943126e-05, "loss": 0.7047, "step": 1045 }, { "epoch": 0.1321019812139869, "grad_norm": 2.203125, "learning_rate": 1.9822183509896824e-05, "loss": 0.6726, "step": 1046 }, { "epoch": 0.13222827373904808, "grad_norm": 2.21875, "learning_rate": 1.9821808498873816e-05, "loss": 0.7979, "step": 1047 }, { "epoch": 0.13235456626410924, "grad_norm": 2.0625, "learning_rate": 1.9821433096377174e-05, "loss": 0.7941, "step": 1048 }, { "epoch": 0.1324808587891704, "grad_norm": 2.09375, "learning_rate": 1.9821057302421867e-05, "loss": 0.7063, "step": 1049 }, { "epoch": 0.1326071513142316, "grad_norm": 1.90625, "learning_rate": 1.982068111702287e-05, "loss": 0.686, "step": 1050 }, { "epoch": 0.13273344383929275, "grad_norm": 1.9140625, "learning_rate": 1.9820304540195182e-05, "loss": 0.6614, "step": 1051 }, { "epoch": 0.13285973636435394, "grad_norm": 1.90625, "learning_rate": 1.9819927571953804e-05, "loss": 0.6622, "step": 1052 }, { "epoch": 0.1329860288894151, "grad_norm": 2.046875, "learning_rate": 1.981955021231377e-05, "loss": 0.8471, "step": 1053 }, { "epoch": 0.1331123214144763, "grad_norm": 1.859375, "learning_rate": 1.9819172461290113e-05, "loss": 0.7653, "step": 1054 }, { "epoch": 0.13323861393953745, "grad_norm": 2.125, "learning_rate": 1.9818794318897897e-05, "loss": 0.7306, "step": 1055 }, { "epoch": 0.13336490646459861, "grad_norm": 2.203125, "learning_rate": 1.9818415785152184e-05, "loss": 0.8027, "step": 1056 }, { "epoch": 0.1334911989896598, "grad_norm": 2.0, "learning_rate": 1.981803686006807e-05, "loss": 0.7777, "step": 1057 }, { "epoch": 0.13361749151472097, "grad_norm": 1.9296875, "learning_rate": 1.9817657543660654e-05, "loss": 0.7151, "step": 1058 }, { "epoch": 0.13374378403978215, "grad_norm": 1.8828125, "learning_rate": 1.9817277835945057e-05, "loss": 0.6325, "step": 1059 }, { "epoch": 0.13387007656484332, "grad_norm": 5.59375, "learning_rate": 1.9816897736936413e-05, "loss": 0.8847, "step": 1060 }, { "epoch": 0.1339963690899045, "grad_norm": 1.921875, "learning_rate": 1.9816517246649867e-05, "loss": 0.7208, "step": 1061 }, { "epoch": 0.13412266161496567, "grad_norm": 2.046875, "learning_rate": 1.981613636510059e-05, "loss": 0.7457, "step": 1062 }, { "epoch": 0.13424895414002683, "grad_norm": 1.921875, "learning_rate": 1.981575509230376e-05, "loss": 0.6171, "step": 1063 }, { "epoch": 0.13437524666508802, "grad_norm": 1.875, "learning_rate": 1.9815373428274576e-05, "loss": 0.5879, "step": 1064 }, { "epoch": 0.13450153919014918, "grad_norm": 2.0625, "learning_rate": 1.981499137302825e-05, "loss": 0.7052, "step": 1065 }, { "epoch": 0.13462783171521037, "grad_norm": 1.9453125, "learning_rate": 1.9814608926580007e-05, "loss": 0.6376, "step": 1066 }, { "epoch": 0.13475412424027153, "grad_norm": 2.015625, "learning_rate": 1.9814226088945092e-05, "loss": 0.7154, "step": 1067 }, { "epoch": 0.1348804167653327, "grad_norm": 1.9765625, "learning_rate": 1.9813842860138764e-05, "loss": 0.6755, "step": 1068 }, { "epoch": 0.13500670929039388, "grad_norm": 2.203125, "learning_rate": 1.9813459240176298e-05, "loss": 0.7664, "step": 1069 }, { "epoch": 0.13513300181545504, "grad_norm": 2.171875, "learning_rate": 1.9813075229072986e-05, "loss": 0.7397, "step": 1070 }, { "epoch": 0.13525929434051623, "grad_norm": 2.0625, "learning_rate": 1.981269082684413e-05, "loss": 0.7431, "step": 1071 }, { "epoch": 0.1353855868655774, "grad_norm": 2.015625, "learning_rate": 1.9812306033505052e-05, "loss": 0.7415, "step": 1072 }, { "epoch": 0.13551187939063858, "grad_norm": 1.9296875, "learning_rate": 1.9811920849071092e-05, "loss": 0.7149, "step": 1073 }, { "epoch": 0.13563817191569974, "grad_norm": 2.15625, "learning_rate": 1.9811535273557602e-05, "loss": 0.7878, "step": 1074 }, { "epoch": 0.1357644644407609, "grad_norm": 1.953125, "learning_rate": 1.981114930697994e-05, "loss": 0.7627, "step": 1075 }, { "epoch": 0.1358907569658221, "grad_norm": 1.9453125, "learning_rate": 1.9810762949353507e-05, "loss": 0.691, "step": 1076 }, { "epoch": 0.13601704949088325, "grad_norm": 1.9375, "learning_rate": 1.981037620069369e-05, "loss": 0.6695, "step": 1077 }, { "epoch": 0.13614334201594444, "grad_norm": 1.8828125, "learning_rate": 1.9809989061015908e-05, "loss": 0.7105, "step": 1078 }, { "epoch": 0.1362696345410056, "grad_norm": 2.84375, "learning_rate": 1.9809601530335592e-05, "loss": 0.76, "step": 1079 }, { "epoch": 0.13639592706606676, "grad_norm": 1.828125, "learning_rate": 1.9809213608668188e-05, "loss": 0.5984, "step": 1080 }, { "epoch": 0.13652221959112795, "grad_norm": 1.953125, "learning_rate": 1.9808825296029153e-05, "loss": 0.586, "step": 1081 }, { "epoch": 0.1366485121161891, "grad_norm": 1.96875, "learning_rate": 1.980843659243397e-05, "loss": 0.7059, "step": 1082 }, { "epoch": 0.1367748046412503, "grad_norm": 1.9921875, "learning_rate": 1.9808047497898126e-05, "loss": 0.6645, "step": 1083 }, { "epoch": 0.13690109716631146, "grad_norm": 2.0, "learning_rate": 1.9807658012437136e-05, "loss": 0.6755, "step": 1084 }, { "epoch": 0.13702738969137265, "grad_norm": 2.046875, "learning_rate": 1.980726813606652e-05, "loss": 0.7246, "step": 1085 }, { "epoch": 0.13715368221643381, "grad_norm": 2.015625, "learning_rate": 1.980687786880182e-05, "loss": 0.6654, "step": 1086 }, { "epoch": 0.13727997474149498, "grad_norm": 2.09375, "learning_rate": 1.980648721065859e-05, "loss": 0.6668, "step": 1087 }, { "epoch": 0.13740626726655616, "grad_norm": 1.8046875, "learning_rate": 1.9806096161652398e-05, "loss": 0.6487, "step": 1088 }, { "epoch": 0.13753255979161733, "grad_norm": 1.9296875, "learning_rate": 1.980570472179883e-05, "loss": 0.6467, "step": 1089 }, { "epoch": 0.13765885231667851, "grad_norm": 1.921875, "learning_rate": 1.9805312891113495e-05, "loss": 0.6796, "step": 1090 }, { "epoch": 0.13778514484173968, "grad_norm": 2.265625, "learning_rate": 1.9804920669612005e-05, "loss": 0.6955, "step": 1091 }, { "epoch": 0.13791143736680087, "grad_norm": 2.140625, "learning_rate": 1.980452805730999e-05, "loss": 0.8692, "step": 1092 }, { "epoch": 0.13803772989186203, "grad_norm": 2.046875, "learning_rate": 1.9804135054223107e-05, "loss": 0.6926, "step": 1093 }, { "epoch": 0.1381640224169232, "grad_norm": 1.765625, "learning_rate": 1.980374166036701e-05, "loss": 0.6379, "step": 1094 }, { "epoch": 0.13829031494198438, "grad_norm": 1.8671875, "learning_rate": 1.980334787575739e-05, "loss": 0.7289, "step": 1095 }, { "epoch": 0.13841660746704554, "grad_norm": 2.046875, "learning_rate": 1.9802953700409936e-05, "loss": 0.7301, "step": 1096 }, { "epoch": 0.13854289999210673, "grad_norm": 1.96875, "learning_rate": 1.9802559134340357e-05, "loss": 0.6982, "step": 1097 }, { "epoch": 0.1386691925171679, "grad_norm": 1.859375, "learning_rate": 1.9802164177564384e-05, "loss": 0.7004, "step": 1098 }, { "epoch": 0.13879548504222905, "grad_norm": 1.9765625, "learning_rate": 1.9801768830097753e-05, "loss": 0.7412, "step": 1099 }, { "epoch": 0.13892177756729024, "grad_norm": 2.140625, "learning_rate": 1.980137309195623e-05, "loss": 0.8136, "step": 1100 }, { "epoch": 0.1390480700923514, "grad_norm": 2.1875, "learning_rate": 1.980097696315558e-05, "loss": 0.7113, "step": 1101 }, { "epoch": 0.1391743626174126, "grad_norm": 1.90625, "learning_rate": 1.98005804437116e-05, "loss": 0.6812, "step": 1102 }, { "epoch": 0.13930065514247375, "grad_norm": 2.015625, "learning_rate": 1.9800183533640085e-05, "loss": 0.6786, "step": 1103 }, { "epoch": 0.13942694766753494, "grad_norm": 4.53125, "learning_rate": 1.979978623295686e-05, "loss": 0.9208, "step": 1104 }, { "epoch": 0.1395532401925961, "grad_norm": 2.09375, "learning_rate": 1.9799388541677762e-05, "loss": 0.6154, "step": 1105 }, { "epoch": 0.13967953271765726, "grad_norm": 1.9140625, "learning_rate": 1.979899045981864e-05, "loss": 0.5347, "step": 1106 }, { "epoch": 0.13980582524271845, "grad_norm": 2.3125, "learning_rate": 1.979859198739536e-05, "loss": 0.7419, "step": 1107 }, { "epoch": 0.1399321177677796, "grad_norm": 2.1875, "learning_rate": 1.9798193124423804e-05, "loss": 0.7603, "step": 1108 }, { "epoch": 0.1400584102928408, "grad_norm": 1.84375, "learning_rate": 1.9797793870919872e-05, "loss": 0.6941, "step": 1109 }, { "epoch": 0.14018470281790196, "grad_norm": 1.9296875, "learning_rate": 1.9797394226899474e-05, "loss": 0.6964, "step": 1110 }, { "epoch": 0.14031099534296315, "grad_norm": 2.046875, "learning_rate": 1.979699419237854e-05, "loss": 0.7035, "step": 1111 }, { "epoch": 0.1404372878680243, "grad_norm": 3.9375, "learning_rate": 1.979659376737302e-05, "loss": 0.9527, "step": 1112 }, { "epoch": 0.14056358039308547, "grad_norm": 2.015625, "learning_rate": 1.9796192951898865e-05, "loss": 0.7166, "step": 1113 }, { "epoch": 0.14068987291814666, "grad_norm": 2.046875, "learning_rate": 1.9795791745972054e-05, "loss": 0.7668, "step": 1114 }, { "epoch": 0.14081616544320782, "grad_norm": 2.171875, "learning_rate": 1.979539014960858e-05, "loss": 0.7027, "step": 1115 }, { "epoch": 0.140942457968269, "grad_norm": 2.125, "learning_rate": 1.979498816282445e-05, "loss": 0.8007, "step": 1116 }, { "epoch": 0.14106875049333018, "grad_norm": 2.234375, "learning_rate": 1.979458578563568e-05, "loss": 0.6584, "step": 1117 }, { "epoch": 0.14119504301839134, "grad_norm": 2.15625, "learning_rate": 1.9794183018058313e-05, "loss": 0.7458, "step": 1118 }, { "epoch": 0.14132133554345253, "grad_norm": 2.21875, "learning_rate": 1.9793779860108408e-05, "loss": 0.7136, "step": 1119 }, { "epoch": 0.1414476280685137, "grad_norm": 2.0, "learning_rate": 1.979337631180202e-05, "loss": 0.7033, "step": 1120 }, { "epoch": 0.14157392059357488, "grad_norm": 1.921875, "learning_rate": 1.9792972373155247e-05, "loss": 0.669, "step": 1121 }, { "epoch": 0.14170021311863604, "grad_norm": 2.1875, "learning_rate": 1.9792568044184176e-05, "loss": 0.6209, "step": 1122 }, { "epoch": 0.14182650564369723, "grad_norm": 1.84375, "learning_rate": 1.9792163324904937e-05, "loss": 0.6922, "step": 1123 }, { "epoch": 0.1419527981687584, "grad_norm": 2.0, "learning_rate": 1.979175821533365e-05, "loss": 0.6241, "step": 1124 }, { "epoch": 0.14207909069381955, "grad_norm": 2.015625, "learning_rate": 1.9791352715486468e-05, "loss": 0.7167, "step": 1125 }, { "epoch": 0.14220538321888074, "grad_norm": 2.03125, "learning_rate": 1.9790946825379546e-05, "loss": 0.6832, "step": 1126 }, { "epoch": 0.1423316757439419, "grad_norm": 2.125, "learning_rate": 1.9790540545029068e-05, "loss": 0.7196, "step": 1127 }, { "epoch": 0.1424579682690031, "grad_norm": 1.8984375, "learning_rate": 1.979013387445123e-05, "loss": 0.6412, "step": 1128 }, { "epoch": 0.14258426079406425, "grad_norm": 2.09375, "learning_rate": 1.9789726813662233e-05, "loss": 0.6829, "step": 1129 }, { "epoch": 0.1427105533191254, "grad_norm": 2.015625, "learning_rate": 1.9789319362678308e-05, "loss": 0.748, "step": 1130 }, { "epoch": 0.1428368458441866, "grad_norm": 2.125, "learning_rate": 1.9788911521515688e-05, "loss": 0.6936, "step": 1131 }, { "epoch": 0.14296313836924776, "grad_norm": 2.125, "learning_rate": 1.978850329019064e-05, "loss": 0.8595, "step": 1132 }, { "epoch": 0.14308943089430895, "grad_norm": 2.171875, "learning_rate": 1.9788094668719418e-05, "loss": 0.7464, "step": 1133 }, { "epoch": 0.1432157234193701, "grad_norm": 1.90625, "learning_rate": 1.9787685657118324e-05, "loss": 0.6369, "step": 1134 }, { "epoch": 0.1433420159444313, "grad_norm": 2.15625, "learning_rate": 1.9787276255403656e-05, "loss": 0.8037, "step": 1135 }, { "epoch": 0.14346830846949246, "grad_norm": 2.140625, "learning_rate": 1.978686646359173e-05, "loss": 0.7311, "step": 1136 }, { "epoch": 0.14359460099455362, "grad_norm": 2.078125, "learning_rate": 1.978645628169888e-05, "loss": 0.6117, "step": 1137 }, { "epoch": 0.1437208935196148, "grad_norm": 1.9765625, "learning_rate": 1.9786045709741452e-05, "loss": 0.6734, "step": 1138 }, { "epoch": 0.14384718604467597, "grad_norm": 1.8359375, "learning_rate": 1.9785634747735816e-05, "loss": 0.6278, "step": 1139 }, { "epoch": 0.14397347856973716, "grad_norm": 2.109375, "learning_rate": 1.978522339569835e-05, "loss": 0.7674, "step": 1140 }, { "epoch": 0.14409977109479832, "grad_norm": 1.9140625, "learning_rate": 1.9784811653645443e-05, "loss": 0.7196, "step": 1141 }, { "epoch": 0.1442260636198595, "grad_norm": 1.90625, "learning_rate": 1.9784399521593518e-05, "loss": 0.6637, "step": 1142 }, { "epoch": 0.14435235614492067, "grad_norm": 1.921875, "learning_rate": 1.9783986999558994e-05, "loss": 0.7337, "step": 1143 }, { "epoch": 0.14447864866998184, "grad_norm": 2.09375, "learning_rate": 1.978357408755831e-05, "loss": 0.6744, "step": 1144 }, { "epoch": 0.14460494119504302, "grad_norm": 2.140625, "learning_rate": 1.9783160785607932e-05, "loss": 0.6799, "step": 1145 }, { "epoch": 0.14473123372010419, "grad_norm": 1.859375, "learning_rate": 1.978274709372433e-05, "loss": 0.7194, "step": 1146 }, { "epoch": 0.14485752624516537, "grad_norm": 1.875, "learning_rate": 1.978233301192399e-05, "loss": 0.5306, "step": 1147 }, { "epoch": 0.14498381877022654, "grad_norm": 2.203125, "learning_rate": 1.978191854022342e-05, "loss": 0.7634, "step": 1148 }, { "epoch": 0.1451101112952877, "grad_norm": 1.9921875, "learning_rate": 1.9781503678639137e-05, "loss": 0.7024, "step": 1149 }, { "epoch": 0.14523640382034889, "grad_norm": 1.9765625, "learning_rate": 1.9781088427187677e-05, "loss": 0.7334, "step": 1150 }, { "epoch": 0.14536269634541005, "grad_norm": 1.96875, "learning_rate": 1.9780672785885593e-05, "loss": 0.6625, "step": 1151 }, { "epoch": 0.14548898887047124, "grad_norm": 2.015625, "learning_rate": 1.978025675474945e-05, "loss": 0.6839, "step": 1152 }, { "epoch": 0.1456152813955324, "grad_norm": 2.0, "learning_rate": 1.977984033379583e-05, "loss": 0.7055, "step": 1153 }, { "epoch": 0.1457415739205936, "grad_norm": 1.8125, "learning_rate": 1.977942352304133e-05, "loss": 0.649, "step": 1154 }, { "epoch": 0.14586786644565475, "grad_norm": 2.0, "learning_rate": 1.9779006322502565e-05, "loss": 0.7103, "step": 1155 }, { "epoch": 0.1459941589707159, "grad_norm": 2.09375, "learning_rate": 1.977858873219616e-05, "loss": 0.7412, "step": 1156 }, { "epoch": 0.1461204514957771, "grad_norm": 2.109375, "learning_rate": 1.977817075213876e-05, "loss": 0.6832, "step": 1157 }, { "epoch": 0.14624674402083826, "grad_norm": 1.9140625, "learning_rate": 1.9777752382347028e-05, "loss": 0.7472, "step": 1158 }, { "epoch": 0.14637303654589945, "grad_norm": 1.953125, "learning_rate": 1.977733362283764e-05, "loss": 0.7699, "step": 1159 }, { "epoch": 0.1464993290709606, "grad_norm": 1.984375, "learning_rate": 1.977691447362728e-05, "loss": 0.8532, "step": 1160 }, { "epoch": 0.1466256215960218, "grad_norm": 4.375, "learning_rate": 1.9776494934732658e-05, "loss": 0.9304, "step": 1161 }, { "epoch": 0.14675191412108296, "grad_norm": 1.90625, "learning_rate": 1.97760750061705e-05, "loss": 0.6723, "step": 1162 }, { "epoch": 0.14687820664614412, "grad_norm": 2.28125, "learning_rate": 1.9775654687957536e-05, "loss": 0.8653, "step": 1163 }, { "epoch": 0.1470044991712053, "grad_norm": 1.9765625, "learning_rate": 1.9775233980110524e-05, "loss": 0.7554, "step": 1164 }, { "epoch": 0.14713079169626647, "grad_norm": 1.9609375, "learning_rate": 1.977481288264623e-05, "loss": 0.7941, "step": 1165 }, { "epoch": 0.14725708422132766, "grad_norm": 1.9453125, "learning_rate": 1.977439139558144e-05, "loss": 0.7083, "step": 1166 }, { "epoch": 0.14738337674638882, "grad_norm": 1.828125, "learning_rate": 1.977396951893295e-05, "loss": 0.7125, "step": 1167 }, { "epoch": 0.14750966927144998, "grad_norm": 1.9375, "learning_rate": 1.9773547252717577e-05, "loss": 0.6822, "step": 1168 }, { "epoch": 0.14763596179651117, "grad_norm": 2.03125, "learning_rate": 1.9773124596952153e-05, "loss": 0.7045, "step": 1169 }, { "epoch": 0.14776225432157233, "grad_norm": 1.9765625, "learning_rate": 1.977270155165352e-05, "loss": 0.6098, "step": 1170 }, { "epoch": 0.14788854684663352, "grad_norm": 1.9765625, "learning_rate": 1.9772278116838546e-05, "loss": 0.6887, "step": 1171 }, { "epoch": 0.14801483937169468, "grad_norm": 1.9765625, "learning_rate": 1.9771854292524103e-05, "loss": 0.7095, "step": 1172 }, { "epoch": 0.14814113189675587, "grad_norm": 1.9765625, "learning_rate": 1.977143007872708e-05, "loss": 0.6767, "step": 1173 }, { "epoch": 0.14826742442181703, "grad_norm": 1.9375, "learning_rate": 1.9771005475464396e-05, "loss": 0.6853, "step": 1174 }, { "epoch": 0.1483937169468782, "grad_norm": 2.3125, "learning_rate": 1.977058048275297e-05, "loss": 0.7596, "step": 1175 }, { "epoch": 0.14852000947193938, "grad_norm": 2.0625, "learning_rate": 1.9770155100609733e-05, "loss": 0.7544, "step": 1176 }, { "epoch": 0.14864630199700055, "grad_norm": 1.9296875, "learning_rate": 1.9769729329051654e-05, "loss": 0.662, "step": 1177 }, { "epoch": 0.14877259452206174, "grad_norm": 1.7890625, "learning_rate": 1.976930316809569e-05, "loss": 0.6079, "step": 1178 }, { "epoch": 0.1488988870471229, "grad_norm": 2.078125, "learning_rate": 1.9768876617758836e-05, "loss": 0.7579, "step": 1179 }, { "epoch": 0.14902517957218406, "grad_norm": 2.0625, "learning_rate": 1.9768449678058086e-05, "loss": 0.7503, "step": 1180 }, { "epoch": 0.14915147209724525, "grad_norm": 3.609375, "learning_rate": 1.9768022349010464e-05, "loss": 0.7496, "step": 1181 }, { "epoch": 0.1492777646223064, "grad_norm": 1.8984375, "learning_rate": 1.9767594630632998e-05, "loss": 0.6106, "step": 1182 }, { "epoch": 0.1494040571473676, "grad_norm": 1.9453125, "learning_rate": 1.976716652294274e-05, "loss": 0.6913, "step": 1183 }, { "epoch": 0.14953034967242876, "grad_norm": 1.8828125, "learning_rate": 1.9766738025956744e-05, "loss": 0.7556, "step": 1184 }, { "epoch": 0.14965664219748995, "grad_norm": 1.9609375, "learning_rate": 1.97663091396921e-05, "loss": 0.6873, "step": 1185 }, { "epoch": 0.1497829347225511, "grad_norm": 2.125, "learning_rate": 1.9765879864165892e-05, "loss": 0.847, "step": 1186 }, { "epoch": 0.14990922724761227, "grad_norm": 2.0, "learning_rate": 1.976545019939524e-05, "loss": 0.6388, "step": 1187 }, { "epoch": 0.15003551977267346, "grad_norm": 1.921875, "learning_rate": 1.9765020145397264e-05, "loss": 0.6039, "step": 1188 }, { "epoch": 0.15016181229773462, "grad_norm": 1.9375, "learning_rate": 1.9764589702189104e-05, "loss": 0.6836, "step": 1189 }, { "epoch": 0.1502881048227958, "grad_norm": 2.265625, "learning_rate": 1.9764158869787916e-05, "loss": 0.9019, "step": 1190 }, { "epoch": 0.15041439734785697, "grad_norm": 2.03125, "learning_rate": 1.976372764821088e-05, "loss": 0.6542, "step": 1191 }, { "epoch": 0.15054068987291816, "grad_norm": 1.8984375, "learning_rate": 1.9763296037475174e-05, "loss": 0.6114, "step": 1192 }, { "epoch": 0.15066698239797932, "grad_norm": 2.265625, "learning_rate": 1.9762864037598005e-05, "loss": 0.7644, "step": 1193 }, { "epoch": 0.15079327492304048, "grad_norm": 2.03125, "learning_rate": 1.9762431648596588e-05, "loss": 0.6675, "step": 1194 }, { "epoch": 0.15091956744810167, "grad_norm": 2.109375, "learning_rate": 1.9761998870488163e-05, "loss": 0.7385, "step": 1195 }, { "epoch": 0.15104585997316283, "grad_norm": 2.09375, "learning_rate": 1.9761565703289977e-05, "loss": 0.7262, "step": 1196 }, { "epoch": 0.15117215249822402, "grad_norm": 2.296875, "learning_rate": 1.9761132147019292e-05, "loss": 0.6593, "step": 1197 }, { "epoch": 0.15129844502328518, "grad_norm": 1.9609375, "learning_rate": 1.9760698201693388e-05, "loss": 0.7455, "step": 1198 }, { "epoch": 0.15142473754834634, "grad_norm": 1.9921875, "learning_rate": 1.9760263867329568e-05, "loss": 0.7043, "step": 1199 }, { "epoch": 0.15155103007340753, "grad_norm": 1.9375, "learning_rate": 1.9759829143945138e-05, "loss": 0.6994, "step": 1200 }, { "epoch": 0.1516773225984687, "grad_norm": 1.8984375, "learning_rate": 1.9759394031557425e-05, "loss": 0.6, "step": 1201 }, { "epoch": 0.15180361512352988, "grad_norm": 2.375, "learning_rate": 1.9758958530183773e-05, "loss": 0.6431, "step": 1202 }, { "epoch": 0.15192990764859104, "grad_norm": 1.9453125, "learning_rate": 1.9758522639841544e-05, "loss": 0.7562, "step": 1203 }, { "epoch": 0.15205620017365223, "grad_norm": 2.09375, "learning_rate": 1.9758086360548107e-05, "loss": 0.6591, "step": 1204 }, { "epoch": 0.1521824926987134, "grad_norm": 1.9765625, "learning_rate": 1.9757649692320847e-05, "loss": 0.7019, "step": 1205 }, { "epoch": 0.15230878522377456, "grad_norm": 2.125, "learning_rate": 1.9757212635177177e-05, "loss": 0.7903, "step": 1206 }, { "epoch": 0.15243507774883575, "grad_norm": 1.9765625, "learning_rate": 1.975677518913451e-05, "loss": 0.6598, "step": 1207 }, { "epoch": 0.1525613702738969, "grad_norm": 2.203125, "learning_rate": 1.9756337354210283e-05, "loss": 0.7352, "step": 1208 }, { "epoch": 0.1526876627989581, "grad_norm": 1.921875, "learning_rate": 1.9755899130421955e-05, "loss": 0.7021, "step": 1209 }, { "epoch": 0.15281395532401926, "grad_norm": 1.96875, "learning_rate": 1.9755460517786983e-05, "loss": 0.6947, "step": 1210 }, { "epoch": 0.15294024784908045, "grad_norm": 1.9765625, "learning_rate": 1.9755021516322852e-05, "loss": 0.618, "step": 1211 }, { "epoch": 0.1530665403741416, "grad_norm": 2.015625, "learning_rate": 1.975458212604706e-05, "loss": 0.798, "step": 1212 }, { "epoch": 0.15319283289920277, "grad_norm": 2.0, "learning_rate": 1.975414234697712e-05, "loss": 0.6695, "step": 1213 }, { "epoch": 0.15331912542426396, "grad_norm": 1.90625, "learning_rate": 1.975370217913056e-05, "loss": 0.6721, "step": 1214 }, { "epoch": 0.15344541794932512, "grad_norm": 1.984375, "learning_rate": 1.9753261622524923e-05, "loss": 0.6868, "step": 1215 }, { "epoch": 0.1535717104743863, "grad_norm": 1.8046875, "learning_rate": 1.9752820677177772e-05, "loss": 0.6419, "step": 1216 }, { "epoch": 0.15369800299944747, "grad_norm": 1.859375, "learning_rate": 1.975237934310668e-05, "loss": 0.629, "step": 1217 }, { "epoch": 0.15382429552450863, "grad_norm": 1.90625, "learning_rate": 1.9751937620329235e-05, "loss": 0.7072, "step": 1218 }, { "epoch": 0.15395058804956982, "grad_norm": 2.109375, "learning_rate": 1.9751495508863048e-05, "loss": 0.6855, "step": 1219 }, { "epoch": 0.15407688057463098, "grad_norm": 1.921875, "learning_rate": 1.9751053008725736e-05, "loss": 0.6509, "step": 1220 }, { "epoch": 0.15420317309969217, "grad_norm": 1.8203125, "learning_rate": 1.975061011993494e-05, "loss": 0.6497, "step": 1221 }, { "epoch": 0.15432946562475333, "grad_norm": 1.8671875, "learning_rate": 1.975016684250831e-05, "loss": 0.66, "step": 1222 }, { "epoch": 0.15445575814981452, "grad_norm": 1.9453125, "learning_rate": 1.9749723176463517e-05, "loss": 0.6882, "step": 1223 }, { "epoch": 0.15458205067487568, "grad_norm": 2.0625, "learning_rate": 1.9749279121818235e-05, "loss": 0.772, "step": 1224 }, { "epoch": 0.15470834319993684, "grad_norm": 2.125, "learning_rate": 1.9748834678590176e-05, "loss": 0.6819, "step": 1225 }, { "epoch": 0.15483463572499803, "grad_norm": 1.8359375, "learning_rate": 1.9748389846797047e-05, "loss": 0.7338, "step": 1226 }, { "epoch": 0.1549609282500592, "grad_norm": 1.96875, "learning_rate": 1.9747944626456577e-05, "loss": 0.6634, "step": 1227 }, { "epoch": 0.15508722077512038, "grad_norm": 1.8515625, "learning_rate": 1.9747499017586516e-05, "loss": 0.6568, "step": 1228 }, { "epoch": 0.15521351330018154, "grad_norm": 1.9453125, "learning_rate": 1.974705302020462e-05, "loss": 0.6032, "step": 1229 }, { "epoch": 0.1553398058252427, "grad_norm": 1.9765625, "learning_rate": 1.974660663432867e-05, "loss": 0.7783, "step": 1230 }, { "epoch": 0.1554660983503039, "grad_norm": 1.921875, "learning_rate": 1.9746159859976456e-05, "loss": 0.7937, "step": 1231 }, { "epoch": 0.15559239087536506, "grad_norm": 1.890625, "learning_rate": 1.974571269716578e-05, "loss": 0.6993, "step": 1232 }, { "epoch": 0.15571868340042624, "grad_norm": 1.9765625, "learning_rate": 1.974526514591448e-05, "loss": 0.6578, "step": 1233 }, { "epoch": 0.1558449759254874, "grad_norm": 1.984375, "learning_rate": 1.9744817206240374e-05, "loss": 0.7487, "step": 1234 }, { "epoch": 0.1559712684505486, "grad_norm": 2.015625, "learning_rate": 1.974436887816133e-05, "loss": 0.691, "step": 1235 }, { "epoch": 0.15609756097560976, "grad_norm": 1.984375, "learning_rate": 1.9743920161695212e-05, "loss": 0.7284, "step": 1236 }, { "epoch": 0.15622385350067092, "grad_norm": 2.15625, "learning_rate": 1.974347105685991e-05, "loss": 0.6933, "step": 1237 }, { "epoch": 0.1563501460257321, "grad_norm": 1.9375, "learning_rate": 1.974302156367332e-05, "loss": 0.6975, "step": 1238 }, { "epoch": 0.15647643855079327, "grad_norm": 2.046875, "learning_rate": 1.9742571682153353e-05, "loss": 0.7004, "step": 1239 }, { "epoch": 0.15660273107585446, "grad_norm": 2.09375, "learning_rate": 1.974212141231795e-05, "loss": 0.7435, "step": 1240 }, { "epoch": 0.15672902360091562, "grad_norm": 2.109375, "learning_rate": 1.974167075418505e-05, "loss": 0.6579, "step": 1241 }, { "epoch": 0.1568553161259768, "grad_norm": 1.9375, "learning_rate": 1.9741219707772618e-05, "loss": 0.6838, "step": 1242 }, { "epoch": 0.15698160865103797, "grad_norm": 2.0, "learning_rate": 1.9740768273098633e-05, "loss": 0.7748, "step": 1243 }, { "epoch": 0.15710790117609913, "grad_norm": 2.171875, "learning_rate": 1.9740316450181086e-05, "loss": 0.7258, "step": 1244 }, { "epoch": 0.15723419370116032, "grad_norm": 1.96875, "learning_rate": 1.973986423903799e-05, "loss": 0.6603, "step": 1245 }, { "epoch": 0.15736048622622148, "grad_norm": 1.9609375, "learning_rate": 1.9739411639687362e-05, "loss": 0.6866, "step": 1246 }, { "epoch": 0.15748677875128267, "grad_norm": 2.0625, "learning_rate": 1.973895865214724e-05, "loss": 0.6285, "step": 1247 }, { "epoch": 0.15761307127634383, "grad_norm": 2.078125, "learning_rate": 1.9738505276435692e-05, "loss": 0.6867, "step": 1248 }, { "epoch": 0.157739363801405, "grad_norm": 1.9765625, "learning_rate": 1.9738051512570776e-05, "loss": 0.6978, "step": 1249 }, { "epoch": 0.15786565632646618, "grad_norm": 2.03125, "learning_rate": 1.9737597360570583e-05, "loss": 0.6983, "step": 1250 }, { "epoch": 0.15799194885152734, "grad_norm": 1.9765625, "learning_rate": 1.9737142820453215e-05, "loss": 0.7221, "step": 1251 }, { "epoch": 0.15811824137658853, "grad_norm": 1.9140625, "learning_rate": 1.9736687892236786e-05, "loss": 0.7292, "step": 1252 }, { "epoch": 0.1582445339016497, "grad_norm": 2.0, "learning_rate": 1.9736232575939433e-05, "loss": 0.7674, "step": 1253 }, { "epoch": 0.15837082642671088, "grad_norm": 2.265625, "learning_rate": 1.9735776871579294e-05, "loss": 0.7491, "step": 1254 }, { "epoch": 0.15849711895177204, "grad_norm": 1.875, "learning_rate": 1.9735320779174545e-05, "loss": 0.644, "step": 1255 }, { "epoch": 0.1586234114768332, "grad_norm": 1.9296875, "learning_rate": 1.9734864298743358e-05, "loss": 0.6897, "step": 1256 }, { "epoch": 0.1587497040018944, "grad_norm": 2.125, "learning_rate": 1.9734407430303926e-05, "loss": 0.7978, "step": 1257 }, { "epoch": 0.15887599652695555, "grad_norm": 2.03125, "learning_rate": 1.9733950173874462e-05, "loss": 0.7074, "step": 1258 }, { "epoch": 0.15900228905201674, "grad_norm": 2.28125, "learning_rate": 1.9733492529473192e-05, "loss": 0.7445, "step": 1259 }, { "epoch": 0.1591285815770779, "grad_norm": 2.734375, "learning_rate": 1.973303449711835e-05, "loss": 0.8628, "step": 1260 }, { "epoch": 0.1592548741021391, "grad_norm": 1.9375, "learning_rate": 1.9732576076828195e-05, "loss": 0.6423, "step": 1261 }, { "epoch": 0.15938116662720025, "grad_norm": 2.078125, "learning_rate": 1.9732117268621005e-05, "loss": 0.7005, "step": 1262 }, { "epoch": 0.15950745915226142, "grad_norm": 2.203125, "learning_rate": 1.973165807251506e-05, "loss": 0.8088, "step": 1263 }, { "epoch": 0.1596337516773226, "grad_norm": 2.015625, "learning_rate": 1.9731198488528664e-05, "loss": 0.7094, "step": 1264 }, { "epoch": 0.15976004420238377, "grad_norm": 2.03125, "learning_rate": 1.9730738516680138e-05, "loss": 0.8307, "step": 1265 }, { "epoch": 0.15988633672744496, "grad_norm": 2.0625, "learning_rate": 1.973027815698781e-05, "loss": 0.6923, "step": 1266 }, { "epoch": 0.16001262925250612, "grad_norm": 1.9453125, "learning_rate": 1.972981740947003e-05, "loss": 0.6729, "step": 1267 }, { "epoch": 0.16013892177756728, "grad_norm": 1.96875, "learning_rate": 1.9729356274145165e-05, "loss": 0.6483, "step": 1268 }, { "epoch": 0.16026521430262847, "grad_norm": 1.9140625, "learning_rate": 1.9728894751031595e-05, "loss": 0.7095, "step": 1269 }, { "epoch": 0.16039150682768963, "grad_norm": 2.15625, "learning_rate": 1.9728432840147714e-05, "loss": 0.8751, "step": 1270 }, { "epoch": 0.16051779935275082, "grad_norm": 1.875, "learning_rate": 1.972797054151193e-05, "loss": 0.6766, "step": 1271 }, { "epoch": 0.16064409187781198, "grad_norm": 4.34375, "learning_rate": 1.9727507855142674e-05, "loss": 1.0146, "step": 1272 }, { "epoch": 0.16077038440287317, "grad_norm": 1.8828125, "learning_rate": 1.9727044781058384e-05, "loss": 0.6816, "step": 1273 }, { "epoch": 0.16089667692793433, "grad_norm": 2.03125, "learning_rate": 1.972658131927752e-05, "loss": 0.7423, "step": 1274 }, { "epoch": 0.1610229694529955, "grad_norm": 1.9609375, "learning_rate": 1.9726117469818548e-05, "loss": 0.7232, "step": 1275 }, { "epoch": 0.16114926197805668, "grad_norm": 2.171875, "learning_rate": 1.9725653232699962e-05, "loss": 0.7535, "step": 1276 }, { "epoch": 0.16127555450311784, "grad_norm": 2.0625, "learning_rate": 1.9725188607940265e-05, "loss": 0.6601, "step": 1277 }, { "epoch": 0.16140184702817903, "grad_norm": 2.171875, "learning_rate": 1.9724723595557977e-05, "loss": 0.874, "step": 1278 }, { "epoch": 0.1615281395532402, "grad_norm": 1.9296875, "learning_rate": 1.9724258195571626e-05, "loss": 0.6716, "step": 1279 }, { "epoch": 0.16165443207830135, "grad_norm": 1.8203125, "learning_rate": 1.9723792407999766e-05, "loss": 0.6169, "step": 1280 }, { "epoch": 0.16178072460336254, "grad_norm": 1.8125, "learning_rate": 1.9723326232860964e-05, "loss": 0.6922, "step": 1281 }, { "epoch": 0.1619070171284237, "grad_norm": 1.9375, "learning_rate": 1.9722859670173798e-05, "loss": 0.7662, "step": 1282 }, { "epoch": 0.1620333096534849, "grad_norm": 2.15625, "learning_rate": 1.972239271995686e-05, "loss": 0.797, "step": 1283 }, { "epoch": 0.16215960217854605, "grad_norm": 1.8671875, "learning_rate": 1.9721925382228774e-05, "loss": 0.6652, "step": 1284 }, { "epoch": 0.16228589470360724, "grad_norm": 2.140625, "learning_rate": 1.9721457657008156e-05, "loss": 0.7759, "step": 1285 }, { "epoch": 0.1624121872286684, "grad_norm": 1.9765625, "learning_rate": 1.9720989544313648e-05, "loss": 0.6827, "step": 1286 }, { "epoch": 0.16253847975372956, "grad_norm": 2.140625, "learning_rate": 1.9720521044163914e-05, "loss": 0.7163, "step": 1287 }, { "epoch": 0.16266477227879075, "grad_norm": 2.0625, "learning_rate": 1.9720052156577625e-05, "loss": 0.7068, "step": 1288 }, { "epoch": 0.16279106480385191, "grad_norm": 2.015625, "learning_rate": 1.9719582881573468e-05, "loss": 0.6875, "step": 1289 }, { "epoch": 0.1629173573289131, "grad_norm": 1.8515625, "learning_rate": 1.971911321917015e-05, "loss": 0.645, "step": 1290 }, { "epoch": 0.16304364985397427, "grad_norm": 2.140625, "learning_rate": 1.9718643169386388e-05, "loss": 0.7212, "step": 1291 }, { "epoch": 0.16316994237903545, "grad_norm": 2.015625, "learning_rate": 1.971817273224092e-05, "loss": 0.7323, "step": 1292 }, { "epoch": 0.16329623490409662, "grad_norm": 1.921875, "learning_rate": 1.971770190775249e-05, "loss": 0.7055, "step": 1293 }, { "epoch": 0.16342252742915778, "grad_norm": 1.984375, "learning_rate": 1.971723069593987e-05, "loss": 0.745, "step": 1294 }, { "epoch": 0.16354881995421897, "grad_norm": 2.15625, "learning_rate": 1.971675909682184e-05, "loss": 0.7101, "step": 1295 }, { "epoch": 0.16367511247928013, "grad_norm": 2.0625, "learning_rate": 1.97162871104172e-05, "loss": 0.6528, "step": 1296 }, { "epoch": 0.16380140500434132, "grad_norm": 2.140625, "learning_rate": 1.9715814736744755e-05, "loss": 0.6722, "step": 1297 }, { "epoch": 0.16392769752940248, "grad_norm": 2.203125, "learning_rate": 1.9715341975823337e-05, "loss": 0.8479, "step": 1298 }, { "epoch": 0.16405399005446364, "grad_norm": 2.265625, "learning_rate": 1.9714868827671792e-05, "loss": 0.6909, "step": 1299 }, { "epoch": 0.16418028257952483, "grad_norm": 2.0625, "learning_rate": 1.971439529230897e-05, "loss": 0.736, "step": 1300 }, { "epoch": 0.164306575104586, "grad_norm": 2.03125, "learning_rate": 1.9713921369753756e-05, "loss": 0.8484, "step": 1301 }, { "epoch": 0.16443286762964718, "grad_norm": 1.875, "learning_rate": 1.971344706002503e-05, "loss": 0.6809, "step": 1302 }, { "epoch": 0.16455916015470834, "grad_norm": 2.09375, "learning_rate": 1.9712972363141702e-05, "loss": 0.7371, "step": 1303 }, { "epoch": 0.16468545267976953, "grad_norm": 1.8046875, "learning_rate": 1.9712497279122692e-05, "loss": 0.6526, "step": 1304 }, { "epoch": 0.1648117452048307, "grad_norm": 1.9453125, "learning_rate": 1.971202180798693e-05, "loss": 0.6686, "step": 1305 }, { "epoch": 0.16493803772989185, "grad_norm": 2.03125, "learning_rate": 1.9711545949753374e-05, "loss": 0.7174, "step": 1306 }, { "epoch": 0.16506433025495304, "grad_norm": 2.171875, "learning_rate": 1.971106970444099e-05, "loss": 0.7646, "step": 1307 }, { "epoch": 0.1651906227800142, "grad_norm": 2.0625, "learning_rate": 1.9710593072068756e-05, "loss": 0.6885, "step": 1308 }, { "epoch": 0.1653169153050754, "grad_norm": 1.828125, "learning_rate": 1.971011605265567e-05, "loss": 0.6432, "step": 1309 }, { "epoch": 0.16544320783013655, "grad_norm": 1.9296875, "learning_rate": 1.9709638646220754e-05, "loss": 0.6415, "step": 1310 }, { "epoch": 0.1655695003551977, "grad_norm": 2.0625, "learning_rate": 1.9709160852783022e-05, "loss": 0.7898, "step": 1311 }, { "epoch": 0.1656957928802589, "grad_norm": 2.0625, "learning_rate": 1.9708682672361524e-05, "loss": 0.7725, "step": 1312 }, { "epoch": 0.16582208540532006, "grad_norm": 1.9921875, "learning_rate": 1.970820410497532e-05, "loss": 0.7429, "step": 1313 }, { "epoch": 0.16594837793038125, "grad_norm": 1.890625, "learning_rate": 1.9707725150643488e-05, "loss": 0.7302, "step": 1314 }, { "epoch": 0.1660746704554424, "grad_norm": 1.9765625, "learning_rate": 1.970724580938511e-05, "loss": 0.6547, "step": 1315 }, { "epoch": 0.1662009629805036, "grad_norm": 1.8671875, "learning_rate": 1.97067660812193e-05, "loss": 0.646, "step": 1316 }, { "epoch": 0.16632725550556476, "grad_norm": 2.265625, "learning_rate": 1.9706285966165167e-05, "loss": 0.6671, "step": 1317 }, { "epoch": 0.16645354803062593, "grad_norm": 1.90625, "learning_rate": 1.970580546424186e-05, "loss": 0.6609, "step": 1318 }, { "epoch": 0.16657984055568711, "grad_norm": 2.046875, "learning_rate": 1.9705324575468524e-05, "loss": 0.7374, "step": 1319 }, { "epoch": 0.16670613308074828, "grad_norm": 1.8359375, "learning_rate": 1.9704843299864326e-05, "loss": 0.6842, "step": 1320 }, { "epoch": 0.16683242560580946, "grad_norm": 2.046875, "learning_rate": 1.9704361637448448e-05, "loss": 0.7602, "step": 1321 }, { "epoch": 0.16695871813087063, "grad_norm": 2.015625, "learning_rate": 1.9703879588240094e-05, "loss": 0.7353, "step": 1322 }, { "epoch": 0.16708501065593181, "grad_norm": 2.1875, "learning_rate": 1.970339715225847e-05, "loss": 0.7512, "step": 1323 }, { "epoch": 0.16721130318099298, "grad_norm": 1.9140625, "learning_rate": 1.970291432952281e-05, "loss": 0.6972, "step": 1324 }, { "epoch": 0.16733759570605414, "grad_norm": 1.90625, "learning_rate": 1.9702431120052352e-05, "loss": 0.6802, "step": 1325 }, { "epoch": 0.16746388823111533, "grad_norm": 2.0625, "learning_rate": 1.970194752386636e-05, "loss": 0.7445, "step": 1326 }, { "epoch": 0.1675901807561765, "grad_norm": 1.96875, "learning_rate": 1.970146354098411e-05, "loss": 0.6058, "step": 1327 }, { "epoch": 0.16771647328123768, "grad_norm": 1.9609375, "learning_rate": 1.9700979171424892e-05, "loss": 0.6565, "step": 1328 }, { "epoch": 0.16784276580629884, "grad_norm": 1.9453125, "learning_rate": 1.9700494415208008e-05, "loss": 0.6881, "step": 1329 }, { "epoch": 0.16796905833136, "grad_norm": 1.8515625, "learning_rate": 1.9700009272352784e-05, "loss": 0.6322, "step": 1330 }, { "epoch": 0.1680953508564212, "grad_norm": 1.8125, "learning_rate": 1.9699523742878554e-05, "loss": 0.6376, "step": 1331 }, { "epoch": 0.16822164338148235, "grad_norm": 2.0, "learning_rate": 1.969903782680467e-05, "loss": 0.6994, "step": 1332 }, { "epoch": 0.16834793590654354, "grad_norm": 1.96875, "learning_rate": 1.9698551524150497e-05, "loss": 0.7716, "step": 1333 }, { "epoch": 0.1684742284316047, "grad_norm": 2.0625, "learning_rate": 1.969806483493542e-05, "loss": 0.6442, "step": 1334 }, { "epoch": 0.1686005209566659, "grad_norm": 1.8828125, "learning_rate": 1.9697577759178842e-05, "loss": 0.6826, "step": 1335 }, { "epoch": 0.16872681348172705, "grad_norm": 2.125, "learning_rate": 1.9697090296900173e-05, "loss": 0.8199, "step": 1336 }, { "epoch": 0.1688531060067882, "grad_norm": 2.0, "learning_rate": 1.969660244811884e-05, "loss": 0.754, "step": 1337 }, { "epoch": 0.1689793985318494, "grad_norm": 2.09375, "learning_rate": 1.9696114212854288e-05, "loss": 0.7378, "step": 1338 }, { "epoch": 0.16910569105691056, "grad_norm": 1.921875, "learning_rate": 1.969562559112598e-05, "loss": 0.7788, "step": 1339 }, { "epoch": 0.16923198358197175, "grad_norm": 1.9453125, "learning_rate": 1.969513658295339e-05, "loss": 0.7135, "step": 1340 }, { "epoch": 0.1693582761070329, "grad_norm": 2.078125, "learning_rate": 1.9694647188356005e-05, "loss": 0.7705, "step": 1341 }, { "epoch": 0.1694845686320941, "grad_norm": 1.828125, "learning_rate": 1.9694157407353334e-05, "loss": 0.6271, "step": 1342 }, { "epoch": 0.16961086115715526, "grad_norm": 2.09375, "learning_rate": 1.9693667239964902e-05, "loss": 0.7109, "step": 1343 }, { "epoch": 0.16973715368221642, "grad_norm": 1.96875, "learning_rate": 1.969317668621024e-05, "loss": 0.6416, "step": 1344 }, { "epoch": 0.1698634462072776, "grad_norm": 1.9921875, "learning_rate": 1.9692685746108905e-05, "loss": 0.6637, "step": 1345 }, { "epoch": 0.16998973873233877, "grad_norm": 2.03125, "learning_rate": 1.969219441968046e-05, "loss": 0.69, "step": 1346 }, { "epoch": 0.17011603125739996, "grad_norm": 1.859375, "learning_rate": 1.969170270694449e-05, "loss": 0.6247, "step": 1347 }, { "epoch": 0.17024232378246112, "grad_norm": 2.015625, "learning_rate": 1.9691210607920597e-05, "loss": 0.7365, "step": 1348 }, { "epoch": 0.17036861630752229, "grad_norm": 1.859375, "learning_rate": 1.969071812262839e-05, "loss": 0.6864, "step": 1349 }, { "epoch": 0.17049490883258347, "grad_norm": 1.953125, "learning_rate": 1.96902252510875e-05, "loss": 0.6804, "step": 1350 }, { "epoch": 0.17062120135764464, "grad_norm": 1.984375, "learning_rate": 1.9689731993317574e-05, "loss": 0.7258, "step": 1351 }, { "epoch": 0.17074749388270583, "grad_norm": 1.984375, "learning_rate": 1.968923834933827e-05, "loss": 0.6005, "step": 1352 }, { "epoch": 0.170873786407767, "grad_norm": 1.953125, "learning_rate": 1.968874431916926e-05, "loss": 0.7166, "step": 1353 }, { "epoch": 0.17100007893282818, "grad_norm": 2.234375, "learning_rate": 1.968824990283024e-05, "loss": 0.7903, "step": 1354 }, { "epoch": 0.17112637145788934, "grad_norm": 2.15625, "learning_rate": 1.9687755100340914e-05, "loss": 0.7772, "step": 1355 }, { "epoch": 0.1712526639829505, "grad_norm": 1.9375, "learning_rate": 1.9687259911721004e-05, "loss": 0.6933, "step": 1356 }, { "epoch": 0.1713789565080117, "grad_norm": 1.8359375, "learning_rate": 1.9686764336990246e-05, "loss": 0.6903, "step": 1357 }, { "epoch": 0.17150524903307285, "grad_norm": 1.9609375, "learning_rate": 1.9686268376168396e-05, "loss": 0.6867, "step": 1358 }, { "epoch": 0.17163154155813404, "grad_norm": 2.046875, "learning_rate": 1.9685772029275218e-05, "loss": 0.7156, "step": 1359 }, { "epoch": 0.1717578340831952, "grad_norm": 2.09375, "learning_rate": 1.96852752963305e-05, "loss": 0.7571, "step": 1360 }, { "epoch": 0.17188412660825636, "grad_norm": 1.8671875, "learning_rate": 1.9684778177354033e-05, "loss": 0.6592, "step": 1361 }, { "epoch": 0.17201041913331755, "grad_norm": 2.0, "learning_rate": 1.968428067236564e-05, "loss": 0.737, "step": 1362 }, { "epoch": 0.1721367116583787, "grad_norm": 2.03125, "learning_rate": 1.968378278138514e-05, "loss": 0.7028, "step": 1363 }, { "epoch": 0.1722630041834399, "grad_norm": 2.03125, "learning_rate": 1.9683284504432384e-05, "loss": 0.7295, "step": 1364 }, { "epoch": 0.17238929670850106, "grad_norm": 1.921875, "learning_rate": 1.9682785841527233e-05, "loss": 0.7687, "step": 1365 }, { "epoch": 0.17251558923356225, "grad_norm": 1.9375, "learning_rate": 1.968228679268956e-05, "loss": 0.6907, "step": 1366 }, { "epoch": 0.1726418817586234, "grad_norm": 1.953125, "learning_rate": 1.9681787357939257e-05, "loss": 0.7267, "step": 1367 }, { "epoch": 0.17276817428368457, "grad_norm": 1.90625, "learning_rate": 1.968128753729623e-05, "loss": 0.7194, "step": 1368 }, { "epoch": 0.17289446680874576, "grad_norm": 1.8671875, "learning_rate": 1.96807873307804e-05, "loss": 0.6824, "step": 1369 }, { "epoch": 0.17302075933380692, "grad_norm": 1.921875, "learning_rate": 1.9680286738411706e-05, "loss": 0.6855, "step": 1370 }, { "epoch": 0.1731470518588681, "grad_norm": 1.9453125, "learning_rate": 1.9679785760210098e-05, "loss": 0.7361, "step": 1371 }, { "epoch": 0.17327334438392927, "grad_norm": 1.890625, "learning_rate": 1.9679284396195544e-05, "loss": 0.6092, "step": 1372 }, { "epoch": 0.17339963690899046, "grad_norm": 1.9296875, "learning_rate": 1.967878264638803e-05, "loss": 0.6283, "step": 1373 }, { "epoch": 0.17352592943405162, "grad_norm": 1.984375, "learning_rate": 1.9678280510807552e-05, "loss": 0.7087, "step": 1374 }, { "epoch": 0.17365222195911278, "grad_norm": 1.9140625, "learning_rate": 1.967777798947412e-05, "loss": 0.6893, "step": 1375 }, { "epoch": 0.17377851448417397, "grad_norm": 1.875, "learning_rate": 1.9677275082407775e-05, "loss": 0.6584, "step": 1376 }, { "epoch": 0.17390480700923514, "grad_norm": 1.7421875, "learning_rate": 1.967677178962855e-05, "loss": 0.6702, "step": 1377 }, { "epoch": 0.17403109953429632, "grad_norm": 2.09375, "learning_rate": 1.9676268111156512e-05, "loss": 0.6734, "step": 1378 }, { "epoch": 0.17415739205935749, "grad_norm": 1.984375, "learning_rate": 1.967576404701173e-05, "loss": 0.8123, "step": 1379 }, { "epoch": 0.17428368458441865, "grad_norm": 1.9453125, "learning_rate": 1.9675259597214304e-05, "loss": 0.7394, "step": 1380 }, { "epoch": 0.17440997710947984, "grad_norm": 2.171875, "learning_rate": 1.967475476178433e-05, "loss": 0.7479, "step": 1381 }, { "epoch": 0.174536269634541, "grad_norm": 1.859375, "learning_rate": 1.9674249540741937e-05, "loss": 0.5543, "step": 1382 }, { "epoch": 0.17466256215960219, "grad_norm": 1.8984375, "learning_rate": 1.9673743934107255e-05, "loss": 0.705, "step": 1383 }, { "epoch": 0.17478885468466335, "grad_norm": 1.90625, "learning_rate": 1.9673237941900445e-05, "loss": 0.643, "step": 1384 }, { "epoch": 0.17491514720972454, "grad_norm": 1.828125, "learning_rate": 1.967273156414167e-05, "loss": 0.7249, "step": 1385 }, { "epoch": 0.1750414397347857, "grad_norm": 1.9765625, "learning_rate": 1.9672224800851112e-05, "loss": 0.6659, "step": 1386 }, { "epoch": 0.17516773225984686, "grad_norm": 1.8984375, "learning_rate": 1.9671717652048968e-05, "loss": 0.6971, "step": 1387 }, { "epoch": 0.17529402478490805, "grad_norm": 2.0, "learning_rate": 1.967121011775546e-05, "loss": 0.7784, "step": 1388 }, { "epoch": 0.1754203173099692, "grad_norm": 2.03125, "learning_rate": 1.9670702197990804e-05, "loss": 0.7777, "step": 1389 }, { "epoch": 0.1755466098350304, "grad_norm": 1.9453125, "learning_rate": 1.967019389277526e-05, "loss": 0.6571, "step": 1390 }, { "epoch": 0.17567290236009156, "grad_norm": 2.0, "learning_rate": 1.9669685202129073e-05, "loss": 0.7194, "step": 1391 }, { "epoch": 0.17579919488515275, "grad_norm": 2.078125, "learning_rate": 1.9669176126072525e-05, "loss": 0.7824, "step": 1392 }, { "epoch": 0.1759254874102139, "grad_norm": 2.0, "learning_rate": 1.966866666462591e-05, "loss": 0.6291, "step": 1393 }, { "epoch": 0.17605177993527507, "grad_norm": 1.8125, "learning_rate": 1.9668156817809527e-05, "loss": 0.63, "step": 1394 }, { "epoch": 0.17617807246033626, "grad_norm": 1.921875, "learning_rate": 1.9667646585643703e-05, "loss": 0.7346, "step": 1395 }, { "epoch": 0.17630436498539742, "grad_norm": 1.921875, "learning_rate": 1.966713596814877e-05, "loss": 0.714, "step": 1396 }, { "epoch": 0.1764306575104586, "grad_norm": 2.15625, "learning_rate": 1.966662496534508e-05, "loss": 0.7632, "step": 1397 }, { "epoch": 0.17655695003551977, "grad_norm": 4.125, "learning_rate": 1.966611357725301e-05, "loss": 1.0532, "step": 1398 }, { "epoch": 0.17668324256058093, "grad_norm": 2.015625, "learning_rate": 1.966560180389293e-05, "loss": 0.8192, "step": 1399 }, { "epoch": 0.17680953508564212, "grad_norm": 1.9296875, "learning_rate": 1.9665089645285243e-05, "loss": 0.6647, "step": 1400 }, { "epoch": 0.17693582761070328, "grad_norm": 1.984375, "learning_rate": 1.9664577101450364e-05, "loss": 0.7275, "step": 1401 }, { "epoch": 0.17706212013576447, "grad_norm": 2.015625, "learning_rate": 1.966406417240872e-05, "loss": 0.6201, "step": 1402 }, { "epoch": 0.17718841266082563, "grad_norm": 1.8828125, "learning_rate": 1.9663550858180754e-05, "loss": 0.6908, "step": 1403 }, { "epoch": 0.17731470518588682, "grad_norm": 1.953125, "learning_rate": 1.966303715878693e-05, "loss": 0.7487, "step": 1404 }, { "epoch": 0.17744099771094798, "grad_norm": 1.9375, "learning_rate": 1.9662523074247716e-05, "loss": 0.6947, "step": 1405 }, { "epoch": 0.17756729023600915, "grad_norm": 1.8984375, "learning_rate": 1.9662008604583608e-05, "loss": 0.6959, "step": 1406 }, { "epoch": 0.17769358276107033, "grad_norm": 1.9375, "learning_rate": 1.9661493749815106e-05, "loss": 0.7004, "step": 1407 }, { "epoch": 0.1778198752861315, "grad_norm": 1.8671875, "learning_rate": 1.966097850996274e-05, "loss": 0.6496, "step": 1408 }, { "epoch": 0.17794616781119268, "grad_norm": 1.8828125, "learning_rate": 1.9660462885047036e-05, "loss": 0.6556, "step": 1409 }, { "epoch": 0.17807246033625385, "grad_norm": 2.109375, "learning_rate": 1.9659946875088553e-05, "loss": 0.6837, "step": 1410 }, { "epoch": 0.178198752861315, "grad_norm": 2.046875, "learning_rate": 1.965943048010785e-05, "loss": 0.79, "step": 1411 }, { "epoch": 0.1783250453863762, "grad_norm": 2.015625, "learning_rate": 1.965891370012552e-05, "loss": 0.7637, "step": 1412 }, { "epoch": 0.17845133791143736, "grad_norm": 2.0, "learning_rate": 1.9658396535162155e-05, "loss": 0.6221, "step": 1413 }, { "epoch": 0.17857763043649855, "grad_norm": 1.9453125, "learning_rate": 1.9657878985238367e-05, "loss": 0.7423, "step": 1414 }, { "epoch": 0.1787039229615597, "grad_norm": 1.890625, "learning_rate": 1.965736105037478e-05, "loss": 0.7081, "step": 1415 }, { "epoch": 0.1788302154866209, "grad_norm": 1.8125, "learning_rate": 1.9656842730592046e-05, "loss": 0.7366, "step": 1416 }, { "epoch": 0.17895650801168206, "grad_norm": 1.9296875, "learning_rate": 1.9656324025910822e-05, "loss": 0.7052, "step": 1417 }, { "epoch": 0.17908280053674322, "grad_norm": 2.015625, "learning_rate": 1.9655804936351782e-05, "loss": 0.6757, "step": 1418 }, { "epoch": 0.1792090930618044, "grad_norm": 1.9375, "learning_rate": 1.965528546193561e-05, "loss": 0.713, "step": 1419 }, { "epoch": 0.17933538558686557, "grad_norm": 2.109375, "learning_rate": 1.9654765602683022e-05, "loss": 0.723, "step": 1420 }, { "epoch": 0.17946167811192676, "grad_norm": 1.9453125, "learning_rate": 1.9654245358614728e-05, "loss": 0.5994, "step": 1421 }, { "epoch": 0.17958797063698792, "grad_norm": 1.9375, "learning_rate": 1.9653724729751468e-05, "loss": 0.7009, "step": 1422 }, { "epoch": 0.1797142631620491, "grad_norm": 1.8828125, "learning_rate": 1.965320371611399e-05, "loss": 0.7518, "step": 1423 }, { "epoch": 0.17984055568711027, "grad_norm": 1.8828125, "learning_rate": 1.965268231772307e-05, "loss": 0.6131, "step": 1424 }, { "epoch": 0.17996684821217143, "grad_norm": 1.859375, "learning_rate": 1.965216053459948e-05, "loss": 0.6895, "step": 1425 }, { "epoch": 0.18009314073723262, "grad_norm": 1.8046875, "learning_rate": 1.965163836676402e-05, "loss": 0.6428, "step": 1426 }, { "epoch": 0.18021943326229378, "grad_norm": 1.984375, "learning_rate": 1.9651115814237502e-05, "loss": 0.7329, "step": 1427 }, { "epoch": 0.18034572578735497, "grad_norm": 2.125, "learning_rate": 1.965059287704075e-05, "loss": 0.7062, "step": 1428 }, { "epoch": 0.18047201831241613, "grad_norm": 1.828125, "learning_rate": 1.965006955519461e-05, "loss": 0.6859, "step": 1429 }, { "epoch": 0.1805983108374773, "grad_norm": 2.0, "learning_rate": 1.9649545848719947e-05, "loss": 0.7149, "step": 1430 }, { "epoch": 0.18072460336253848, "grad_norm": 1.9375, "learning_rate": 1.9649021757637627e-05, "loss": 0.678, "step": 1431 }, { "epoch": 0.18085089588759964, "grad_norm": 1.8203125, "learning_rate": 1.9648497281968542e-05, "loss": 0.6735, "step": 1432 }, { "epoch": 0.18097718841266083, "grad_norm": 1.9765625, "learning_rate": 1.9647972421733594e-05, "loss": 0.7246, "step": 1433 }, { "epoch": 0.181103480937722, "grad_norm": 1.78125, "learning_rate": 1.9647447176953703e-05, "loss": 0.5271, "step": 1434 }, { "epoch": 0.18122977346278318, "grad_norm": 2.40625, "learning_rate": 1.9646921547649803e-05, "loss": 0.6424, "step": 1435 }, { "epoch": 0.18135606598784434, "grad_norm": 1.8359375, "learning_rate": 1.9646395533842852e-05, "loss": 0.689, "step": 1436 }, { "epoch": 0.1814823585129055, "grad_norm": 1.8203125, "learning_rate": 1.9645869135553806e-05, "loss": 0.6387, "step": 1437 }, { "epoch": 0.1816086510379667, "grad_norm": 1.8203125, "learning_rate": 1.964534235280365e-05, "loss": 0.6568, "step": 1438 }, { "epoch": 0.18173494356302786, "grad_norm": 2.078125, "learning_rate": 1.9644815185613382e-05, "loss": 0.6615, "step": 1439 }, { "epoch": 0.18186123608808905, "grad_norm": 1.828125, "learning_rate": 1.964428763400401e-05, "loss": 0.5808, "step": 1440 }, { "epoch": 0.1819875286131502, "grad_norm": 2.03125, "learning_rate": 1.9643759697996563e-05, "loss": 0.6983, "step": 1441 }, { "epoch": 0.1821138211382114, "grad_norm": 2.015625, "learning_rate": 1.9643231377612082e-05, "loss": 0.6833, "step": 1442 }, { "epoch": 0.18224011366327256, "grad_norm": 1.9453125, "learning_rate": 1.9642702672871626e-05, "loss": 0.724, "step": 1443 }, { "epoch": 0.18236640618833372, "grad_norm": 2.015625, "learning_rate": 1.9642173583796265e-05, "loss": 0.7172, "step": 1444 }, { "epoch": 0.1824926987133949, "grad_norm": 1.921875, "learning_rate": 1.9641644110407094e-05, "loss": 0.7197, "step": 1445 }, { "epoch": 0.18261899123845607, "grad_norm": 1.9921875, "learning_rate": 1.9641114252725208e-05, "loss": 0.6997, "step": 1446 }, { "epoch": 0.18274528376351726, "grad_norm": 1.828125, "learning_rate": 1.9640584010771734e-05, "loss": 0.6503, "step": 1447 }, { "epoch": 0.18287157628857842, "grad_norm": 1.796875, "learning_rate": 1.9640053384567798e-05, "loss": 0.6175, "step": 1448 }, { "epoch": 0.18299786881363958, "grad_norm": 2.03125, "learning_rate": 1.9639522374134555e-05, "loss": 0.7536, "step": 1449 }, { "epoch": 0.18312416133870077, "grad_norm": 2.109375, "learning_rate": 1.963899097949317e-05, "loss": 0.7215, "step": 1450 }, { "epoch": 0.18325045386376193, "grad_norm": 2.109375, "learning_rate": 1.9638459200664822e-05, "loss": 0.7316, "step": 1451 }, { "epoch": 0.18337674638882312, "grad_norm": 1.90625, "learning_rate": 1.9637927037670704e-05, "loss": 0.6908, "step": 1452 }, { "epoch": 0.18350303891388428, "grad_norm": 1.9609375, "learning_rate": 1.9637394490532033e-05, "loss": 0.6708, "step": 1453 }, { "epoch": 0.18362933143894547, "grad_norm": 1.875, "learning_rate": 1.9636861559270026e-05, "loss": 0.6422, "step": 1454 }, { "epoch": 0.18375562396400663, "grad_norm": 2.171875, "learning_rate": 1.963632824390593e-05, "loss": 0.761, "step": 1455 }, { "epoch": 0.1838819164890678, "grad_norm": 1.90625, "learning_rate": 1.9635794544460997e-05, "loss": 0.6266, "step": 1456 }, { "epoch": 0.18400820901412898, "grad_norm": 1.96875, "learning_rate": 1.9635260460956508e-05, "loss": 0.6762, "step": 1457 }, { "epoch": 0.18413450153919014, "grad_norm": 1.984375, "learning_rate": 1.9634725993413744e-05, "loss": 0.6743, "step": 1458 }, { "epoch": 0.18426079406425133, "grad_norm": 1.8828125, "learning_rate": 1.9634191141854008e-05, "loss": 0.6599, "step": 1459 }, { "epoch": 0.1843870865893125, "grad_norm": 2.109375, "learning_rate": 1.963365590629862e-05, "loss": 0.7398, "step": 1460 }, { "epoch": 0.18451337911437365, "grad_norm": 1.8515625, "learning_rate": 1.963312028676891e-05, "loss": 0.6725, "step": 1461 }, { "epoch": 0.18463967163943484, "grad_norm": 1.9921875, "learning_rate": 1.9632584283286227e-05, "loss": 0.6861, "step": 1462 }, { "epoch": 0.184765964164496, "grad_norm": 2.03125, "learning_rate": 1.9632047895871938e-05, "loss": 0.7332, "step": 1463 }, { "epoch": 0.1848922566895572, "grad_norm": 2.03125, "learning_rate": 1.963151112454742e-05, "loss": 0.6623, "step": 1464 }, { "epoch": 0.18501854921461836, "grad_norm": 1.890625, "learning_rate": 1.9630973969334068e-05, "loss": 0.7627, "step": 1465 }, { "epoch": 0.18514484173967954, "grad_norm": 1.8125, "learning_rate": 1.963043643025329e-05, "loss": 0.6374, "step": 1466 }, { "epoch": 0.1852711342647407, "grad_norm": 2.0625, "learning_rate": 1.9629898507326516e-05, "loss": 0.8067, "step": 1467 }, { "epoch": 0.18539742678980187, "grad_norm": 1.9609375, "learning_rate": 1.9629360200575175e-05, "loss": 0.6704, "step": 1468 }, { "epoch": 0.18552371931486306, "grad_norm": 1.984375, "learning_rate": 1.9628821510020738e-05, "loss": 0.6379, "step": 1469 }, { "epoch": 0.18565001183992422, "grad_norm": 1.859375, "learning_rate": 1.962828243568466e-05, "loss": 0.7775, "step": 1470 }, { "epoch": 0.1857763043649854, "grad_norm": 1.9609375, "learning_rate": 1.9627742977588443e-05, "loss": 0.6976, "step": 1471 }, { "epoch": 0.18590259689004657, "grad_norm": 1.78125, "learning_rate": 1.962720313575358e-05, "loss": 0.6383, "step": 1472 }, { "epoch": 0.18602888941510776, "grad_norm": 3.1875, "learning_rate": 1.9626662910201584e-05, "loss": 0.8336, "step": 1473 }, { "epoch": 0.18615518194016892, "grad_norm": 2.015625, "learning_rate": 1.9626122300953993e-05, "loss": 0.7391, "step": 1474 }, { "epoch": 0.18628147446523008, "grad_norm": 2.8125, "learning_rate": 1.9625581308032356e-05, "loss": 0.9466, "step": 1475 }, { "epoch": 0.18640776699029127, "grad_norm": 2.0, "learning_rate": 1.9625039931458228e-05, "loss": 0.7281, "step": 1476 }, { "epoch": 0.18653405951535243, "grad_norm": 1.90625, "learning_rate": 1.9624498171253195e-05, "loss": 0.6441, "step": 1477 }, { "epoch": 0.18666035204041362, "grad_norm": 1.9921875, "learning_rate": 1.9623956027438846e-05, "loss": 0.6876, "step": 1478 }, { "epoch": 0.18678664456547478, "grad_norm": 2.015625, "learning_rate": 1.962341350003679e-05, "loss": 0.6758, "step": 1479 }, { "epoch": 0.18691293709053594, "grad_norm": 2.125, "learning_rate": 1.9622870589068653e-05, "loss": 0.691, "step": 1480 }, { "epoch": 0.18703922961559713, "grad_norm": 2.015625, "learning_rate": 1.9622327294556074e-05, "loss": 0.7218, "step": 1481 }, { "epoch": 0.1871655221406583, "grad_norm": 2.0625, "learning_rate": 1.9621783616520702e-05, "loss": 0.6575, "step": 1482 }, { "epoch": 0.18729181466571948, "grad_norm": 1.9765625, "learning_rate": 1.962123955498421e-05, "loss": 0.6995, "step": 1483 }, { "epoch": 0.18741810719078064, "grad_norm": 2.09375, "learning_rate": 1.9620695109968285e-05, "loss": 0.803, "step": 1484 }, { "epoch": 0.18754439971584183, "grad_norm": 1.953125, "learning_rate": 1.962015028149463e-05, "loss": 0.7642, "step": 1485 }, { "epoch": 0.187670692240903, "grad_norm": 1.8828125, "learning_rate": 1.9619605069584954e-05, "loss": 0.6809, "step": 1486 }, { "epoch": 0.18779698476596415, "grad_norm": 1.9765625, "learning_rate": 1.9619059474260985e-05, "loss": 0.7128, "step": 1487 }, { "epoch": 0.18792327729102534, "grad_norm": 2.15625, "learning_rate": 1.961851349554448e-05, "loss": 0.7985, "step": 1488 }, { "epoch": 0.1880495698160865, "grad_norm": 1.9609375, "learning_rate": 1.961796713345719e-05, "loss": 0.7125, "step": 1489 }, { "epoch": 0.1881758623411477, "grad_norm": 2.03125, "learning_rate": 1.96174203880209e-05, "loss": 0.7112, "step": 1490 }, { "epoch": 0.18830215486620885, "grad_norm": 2.09375, "learning_rate": 1.96168732592574e-05, "loss": 0.7176, "step": 1491 }, { "epoch": 0.18842844739127004, "grad_norm": 1.9609375, "learning_rate": 1.9616325747188494e-05, "loss": 0.6002, "step": 1492 }, { "epoch": 0.1885547399163312, "grad_norm": 1.765625, "learning_rate": 1.9615777851836003e-05, "loss": 0.6967, "step": 1493 }, { "epoch": 0.18868103244139237, "grad_norm": 1.890625, "learning_rate": 1.9615229573221772e-05, "loss": 0.6106, "step": 1494 }, { "epoch": 0.18880732496645355, "grad_norm": 1.8203125, "learning_rate": 1.961468091136765e-05, "loss": 0.6633, "step": 1495 }, { "epoch": 0.18893361749151472, "grad_norm": 1.921875, "learning_rate": 1.96141318662955e-05, "loss": 0.7808, "step": 1496 }, { "epoch": 0.1890599100165759, "grad_norm": 2.03125, "learning_rate": 1.9613582438027217e-05, "loss": 0.7431, "step": 1497 }, { "epoch": 0.18918620254163707, "grad_norm": 2.03125, "learning_rate": 1.961303262658469e-05, "loss": 0.7368, "step": 1498 }, { "epoch": 0.18931249506669823, "grad_norm": 1.9609375, "learning_rate": 1.961248243198984e-05, "loss": 0.6836, "step": 1499 }, { "epoch": 0.18943878759175942, "grad_norm": 2.0625, "learning_rate": 1.961193185426459e-05, "loss": 0.8188, "step": 1500 }, { "epoch": 0.18956508011682058, "grad_norm": 1.9921875, "learning_rate": 1.961138089343089e-05, "loss": 0.7043, "step": 1501 }, { "epoch": 0.18969137264188177, "grad_norm": 1.921875, "learning_rate": 1.9610829549510698e-05, "loss": 0.6575, "step": 1502 }, { "epoch": 0.18981766516694293, "grad_norm": 3.21875, "learning_rate": 1.961027782252599e-05, "loss": 0.9413, "step": 1503 }, { "epoch": 0.18994395769200412, "grad_norm": 2.265625, "learning_rate": 1.9609725712498753e-05, "loss": 0.751, "step": 1504 }, { "epoch": 0.19007025021706528, "grad_norm": 1.9296875, "learning_rate": 1.9609173219450998e-05, "loss": 0.7017, "step": 1505 }, { "epoch": 0.19019654274212644, "grad_norm": 1.921875, "learning_rate": 1.9608620343404743e-05, "loss": 0.7014, "step": 1506 }, { "epoch": 0.19032283526718763, "grad_norm": 2.0625, "learning_rate": 1.9608067084382025e-05, "loss": 0.7571, "step": 1507 }, { "epoch": 0.1904491277922488, "grad_norm": 3.03125, "learning_rate": 1.9607513442404892e-05, "loss": 0.8513, "step": 1508 }, { "epoch": 0.19057542031730998, "grad_norm": 1.8203125, "learning_rate": 1.960695941749542e-05, "loss": 0.6171, "step": 1509 }, { "epoch": 0.19070171284237114, "grad_norm": 2.046875, "learning_rate": 1.960640500967568e-05, "loss": 0.7295, "step": 1510 }, { "epoch": 0.1908280053674323, "grad_norm": 1.90625, "learning_rate": 1.9605850218967783e-05, "loss": 0.6803, "step": 1511 }, { "epoch": 0.1909542978924935, "grad_norm": 1.9375, "learning_rate": 1.9605295045393827e-05, "loss": 0.7975, "step": 1512 }, { "epoch": 0.19108059041755465, "grad_norm": 2.046875, "learning_rate": 1.9604739488975948e-05, "loss": 0.7561, "step": 1513 }, { "epoch": 0.19120688294261584, "grad_norm": 2.046875, "learning_rate": 1.9604183549736287e-05, "loss": 0.7064, "step": 1514 }, { "epoch": 0.191333175467677, "grad_norm": 2.34375, "learning_rate": 1.9603627227697002e-05, "loss": 0.73, "step": 1515 }, { "epoch": 0.1914594679927382, "grad_norm": 1.8671875, "learning_rate": 1.960307052288027e-05, "loss": 0.6533, "step": 1516 }, { "epoch": 0.19158576051779935, "grad_norm": 1.9140625, "learning_rate": 1.9602513435308273e-05, "loss": 0.6412, "step": 1517 }, { "epoch": 0.19171205304286051, "grad_norm": 1.9609375, "learning_rate": 1.9601955965003224e-05, "loss": 0.6652, "step": 1518 }, { "epoch": 0.1918383455679217, "grad_norm": 3.71875, "learning_rate": 1.9601398111987337e-05, "loss": 0.8008, "step": 1519 }, { "epoch": 0.19196463809298286, "grad_norm": 1.9453125, "learning_rate": 1.9600839876282845e-05, "loss": 0.6974, "step": 1520 }, { "epoch": 0.19209093061804405, "grad_norm": 1.9765625, "learning_rate": 1.9600281257912002e-05, "loss": 0.7218, "step": 1521 }, { "epoch": 0.19221722314310521, "grad_norm": 1.859375, "learning_rate": 1.9599722256897075e-05, "loss": 0.6901, "step": 1522 }, { "epoch": 0.1923435156681664, "grad_norm": 1.78125, "learning_rate": 1.9599162873260336e-05, "loss": 0.6337, "step": 1523 }, { "epoch": 0.19246980819322757, "grad_norm": 1.9921875, "learning_rate": 1.959860310702409e-05, "loss": 0.6963, "step": 1524 }, { "epoch": 0.19259610071828873, "grad_norm": 1.9140625, "learning_rate": 1.9598042958210642e-05, "loss": 0.7243, "step": 1525 }, { "epoch": 0.19272239324334992, "grad_norm": 1.96875, "learning_rate": 1.959748242684232e-05, "loss": 0.6815, "step": 1526 }, { "epoch": 0.19284868576841108, "grad_norm": 1.984375, "learning_rate": 1.9596921512941468e-05, "loss": 0.7111, "step": 1527 }, { "epoch": 0.19297497829347227, "grad_norm": 1.9140625, "learning_rate": 1.9596360216530436e-05, "loss": 0.7343, "step": 1528 }, { "epoch": 0.19310127081853343, "grad_norm": 2.078125, "learning_rate": 1.95957985376316e-05, "loss": 0.6957, "step": 1529 }, { "epoch": 0.1932275633435946, "grad_norm": 1.859375, "learning_rate": 1.959523647626735e-05, "loss": 0.6343, "step": 1530 }, { "epoch": 0.19335385586865578, "grad_norm": 1.9609375, "learning_rate": 1.9594674032460088e-05, "loss": 0.6348, "step": 1531 }, { "epoch": 0.19348014839371694, "grad_norm": 1.9375, "learning_rate": 1.9594111206232224e-05, "loss": 0.6314, "step": 1532 }, { "epoch": 0.19360644091877813, "grad_norm": 1.96875, "learning_rate": 1.95935479976062e-05, "loss": 0.6835, "step": 1533 }, { "epoch": 0.1937327334438393, "grad_norm": 2.3125, "learning_rate": 1.9592984406604458e-05, "loss": 0.7107, "step": 1534 }, { "epoch": 0.19385902596890048, "grad_norm": 1.75, "learning_rate": 1.9592420433249465e-05, "loss": 0.6321, "step": 1535 }, { "epoch": 0.19398531849396164, "grad_norm": 1.90625, "learning_rate": 1.9591856077563696e-05, "loss": 0.7038, "step": 1536 }, { "epoch": 0.1941116110190228, "grad_norm": 1.8359375, "learning_rate": 1.959129133956965e-05, "loss": 0.576, "step": 1537 }, { "epoch": 0.194237903544084, "grad_norm": 1.8671875, "learning_rate": 1.959072621928983e-05, "loss": 0.7311, "step": 1538 }, { "epoch": 0.19436419606914515, "grad_norm": 1.8359375, "learning_rate": 1.9590160716746766e-05, "loss": 0.6289, "step": 1539 }, { "epoch": 0.19449048859420634, "grad_norm": 1.9921875, "learning_rate": 1.9589594831962998e-05, "loss": 0.728, "step": 1540 }, { "epoch": 0.1946167811192675, "grad_norm": 1.7890625, "learning_rate": 1.9589028564961074e-05, "loss": 0.704, "step": 1541 }, { "epoch": 0.19474307364432866, "grad_norm": 1.921875, "learning_rate": 1.958846191576357e-05, "loss": 0.6918, "step": 1542 }, { "epoch": 0.19486936616938985, "grad_norm": 1.9375, "learning_rate": 1.9587894884393066e-05, "loss": 0.7534, "step": 1543 }, { "epoch": 0.194995658694451, "grad_norm": 1.9375, "learning_rate": 1.958732747087217e-05, "loss": 0.7189, "step": 1544 }, { "epoch": 0.1951219512195122, "grad_norm": 1.9296875, "learning_rate": 1.958675967522349e-05, "loss": 0.6883, "step": 1545 }, { "epoch": 0.19524824374457336, "grad_norm": 1.9140625, "learning_rate": 1.958619149746966e-05, "loss": 0.7438, "step": 1546 }, { "epoch": 0.19537453626963455, "grad_norm": 2.0625, "learning_rate": 1.958562293763333e-05, "loss": 0.7625, "step": 1547 }, { "epoch": 0.1955008287946957, "grad_norm": 1.90625, "learning_rate": 1.958505399573716e-05, "loss": 0.692, "step": 1548 }, { "epoch": 0.19562712131975687, "grad_norm": 2.21875, "learning_rate": 1.9584484671803818e-05, "loss": 0.7127, "step": 1549 }, { "epoch": 0.19575341384481806, "grad_norm": 1.921875, "learning_rate": 1.958391496585601e-05, "loss": 0.681, "step": 1550 }, { "epoch": 0.19587970636987923, "grad_norm": 2.078125, "learning_rate": 1.958334487791643e-05, "loss": 0.7466, "step": 1551 }, { "epoch": 0.19600599889494041, "grad_norm": 2.140625, "learning_rate": 1.958277440800781e-05, "loss": 0.7205, "step": 1552 }, { "epoch": 0.19613229142000158, "grad_norm": 1.96875, "learning_rate": 1.958220355615288e-05, "loss": 0.7547, "step": 1553 }, { "epoch": 0.19625858394506276, "grad_norm": 1.84375, "learning_rate": 1.95816323223744e-05, "loss": 0.7152, "step": 1554 }, { "epoch": 0.19638487647012393, "grad_norm": 1.84375, "learning_rate": 1.958106070669513e-05, "loss": 0.7251, "step": 1555 }, { "epoch": 0.1965111689951851, "grad_norm": 2.015625, "learning_rate": 1.958048870913786e-05, "loss": 0.6854, "step": 1556 }, { "epoch": 0.19663746152024628, "grad_norm": 2.0, "learning_rate": 1.957991632972539e-05, "loss": 0.7306, "step": 1557 }, { "epoch": 0.19676375404530744, "grad_norm": 2.015625, "learning_rate": 1.9579343568480523e-05, "loss": 0.7369, "step": 1558 }, { "epoch": 0.19689004657036863, "grad_norm": 1.921875, "learning_rate": 1.95787704254261e-05, "loss": 0.6884, "step": 1559 }, { "epoch": 0.1970163390954298, "grad_norm": 1.859375, "learning_rate": 1.9578196900584956e-05, "loss": 0.6548, "step": 1560 }, { "epoch": 0.19714263162049095, "grad_norm": 1.96875, "learning_rate": 1.9577622993979957e-05, "loss": 0.7125, "step": 1561 }, { "epoch": 0.19726892414555214, "grad_norm": 2.09375, "learning_rate": 1.9577048705633974e-05, "loss": 0.784, "step": 1562 }, { "epoch": 0.1973952166706133, "grad_norm": 1.984375, "learning_rate": 1.9576474035569895e-05, "loss": 0.692, "step": 1563 }, { "epoch": 0.1975215091956745, "grad_norm": 1.9140625, "learning_rate": 1.957589898381063e-05, "loss": 0.7103, "step": 1564 }, { "epoch": 0.19764780172073565, "grad_norm": 2.046875, "learning_rate": 1.9575323550379094e-05, "loss": 0.6927, "step": 1565 }, { "epoch": 0.19777409424579684, "grad_norm": 2.0, "learning_rate": 1.957474773529823e-05, "loss": 0.6661, "step": 1566 }, { "epoch": 0.197900386770858, "grad_norm": 1.9140625, "learning_rate": 1.957417153859098e-05, "loss": 0.7417, "step": 1567 }, { "epoch": 0.19802667929591916, "grad_norm": 1.8671875, "learning_rate": 1.9573594960280312e-05, "loss": 0.622, "step": 1568 }, { "epoch": 0.19815297182098035, "grad_norm": 2.125, "learning_rate": 1.9573018000389208e-05, "loss": 0.7425, "step": 1569 }, { "epoch": 0.1982792643460415, "grad_norm": 1.8515625, "learning_rate": 1.9572440658940667e-05, "loss": 0.6473, "step": 1570 }, { "epoch": 0.1984055568711027, "grad_norm": 1.9609375, "learning_rate": 1.9571862935957696e-05, "loss": 0.8063, "step": 1571 }, { "epoch": 0.19853184939616386, "grad_norm": 1.90625, "learning_rate": 1.9571284831463326e-05, "loss": 0.6739, "step": 1572 }, { "epoch": 0.19865814192122505, "grad_norm": 1.921875, "learning_rate": 1.9570706345480594e-05, "loss": 0.7231, "step": 1573 }, { "epoch": 0.1987844344462862, "grad_norm": 2.078125, "learning_rate": 1.9570127478032562e-05, "loss": 0.7792, "step": 1574 }, { "epoch": 0.19891072697134737, "grad_norm": 2.03125, "learning_rate": 1.9569548229142298e-05, "loss": 0.6816, "step": 1575 }, { "epoch": 0.19903701949640856, "grad_norm": 1.9375, "learning_rate": 1.9568968598832894e-05, "loss": 0.7011, "step": 1576 }, { "epoch": 0.19916331202146972, "grad_norm": 1.9375, "learning_rate": 1.9568388587127448e-05, "loss": 0.6573, "step": 1577 }, { "epoch": 0.1992896045465309, "grad_norm": 1.8671875, "learning_rate": 1.956780819404908e-05, "loss": 0.6668, "step": 1578 }, { "epoch": 0.19941589707159207, "grad_norm": 1.9296875, "learning_rate": 1.9567227419620924e-05, "loss": 0.5531, "step": 1579 }, { "epoch": 0.19954218959665324, "grad_norm": 1.8515625, "learning_rate": 1.9566646263866124e-05, "loss": 0.6532, "step": 1580 }, { "epoch": 0.19966848212171442, "grad_norm": 1.953125, "learning_rate": 1.9566064726807846e-05, "loss": 0.6939, "step": 1581 }, { "epoch": 0.19979477464677559, "grad_norm": 1.9375, "learning_rate": 1.9565482808469274e-05, "loss": 0.6805, "step": 1582 }, { "epoch": 0.19992106717183677, "grad_norm": 1.9453125, "learning_rate": 1.9564900508873593e-05, "loss": 0.7577, "step": 1583 }, { "epoch": 0.20004735969689794, "grad_norm": 2.125, "learning_rate": 1.9564317828044022e-05, "loss": 0.7413, "step": 1584 }, { "epoch": 0.20017365222195913, "grad_norm": 1.859375, "learning_rate": 1.9563734766003773e-05, "loss": 0.6362, "step": 1585 }, { "epoch": 0.2002999447470203, "grad_norm": 2.078125, "learning_rate": 1.9563151322776096e-05, "loss": 0.8043, "step": 1586 }, { "epoch": 0.20042623727208145, "grad_norm": 1.8828125, "learning_rate": 1.956256749838424e-05, "loss": 0.7344, "step": 1587 }, { "epoch": 0.20055252979714264, "grad_norm": 1.9765625, "learning_rate": 1.9561983292851477e-05, "loss": 0.6371, "step": 1588 }, { "epoch": 0.2006788223222038, "grad_norm": 1.9609375, "learning_rate": 1.956139870620109e-05, "loss": 0.7318, "step": 1589 }, { "epoch": 0.200805114847265, "grad_norm": 1.875, "learning_rate": 1.9560813738456384e-05, "loss": 0.6983, "step": 1590 }, { "epoch": 0.20093140737232615, "grad_norm": 2.046875, "learning_rate": 1.9560228389640668e-05, "loss": 0.7067, "step": 1591 }, { "epoch": 0.2010576998973873, "grad_norm": 1.859375, "learning_rate": 1.9559642659777273e-05, "loss": 0.6734, "step": 1592 }, { "epoch": 0.2011839924224485, "grad_norm": 2.03125, "learning_rate": 1.9559056548889554e-05, "loss": 0.6451, "step": 1593 }, { "epoch": 0.20131028494750966, "grad_norm": 1.7734375, "learning_rate": 1.9558470057000862e-05, "loss": 0.6322, "step": 1594 }, { "epoch": 0.20143657747257085, "grad_norm": 1.9140625, "learning_rate": 1.9557883184134576e-05, "loss": 0.6193, "step": 1595 }, { "epoch": 0.201562869997632, "grad_norm": 1.96875, "learning_rate": 1.955729593031409e-05, "loss": 0.678, "step": 1596 }, { "epoch": 0.2016891625226932, "grad_norm": 2.015625, "learning_rate": 1.9556708295562806e-05, "loss": 0.6949, "step": 1597 }, { "epoch": 0.20181545504775436, "grad_norm": 2.0625, "learning_rate": 1.955612027990415e-05, "loss": 0.7563, "step": 1598 }, { "epoch": 0.20194174757281552, "grad_norm": 2.015625, "learning_rate": 1.9555531883361554e-05, "loss": 0.772, "step": 1599 }, { "epoch": 0.2020680400978767, "grad_norm": 1.9140625, "learning_rate": 1.9554943105958478e-05, "loss": 0.6728, "step": 1600 }, { "epoch": 0.20219433262293787, "grad_norm": 2.0, "learning_rate": 1.955435394771838e-05, "loss": 0.6943, "step": 1601 }, { "epoch": 0.20232062514799906, "grad_norm": 2.0, "learning_rate": 1.9553764408664748e-05, "loss": 0.7155, "step": 1602 }, { "epoch": 0.20244691767306022, "grad_norm": 2.140625, "learning_rate": 1.9553174488821077e-05, "loss": 0.7556, "step": 1603 }, { "epoch": 0.2025732101981214, "grad_norm": 2.046875, "learning_rate": 1.9552584188210883e-05, "loss": 0.6527, "step": 1604 }, { "epoch": 0.20269950272318257, "grad_norm": 1.921875, "learning_rate": 1.9551993506857695e-05, "loss": 0.6452, "step": 1605 }, { "epoch": 0.20282579524824373, "grad_norm": 1.953125, "learning_rate": 1.9551402444785047e-05, "loss": 0.6536, "step": 1606 }, { "epoch": 0.20295208777330492, "grad_norm": 1.9921875, "learning_rate": 1.9550811002016507e-05, "loss": 0.6364, "step": 1607 }, { "epoch": 0.20307838029836608, "grad_norm": 1.921875, "learning_rate": 1.9550219178575643e-05, "loss": 0.6531, "step": 1608 }, { "epoch": 0.20320467282342727, "grad_norm": 2.234375, "learning_rate": 1.954962697448605e-05, "loss": 0.7802, "step": 1609 }, { "epoch": 0.20333096534848843, "grad_norm": 1.984375, "learning_rate": 1.9549034389771323e-05, "loss": 0.6627, "step": 1610 }, { "epoch": 0.2034572578735496, "grad_norm": 1.921875, "learning_rate": 1.954844142445509e-05, "loss": 0.6748, "step": 1611 }, { "epoch": 0.20358355039861079, "grad_norm": 2.125, "learning_rate": 1.9547848078560975e-05, "loss": 0.9004, "step": 1612 }, { "epoch": 0.20370984292367195, "grad_norm": 2.0625, "learning_rate": 1.954725435211264e-05, "loss": 0.7115, "step": 1613 }, { "epoch": 0.20383613544873314, "grad_norm": 1.8828125, "learning_rate": 1.9546660245133738e-05, "loss": 0.6674, "step": 1614 }, { "epoch": 0.2039624279737943, "grad_norm": 1.8046875, "learning_rate": 1.9546065757647952e-05, "loss": 0.6051, "step": 1615 }, { "epoch": 0.20408872049885549, "grad_norm": 1.7890625, "learning_rate": 1.9545470889678978e-05, "loss": 0.7145, "step": 1616 }, { "epoch": 0.20421501302391665, "grad_norm": 1.796875, "learning_rate": 1.954487564125053e-05, "loss": 0.6224, "step": 1617 }, { "epoch": 0.2043413055489778, "grad_norm": 1.9609375, "learning_rate": 1.9544280012386326e-05, "loss": 0.7477, "step": 1618 }, { "epoch": 0.204467598074039, "grad_norm": 1.8515625, "learning_rate": 1.9543684003110112e-05, "loss": 0.7142, "step": 1619 }, { "epoch": 0.20459389059910016, "grad_norm": 1.9921875, "learning_rate": 1.954308761344564e-05, "loss": 0.832, "step": 1620 }, { "epoch": 0.20472018312416135, "grad_norm": 1.9765625, "learning_rate": 1.954249084341668e-05, "loss": 0.6974, "step": 1621 }, { "epoch": 0.2048464756492225, "grad_norm": 1.9921875, "learning_rate": 1.954189369304702e-05, "loss": 0.7517, "step": 1622 }, { "epoch": 0.2049727681742837, "grad_norm": 1.9453125, "learning_rate": 1.9541296162360462e-05, "loss": 0.7276, "step": 1623 }, { "epoch": 0.20509906069934486, "grad_norm": 2.0625, "learning_rate": 1.954069825138082e-05, "loss": 0.6471, "step": 1624 }, { "epoch": 0.20522535322440602, "grad_norm": 1.9296875, "learning_rate": 1.9540099960131927e-05, "loss": 0.6523, "step": 1625 }, { "epoch": 0.2053516457494672, "grad_norm": 2.21875, "learning_rate": 1.9539501288637628e-05, "loss": 0.7515, "step": 1626 }, { "epoch": 0.20547793827452837, "grad_norm": 2.0, "learning_rate": 1.9538902236921782e-05, "loss": 0.74, "step": 1627 }, { "epoch": 0.20560423079958956, "grad_norm": 2.0, "learning_rate": 1.9538302805008273e-05, "loss": 0.7184, "step": 1628 }, { "epoch": 0.20573052332465072, "grad_norm": 1.96875, "learning_rate": 1.9537702992920986e-05, "loss": 0.7312, "step": 1629 }, { "epoch": 0.20585681584971188, "grad_norm": 1.90625, "learning_rate": 1.953710280068383e-05, "loss": 0.6476, "step": 1630 }, { "epoch": 0.20598310837477307, "grad_norm": 1.9765625, "learning_rate": 1.953650222832073e-05, "loss": 0.6971, "step": 1631 }, { "epoch": 0.20610940089983423, "grad_norm": 1.9140625, "learning_rate": 1.953590127585562e-05, "loss": 0.6697, "step": 1632 }, { "epoch": 0.20623569342489542, "grad_norm": 1.8671875, "learning_rate": 1.9535299943312455e-05, "loss": 0.6662, "step": 1633 }, { "epoch": 0.20636198594995658, "grad_norm": 1.859375, "learning_rate": 1.9534698230715203e-05, "loss": 0.6149, "step": 1634 }, { "epoch": 0.20648827847501777, "grad_norm": 1.859375, "learning_rate": 1.953409613808784e-05, "loss": 0.7087, "step": 1635 }, { "epoch": 0.20661457100007893, "grad_norm": 1.9921875, "learning_rate": 1.9533493665454372e-05, "loss": 0.6571, "step": 1636 }, { "epoch": 0.2067408635251401, "grad_norm": 1.9140625, "learning_rate": 1.953289081283881e-05, "loss": 0.7127, "step": 1637 }, { "epoch": 0.20686715605020128, "grad_norm": 2.015625, "learning_rate": 1.9532287580265183e-05, "loss": 0.7742, "step": 1638 }, { "epoch": 0.20699344857526245, "grad_norm": 4.5, "learning_rate": 1.9531683967757533e-05, "loss": 0.8626, "step": 1639 }, { "epoch": 0.20711974110032363, "grad_norm": 2.0625, "learning_rate": 1.9531079975339912e-05, "loss": 0.7415, "step": 1640 }, { "epoch": 0.2072460336253848, "grad_norm": 2.046875, "learning_rate": 1.9530475603036406e-05, "loss": 0.7502, "step": 1641 }, { "epoch": 0.20737232615044596, "grad_norm": 1.953125, "learning_rate": 1.9529870850871096e-05, "loss": 0.7355, "step": 1642 }, { "epoch": 0.20749861867550715, "grad_norm": 1.9140625, "learning_rate": 1.952926571886809e-05, "loss": 0.6511, "step": 1643 }, { "epoch": 0.2076249112005683, "grad_norm": 2.015625, "learning_rate": 1.9528660207051507e-05, "loss": 0.6684, "step": 1644 }, { "epoch": 0.2077512037256295, "grad_norm": 1.9453125, "learning_rate": 1.9528054315445476e-05, "loss": 0.7199, "step": 1645 }, { "epoch": 0.20787749625069066, "grad_norm": 1.90625, "learning_rate": 1.952744804407415e-05, "loss": 0.7403, "step": 1646 }, { "epoch": 0.20800378877575185, "grad_norm": 2.015625, "learning_rate": 1.9526841392961694e-05, "loss": 0.6734, "step": 1647 }, { "epoch": 0.208130081300813, "grad_norm": 1.9453125, "learning_rate": 1.9526234362132286e-05, "loss": 0.7133, "step": 1648 }, { "epoch": 0.20825637382587417, "grad_norm": 2.15625, "learning_rate": 1.952562695161012e-05, "loss": 0.6877, "step": 1649 }, { "epoch": 0.20838266635093536, "grad_norm": 1.890625, "learning_rate": 1.9525019161419413e-05, "loss": 0.6622, "step": 1650 }, { "epoch": 0.20850895887599652, "grad_norm": 1.8671875, "learning_rate": 1.9524410991584378e-05, "loss": 0.6927, "step": 1651 }, { "epoch": 0.2086352514010577, "grad_norm": 2.0, "learning_rate": 1.9523802442129267e-05, "loss": 0.7129, "step": 1652 }, { "epoch": 0.20876154392611887, "grad_norm": 3.03125, "learning_rate": 1.952319351307833e-05, "loss": 0.8018, "step": 1653 }, { "epoch": 0.20888783645118006, "grad_norm": 1.8515625, "learning_rate": 1.9522584204455835e-05, "loss": 0.6366, "step": 1654 }, { "epoch": 0.20901412897624122, "grad_norm": 1.9609375, "learning_rate": 1.952197451628607e-05, "loss": 0.6708, "step": 1655 }, { "epoch": 0.20914042150130238, "grad_norm": 1.6953125, "learning_rate": 1.952136444859334e-05, "loss": 0.6492, "step": 1656 }, { "epoch": 0.20926671402636357, "grad_norm": 1.9453125, "learning_rate": 1.9520754001401953e-05, "loss": 0.7325, "step": 1657 }, { "epoch": 0.20939300655142473, "grad_norm": 1.9375, "learning_rate": 1.9520143174736247e-05, "loss": 0.6853, "step": 1658 }, { "epoch": 0.20951929907648592, "grad_norm": 1.8984375, "learning_rate": 1.951953196862056e-05, "loss": 0.7015, "step": 1659 }, { "epoch": 0.20964559160154708, "grad_norm": 2.09375, "learning_rate": 1.951892038307926e-05, "loss": 0.7197, "step": 1660 }, { "epoch": 0.20977188412660824, "grad_norm": 2.109375, "learning_rate": 1.951830841813672e-05, "loss": 0.8331, "step": 1661 }, { "epoch": 0.20989817665166943, "grad_norm": 1.953125, "learning_rate": 1.9517696073817336e-05, "loss": 0.6979, "step": 1662 }, { "epoch": 0.2100244691767306, "grad_norm": 1.796875, "learning_rate": 1.951708335014551e-05, "loss": 0.5992, "step": 1663 }, { "epoch": 0.21015076170179178, "grad_norm": 1.8203125, "learning_rate": 1.9516470247145664e-05, "loss": 0.6397, "step": 1664 }, { "epoch": 0.21027705422685294, "grad_norm": 2.078125, "learning_rate": 1.9515856764842237e-05, "loss": 0.6812, "step": 1665 }, { "epoch": 0.21040334675191413, "grad_norm": 1.7734375, "learning_rate": 1.951524290325968e-05, "loss": 0.7119, "step": 1666 }, { "epoch": 0.2105296392769753, "grad_norm": 2.125, "learning_rate": 1.951462866242246e-05, "loss": 0.7501, "step": 1667 }, { "epoch": 0.21065593180203646, "grad_norm": 1.9609375, "learning_rate": 1.9514014042355057e-05, "loss": 0.7025, "step": 1668 }, { "epoch": 0.21078222432709764, "grad_norm": 1.8125, "learning_rate": 1.9513399043081972e-05, "loss": 0.6162, "step": 1669 }, { "epoch": 0.2109085168521588, "grad_norm": 1.921875, "learning_rate": 1.9512783664627713e-05, "loss": 0.7038, "step": 1670 }, { "epoch": 0.21103480937722, "grad_norm": 1.9921875, "learning_rate": 1.951216790701681e-05, "loss": 0.6835, "step": 1671 }, { "epoch": 0.21116110190228116, "grad_norm": 1.84375, "learning_rate": 1.951155177027381e-05, "loss": 0.644, "step": 1672 }, { "epoch": 0.21128739442734235, "grad_norm": 1.9765625, "learning_rate": 1.9510935254423265e-05, "loss": 0.7027, "step": 1673 }, { "epoch": 0.2114136869524035, "grad_norm": 1.9765625, "learning_rate": 1.9510318359489746e-05, "loss": 0.6816, "step": 1674 }, { "epoch": 0.21153997947746467, "grad_norm": 2.0625, "learning_rate": 1.950970108549785e-05, "loss": 0.6944, "step": 1675 }, { "epoch": 0.21166627200252586, "grad_norm": 2.09375, "learning_rate": 1.950908343247217e-05, "loss": 0.7214, "step": 1676 }, { "epoch": 0.21179256452758702, "grad_norm": 1.9765625, "learning_rate": 1.950846540043733e-05, "loss": 0.7416, "step": 1677 }, { "epoch": 0.2119188570526482, "grad_norm": 1.8984375, "learning_rate": 1.9507846989417958e-05, "loss": 0.6465, "step": 1678 }, { "epoch": 0.21204514957770937, "grad_norm": 1.9375, "learning_rate": 1.950722819943871e-05, "loss": 0.7909, "step": 1679 }, { "epoch": 0.21217144210277053, "grad_norm": 1.8828125, "learning_rate": 1.9506609030524247e-05, "loss": 0.7671, "step": 1680 }, { "epoch": 0.21229773462783172, "grad_norm": 2.03125, "learning_rate": 1.950598948269924e-05, "loss": 0.6877, "step": 1681 }, { "epoch": 0.21242402715289288, "grad_norm": 1.8984375, "learning_rate": 1.9505369555988395e-05, "loss": 0.7328, "step": 1682 }, { "epoch": 0.21255031967795407, "grad_norm": 1.8203125, "learning_rate": 1.950474925041641e-05, "loss": 0.6761, "step": 1683 }, { "epoch": 0.21267661220301523, "grad_norm": 1.8203125, "learning_rate": 1.9504128566008017e-05, "loss": 0.6866, "step": 1684 }, { "epoch": 0.21280290472807642, "grad_norm": 1.7734375, "learning_rate": 1.950350750278795e-05, "loss": 0.6481, "step": 1685 }, { "epoch": 0.21292919725313758, "grad_norm": 2.09375, "learning_rate": 1.9502886060780964e-05, "loss": 0.7048, "step": 1686 }, { "epoch": 0.21305548977819874, "grad_norm": 1.9609375, "learning_rate": 1.950226424001183e-05, "loss": 0.7023, "step": 1687 }, { "epoch": 0.21318178230325993, "grad_norm": 1.8984375, "learning_rate": 1.950164204050533e-05, "loss": 0.6011, "step": 1688 }, { "epoch": 0.2133080748283211, "grad_norm": 1.765625, "learning_rate": 1.9501019462286266e-05, "loss": 0.6481, "step": 1689 }, { "epoch": 0.21343436735338228, "grad_norm": 1.8359375, "learning_rate": 1.950039650537945e-05, "loss": 0.7307, "step": 1690 }, { "epoch": 0.21356065987844344, "grad_norm": 1.96875, "learning_rate": 1.949977316980971e-05, "loss": 0.661, "step": 1691 }, { "epoch": 0.2136869524035046, "grad_norm": 2.0625, "learning_rate": 1.9499149455601897e-05, "loss": 0.713, "step": 1692 }, { "epoch": 0.2138132449285658, "grad_norm": 2.15625, "learning_rate": 1.9498525362780865e-05, "loss": 0.7365, "step": 1693 }, { "epoch": 0.21393953745362695, "grad_norm": 1.984375, "learning_rate": 1.9497900891371492e-05, "loss": 0.6333, "step": 1694 }, { "epoch": 0.21406582997868814, "grad_norm": 1.9140625, "learning_rate": 1.9497276041398666e-05, "loss": 0.6639, "step": 1695 }, { "epoch": 0.2141921225037493, "grad_norm": 2.078125, "learning_rate": 1.9496650812887293e-05, "loss": 0.6826, "step": 1696 }, { "epoch": 0.2143184150288105, "grad_norm": 1.8828125, "learning_rate": 1.949602520586229e-05, "loss": 0.7359, "step": 1697 }, { "epoch": 0.21444470755387166, "grad_norm": 1.765625, "learning_rate": 1.9495399220348598e-05, "loss": 0.6572, "step": 1698 }, { "epoch": 0.21457100007893282, "grad_norm": 4.0, "learning_rate": 1.9494772856371162e-05, "loss": 0.7964, "step": 1699 }, { "epoch": 0.214697292603994, "grad_norm": 1.9453125, "learning_rate": 1.949414611395495e-05, "loss": 0.7196, "step": 1700 }, { "epoch": 0.21482358512905517, "grad_norm": 1.6875, "learning_rate": 1.9493518993124943e-05, "loss": 0.6624, "step": 1701 }, { "epoch": 0.21494987765411636, "grad_norm": 2.015625, "learning_rate": 1.9492891493906134e-05, "loss": 0.7043, "step": 1702 }, { "epoch": 0.21507617017917752, "grad_norm": 1.8359375, "learning_rate": 1.9492263616323536e-05, "loss": 0.7104, "step": 1703 }, { "epoch": 0.2152024627042387, "grad_norm": 1.84375, "learning_rate": 1.9491635360402176e-05, "loss": 0.6678, "step": 1704 }, { "epoch": 0.21532875522929987, "grad_norm": 1.875, "learning_rate": 1.949100672616709e-05, "loss": 0.7154, "step": 1705 }, { "epoch": 0.21545504775436103, "grad_norm": 1.9375, "learning_rate": 1.9490377713643336e-05, "loss": 0.7448, "step": 1706 }, { "epoch": 0.21558134027942222, "grad_norm": 1.875, "learning_rate": 1.9489748322855985e-05, "loss": 0.7021, "step": 1707 }, { "epoch": 0.21570763280448338, "grad_norm": 1.890625, "learning_rate": 1.9489118553830123e-05, "loss": 0.6932, "step": 1708 }, { "epoch": 0.21583392532954457, "grad_norm": 1.765625, "learning_rate": 1.9488488406590857e-05, "loss": 0.6331, "step": 1709 }, { "epoch": 0.21596021785460573, "grad_norm": 2.21875, "learning_rate": 1.9487857881163292e-05, "loss": 0.696, "step": 1710 }, { "epoch": 0.2160865103796669, "grad_norm": 2.421875, "learning_rate": 1.9487226977572566e-05, "loss": 0.7657, "step": 1711 }, { "epoch": 0.21621280290472808, "grad_norm": 1.8671875, "learning_rate": 1.9486595695843828e-05, "loss": 0.7432, "step": 1712 }, { "epoch": 0.21633909542978924, "grad_norm": 4.1875, "learning_rate": 1.948596403600223e-05, "loss": 0.7255, "step": 1713 }, { "epoch": 0.21646538795485043, "grad_norm": 1.7734375, "learning_rate": 1.9485331998072955e-05, "loss": 0.7314, "step": 1714 }, { "epoch": 0.2165916804799116, "grad_norm": 1.953125, "learning_rate": 1.9484699582081194e-05, "loss": 0.7431, "step": 1715 }, { "epoch": 0.21671797300497278, "grad_norm": 1.828125, "learning_rate": 1.948406678805215e-05, "loss": 0.5945, "step": 1716 }, { "epoch": 0.21684426553003394, "grad_norm": 1.9140625, "learning_rate": 1.9483433616011054e-05, "loss": 0.7192, "step": 1717 }, { "epoch": 0.2169705580550951, "grad_norm": 1.921875, "learning_rate": 1.948280006598313e-05, "loss": 0.7052, "step": 1718 }, { "epoch": 0.2170968505801563, "grad_norm": 2.34375, "learning_rate": 1.9482166137993642e-05, "loss": 0.7193, "step": 1719 }, { "epoch": 0.21722314310521745, "grad_norm": 1.8046875, "learning_rate": 1.9481531832067848e-05, "loss": 0.5951, "step": 1720 }, { "epoch": 0.21734943563027864, "grad_norm": 1.9375, "learning_rate": 1.9480897148231034e-05, "loss": 0.6429, "step": 1721 }, { "epoch": 0.2174757281553398, "grad_norm": 1.8515625, "learning_rate": 1.9480262086508495e-05, "loss": 0.6898, "step": 1722 }, { "epoch": 0.21760202068040096, "grad_norm": 2.0, "learning_rate": 1.9479626646925544e-05, "loss": 0.7756, "step": 1723 }, { "epoch": 0.21772831320546215, "grad_norm": 1.9609375, "learning_rate": 1.9478990829507507e-05, "loss": 0.7748, "step": 1724 }, { "epoch": 0.21785460573052332, "grad_norm": 1.859375, "learning_rate": 1.947835463427973e-05, "loss": 0.6974, "step": 1725 }, { "epoch": 0.2179808982555845, "grad_norm": 2.015625, "learning_rate": 1.9477718061267564e-05, "loss": 0.7636, "step": 1726 }, { "epoch": 0.21810719078064567, "grad_norm": 2.015625, "learning_rate": 1.9477081110496385e-05, "loss": 0.6806, "step": 1727 }, { "epoch": 0.21823348330570685, "grad_norm": 1.9453125, "learning_rate": 1.947644378199158e-05, "loss": 0.7705, "step": 1728 }, { "epoch": 0.21835977583076802, "grad_norm": 2.015625, "learning_rate": 1.9475806075778553e-05, "loss": 0.6481, "step": 1729 }, { "epoch": 0.21848606835582918, "grad_norm": 1.9453125, "learning_rate": 1.947516799188272e-05, "loss": 0.7608, "step": 1730 }, { "epoch": 0.21861236088089037, "grad_norm": 2.578125, "learning_rate": 1.947452953032951e-05, "loss": 0.7907, "step": 1731 }, { "epoch": 0.21873865340595153, "grad_norm": 2.15625, "learning_rate": 1.9473890691144376e-05, "loss": 0.7738, "step": 1732 }, { "epoch": 0.21886494593101272, "grad_norm": 2.0, "learning_rate": 1.947325147435278e-05, "loss": 0.7222, "step": 1733 }, { "epoch": 0.21899123845607388, "grad_norm": 1.7421875, "learning_rate": 1.9472611879980195e-05, "loss": 0.603, "step": 1734 }, { "epoch": 0.21911753098113507, "grad_norm": 1.9140625, "learning_rate": 1.947197190805212e-05, "loss": 0.7066, "step": 1735 }, { "epoch": 0.21924382350619623, "grad_norm": 1.953125, "learning_rate": 1.9471331558594057e-05, "loss": 0.6799, "step": 1736 }, { "epoch": 0.2193701160312574, "grad_norm": 1.921875, "learning_rate": 1.947069083163153e-05, "loss": 0.6703, "step": 1737 }, { "epoch": 0.21949640855631858, "grad_norm": 1.8828125, "learning_rate": 1.947004972719008e-05, "loss": 0.6637, "step": 1738 }, { "epoch": 0.21962270108137974, "grad_norm": 2.09375, "learning_rate": 1.9469408245295255e-05, "loss": 0.8082, "step": 1739 }, { "epoch": 0.21974899360644093, "grad_norm": 1.984375, "learning_rate": 1.946876638597263e-05, "loss": 0.6854, "step": 1740 }, { "epoch": 0.2198752861315021, "grad_norm": 2.109375, "learning_rate": 1.946812414924778e-05, "loss": 0.7812, "step": 1741 }, { "epoch": 0.22000157865656325, "grad_norm": 2.078125, "learning_rate": 1.946748153514631e-05, "loss": 0.6541, "step": 1742 }, { "epoch": 0.22012787118162444, "grad_norm": 1.859375, "learning_rate": 1.946683854369383e-05, "loss": 0.6751, "step": 1743 }, { "epoch": 0.2202541637066856, "grad_norm": 1.6640625, "learning_rate": 1.9466195174915966e-05, "loss": 0.6268, "step": 1744 }, { "epoch": 0.2203804562317468, "grad_norm": 1.8984375, "learning_rate": 1.9465551428838363e-05, "loss": 0.6795, "step": 1745 }, { "epoch": 0.22050674875680795, "grad_norm": 1.96875, "learning_rate": 1.946490730548668e-05, "loss": 0.7505, "step": 1746 }, { "epoch": 0.22063304128186914, "grad_norm": 1.8984375, "learning_rate": 1.946426280488659e-05, "loss": 0.6708, "step": 1747 }, { "epoch": 0.2207593338069303, "grad_norm": 1.890625, "learning_rate": 1.946361792706378e-05, "loss": 0.6512, "step": 1748 }, { "epoch": 0.22088562633199146, "grad_norm": 1.8828125, "learning_rate": 1.9462972672043953e-05, "loss": 0.6344, "step": 1749 }, { "epoch": 0.22101191885705265, "grad_norm": 1.890625, "learning_rate": 1.946232703985283e-05, "loss": 0.5895, "step": 1750 }, { "epoch": 0.22113821138211381, "grad_norm": 1.7734375, "learning_rate": 1.9461681030516142e-05, "loss": 0.6203, "step": 1751 }, { "epoch": 0.221264503907175, "grad_norm": 1.7734375, "learning_rate": 1.946103464405964e-05, "loss": 0.6786, "step": 1752 }, { "epoch": 0.22139079643223616, "grad_norm": 1.8125, "learning_rate": 1.946038788050908e-05, "loss": 0.7095, "step": 1753 }, { "epoch": 0.22151708895729735, "grad_norm": 1.9375, "learning_rate": 1.945974073989025e-05, "loss": 0.7686, "step": 1754 }, { "epoch": 0.22164338148235851, "grad_norm": 2.109375, "learning_rate": 1.945909322222894e-05, "loss": 0.7688, "step": 1755 }, { "epoch": 0.22176967400741968, "grad_norm": 1.9609375, "learning_rate": 1.9458445327550957e-05, "loss": 0.8099, "step": 1756 }, { "epoch": 0.22189596653248086, "grad_norm": 1.921875, "learning_rate": 1.9457797055882125e-05, "loss": 0.622, "step": 1757 }, { "epoch": 0.22202225905754203, "grad_norm": 1.6875, "learning_rate": 1.9457148407248283e-05, "loss": 0.6133, "step": 1758 }, { "epoch": 0.22214855158260322, "grad_norm": 1.9375, "learning_rate": 1.9456499381675282e-05, "loss": 0.7274, "step": 1759 }, { "epoch": 0.22227484410766438, "grad_norm": 1.8125, "learning_rate": 1.9455849979188998e-05, "loss": 0.6254, "step": 1760 }, { "epoch": 0.22240113663272554, "grad_norm": 1.890625, "learning_rate": 1.9455200199815306e-05, "loss": 0.6628, "step": 1761 }, { "epoch": 0.22252742915778673, "grad_norm": 1.921875, "learning_rate": 1.9454550043580108e-05, "loss": 0.5693, "step": 1762 }, { "epoch": 0.2226537216828479, "grad_norm": 1.8515625, "learning_rate": 1.945389951050932e-05, "loss": 0.6822, "step": 1763 }, { "epoch": 0.22278001420790908, "grad_norm": 2.109375, "learning_rate": 1.9453248600628867e-05, "loss": 0.6809, "step": 1764 }, { "epoch": 0.22290630673297024, "grad_norm": 1.953125, "learning_rate": 1.9452597313964697e-05, "loss": 0.6722, "step": 1765 }, { "epoch": 0.22303259925803143, "grad_norm": 1.9296875, "learning_rate": 1.9451945650542763e-05, "loss": 0.7631, "step": 1766 }, { "epoch": 0.2231588917830926, "grad_norm": 2.375, "learning_rate": 1.9451293610389044e-05, "loss": 0.7899, "step": 1767 }, { "epoch": 0.22328518430815375, "grad_norm": 1.96875, "learning_rate": 1.9450641193529524e-05, "loss": 0.6983, "step": 1768 }, { "epoch": 0.22341147683321494, "grad_norm": 1.859375, "learning_rate": 1.944998839999021e-05, "loss": 0.6607, "step": 1769 }, { "epoch": 0.2235377693582761, "grad_norm": 1.8828125, "learning_rate": 1.9449335229797124e-05, "loss": 0.6859, "step": 1770 }, { "epoch": 0.2236640618833373, "grad_norm": 1.984375, "learning_rate": 1.9448681682976293e-05, "loss": 0.7057, "step": 1771 }, { "epoch": 0.22379035440839845, "grad_norm": 1.953125, "learning_rate": 1.9448027759553768e-05, "loss": 0.6345, "step": 1772 }, { "epoch": 0.2239166469334596, "grad_norm": 1.9140625, "learning_rate": 1.9447373459555614e-05, "loss": 0.6418, "step": 1773 }, { "epoch": 0.2240429394585208, "grad_norm": 1.8046875, "learning_rate": 1.944671878300791e-05, "loss": 0.7032, "step": 1774 }, { "epoch": 0.22416923198358196, "grad_norm": 2.0, "learning_rate": 1.944606372993675e-05, "loss": 0.7087, "step": 1775 }, { "epoch": 0.22429552450864315, "grad_norm": 2.078125, "learning_rate": 1.9445408300368238e-05, "loss": 0.833, "step": 1776 }, { "epoch": 0.2244218170337043, "grad_norm": 1.921875, "learning_rate": 1.9444752494328505e-05, "loss": 0.7643, "step": 1777 }, { "epoch": 0.2245481095587655, "grad_norm": 1.7890625, "learning_rate": 1.944409631184369e-05, "loss": 0.6534, "step": 1778 }, { "epoch": 0.22467440208382666, "grad_norm": 1.9765625, "learning_rate": 1.9443439752939937e-05, "loss": 0.6842, "step": 1779 }, { "epoch": 0.22480069460888782, "grad_norm": 1.9140625, "learning_rate": 1.9442782817643425e-05, "loss": 0.6623, "step": 1780 }, { "epoch": 0.224926987133949, "grad_norm": 1.8671875, "learning_rate": 1.944212550598033e-05, "loss": 0.6258, "step": 1781 }, { "epoch": 0.22505327965901017, "grad_norm": 1.8671875, "learning_rate": 1.944146781797686e-05, "loss": 0.534, "step": 1782 }, { "epoch": 0.22517957218407136, "grad_norm": 2.15625, "learning_rate": 1.9440809753659223e-05, "loss": 0.7285, "step": 1783 }, { "epoch": 0.22530586470913253, "grad_norm": 1.984375, "learning_rate": 1.9440151313053647e-05, "loss": 0.6785, "step": 1784 }, { "epoch": 0.22543215723419371, "grad_norm": 2.140625, "learning_rate": 1.943949249618638e-05, "loss": 0.6113, "step": 1785 }, { "epoch": 0.22555844975925488, "grad_norm": 1.78125, "learning_rate": 1.9438833303083677e-05, "loss": 0.6757, "step": 1786 }, { "epoch": 0.22568474228431604, "grad_norm": 1.859375, "learning_rate": 1.9438173733771814e-05, "loss": 0.6251, "step": 1787 }, { "epoch": 0.22581103480937723, "grad_norm": 1.8671875, "learning_rate": 1.9437513788277077e-05, "loss": 0.6151, "step": 1788 }, { "epoch": 0.2259373273344384, "grad_norm": 1.9140625, "learning_rate": 1.9436853466625777e-05, "loss": 0.673, "step": 1789 }, { "epoch": 0.22606361985949958, "grad_norm": 1.96875, "learning_rate": 1.9436192768844224e-05, "loss": 0.8474, "step": 1790 }, { "epoch": 0.22618991238456074, "grad_norm": 3.609375, "learning_rate": 1.9435531694958758e-05, "loss": 0.7854, "step": 1791 }, { "epoch": 0.2263162049096219, "grad_norm": 1.8125, "learning_rate": 1.9434870244995725e-05, "loss": 0.7091, "step": 1792 }, { "epoch": 0.2264424974346831, "grad_norm": 2.234375, "learning_rate": 1.943420841898149e-05, "loss": 0.6268, "step": 1793 }, { "epoch": 0.22656878995974425, "grad_norm": 1.8203125, "learning_rate": 1.9433546216942426e-05, "loss": 0.6574, "step": 1794 }, { "epoch": 0.22669508248480544, "grad_norm": 1.90625, "learning_rate": 1.943288363890494e-05, "loss": 0.6776, "step": 1795 }, { "epoch": 0.2268213750098666, "grad_norm": 1.953125, "learning_rate": 1.9432220684895427e-05, "loss": 0.6968, "step": 1796 }, { "epoch": 0.2269476675349278, "grad_norm": 1.875, "learning_rate": 1.9431557354940315e-05, "loss": 0.703, "step": 1797 }, { "epoch": 0.22707396005998895, "grad_norm": 3.71875, "learning_rate": 1.9430893649066048e-05, "loss": 0.8401, "step": 1798 }, { "epoch": 0.2272002525850501, "grad_norm": 1.8671875, "learning_rate": 1.9430229567299075e-05, "loss": 0.6546, "step": 1799 }, { "epoch": 0.2273265451101113, "grad_norm": 1.921875, "learning_rate": 1.9429565109665866e-05, "loss": 0.6767, "step": 1800 }, { "epoch": 0.22745283763517246, "grad_norm": 1.7734375, "learning_rate": 1.9428900276192903e-05, "loss": 0.6716, "step": 1801 }, { "epoch": 0.22757913016023365, "grad_norm": 1.96875, "learning_rate": 1.942823506690669e-05, "loss": 0.7005, "step": 1802 }, { "epoch": 0.2277054226852948, "grad_norm": 2.03125, "learning_rate": 1.942756948183373e-05, "loss": 0.6694, "step": 1803 }, { "epoch": 0.227831715210356, "grad_norm": 1.7890625, "learning_rate": 1.942690352100056e-05, "loss": 0.6213, "step": 1804 }, { "epoch": 0.22795800773541716, "grad_norm": 2.0625, "learning_rate": 1.9426237184433728e-05, "loss": 0.6728, "step": 1805 }, { "epoch": 0.22808430026047832, "grad_norm": 1.9375, "learning_rate": 1.9425570472159777e-05, "loss": 0.7457, "step": 1806 }, { "epoch": 0.2282105927855395, "grad_norm": 1.9609375, "learning_rate": 1.9424903384205298e-05, "loss": 0.6624, "step": 1807 }, { "epoch": 0.22833688531060067, "grad_norm": 2.203125, "learning_rate": 1.9424235920596866e-05, "loss": 0.7864, "step": 1808 }, { "epoch": 0.22846317783566186, "grad_norm": 1.8203125, "learning_rate": 1.9423568081361093e-05, "loss": 0.6589, "step": 1809 }, { "epoch": 0.22858947036072302, "grad_norm": 1.9765625, "learning_rate": 1.9422899866524592e-05, "loss": 0.6943, "step": 1810 }, { "epoch": 0.22871576288578419, "grad_norm": 1.859375, "learning_rate": 1.9422231276114e-05, "loss": 0.6688, "step": 1811 }, { "epoch": 0.22884205541084537, "grad_norm": 2.28125, "learning_rate": 1.9421562310155964e-05, "loss": 0.7166, "step": 1812 }, { "epoch": 0.22896834793590654, "grad_norm": 1.8671875, "learning_rate": 1.942089296867715e-05, "loss": 0.6768, "step": 1813 }, { "epoch": 0.22909464046096772, "grad_norm": 1.859375, "learning_rate": 1.942022325170423e-05, "loss": 0.7112, "step": 1814 }, { "epoch": 0.22922093298602889, "grad_norm": 2.015625, "learning_rate": 1.9419553159263903e-05, "loss": 0.743, "step": 1815 }, { "epoch": 0.22934722551109007, "grad_norm": 1.9453125, "learning_rate": 1.9418882691382873e-05, "loss": 0.7138, "step": 1816 }, { "epoch": 0.22947351803615124, "grad_norm": 1.8984375, "learning_rate": 1.9418211848087873e-05, "loss": 0.7003, "step": 1817 }, { "epoch": 0.2295998105612124, "grad_norm": 1.859375, "learning_rate": 1.9417540629405627e-05, "loss": 0.6989, "step": 1818 }, { "epoch": 0.2297261030862736, "grad_norm": 1.890625, "learning_rate": 1.9416869035362896e-05, "loss": 0.6522, "step": 1819 }, { "epoch": 0.22985239561133475, "grad_norm": 1.96875, "learning_rate": 1.941619706598645e-05, "loss": 0.7388, "step": 1820 }, { "epoch": 0.22997868813639594, "grad_norm": 1.8828125, "learning_rate": 1.9415524721303066e-05, "loss": 0.6963, "step": 1821 }, { "epoch": 0.2301049806614571, "grad_norm": 1.9140625, "learning_rate": 1.9414852001339547e-05, "loss": 0.7291, "step": 1822 }, { "epoch": 0.23023127318651826, "grad_norm": 2.65625, "learning_rate": 1.9414178906122705e-05, "loss": 0.7738, "step": 1823 }, { "epoch": 0.23035756571157945, "grad_norm": 1.9296875, "learning_rate": 1.9413505435679365e-05, "loss": 0.6907, "step": 1824 }, { "epoch": 0.2304838582366406, "grad_norm": 1.9453125, "learning_rate": 1.9412831590036374e-05, "loss": 0.6645, "step": 1825 }, { "epoch": 0.2306101507617018, "grad_norm": 1.8046875, "learning_rate": 1.9412157369220587e-05, "loss": 0.6339, "step": 1826 }, { "epoch": 0.23073644328676296, "grad_norm": 2.078125, "learning_rate": 1.9411482773258877e-05, "loss": 0.7141, "step": 1827 }, { "epoch": 0.23086273581182415, "grad_norm": 1.78125, "learning_rate": 1.9410807802178138e-05, "loss": 0.6098, "step": 1828 }, { "epoch": 0.2309890283368853, "grad_norm": 1.84375, "learning_rate": 1.9410132456005262e-05, "loss": 0.655, "step": 1829 }, { "epoch": 0.23111532086194647, "grad_norm": 1.859375, "learning_rate": 1.9409456734767174e-05, "loss": 0.6758, "step": 1830 }, { "epoch": 0.23124161338700766, "grad_norm": 1.90625, "learning_rate": 1.9408780638490805e-05, "loss": 0.6803, "step": 1831 }, { "epoch": 0.23136790591206882, "grad_norm": 2.0625, "learning_rate": 1.94081041672031e-05, "loss": 0.649, "step": 1832 }, { "epoch": 0.23149419843713, "grad_norm": 1.8203125, "learning_rate": 1.9407427320931026e-05, "loss": 0.6635, "step": 1833 }, { "epoch": 0.23162049096219117, "grad_norm": 2.09375, "learning_rate": 1.9406750099701557e-05, "loss": 0.7249, "step": 1834 }, { "epoch": 0.23174678348725236, "grad_norm": 1.9453125, "learning_rate": 1.9406072503541687e-05, "loss": 0.6566, "step": 1835 }, { "epoch": 0.23187307601231352, "grad_norm": 1.8984375, "learning_rate": 1.9405394532478422e-05, "loss": 0.7111, "step": 1836 }, { "epoch": 0.23199936853737468, "grad_norm": 1.8671875, "learning_rate": 1.9404716186538787e-05, "loss": 0.6726, "step": 1837 }, { "epoch": 0.23212566106243587, "grad_norm": 1.8671875, "learning_rate": 1.9404037465749818e-05, "loss": 0.5967, "step": 1838 }, { "epoch": 0.23225195358749703, "grad_norm": 1.8984375, "learning_rate": 1.9403358370138564e-05, "loss": 0.6642, "step": 1839 }, { "epoch": 0.23237824611255822, "grad_norm": 1.828125, "learning_rate": 1.9402678899732098e-05, "loss": 0.6741, "step": 1840 }, { "epoch": 0.23250453863761938, "grad_norm": 1.8203125, "learning_rate": 1.9401999054557496e-05, "loss": 0.6281, "step": 1841 }, { "epoch": 0.23263083116268055, "grad_norm": 1.984375, "learning_rate": 1.940131883464186e-05, "loss": 0.6823, "step": 1842 }, { "epoch": 0.23275712368774173, "grad_norm": 1.8125, "learning_rate": 1.9400638240012297e-05, "loss": 0.5899, "step": 1843 }, { "epoch": 0.2328834162128029, "grad_norm": 1.8671875, "learning_rate": 1.9399957270695937e-05, "loss": 0.6591, "step": 1844 }, { "epoch": 0.23300970873786409, "grad_norm": 2.09375, "learning_rate": 1.9399275926719924e-05, "loss": 0.7717, "step": 1845 }, { "epoch": 0.23313600126292525, "grad_norm": 1.8515625, "learning_rate": 1.939859420811141e-05, "loss": 0.6793, "step": 1846 }, { "epoch": 0.23326229378798644, "grad_norm": 1.890625, "learning_rate": 1.939791211489757e-05, "loss": 0.747, "step": 1847 }, { "epoch": 0.2333885863130476, "grad_norm": 1.875, "learning_rate": 1.939722964710559e-05, "loss": 0.618, "step": 1848 }, { "epoch": 0.23351487883810876, "grad_norm": 1.875, "learning_rate": 1.9396546804762668e-05, "loss": 0.7303, "step": 1849 }, { "epoch": 0.23364117136316995, "grad_norm": 1.8828125, "learning_rate": 1.9395863587896025e-05, "loss": 0.7494, "step": 1850 }, { "epoch": 0.2337674638882311, "grad_norm": 1.953125, "learning_rate": 1.939517999653289e-05, "loss": 0.7253, "step": 1851 }, { "epoch": 0.2338937564132923, "grad_norm": 1.9140625, "learning_rate": 1.939449603070051e-05, "loss": 0.7147, "step": 1852 }, { "epoch": 0.23402004893835346, "grad_norm": 2.109375, "learning_rate": 1.939381169042615e-05, "loss": 0.6956, "step": 1853 }, { "epoch": 0.23414634146341465, "grad_norm": 1.90625, "learning_rate": 1.939312697573708e-05, "loss": 0.7424, "step": 1854 }, { "epoch": 0.2342726339884758, "grad_norm": 2.046875, "learning_rate": 1.9392441886660594e-05, "loss": 0.6874, "step": 1855 }, { "epoch": 0.23439892651353697, "grad_norm": 1.9609375, "learning_rate": 1.9391756423223996e-05, "loss": 0.7446, "step": 1856 }, { "epoch": 0.23452521903859816, "grad_norm": 1.90625, "learning_rate": 1.9391070585454607e-05, "loss": 0.7414, "step": 1857 }, { "epoch": 0.23465151156365932, "grad_norm": 1.9765625, "learning_rate": 1.9390384373379766e-05, "loss": 0.6605, "step": 1858 }, { "epoch": 0.2347778040887205, "grad_norm": 1.8203125, "learning_rate": 1.9389697787026823e-05, "loss": 0.6278, "step": 1859 }, { "epoch": 0.23490409661378167, "grad_norm": 1.921875, "learning_rate": 1.9389010826423142e-05, "loss": 0.7003, "step": 1860 }, { "epoch": 0.23503038913884283, "grad_norm": 1.8984375, "learning_rate": 1.9388323491596105e-05, "loss": 0.6615, "step": 1861 }, { "epoch": 0.23515668166390402, "grad_norm": 1.859375, "learning_rate": 1.9387635782573106e-05, "loss": 0.7614, "step": 1862 }, { "epoch": 0.23528297418896518, "grad_norm": 1.9453125, "learning_rate": 1.9386947699381556e-05, "loss": 0.6119, "step": 1863 }, { "epoch": 0.23540926671402637, "grad_norm": 1.90625, "learning_rate": 1.9386259242048883e-05, "loss": 0.754, "step": 1864 }, { "epoch": 0.23553555923908753, "grad_norm": 1.9296875, "learning_rate": 1.9385570410602522e-05, "loss": 0.7206, "step": 1865 }, { "epoch": 0.23566185176414872, "grad_norm": 1.921875, "learning_rate": 1.9384881205069932e-05, "loss": 0.6579, "step": 1866 }, { "epoch": 0.23578814428920988, "grad_norm": 1.8828125, "learning_rate": 1.938419162547858e-05, "loss": 0.6857, "step": 1867 }, { "epoch": 0.23591443681427104, "grad_norm": 1.8984375, "learning_rate": 1.9383501671855957e-05, "loss": 0.6826, "step": 1868 }, { "epoch": 0.23604072933933223, "grad_norm": 1.8828125, "learning_rate": 1.9382811344229555e-05, "loss": 0.6667, "step": 1869 }, { "epoch": 0.2361670218643934, "grad_norm": 1.953125, "learning_rate": 1.9382120642626895e-05, "loss": 0.6806, "step": 1870 }, { "epoch": 0.23629331438945458, "grad_norm": 2.0625, "learning_rate": 1.9381429567075507e-05, "loss": 0.7394, "step": 1871 }, { "epoch": 0.23641960691451575, "grad_norm": 1.8359375, "learning_rate": 1.9380738117602928e-05, "loss": 0.657, "step": 1872 }, { "epoch": 0.2365458994395769, "grad_norm": 1.96875, "learning_rate": 1.9380046294236725e-05, "loss": 0.6634, "step": 1873 }, { "epoch": 0.2366721919646381, "grad_norm": 1.984375, "learning_rate": 1.937935409700447e-05, "loss": 0.7471, "step": 1874 }, { "epoch": 0.23679848448969926, "grad_norm": 1.6796875, "learning_rate": 1.937866152593375e-05, "loss": 0.6317, "step": 1875 }, { "epoch": 0.23692477701476045, "grad_norm": 1.9140625, "learning_rate": 1.9377968581052176e-05, "loss": 0.6672, "step": 1876 }, { "epoch": 0.2370510695398216, "grad_norm": 1.9453125, "learning_rate": 1.937727526238736e-05, "loss": 0.733, "step": 1877 }, { "epoch": 0.2371773620648828, "grad_norm": 1.828125, "learning_rate": 1.937658156996694e-05, "loss": 0.6408, "step": 1878 }, { "epoch": 0.23730365458994396, "grad_norm": 1.765625, "learning_rate": 1.937588750381856e-05, "loss": 0.6432, "step": 1879 }, { "epoch": 0.23742994711500512, "grad_norm": 2.03125, "learning_rate": 1.937519306396989e-05, "loss": 0.7291, "step": 1880 }, { "epoch": 0.2375562396400663, "grad_norm": 2.0, "learning_rate": 1.9374498250448605e-05, "loss": 0.7115, "step": 1881 }, { "epoch": 0.23768253216512747, "grad_norm": 2.21875, "learning_rate": 1.9373803063282403e-05, "loss": 0.7653, "step": 1882 }, { "epoch": 0.23780882469018866, "grad_norm": 1.9765625, "learning_rate": 1.9373107502498984e-05, "loss": 0.6929, "step": 1883 }, { "epoch": 0.23793511721524982, "grad_norm": 1.8984375, "learning_rate": 1.937241156812608e-05, "loss": 0.7264, "step": 1884 }, { "epoch": 0.238061409740311, "grad_norm": 1.953125, "learning_rate": 1.9371715260191425e-05, "loss": 0.7789, "step": 1885 }, { "epoch": 0.23818770226537217, "grad_norm": 1.8203125, "learning_rate": 1.937101857872277e-05, "loss": 0.6671, "step": 1886 }, { "epoch": 0.23831399479043333, "grad_norm": 1.8828125, "learning_rate": 1.937032152374789e-05, "loss": 0.7721, "step": 1887 }, { "epoch": 0.23844028731549452, "grad_norm": 1.875, "learning_rate": 1.936962409529456e-05, "loss": 0.6743, "step": 1888 }, { "epoch": 0.23856657984055568, "grad_norm": 1.8359375, "learning_rate": 1.9368926293390588e-05, "loss": 0.6997, "step": 1889 }, { "epoch": 0.23869287236561687, "grad_norm": 1.84375, "learning_rate": 1.9368228118063777e-05, "loss": 0.6733, "step": 1890 }, { "epoch": 0.23881916489067803, "grad_norm": 2.015625, "learning_rate": 1.9367529569341957e-05, "loss": 0.7335, "step": 1891 }, { "epoch": 0.2389454574157392, "grad_norm": 1.9140625, "learning_rate": 1.9366830647252974e-05, "loss": 0.6877, "step": 1892 }, { "epoch": 0.23907174994080038, "grad_norm": 1.9296875, "learning_rate": 1.936613135182468e-05, "loss": 0.7766, "step": 1893 }, { "epoch": 0.23919804246586154, "grad_norm": 1.9140625, "learning_rate": 1.9365431683084947e-05, "loss": 0.7081, "step": 1894 }, { "epoch": 0.23932433499092273, "grad_norm": 2.953125, "learning_rate": 1.9364731641061673e-05, "loss": 0.8252, "step": 1895 }, { "epoch": 0.2394506275159839, "grad_norm": 1.9140625, "learning_rate": 1.9364031225782745e-05, "loss": 0.7284, "step": 1896 }, { "epoch": 0.23957692004104508, "grad_norm": 2.03125, "learning_rate": 1.936333043727609e-05, "loss": 0.6776, "step": 1897 }, { "epoch": 0.23970321256610624, "grad_norm": 1.890625, "learning_rate": 1.936262927556964e-05, "loss": 0.7006, "step": 1898 }, { "epoch": 0.2398295050911674, "grad_norm": 1.890625, "learning_rate": 1.9361927740691334e-05, "loss": 0.6487, "step": 1899 }, { "epoch": 0.2399557976162286, "grad_norm": 1.8125, "learning_rate": 1.936122583266914e-05, "loss": 0.7112, "step": 1900 }, { "epoch": 0.24008209014128976, "grad_norm": 1.8984375, "learning_rate": 1.936052355153103e-05, "loss": 0.6112, "step": 1901 }, { "epoch": 0.24020838266635094, "grad_norm": 2.03125, "learning_rate": 1.9359820897305002e-05, "loss": 0.7523, "step": 1902 }, { "epoch": 0.2403346751914121, "grad_norm": 1.78125, "learning_rate": 1.9359117870019052e-05, "loss": 0.6936, "step": 1903 }, { "epoch": 0.2404609677164733, "grad_norm": 1.9296875, "learning_rate": 1.935841446970121e-05, "loss": 0.7176, "step": 1904 }, { "epoch": 0.24058726024153446, "grad_norm": 1.9375, "learning_rate": 1.9357710696379508e-05, "loss": 0.7688, "step": 1905 }, { "epoch": 0.24071355276659562, "grad_norm": 1.8671875, "learning_rate": 1.9357006550081994e-05, "loss": 0.6299, "step": 1906 }, { "epoch": 0.2408398452916568, "grad_norm": 1.8046875, "learning_rate": 1.935630203083674e-05, "loss": 0.6971, "step": 1907 }, { "epoch": 0.24096613781671797, "grad_norm": 2.03125, "learning_rate": 1.9355597138671826e-05, "loss": 0.7565, "step": 1908 }, { "epoch": 0.24109243034177916, "grad_norm": 1.859375, "learning_rate": 1.935489187361534e-05, "loss": 0.66, "step": 1909 }, { "epoch": 0.24121872286684032, "grad_norm": 2.171875, "learning_rate": 1.9354186235695395e-05, "loss": 0.706, "step": 1910 }, { "epoch": 0.24134501539190148, "grad_norm": 2.015625, "learning_rate": 1.935348022494012e-05, "loss": 0.7326, "step": 1911 }, { "epoch": 0.24147130791696267, "grad_norm": 3.078125, "learning_rate": 1.9352773841377655e-05, "loss": 0.8503, "step": 1912 }, { "epoch": 0.24159760044202383, "grad_norm": 1.828125, "learning_rate": 1.935206708503615e-05, "loss": 0.6688, "step": 1913 }, { "epoch": 0.24172389296708502, "grad_norm": 1.90625, "learning_rate": 1.9351359955943776e-05, "loss": 0.7316, "step": 1914 }, { "epoch": 0.24185018549214618, "grad_norm": 1.890625, "learning_rate": 1.9350652454128718e-05, "loss": 0.7116, "step": 1915 }, { "epoch": 0.24197647801720737, "grad_norm": 1.84375, "learning_rate": 1.9349944579619177e-05, "loss": 0.7143, "step": 1916 }, { "epoch": 0.24210277054226853, "grad_norm": 2.296875, "learning_rate": 1.9349236332443365e-05, "loss": 0.8093, "step": 1917 }, { "epoch": 0.2422290630673297, "grad_norm": 2.78125, "learning_rate": 1.9348527712629513e-05, "loss": 0.8406, "step": 1918 }, { "epoch": 0.24235535559239088, "grad_norm": 1.90625, "learning_rate": 1.934781872020586e-05, "loss": 0.515, "step": 1919 }, { "epoch": 0.24248164811745204, "grad_norm": 1.9296875, "learning_rate": 1.9347109355200672e-05, "loss": 0.7743, "step": 1920 }, { "epoch": 0.24260794064251323, "grad_norm": 2.203125, "learning_rate": 1.934639961764222e-05, "loss": 0.7346, "step": 1921 }, { "epoch": 0.2427342331675744, "grad_norm": 1.8984375, "learning_rate": 1.934568950755879e-05, "loss": 0.628, "step": 1922 }, { "epoch": 0.24286052569263555, "grad_norm": 1.96875, "learning_rate": 1.9344979024978688e-05, "loss": 0.6162, "step": 1923 }, { "epoch": 0.24298681821769674, "grad_norm": 1.8984375, "learning_rate": 1.9344268169930228e-05, "loss": 0.6623, "step": 1924 }, { "epoch": 0.2431131107427579, "grad_norm": 1.7421875, "learning_rate": 1.9343556942441748e-05, "loss": 0.5877, "step": 1925 }, { "epoch": 0.2432394032678191, "grad_norm": 1.8359375, "learning_rate": 1.934284534254159e-05, "loss": 0.6761, "step": 1926 }, { "epoch": 0.24336569579288025, "grad_norm": 1.9609375, "learning_rate": 1.9342133370258124e-05, "loss": 0.6891, "step": 1927 }, { "epoch": 0.24349198831794144, "grad_norm": 1.8515625, "learning_rate": 1.9341421025619723e-05, "loss": 0.6388, "step": 1928 }, { "epoch": 0.2436182808430026, "grad_norm": 1.7578125, "learning_rate": 1.9340708308654782e-05, "loss": 0.5923, "step": 1929 }, { "epoch": 0.24374457336806377, "grad_norm": 1.921875, "learning_rate": 1.9339995219391705e-05, "loss": 0.6626, "step": 1930 }, { "epoch": 0.24387086589312496, "grad_norm": 1.8359375, "learning_rate": 1.9339281757858915e-05, "loss": 0.6315, "step": 1931 }, { "epoch": 0.24399715841818612, "grad_norm": 1.84375, "learning_rate": 1.933856792408485e-05, "loss": 0.6565, "step": 1932 }, { "epoch": 0.2441234509432473, "grad_norm": 1.8515625, "learning_rate": 1.933785371809796e-05, "loss": 0.6254, "step": 1933 }, { "epoch": 0.24424974346830847, "grad_norm": 1.90625, "learning_rate": 1.933713913992671e-05, "loss": 0.718, "step": 1934 }, { "epoch": 0.24437603599336966, "grad_norm": 1.9375, "learning_rate": 1.9336424189599586e-05, "loss": 0.6496, "step": 1935 }, { "epoch": 0.24450232851843082, "grad_norm": 1.8125, "learning_rate": 1.9335708867145084e-05, "loss": 0.6931, "step": 1936 }, { "epoch": 0.24462862104349198, "grad_norm": 1.8984375, "learning_rate": 1.933499317259171e-05, "loss": 0.6934, "step": 1937 }, { "epoch": 0.24475491356855317, "grad_norm": 2.109375, "learning_rate": 1.9334277105967992e-05, "loss": 0.6877, "step": 1938 }, { "epoch": 0.24488120609361433, "grad_norm": 1.8671875, "learning_rate": 1.9333560667302474e-05, "loss": 0.7579, "step": 1939 }, { "epoch": 0.24500749861867552, "grad_norm": 1.9296875, "learning_rate": 1.933284385662371e-05, "loss": 0.6889, "step": 1940 }, { "epoch": 0.24513379114373668, "grad_norm": 2.015625, "learning_rate": 1.933212667396027e-05, "loss": 0.7183, "step": 1941 }, { "epoch": 0.24526008366879784, "grad_norm": 1.84375, "learning_rate": 1.9331409119340736e-05, "loss": 0.6563, "step": 1942 }, { "epoch": 0.24538637619385903, "grad_norm": 1.96875, "learning_rate": 1.933069119279371e-05, "loss": 0.7704, "step": 1943 }, { "epoch": 0.2455126687189202, "grad_norm": 1.8359375, "learning_rate": 1.932997289434781e-05, "loss": 0.71, "step": 1944 }, { "epoch": 0.24563896124398138, "grad_norm": 2.03125, "learning_rate": 1.9329254224031665e-05, "loss": 0.673, "step": 1945 }, { "epoch": 0.24576525376904254, "grad_norm": 1.9375, "learning_rate": 1.932853518187391e-05, "loss": 0.7106, "step": 1946 }, { "epoch": 0.24589154629410373, "grad_norm": 2.03125, "learning_rate": 1.9327815767903217e-05, "loss": 0.7053, "step": 1947 }, { "epoch": 0.2460178388191649, "grad_norm": 1.9140625, "learning_rate": 1.9327095982148258e-05, "loss": 0.7068, "step": 1948 }, { "epoch": 0.24614413134422605, "grad_norm": 1.90625, "learning_rate": 1.9326375824637715e-05, "loss": 0.6741, "step": 1949 }, { "epoch": 0.24627042386928724, "grad_norm": 1.7890625, "learning_rate": 1.9325655295400295e-05, "loss": 0.6641, "step": 1950 }, { "epoch": 0.2463967163943484, "grad_norm": 1.890625, "learning_rate": 1.9324934394464716e-05, "loss": 0.6395, "step": 1951 }, { "epoch": 0.2465230089194096, "grad_norm": 1.8203125, "learning_rate": 1.9324213121859716e-05, "loss": 0.7235, "step": 1952 }, { "epoch": 0.24664930144447075, "grad_norm": 1.828125, "learning_rate": 1.9323491477614036e-05, "loss": 0.6817, "step": 1953 }, { "epoch": 0.24677559396953191, "grad_norm": 1.953125, "learning_rate": 1.9322769461756446e-05, "loss": 0.7402, "step": 1954 }, { "epoch": 0.2469018864945931, "grad_norm": 2.015625, "learning_rate": 1.932204707431572e-05, "loss": 0.7375, "step": 1955 }, { "epoch": 0.24702817901965426, "grad_norm": 2.171875, "learning_rate": 1.9321324315320652e-05, "loss": 0.7486, "step": 1956 }, { "epoch": 0.24715447154471545, "grad_norm": 2.078125, "learning_rate": 1.9320601184800047e-05, "loss": 0.7014, "step": 1957 }, { "epoch": 0.24728076406977662, "grad_norm": 1.6953125, "learning_rate": 1.9319877682782727e-05, "loss": 0.6134, "step": 1958 }, { "epoch": 0.2474070565948378, "grad_norm": 1.6796875, "learning_rate": 1.9319153809297532e-05, "loss": 0.5224, "step": 1959 }, { "epoch": 0.24753334911989897, "grad_norm": 1.859375, "learning_rate": 1.9318429564373317e-05, "loss": 0.6827, "step": 1960 }, { "epoch": 0.24765964164496013, "grad_norm": 1.9453125, "learning_rate": 1.931770494803894e-05, "loss": 0.739, "step": 1961 }, { "epoch": 0.24778593417002132, "grad_norm": 1.890625, "learning_rate": 1.9316979960323286e-05, "loss": 0.6959, "step": 1962 }, { "epoch": 0.24791222669508248, "grad_norm": 2.0, "learning_rate": 1.931625460125526e-05, "loss": 0.6925, "step": 1963 }, { "epoch": 0.24803851922014367, "grad_norm": 1.8515625, "learning_rate": 1.9315528870863757e-05, "loss": 0.6188, "step": 1964 }, { "epoch": 0.24816481174520483, "grad_norm": 1.90625, "learning_rate": 1.9314802769177713e-05, "loss": 0.7073, "step": 1965 }, { "epoch": 0.24829110427026602, "grad_norm": 1.96875, "learning_rate": 1.9314076296226066e-05, "loss": 0.7319, "step": 1966 }, { "epoch": 0.24841739679532718, "grad_norm": 1.984375, "learning_rate": 1.931334945203777e-05, "loss": 0.7099, "step": 1967 }, { "epoch": 0.24854368932038834, "grad_norm": 1.96875, "learning_rate": 1.9312622236641804e-05, "loss": 0.6977, "step": 1968 }, { "epoch": 0.24866998184544953, "grad_norm": 1.8984375, "learning_rate": 1.9311894650067146e-05, "loss": 0.6329, "step": 1969 }, { "epoch": 0.2487962743705107, "grad_norm": 3.0625, "learning_rate": 1.9311166692342792e-05, "loss": 0.7625, "step": 1970 }, { "epoch": 0.24892256689557188, "grad_norm": 1.875, "learning_rate": 1.931043836349776e-05, "loss": 0.7145, "step": 1971 }, { "epoch": 0.24904885942063304, "grad_norm": 1.9609375, "learning_rate": 1.9309709663561085e-05, "loss": 0.686, "step": 1972 }, { "epoch": 0.2491751519456942, "grad_norm": 1.9140625, "learning_rate": 1.9308980592561806e-05, "loss": 0.6823, "step": 1973 }, { "epoch": 0.2493014444707554, "grad_norm": 1.921875, "learning_rate": 1.9308251150528982e-05, "loss": 0.6919, "step": 1974 }, { "epoch": 0.24942773699581655, "grad_norm": 1.9609375, "learning_rate": 1.9307521337491687e-05, "loss": 0.7004, "step": 1975 }, { "epoch": 0.24955402952087774, "grad_norm": 1.8203125, "learning_rate": 1.9306791153479007e-05, "loss": 0.686, "step": 1976 }, { "epoch": 0.2496803220459389, "grad_norm": 1.8046875, "learning_rate": 1.9306060598520052e-05, "loss": 0.6621, "step": 1977 }, { "epoch": 0.2498066145710001, "grad_norm": 2.078125, "learning_rate": 1.9305329672643936e-05, "loss": 0.7447, "step": 1978 }, { "epoch": 0.24993290709606125, "grad_norm": 1.84375, "learning_rate": 1.9304598375879794e-05, "loss": 0.596, "step": 1979 }, { "epoch": 0.25005919962112244, "grad_norm": 1.8515625, "learning_rate": 1.930386670825677e-05, "loss": 0.72, "step": 1980 }, { "epoch": 0.2501854921461836, "grad_norm": 1.921875, "learning_rate": 1.930313466980403e-05, "loss": 0.6676, "step": 1981 }, { "epoch": 0.25031178467124476, "grad_norm": 2.015625, "learning_rate": 1.930240226055075e-05, "loss": 0.6692, "step": 1982 }, { "epoch": 0.25043807719630595, "grad_norm": 1.9140625, "learning_rate": 1.9301669480526118e-05, "loss": 0.7114, "step": 1983 }, { "epoch": 0.25056436972136714, "grad_norm": 2.203125, "learning_rate": 1.930093632975935e-05, "loss": 0.8184, "step": 1984 }, { "epoch": 0.2506906622464283, "grad_norm": 2.03125, "learning_rate": 1.9300202808279663e-05, "loss": 0.6925, "step": 1985 }, { "epoch": 0.25081695477148946, "grad_norm": 1.84375, "learning_rate": 1.9299468916116293e-05, "loss": 0.6863, "step": 1986 }, { "epoch": 0.25094324729655065, "grad_norm": 1.9921875, "learning_rate": 1.929873465329849e-05, "loss": 0.7256, "step": 1987 }, { "epoch": 0.2510695398216118, "grad_norm": 1.8984375, "learning_rate": 1.9298000019855517e-05, "loss": 0.7099, "step": 1988 }, { "epoch": 0.251195832346673, "grad_norm": 1.890625, "learning_rate": 1.9297265015816663e-05, "loss": 0.6577, "step": 1989 }, { "epoch": 0.25132212487173416, "grad_norm": 1.9296875, "learning_rate": 1.9296529641211222e-05, "loss": 0.7024, "step": 1990 }, { "epoch": 0.25144841739679535, "grad_norm": 1.8046875, "learning_rate": 1.9295793896068494e-05, "loss": 0.7588, "step": 1991 }, { "epoch": 0.2515747099218565, "grad_norm": 1.9765625, "learning_rate": 1.9295057780417818e-05, "loss": 0.7677, "step": 1992 }, { "epoch": 0.2517010024469177, "grad_norm": 1.7421875, "learning_rate": 1.9294321294288526e-05, "loss": 0.666, "step": 1993 }, { "epoch": 0.25182729497197887, "grad_norm": 1.8828125, "learning_rate": 1.9293584437709975e-05, "loss": 0.6795, "step": 1994 }, { "epoch": 0.25195358749704, "grad_norm": 2.015625, "learning_rate": 1.9292847210711532e-05, "loss": 0.6878, "step": 1995 }, { "epoch": 0.2520798800221012, "grad_norm": 1.8828125, "learning_rate": 1.929210961332258e-05, "loss": 0.6542, "step": 1996 }, { "epoch": 0.2522061725471624, "grad_norm": 1.875, "learning_rate": 1.9291371645572524e-05, "loss": 0.7138, "step": 1997 }, { "epoch": 0.2523324650722235, "grad_norm": 1.7734375, "learning_rate": 1.9290633307490773e-05, "loss": 0.6421, "step": 1998 }, { "epoch": 0.2524587575972847, "grad_norm": 1.90625, "learning_rate": 1.9289894599106756e-05, "loss": 0.6631, "step": 1999 }, { "epoch": 0.2525850501223459, "grad_norm": 1.890625, "learning_rate": 1.9289155520449916e-05, "loss": 0.686, "step": 2000 }, { "epoch": 0.2527113426474071, "grad_norm": 1.765625, "learning_rate": 1.928841607154971e-05, "loss": 0.6023, "step": 2001 }, { "epoch": 0.2528376351724682, "grad_norm": 1.8203125, "learning_rate": 1.928767625243561e-05, "loss": 0.5695, "step": 2002 }, { "epoch": 0.2529639276975294, "grad_norm": 1.796875, "learning_rate": 1.9286936063137113e-05, "loss": 0.6699, "step": 2003 }, { "epoch": 0.2530902202225906, "grad_norm": 1.7578125, "learning_rate": 1.928619550368371e-05, "loss": 0.6432, "step": 2004 }, { "epoch": 0.2532165127476517, "grad_norm": 1.9140625, "learning_rate": 1.928545457410492e-05, "loss": 0.6712, "step": 2005 }, { "epoch": 0.2533428052727129, "grad_norm": 1.9140625, "learning_rate": 1.9284713274430277e-05, "loss": 0.67, "step": 2006 }, { "epoch": 0.2534690977977741, "grad_norm": 1.7265625, "learning_rate": 1.9283971604689324e-05, "loss": 0.631, "step": 2007 }, { "epoch": 0.2535953903228353, "grad_norm": 1.796875, "learning_rate": 1.9283229564911624e-05, "loss": 0.6206, "step": 2008 }, { "epoch": 0.2537216828478964, "grad_norm": 2.0625, "learning_rate": 1.9282487155126757e-05, "loss": 0.7277, "step": 2009 }, { "epoch": 0.2538479753729576, "grad_norm": 1.703125, "learning_rate": 1.928174437536431e-05, "loss": 0.5913, "step": 2010 }, { "epoch": 0.2539742678980188, "grad_norm": 1.9921875, "learning_rate": 1.928100122565389e-05, "loss": 0.6396, "step": 2011 }, { "epoch": 0.25410056042307994, "grad_norm": 2.046875, "learning_rate": 1.9280257706025114e-05, "loss": 0.6404, "step": 2012 }, { "epoch": 0.2542268529481411, "grad_norm": 1.8359375, "learning_rate": 1.927951381650762e-05, "loss": 0.6754, "step": 2013 }, { "epoch": 0.2543531454732023, "grad_norm": 1.9140625, "learning_rate": 1.9278769557131057e-05, "loss": 0.6128, "step": 2014 }, { "epoch": 0.2544794379982635, "grad_norm": 1.8046875, "learning_rate": 1.9278024927925085e-05, "loss": 0.6936, "step": 2015 }, { "epoch": 0.25460573052332464, "grad_norm": 1.859375, "learning_rate": 1.927727992891939e-05, "loss": 0.6869, "step": 2016 }, { "epoch": 0.2547320230483858, "grad_norm": 2.0, "learning_rate": 1.927653456014366e-05, "loss": 0.7588, "step": 2017 }, { "epoch": 0.254858315573447, "grad_norm": 1.9765625, "learning_rate": 1.9275788821627607e-05, "loss": 0.7513, "step": 2018 }, { "epoch": 0.25498460809850815, "grad_norm": 1.8828125, "learning_rate": 1.9275042713400956e-05, "loss": 0.6649, "step": 2019 }, { "epoch": 0.25511090062356934, "grad_norm": 1.890625, "learning_rate": 1.9274296235493444e-05, "loss": 0.6916, "step": 2020 }, { "epoch": 0.2552371931486305, "grad_norm": 1.96875, "learning_rate": 1.927354938793482e-05, "loss": 0.6371, "step": 2021 }, { "epoch": 0.2553634856736917, "grad_norm": 2.09375, "learning_rate": 1.9272802170754854e-05, "loss": 0.8568, "step": 2022 }, { "epoch": 0.25548977819875285, "grad_norm": 1.90625, "learning_rate": 1.9272054583983327e-05, "loss": 0.7126, "step": 2023 }, { "epoch": 0.25561607072381404, "grad_norm": 1.9140625, "learning_rate": 1.9271306627650038e-05, "loss": 0.6194, "step": 2024 }, { "epoch": 0.2557423632488752, "grad_norm": 2.015625, "learning_rate": 1.92705583017848e-05, "loss": 0.7524, "step": 2025 }, { "epoch": 0.25586865577393636, "grad_norm": 1.9296875, "learning_rate": 1.9269809606417437e-05, "loss": 0.7419, "step": 2026 }, { "epoch": 0.25599494829899755, "grad_norm": 1.9140625, "learning_rate": 1.926906054157779e-05, "loss": 0.7362, "step": 2027 }, { "epoch": 0.25612124082405874, "grad_norm": 2.0625, "learning_rate": 1.926831110729572e-05, "loss": 0.7311, "step": 2028 }, { "epoch": 0.25624753334911987, "grad_norm": 1.8359375, "learning_rate": 1.926756130360109e-05, "loss": 0.7824, "step": 2029 }, { "epoch": 0.25637382587418106, "grad_norm": 1.734375, "learning_rate": 1.9266811130523786e-05, "loss": 0.6223, "step": 2030 }, { "epoch": 0.25650011839924225, "grad_norm": 1.953125, "learning_rate": 1.9266060588093714e-05, "loss": 0.6495, "step": 2031 }, { "epoch": 0.25662641092430344, "grad_norm": 1.859375, "learning_rate": 1.9265309676340787e-05, "loss": 0.6539, "step": 2032 }, { "epoch": 0.25675270344936457, "grad_norm": 2.046875, "learning_rate": 1.926455839529493e-05, "loss": 0.778, "step": 2033 }, { "epoch": 0.25687899597442576, "grad_norm": 1.875, "learning_rate": 1.926380674498609e-05, "loss": 0.6866, "step": 2034 }, { "epoch": 0.25700528849948695, "grad_norm": 1.8046875, "learning_rate": 1.926305472544423e-05, "loss": 0.7071, "step": 2035 }, { "epoch": 0.2571315810245481, "grad_norm": 9.625, "learning_rate": 1.926230233669932e-05, "loss": 0.6848, "step": 2036 }, { "epoch": 0.2572578735496093, "grad_norm": 1.875, "learning_rate": 1.9261549578781346e-05, "loss": 0.6628, "step": 2037 }, { "epoch": 0.25738416607467046, "grad_norm": 1.8671875, "learning_rate": 1.9260796451720315e-05, "loss": 0.6486, "step": 2038 }, { "epoch": 0.25751045859973165, "grad_norm": 1.984375, "learning_rate": 1.9260042955546243e-05, "loss": 0.8355, "step": 2039 }, { "epoch": 0.2576367511247928, "grad_norm": 1.7578125, "learning_rate": 1.9259289090289162e-05, "loss": 0.7335, "step": 2040 }, { "epoch": 0.257763043649854, "grad_norm": 1.8125, "learning_rate": 1.9258534855979125e-05, "loss": 0.6978, "step": 2041 }, { "epoch": 0.25788933617491516, "grad_norm": 1.8515625, "learning_rate": 1.9257780252646183e-05, "loss": 0.711, "step": 2042 }, { "epoch": 0.2580156286999763, "grad_norm": 1.9921875, "learning_rate": 1.925702528032042e-05, "loss": 0.6413, "step": 2043 }, { "epoch": 0.2581419212250375, "grad_norm": 1.9140625, "learning_rate": 1.925626993903193e-05, "loss": 0.7141, "step": 2044 }, { "epoch": 0.2582682137500987, "grad_norm": 1.921875, "learning_rate": 1.9255514228810813e-05, "loss": 0.6547, "step": 2045 }, { "epoch": 0.25839450627515986, "grad_norm": 1.9453125, "learning_rate": 1.925475814968719e-05, "loss": 0.6969, "step": 2046 }, { "epoch": 0.258520798800221, "grad_norm": 1.75, "learning_rate": 1.92540017016912e-05, "loss": 0.6497, "step": 2047 }, { "epoch": 0.2586470913252822, "grad_norm": 1.765625, "learning_rate": 1.9253244884852993e-05, "loss": 0.6713, "step": 2048 }, { "epoch": 0.2587733838503434, "grad_norm": 1.8203125, "learning_rate": 1.9252487699202733e-05, "loss": 0.6025, "step": 2049 }, { "epoch": 0.2588996763754045, "grad_norm": 1.953125, "learning_rate": 1.92517301447706e-05, "loss": 0.6856, "step": 2050 }, { "epoch": 0.2590259689004657, "grad_norm": 1.875, "learning_rate": 1.9250972221586786e-05, "loss": 0.6633, "step": 2051 }, { "epoch": 0.2591522614255269, "grad_norm": 2.125, "learning_rate": 1.9250213929681502e-05, "loss": 0.721, "step": 2052 }, { "epoch": 0.2592785539505881, "grad_norm": 1.953125, "learning_rate": 1.9249455269084972e-05, "loss": 0.6644, "step": 2053 }, { "epoch": 0.2594048464756492, "grad_norm": 1.796875, "learning_rate": 1.924869623982743e-05, "loss": 0.633, "step": 2054 }, { "epoch": 0.2595311390007104, "grad_norm": 1.8359375, "learning_rate": 1.924793684193914e-05, "loss": 0.6523, "step": 2055 }, { "epoch": 0.2596574315257716, "grad_norm": 1.8203125, "learning_rate": 1.9247177075450356e-05, "loss": 0.6946, "step": 2056 }, { "epoch": 0.2597837240508327, "grad_norm": 1.9609375, "learning_rate": 1.9246416940391367e-05, "loss": 0.7018, "step": 2057 }, { "epoch": 0.2599100165758939, "grad_norm": 1.953125, "learning_rate": 1.9245656436792472e-05, "loss": 0.6674, "step": 2058 }, { "epoch": 0.2600363091009551, "grad_norm": 1.921875, "learning_rate": 1.9244895564683983e-05, "loss": 0.724, "step": 2059 }, { "epoch": 0.2601626016260163, "grad_norm": 2.546875, "learning_rate": 1.9244134324096223e-05, "loss": 0.8144, "step": 2060 }, { "epoch": 0.2602888941510774, "grad_norm": 1.828125, "learning_rate": 1.9243372715059535e-05, "loss": 0.6897, "step": 2061 }, { "epoch": 0.2604151866761386, "grad_norm": 1.84375, "learning_rate": 1.924261073760427e-05, "loss": 0.6977, "step": 2062 }, { "epoch": 0.2605414792011998, "grad_norm": 1.90625, "learning_rate": 1.9241848391760813e-05, "loss": 0.6636, "step": 2063 }, { "epoch": 0.26066777172626093, "grad_norm": 2.03125, "learning_rate": 1.9241085677559534e-05, "loss": 0.6478, "step": 2064 }, { "epoch": 0.2607940642513221, "grad_norm": 1.8984375, "learning_rate": 1.9240322595030836e-05, "loss": 0.7753, "step": 2065 }, { "epoch": 0.2609203567763833, "grad_norm": 1.890625, "learning_rate": 1.923955914420514e-05, "loss": 0.6818, "step": 2066 }, { "epoch": 0.26104664930144444, "grad_norm": 1.9609375, "learning_rate": 1.9238795325112867e-05, "loss": 0.6757, "step": 2067 }, { "epoch": 0.26117294182650563, "grad_norm": 2.109375, "learning_rate": 1.923803113778447e-05, "loss": 0.7946, "step": 2068 }, { "epoch": 0.2612992343515668, "grad_norm": 2.015625, "learning_rate": 1.9237266582250404e-05, "loss": 0.7708, "step": 2069 }, { "epoch": 0.261425526876628, "grad_norm": 1.9453125, "learning_rate": 1.9236501658541134e-05, "loss": 0.7206, "step": 2070 }, { "epoch": 0.26155181940168915, "grad_norm": 1.859375, "learning_rate": 1.9235736366687164e-05, "loss": 0.666, "step": 2071 }, { "epoch": 0.26167811192675033, "grad_norm": 1.9140625, "learning_rate": 1.9234970706718984e-05, "loss": 0.7599, "step": 2072 }, { "epoch": 0.2618044044518115, "grad_norm": 1.953125, "learning_rate": 1.9234204678667115e-05, "loss": 0.7654, "step": 2073 }, { "epoch": 0.26193069697687266, "grad_norm": 1.859375, "learning_rate": 1.923343828256209e-05, "loss": 0.7976, "step": 2074 }, { "epoch": 0.26205698950193385, "grad_norm": 1.84375, "learning_rate": 1.9232671518434458e-05, "loss": 0.7161, "step": 2075 }, { "epoch": 0.26218328202699503, "grad_norm": 1.828125, "learning_rate": 1.9231904386314772e-05, "loss": 0.6912, "step": 2076 }, { "epoch": 0.2623095745520562, "grad_norm": 2.0625, "learning_rate": 1.9231136886233613e-05, "loss": 0.7387, "step": 2077 }, { "epoch": 0.26243586707711736, "grad_norm": 1.6875, "learning_rate": 1.9230369018221576e-05, "loss": 0.5442, "step": 2078 }, { "epoch": 0.26256215960217855, "grad_norm": 1.8515625, "learning_rate": 1.9229600782309265e-05, "loss": 0.6489, "step": 2079 }, { "epoch": 0.26268845212723974, "grad_norm": 1.796875, "learning_rate": 1.9228832178527293e-05, "loss": 0.6991, "step": 2080 }, { "epoch": 0.26281474465230087, "grad_norm": 1.9375, "learning_rate": 1.9228063206906302e-05, "loss": 0.7567, "step": 2081 }, { "epoch": 0.26294103717736206, "grad_norm": 1.8671875, "learning_rate": 1.922729386747694e-05, "loss": 0.6351, "step": 2082 }, { "epoch": 0.26306732970242325, "grad_norm": 1.875, "learning_rate": 1.9226524160269867e-05, "loss": 0.6881, "step": 2083 }, { "epoch": 0.26319362222748444, "grad_norm": 1.953125, "learning_rate": 1.9225754085315766e-05, "loss": 0.7853, "step": 2084 }, { "epoch": 0.26331991475254557, "grad_norm": 1.78125, "learning_rate": 1.922498364264533e-05, "loss": 0.6516, "step": 2085 }, { "epoch": 0.26344620727760676, "grad_norm": 1.8828125, "learning_rate": 1.9224212832289262e-05, "loss": 0.6191, "step": 2086 }, { "epoch": 0.26357249980266795, "grad_norm": 2.015625, "learning_rate": 1.9223441654278293e-05, "loss": 0.7047, "step": 2087 }, { "epoch": 0.2636987923277291, "grad_norm": 1.78125, "learning_rate": 1.9222670108643152e-05, "loss": 0.5962, "step": 2088 }, { "epoch": 0.26382508485279027, "grad_norm": 1.9921875, "learning_rate": 1.9221898195414595e-05, "loss": 0.7687, "step": 2089 }, { "epoch": 0.26395137737785146, "grad_norm": 2.203125, "learning_rate": 1.9221125914623388e-05, "loss": 0.6525, "step": 2090 }, { "epoch": 0.26407766990291265, "grad_norm": 1.9921875, "learning_rate": 1.9220353266300312e-05, "loss": 0.6772, "step": 2091 }, { "epoch": 0.2642039624279738, "grad_norm": 1.890625, "learning_rate": 1.9219580250476167e-05, "loss": 0.6808, "step": 2092 }, { "epoch": 0.26433025495303497, "grad_norm": 1.734375, "learning_rate": 1.921880686718176e-05, "loss": 0.5972, "step": 2093 }, { "epoch": 0.26445654747809616, "grad_norm": 1.9765625, "learning_rate": 1.9218033116447913e-05, "loss": 0.6447, "step": 2094 }, { "epoch": 0.2645828400031573, "grad_norm": 2.015625, "learning_rate": 1.921725899830547e-05, "loss": 0.7224, "step": 2095 }, { "epoch": 0.2647091325282185, "grad_norm": 2.0, "learning_rate": 1.9216484512785284e-05, "loss": 0.7763, "step": 2096 }, { "epoch": 0.26483542505327967, "grad_norm": 1.9765625, "learning_rate": 1.9215709659918227e-05, "loss": 0.7551, "step": 2097 }, { "epoch": 0.2649617175783408, "grad_norm": 1.84375, "learning_rate": 1.9214934439735177e-05, "loss": 0.6613, "step": 2098 }, { "epoch": 0.265088010103402, "grad_norm": 1.9609375, "learning_rate": 1.9214158852267036e-05, "loss": 0.7273, "step": 2099 }, { "epoch": 0.2652143026284632, "grad_norm": 1.890625, "learning_rate": 1.9213382897544722e-05, "loss": 0.6309, "step": 2100 }, { "epoch": 0.2653405951535244, "grad_norm": 1.9140625, "learning_rate": 1.921260657559915e-05, "loss": 0.7317, "step": 2101 }, { "epoch": 0.2654668876785855, "grad_norm": 1.9765625, "learning_rate": 1.9211829886461274e-05, "loss": 0.7176, "step": 2102 }, { "epoch": 0.2655931802036467, "grad_norm": 1.8359375, "learning_rate": 1.921105283016205e-05, "loss": 0.62, "step": 2103 }, { "epoch": 0.2657194727287079, "grad_norm": 1.984375, "learning_rate": 1.921027540673244e-05, "loss": 0.7216, "step": 2104 }, { "epoch": 0.265845765253769, "grad_norm": 2.03125, "learning_rate": 1.920949761620344e-05, "loss": 0.6676, "step": 2105 }, { "epoch": 0.2659720577788302, "grad_norm": 1.8046875, "learning_rate": 1.9208719458606047e-05, "loss": 0.6442, "step": 2106 }, { "epoch": 0.2660983503038914, "grad_norm": 1.6796875, "learning_rate": 1.920794093397128e-05, "loss": 0.652, "step": 2107 }, { "epoch": 0.2662246428289526, "grad_norm": 1.953125, "learning_rate": 1.920716204233016e-05, "loss": 0.7452, "step": 2108 }, { "epoch": 0.2663509353540137, "grad_norm": 1.8203125, "learning_rate": 1.9206382783713738e-05, "loss": 0.6419, "step": 2109 }, { "epoch": 0.2664772278790749, "grad_norm": 1.828125, "learning_rate": 1.920560315815308e-05, "loss": 0.72, "step": 2110 }, { "epoch": 0.2666035204041361, "grad_norm": 1.953125, "learning_rate": 1.9204823165679247e-05, "loss": 0.7193, "step": 2111 }, { "epoch": 0.26672981292919723, "grad_norm": 1.8828125, "learning_rate": 1.9204042806323337e-05, "loss": 0.6936, "step": 2112 }, { "epoch": 0.2668561054542584, "grad_norm": 1.78125, "learning_rate": 1.9203262080116448e-05, "loss": 0.6592, "step": 2113 }, { "epoch": 0.2669823979793196, "grad_norm": 2.125, "learning_rate": 1.9202480987089703e-05, "loss": 0.8478, "step": 2114 }, { "epoch": 0.2671086905043808, "grad_norm": 2.21875, "learning_rate": 1.920169952727423e-05, "loss": 0.7219, "step": 2115 }, { "epoch": 0.26723498302944193, "grad_norm": 1.96875, "learning_rate": 1.9200917700701176e-05, "loss": 0.7626, "step": 2116 }, { "epoch": 0.2673612755545031, "grad_norm": 1.78125, "learning_rate": 1.9200135507401706e-05, "loss": 0.6784, "step": 2117 }, { "epoch": 0.2674875680795643, "grad_norm": 1.7421875, "learning_rate": 1.9199352947406992e-05, "loss": 0.6543, "step": 2118 }, { "epoch": 0.26761386060462544, "grad_norm": 1.8046875, "learning_rate": 1.9198570020748232e-05, "loss": 0.7048, "step": 2119 }, { "epoch": 0.26774015312968663, "grad_norm": 1.9609375, "learning_rate": 1.9197786727456625e-05, "loss": 0.662, "step": 2120 }, { "epoch": 0.2678664456547478, "grad_norm": 1.859375, "learning_rate": 1.919700306756339e-05, "loss": 0.7205, "step": 2121 }, { "epoch": 0.267992738179809, "grad_norm": 1.8203125, "learning_rate": 1.919621904109977e-05, "loss": 0.6973, "step": 2122 }, { "epoch": 0.26811903070487014, "grad_norm": 1.921875, "learning_rate": 1.9195434648097006e-05, "loss": 0.7033, "step": 2123 }, { "epoch": 0.26824532322993133, "grad_norm": 1.921875, "learning_rate": 1.9194649888586366e-05, "loss": 0.6122, "step": 2124 }, { "epoch": 0.2683716157549925, "grad_norm": 1.875, "learning_rate": 1.9193864762599132e-05, "loss": 0.7593, "step": 2125 }, { "epoch": 0.26849790828005365, "grad_norm": 1.921875, "learning_rate": 1.919307927016659e-05, "loss": 0.6776, "step": 2126 }, { "epoch": 0.26862420080511484, "grad_norm": 1.8125, "learning_rate": 1.919229341132005e-05, "loss": 0.6252, "step": 2127 }, { "epoch": 0.26875049333017603, "grad_norm": 1.796875, "learning_rate": 1.9191507186090838e-05, "loss": 0.6798, "step": 2128 }, { "epoch": 0.26887678585523717, "grad_norm": 1.8046875, "learning_rate": 1.9190720594510288e-05, "loss": 0.6144, "step": 2129 }, { "epoch": 0.26900307838029835, "grad_norm": 1.9140625, "learning_rate": 1.918993363660975e-05, "loss": 0.669, "step": 2130 }, { "epoch": 0.26912937090535954, "grad_norm": 1.8046875, "learning_rate": 1.9189146312420596e-05, "loss": 0.718, "step": 2131 }, { "epoch": 0.26925566343042073, "grad_norm": 1.921875, "learning_rate": 1.9188358621974202e-05, "loss": 0.7311, "step": 2132 }, { "epoch": 0.26938195595548187, "grad_norm": 2.03125, "learning_rate": 1.9187570565301966e-05, "loss": 0.7737, "step": 2133 }, { "epoch": 0.26950824848054306, "grad_norm": 2.109375, "learning_rate": 1.9186782142435297e-05, "loss": 0.7035, "step": 2134 }, { "epoch": 0.26963454100560424, "grad_norm": 2.09375, "learning_rate": 1.918599335340562e-05, "loss": 0.7193, "step": 2135 }, { "epoch": 0.2697608335306654, "grad_norm": 1.84375, "learning_rate": 1.9185204198244373e-05, "loss": 0.7114, "step": 2136 }, { "epoch": 0.26988712605572657, "grad_norm": 1.765625, "learning_rate": 1.9184414676983006e-05, "loss": 0.6978, "step": 2137 }, { "epoch": 0.27001341858078776, "grad_norm": 1.96875, "learning_rate": 1.9183624789653e-05, "loss": 0.7365, "step": 2138 }, { "epoch": 0.27013971110584895, "grad_norm": 1.734375, "learning_rate": 1.9182834536285827e-05, "loss": 0.6053, "step": 2139 }, { "epoch": 0.2702660036309101, "grad_norm": 1.8671875, "learning_rate": 1.9182043916912984e-05, "loss": 0.6665, "step": 2140 }, { "epoch": 0.27039229615597127, "grad_norm": 1.9609375, "learning_rate": 1.918125293156599e-05, "loss": 0.8241, "step": 2141 }, { "epoch": 0.27051858868103246, "grad_norm": 1.8359375, "learning_rate": 1.9180461580276367e-05, "loss": 0.7666, "step": 2142 }, { "epoch": 0.2706448812060936, "grad_norm": 1.7890625, "learning_rate": 1.917966986307566e-05, "loss": 0.6587, "step": 2143 }, { "epoch": 0.2707711737311548, "grad_norm": 1.90625, "learning_rate": 1.9178877779995423e-05, "loss": 0.7589, "step": 2144 }, { "epoch": 0.27089746625621597, "grad_norm": 1.8359375, "learning_rate": 1.9178085331067223e-05, "loss": 0.6779, "step": 2145 }, { "epoch": 0.27102375878127716, "grad_norm": 1.8984375, "learning_rate": 1.9177292516322653e-05, "loss": 0.6211, "step": 2146 }, { "epoch": 0.2711500513063383, "grad_norm": 1.796875, "learning_rate": 1.9176499335793306e-05, "loss": 0.6212, "step": 2147 }, { "epoch": 0.2712763438313995, "grad_norm": 1.9140625, "learning_rate": 1.9175705789510798e-05, "loss": 0.6825, "step": 2148 }, { "epoch": 0.27140263635646067, "grad_norm": 2.09375, "learning_rate": 1.9174911877506762e-05, "loss": 0.7454, "step": 2149 }, { "epoch": 0.2715289288815218, "grad_norm": 1.8984375, "learning_rate": 1.9174117599812832e-05, "loss": 0.6687, "step": 2150 }, { "epoch": 0.271655221406583, "grad_norm": 1.890625, "learning_rate": 1.9173322956460675e-05, "loss": 0.6339, "step": 2151 }, { "epoch": 0.2717815139316442, "grad_norm": 1.9921875, "learning_rate": 1.9172527947481958e-05, "loss": 0.7023, "step": 2152 }, { "epoch": 0.27190780645670537, "grad_norm": 1.921875, "learning_rate": 1.917173257290837e-05, "loss": 0.7301, "step": 2153 }, { "epoch": 0.2720340989817665, "grad_norm": 1.8515625, "learning_rate": 1.917093683277162e-05, "loss": 0.6257, "step": 2154 }, { "epoch": 0.2721603915068277, "grad_norm": 1.8828125, "learning_rate": 1.917014072710341e-05, "loss": 0.7509, "step": 2155 }, { "epoch": 0.2722866840318889, "grad_norm": 2.046875, "learning_rate": 1.9169344255935484e-05, "loss": 0.7269, "step": 2156 }, { "epoch": 0.27241297655695, "grad_norm": 1.859375, "learning_rate": 1.9168547419299577e-05, "loss": 0.6279, "step": 2157 }, { "epoch": 0.2725392690820112, "grad_norm": 1.7890625, "learning_rate": 1.9167750217227454e-05, "loss": 0.7433, "step": 2158 }, { "epoch": 0.2726655616070724, "grad_norm": 1.9453125, "learning_rate": 1.9166952649750893e-05, "loss": 0.702, "step": 2159 }, { "epoch": 0.2727918541321335, "grad_norm": 1.90625, "learning_rate": 1.9166154716901677e-05, "loss": 0.6621, "step": 2160 }, { "epoch": 0.2729181466571947, "grad_norm": 2.296875, "learning_rate": 1.9165356418711612e-05, "loss": 0.71, "step": 2161 }, { "epoch": 0.2730444391822559, "grad_norm": 1.859375, "learning_rate": 1.9164557755212518e-05, "loss": 0.6392, "step": 2162 }, { "epoch": 0.2731707317073171, "grad_norm": 1.9609375, "learning_rate": 1.9163758726436224e-05, "loss": 0.6781, "step": 2163 }, { "epoch": 0.2732970242323782, "grad_norm": 1.8359375, "learning_rate": 1.916295933241458e-05, "loss": 0.735, "step": 2164 }, { "epoch": 0.2734233167574394, "grad_norm": 1.96875, "learning_rate": 1.9162159573179446e-05, "loss": 0.6299, "step": 2165 }, { "epoch": 0.2735496092825006, "grad_norm": 1.8046875, "learning_rate": 1.91613594487627e-05, "loss": 0.6927, "step": 2166 }, { "epoch": 0.27367590180756174, "grad_norm": 2.078125, "learning_rate": 1.9160558959196236e-05, "loss": 0.7436, "step": 2167 }, { "epoch": 0.27380219433262293, "grad_norm": 2.296875, "learning_rate": 1.915975810451195e-05, "loss": 0.7445, "step": 2168 }, { "epoch": 0.2739284868576841, "grad_norm": 1.8828125, "learning_rate": 1.9158956884741777e-05, "loss": 0.6954, "step": 2169 }, { "epoch": 0.2740547793827453, "grad_norm": 1.7890625, "learning_rate": 1.915815529991764e-05, "loss": 0.6452, "step": 2170 }, { "epoch": 0.27418107190780644, "grad_norm": 1.8203125, "learning_rate": 1.915735335007149e-05, "loss": 0.6086, "step": 2171 }, { "epoch": 0.27430736443286763, "grad_norm": 1.9765625, "learning_rate": 1.915655103523529e-05, "loss": 0.7186, "step": 2172 }, { "epoch": 0.2744336569579288, "grad_norm": 1.875, "learning_rate": 1.9155748355441027e-05, "loss": 0.6548, "step": 2173 }, { "epoch": 0.27455994948298995, "grad_norm": 1.890625, "learning_rate": 1.9154945310720686e-05, "loss": 0.6791, "step": 2174 }, { "epoch": 0.27468624200805114, "grad_norm": 1.875, "learning_rate": 1.9154141901106273e-05, "loss": 0.6807, "step": 2175 }, { "epoch": 0.27481253453311233, "grad_norm": 2.015625, "learning_rate": 1.9153338126629815e-05, "loss": 0.6811, "step": 2176 }, { "epoch": 0.2749388270581735, "grad_norm": 1.84375, "learning_rate": 1.9152533987323345e-05, "loss": 0.6039, "step": 2177 }, { "epoch": 0.27506511958323465, "grad_norm": 1.8359375, "learning_rate": 1.9151729483218914e-05, "loss": 0.6616, "step": 2178 }, { "epoch": 0.27519141210829584, "grad_norm": 1.9375, "learning_rate": 1.9150924614348594e-05, "loss": 0.8376, "step": 2179 }, { "epoch": 0.27531770463335703, "grad_norm": 1.828125, "learning_rate": 1.9150119380744458e-05, "loss": 0.6582, "step": 2180 }, { "epoch": 0.27544399715841816, "grad_norm": 1.9296875, "learning_rate": 1.91493137824386e-05, "loss": 0.6701, "step": 2181 }, { "epoch": 0.27557028968347935, "grad_norm": 1.8828125, "learning_rate": 1.9148507819463137e-05, "loss": 0.6946, "step": 2182 }, { "epoch": 0.27569658220854054, "grad_norm": 1.7265625, "learning_rate": 1.9147701491850185e-05, "loss": 0.6516, "step": 2183 }, { "epoch": 0.27582287473360173, "grad_norm": 1.84375, "learning_rate": 1.9146894799631887e-05, "loss": 0.7328, "step": 2184 }, { "epoch": 0.27594916725866286, "grad_norm": 1.75, "learning_rate": 1.914608774284039e-05, "loss": 0.6663, "step": 2185 }, { "epoch": 0.27607545978372405, "grad_norm": 2.03125, "learning_rate": 1.9145280321507872e-05, "loss": 0.7319, "step": 2186 }, { "epoch": 0.27620175230878524, "grad_norm": 1.8828125, "learning_rate": 1.9144472535666505e-05, "loss": 0.7027, "step": 2187 }, { "epoch": 0.2763280448338464, "grad_norm": 1.9609375, "learning_rate": 1.9143664385348487e-05, "loss": 0.6364, "step": 2188 }, { "epoch": 0.27645433735890756, "grad_norm": 1.953125, "learning_rate": 1.9142855870586033e-05, "loss": 0.6596, "step": 2189 }, { "epoch": 0.27658062988396875, "grad_norm": 1.9765625, "learning_rate": 1.9142046991411363e-05, "loss": 0.7185, "step": 2190 }, { "epoch": 0.27670692240902994, "grad_norm": 1.8203125, "learning_rate": 1.914123774785672e-05, "loss": 0.5749, "step": 2191 }, { "epoch": 0.2768332149340911, "grad_norm": 1.734375, "learning_rate": 1.914042813995436e-05, "loss": 0.581, "step": 2192 }, { "epoch": 0.27695950745915227, "grad_norm": 1.8671875, "learning_rate": 1.913961816773655e-05, "loss": 0.6614, "step": 2193 }, { "epoch": 0.27708579998421345, "grad_norm": 3.53125, "learning_rate": 1.9138807831235576e-05, "loss": 0.9069, "step": 2194 }, { "epoch": 0.2772120925092746, "grad_norm": 1.9296875, "learning_rate": 1.913799713048373e-05, "loss": 0.6344, "step": 2195 }, { "epoch": 0.2773383850343358, "grad_norm": 1.8984375, "learning_rate": 1.9137186065513332e-05, "loss": 0.7037, "step": 2196 }, { "epoch": 0.27746467755939697, "grad_norm": 1.8515625, "learning_rate": 1.9136374636356704e-05, "loss": 0.6921, "step": 2197 }, { "epoch": 0.2775909700844581, "grad_norm": 1.78125, "learning_rate": 1.913556284304619e-05, "loss": 0.7023, "step": 2198 }, { "epoch": 0.2777172626095193, "grad_norm": 1.8125, "learning_rate": 1.9134750685614145e-05, "loss": 0.6493, "step": 2199 }, { "epoch": 0.2778435551345805, "grad_norm": 1.859375, "learning_rate": 1.9133938164092942e-05, "loss": 0.7074, "step": 2200 }, { "epoch": 0.27796984765964167, "grad_norm": 1.8671875, "learning_rate": 1.913312527851496e-05, "loss": 0.667, "step": 2201 }, { "epoch": 0.2780961401847028, "grad_norm": 1.8203125, "learning_rate": 1.913231202891261e-05, "loss": 0.6664, "step": 2202 }, { "epoch": 0.278222432709764, "grad_norm": 2.0, "learning_rate": 1.913149841531829e-05, "loss": 0.7169, "step": 2203 }, { "epoch": 0.2783487252348252, "grad_norm": 2.03125, "learning_rate": 1.9130684437764445e-05, "loss": 0.6598, "step": 2204 }, { "epoch": 0.2784750177598863, "grad_norm": 1.8671875, "learning_rate": 1.9129870096283508e-05, "loss": 0.7447, "step": 2205 }, { "epoch": 0.2786013102849475, "grad_norm": 1.84375, "learning_rate": 1.9129055390907938e-05, "loss": 0.6999, "step": 2206 }, { "epoch": 0.2787276028100087, "grad_norm": 1.7578125, "learning_rate": 1.912824032167021e-05, "loss": 0.6469, "step": 2207 }, { "epoch": 0.2788538953350699, "grad_norm": 1.734375, "learning_rate": 1.9127424888602812e-05, "loss": 0.6285, "step": 2208 }, { "epoch": 0.278980187860131, "grad_norm": 1.7265625, "learning_rate": 1.912660909173824e-05, "loss": 0.6375, "step": 2209 }, { "epoch": 0.2791064803851922, "grad_norm": 1.8671875, "learning_rate": 1.9125792931109015e-05, "loss": 0.6136, "step": 2210 }, { "epoch": 0.2792327729102534, "grad_norm": 1.84375, "learning_rate": 1.9124976406747662e-05, "loss": 0.7604, "step": 2211 }, { "epoch": 0.2793590654353145, "grad_norm": 1.9140625, "learning_rate": 1.912415951868673e-05, "loss": 0.7728, "step": 2212 }, { "epoch": 0.2794853579603757, "grad_norm": 1.875, "learning_rate": 1.9123342266958776e-05, "loss": 0.7083, "step": 2213 }, { "epoch": 0.2796116504854369, "grad_norm": 1.890625, "learning_rate": 1.9122524651596376e-05, "loss": 0.6894, "step": 2214 }, { "epoch": 0.2797379430104981, "grad_norm": 1.7265625, "learning_rate": 1.9121706672632113e-05, "loss": 0.6043, "step": 2215 }, { "epoch": 0.2798642355355592, "grad_norm": 1.765625, "learning_rate": 1.9120888330098595e-05, "loss": 0.6122, "step": 2216 }, { "epoch": 0.2799905280606204, "grad_norm": 1.9140625, "learning_rate": 1.9120069624028436e-05, "loss": 0.6693, "step": 2217 }, { "epoch": 0.2801168205856816, "grad_norm": 1.78125, "learning_rate": 1.911925055445427e-05, "loss": 0.7549, "step": 2218 }, { "epoch": 0.28024311311074274, "grad_norm": 1.78125, "learning_rate": 1.9118431121408744e-05, "loss": 0.6751, "step": 2219 }, { "epoch": 0.2803694056358039, "grad_norm": 1.859375, "learning_rate": 1.9117611324924513e-05, "loss": 0.6833, "step": 2220 }, { "epoch": 0.2804956981608651, "grad_norm": 1.6796875, "learning_rate": 1.9116791165034258e-05, "loss": 0.5904, "step": 2221 }, { "epoch": 0.2806219906859263, "grad_norm": 1.8515625, "learning_rate": 1.9115970641770666e-05, "loss": 0.7374, "step": 2222 }, { "epoch": 0.28074828321098744, "grad_norm": 1.78125, "learning_rate": 1.9115149755166437e-05, "loss": 0.6557, "step": 2223 }, { "epoch": 0.2808745757360486, "grad_norm": 3.484375, "learning_rate": 1.9114328505254297e-05, "loss": 0.738, "step": 2224 }, { "epoch": 0.2810008682611098, "grad_norm": 1.7734375, "learning_rate": 1.9113506892066977e-05, "loss": 0.6814, "step": 2225 }, { "epoch": 0.28112716078617095, "grad_norm": 1.796875, "learning_rate": 1.9112684915637226e-05, "loss": 0.6955, "step": 2226 }, { "epoch": 0.28125345331123214, "grad_norm": 1.8046875, "learning_rate": 1.9111862575997798e-05, "loss": 0.6402, "step": 2227 }, { "epoch": 0.2813797458362933, "grad_norm": 1.875, "learning_rate": 1.9111039873181478e-05, "loss": 0.7178, "step": 2228 }, { "epoch": 0.28150603836135446, "grad_norm": 1.890625, "learning_rate": 1.9110216807221053e-05, "loss": 0.7759, "step": 2229 }, { "epoch": 0.28163233088641565, "grad_norm": 1.8125, "learning_rate": 1.910939337814933e-05, "loss": 0.726, "step": 2230 }, { "epoch": 0.28175862341147684, "grad_norm": 1.9765625, "learning_rate": 1.910856958599913e-05, "loss": 0.7534, "step": 2231 }, { "epoch": 0.281884915936538, "grad_norm": 2.09375, "learning_rate": 1.9107745430803284e-05, "loss": 0.7024, "step": 2232 }, { "epoch": 0.28201120846159916, "grad_norm": 1.9140625, "learning_rate": 1.9106920912594642e-05, "loss": 0.6916, "step": 2233 }, { "epoch": 0.28213750098666035, "grad_norm": 2.015625, "learning_rate": 1.910609603140607e-05, "loss": 0.6556, "step": 2234 }, { "epoch": 0.28226379351172154, "grad_norm": 1.921875, "learning_rate": 1.9105270787270442e-05, "loss": 0.7633, "step": 2235 }, { "epoch": 0.2823900860367827, "grad_norm": 1.7734375, "learning_rate": 1.9104445180220648e-05, "loss": 0.7094, "step": 2236 }, { "epoch": 0.28251637856184386, "grad_norm": 1.953125, "learning_rate": 1.9103619210289607e-05, "loss": 0.9181, "step": 2237 }, { "epoch": 0.28264267108690505, "grad_norm": 1.7265625, "learning_rate": 1.9102792877510227e-05, "loss": 0.6113, "step": 2238 }, { "epoch": 0.28276896361196624, "grad_norm": 1.921875, "learning_rate": 1.9101966181915448e-05, "loss": 0.638, "step": 2239 }, { "epoch": 0.2828952561370274, "grad_norm": 1.8125, "learning_rate": 1.910113912353822e-05, "loss": 0.6178, "step": 2240 }, { "epoch": 0.28302154866208856, "grad_norm": 1.8125, "learning_rate": 1.9100311702411508e-05, "loss": 0.6274, "step": 2241 }, { "epoch": 0.28314784118714975, "grad_norm": 1.828125, "learning_rate": 1.9099483918568294e-05, "loss": 0.6315, "step": 2242 }, { "epoch": 0.2832741337122109, "grad_norm": 1.9140625, "learning_rate": 1.9098655772041567e-05, "loss": 0.7468, "step": 2243 }, { "epoch": 0.2834004262372721, "grad_norm": 2.171875, "learning_rate": 1.909782726286433e-05, "loss": 0.8214, "step": 2244 }, { "epoch": 0.28352671876233326, "grad_norm": 1.8828125, "learning_rate": 1.909699839106962e-05, "loss": 0.6481, "step": 2245 }, { "epoch": 0.28365301128739445, "grad_norm": 1.890625, "learning_rate": 1.9096169156690464e-05, "loss": 0.717, "step": 2246 }, { "epoch": 0.2837793038124556, "grad_norm": 1.8828125, "learning_rate": 1.9095339559759917e-05, "loss": 0.6557, "step": 2247 }, { "epoch": 0.2839055963375168, "grad_norm": 2.046875, "learning_rate": 1.9094509600311036e-05, "loss": 0.6974, "step": 2248 }, { "epoch": 0.28403188886257796, "grad_norm": 1.921875, "learning_rate": 1.9093679278376913e-05, "loss": 0.6532, "step": 2249 }, { "epoch": 0.2841581813876391, "grad_norm": 1.734375, "learning_rate": 1.9092848593990635e-05, "loss": 0.6885, "step": 2250 }, { "epoch": 0.2842844739127003, "grad_norm": 1.953125, "learning_rate": 1.9092017547185316e-05, "loss": 0.6731, "step": 2251 }, { "epoch": 0.2844107664377615, "grad_norm": 1.8046875, "learning_rate": 1.9091186137994074e-05, "loss": 0.6346, "step": 2252 }, { "epoch": 0.28453705896282266, "grad_norm": 1.8984375, "learning_rate": 1.9090354366450055e-05, "loss": 0.6919, "step": 2253 }, { "epoch": 0.2846633514878838, "grad_norm": 1.796875, "learning_rate": 1.9089522232586402e-05, "loss": 0.6403, "step": 2254 }, { "epoch": 0.284789644012945, "grad_norm": 1.7734375, "learning_rate": 1.9088689736436285e-05, "loss": 0.5776, "step": 2255 }, { "epoch": 0.2849159365380062, "grad_norm": 2.078125, "learning_rate": 1.908785687803289e-05, "loss": 0.8276, "step": 2256 }, { "epoch": 0.2850422290630673, "grad_norm": 1.90625, "learning_rate": 1.908702365740941e-05, "loss": 0.7122, "step": 2257 }, { "epoch": 0.2851685215881285, "grad_norm": 1.8984375, "learning_rate": 1.9086190074599052e-05, "loss": 0.7302, "step": 2258 }, { "epoch": 0.2852948141131897, "grad_norm": 1.9375, "learning_rate": 1.9085356129635043e-05, "loss": 0.7666, "step": 2259 }, { "epoch": 0.2854211066382508, "grad_norm": 1.9921875, "learning_rate": 1.9084521822550623e-05, "loss": 0.8225, "step": 2260 }, { "epoch": 0.285547399163312, "grad_norm": 1.8203125, "learning_rate": 1.908368715337905e-05, "loss": 0.6851, "step": 2261 }, { "epoch": 0.2856736916883732, "grad_norm": 1.9765625, "learning_rate": 1.9082852122153578e-05, "loss": 0.7931, "step": 2262 }, { "epoch": 0.2857999842134344, "grad_norm": 1.890625, "learning_rate": 1.90820167289075e-05, "loss": 0.7655, "step": 2263 }, { "epoch": 0.2859262767384955, "grad_norm": 2.0, "learning_rate": 1.9081180973674112e-05, "loss": 0.7643, "step": 2264 }, { "epoch": 0.2860525692635567, "grad_norm": 2.03125, "learning_rate": 1.9080344856486724e-05, "loss": 0.829, "step": 2265 }, { "epoch": 0.2861788617886179, "grad_norm": 1.890625, "learning_rate": 1.9079508377378658e-05, "loss": 0.7313, "step": 2266 }, { "epoch": 0.28630515431367903, "grad_norm": 1.7265625, "learning_rate": 1.9078671536383263e-05, "loss": 0.5992, "step": 2267 }, { "epoch": 0.2864314468387402, "grad_norm": 1.8203125, "learning_rate": 1.9077834333533885e-05, "loss": 0.6577, "step": 2268 }, { "epoch": 0.2865577393638014, "grad_norm": 1.875, "learning_rate": 1.9076996768863895e-05, "loss": 0.7208, "step": 2269 }, { "epoch": 0.2866840318888626, "grad_norm": 1.8359375, "learning_rate": 1.9076158842406677e-05, "loss": 0.6398, "step": 2270 }, { "epoch": 0.28681032441392373, "grad_norm": 1.8671875, "learning_rate": 1.907532055419563e-05, "loss": 0.6376, "step": 2271 }, { "epoch": 0.2869366169389849, "grad_norm": 2.0625, "learning_rate": 1.9074481904264165e-05, "loss": 0.7647, "step": 2272 }, { "epoch": 0.2870629094640461, "grad_norm": 2.0, "learning_rate": 1.907364289264571e-05, "loss": 0.6662, "step": 2273 }, { "epoch": 0.28718920198910725, "grad_norm": 1.8984375, "learning_rate": 1.90728035193737e-05, "loss": 0.764, "step": 2274 }, { "epoch": 0.28731549451416843, "grad_norm": 1.7734375, "learning_rate": 1.90719637844816e-05, "loss": 0.7433, "step": 2275 }, { "epoch": 0.2874417870392296, "grad_norm": 1.7578125, "learning_rate": 1.9071123688002875e-05, "loss": 0.6902, "step": 2276 }, { "epoch": 0.2875680795642908, "grad_norm": 1.8671875, "learning_rate": 1.9070283229971007e-05, "loss": 0.6966, "step": 2277 }, { "epoch": 0.28769437208935195, "grad_norm": 1.8828125, "learning_rate": 1.9069442410419493e-05, "loss": 0.6775, "step": 2278 }, { "epoch": 0.28782066461441314, "grad_norm": 1.7890625, "learning_rate": 1.9068601229381857e-05, "loss": 0.6678, "step": 2279 }, { "epoch": 0.2879469571394743, "grad_norm": 2.046875, "learning_rate": 1.9067759686891613e-05, "loss": 0.7176, "step": 2280 }, { "epoch": 0.28807324966453546, "grad_norm": 2.015625, "learning_rate": 1.906691778298231e-05, "loss": 0.6977, "step": 2281 }, { "epoch": 0.28819954218959665, "grad_norm": 1.7265625, "learning_rate": 1.9066075517687505e-05, "loss": 0.6176, "step": 2282 }, { "epoch": 0.28832583471465784, "grad_norm": 1.78125, "learning_rate": 1.9065232891040767e-05, "loss": 0.6577, "step": 2283 }, { "epoch": 0.288452127239719, "grad_norm": 1.7265625, "learning_rate": 1.9064389903075676e-05, "loss": 0.6118, "step": 2284 }, { "epoch": 0.28857841976478016, "grad_norm": 1.8046875, "learning_rate": 1.9063546553825842e-05, "loss": 0.6159, "step": 2285 }, { "epoch": 0.28870471228984135, "grad_norm": 1.921875, "learning_rate": 1.9062702843324873e-05, "loss": 0.745, "step": 2286 }, { "epoch": 0.28883100481490254, "grad_norm": 1.8046875, "learning_rate": 1.9061858771606398e-05, "loss": 0.6545, "step": 2287 }, { "epoch": 0.28895729733996367, "grad_norm": 1.984375, "learning_rate": 1.9061014338704055e-05, "loss": 0.7789, "step": 2288 }, { "epoch": 0.28908358986502486, "grad_norm": 2.0, "learning_rate": 1.906016954465151e-05, "loss": 0.7407, "step": 2289 }, { "epoch": 0.28920988239008605, "grad_norm": 1.671875, "learning_rate": 1.9059324389482427e-05, "loss": 0.6477, "step": 2290 }, { "epoch": 0.28933617491514724, "grad_norm": 1.8203125, "learning_rate": 1.9058478873230494e-05, "loss": 0.6241, "step": 2291 }, { "epoch": 0.28946246744020837, "grad_norm": 1.7265625, "learning_rate": 1.9057632995929413e-05, "loss": 0.6633, "step": 2292 }, { "epoch": 0.28958875996526956, "grad_norm": 1.96875, "learning_rate": 1.9056786757612897e-05, "loss": 0.7505, "step": 2293 }, { "epoch": 0.28971505249033075, "grad_norm": 1.7578125, "learning_rate": 1.9055940158314677e-05, "loss": 0.6263, "step": 2294 }, { "epoch": 0.2898413450153919, "grad_norm": 1.859375, "learning_rate": 1.905509319806849e-05, "loss": 0.6576, "step": 2295 }, { "epoch": 0.28996763754045307, "grad_norm": 1.921875, "learning_rate": 1.9054245876908105e-05, "loss": 0.6594, "step": 2296 }, { "epoch": 0.29009393006551426, "grad_norm": 1.734375, "learning_rate": 1.9053398194867286e-05, "loss": 0.7044, "step": 2297 }, { "epoch": 0.2902202225905754, "grad_norm": 2.140625, "learning_rate": 1.905255015197982e-05, "loss": 0.6841, "step": 2298 }, { "epoch": 0.2903465151156366, "grad_norm": 1.8203125, "learning_rate": 1.905170174827951e-05, "loss": 0.6972, "step": 2299 }, { "epoch": 0.29047280764069777, "grad_norm": 1.7421875, "learning_rate": 1.905085298380017e-05, "loss": 0.6463, "step": 2300 }, { "epoch": 0.29059910016575896, "grad_norm": 1.7578125, "learning_rate": 1.905000385857563e-05, "loss": 0.6515, "step": 2301 }, { "epoch": 0.2907253926908201, "grad_norm": 1.8828125, "learning_rate": 1.9049154372639736e-05, "loss": 0.6698, "step": 2302 }, { "epoch": 0.2908516852158813, "grad_norm": 1.859375, "learning_rate": 1.9048304526026345e-05, "loss": 0.6688, "step": 2303 }, { "epoch": 0.2909779777409425, "grad_norm": 1.8125, "learning_rate": 1.904745431876933e-05, "loss": 0.6603, "step": 2304 }, { "epoch": 0.2911042702660036, "grad_norm": 1.796875, "learning_rate": 1.9046603750902578e-05, "loss": 0.6514, "step": 2305 }, { "epoch": 0.2912305627910648, "grad_norm": 1.78125, "learning_rate": 1.904575282245999e-05, "loss": 0.5818, "step": 2306 }, { "epoch": 0.291356855316126, "grad_norm": 1.9765625, "learning_rate": 1.9044901533475483e-05, "loss": 0.7804, "step": 2307 }, { "epoch": 0.2914831478411872, "grad_norm": 1.8515625, "learning_rate": 1.9044049883982984e-05, "loss": 0.6572, "step": 2308 }, { "epoch": 0.2916094403662483, "grad_norm": 1.7578125, "learning_rate": 1.9043197874016445e-05, "loss": 0.7339, "step": 2309 }, { "epoch": 0.2917357328913095, "grad_norm": 1.9453125, "learning_rate": 1.9042345503609817e-05, "loss": 0.7164, "step": 2310 }, { "epoch": 0.2918620254163707, "grad_norm": 1.796875, "learning_rate": 1.904149277279708e-05, "loss": 0.804, "step": 2311 }, { "epoch": 0.2919883179414318, "grad_norm": 1.8046875, "learning_rate": 1.904063968161222e-05, "loss": 0.6359, "step": 2312 }, { "epoch": 0.292114610466493, "grad_norm": 1.78125, "learning_rate": 1.903978623008923e-05, "loss": 0.6845, "step": 2313 }, { "epoch": 0.2922409029915542, "grad_norm": 1.7578125, "learning_rate": 1.9038932418262144e-05, "loss": 0.6692, "step": 2314 }, { "epoch": 0.2923671955166154, "grad_norm": 1.84375, "learning_rate": 1.903807824616498e-05, "loss": 0.6721, "step": 2315 }, { "epoch": 0.2924934880416765, "grad_norm": 1.71875, "learning_rate": 1.9037223713831786e-05, "loss": 0.6126, "step": 2316 }, { "epoch": 0.2926197805667377, "grad_norm": 1.859375, "learning_rate": 1.9036368821296622e-05, "loss": 0.6226, "step": 2317 }, { "epoch": 0.2927460730917989, "grad_norm": 1.796875, "learning_rate": 1.9035513568593563e-05, "loss": 0.67, "step": 2318 }, { "epoch": 0.29287236561686003, "grad_norm": 1.8984375, "learning_rate": 1.9034657955756695e-05, "loss": 0.7039, "step": 2319 }, { "epoch": 0.2929986581419212, "grad_norm": 1.96875, "learning_rate": 1.9033801982820125e-05, "loss": 0.8023, "step": 2320 }, { "epoch": 0.2931249506669824, "grad_norm": 1.8828125, "learning_rate": 1.9032945649817966e-05, "loss": 0.6957, "step": 2321 }, { "epoch": 0.2932512431920436, "grad_norm": 1.90625, "learning_rate": 1.9032088956784352e-05, "loss": 0.7434, "step": 2322 }, { "epoch": 0.29337753571710473, "grad_norm": 1.8125, "learning_rate": 1.9031231903753424e-05, "loss": 0.6697, "step": 2323 }, { "epoch": 0.2935038282421659, "grad_norm": 1.875, "learning_rate": 1.903037449075935e-05, "loss": 0.7104, "step": 2324 }, { "epoch": 0.2936301207672271, "grad_norm": 1.78125, "learning_rate": 1.9029516717836298e-05, "loss": 0.6881, "step": 2325 }, { "epoch": 0.29375641329228824, "grad_norm": 1.8828125, "learning_rate": 1.9028658585018455e-05, "loss": 0.7258, "step": 2326 }, { "epoch": 0.29388270581734943, "grad_norm": 1.84375, "learning_rate": 1.902780009234003e-05, "loss": 0.6828, "step": 2327 }, { "epoch": 0.2940089983424106, "grad_norm": 2.0625, "learning_rate": 1.902694123983524e-05, "loss": 0.8866, "step": 2328 }, { "epoch": 0.29413529086747175, "grad_norm": 1.8046875, "learning_rate": 1.9026082027538313e-05, "loss": 0.6405, "step": 2329 }, { "epoch": 0.29426158339253294, "grad_norm": 1.7890625, "learning_rate": 1.9025222455483496e-05, "loss": 0.6598, "step": 2330 }, { "epoch": 0.29438787591759413, "grad_norm": 1.90625, "learning_rate": 1.9024362523705055e-05, "loss": 0.7552, "step": 2331 }, { "epoch": 0.2945141684426553, "grad_norm": 1.8203125, "learning_rate": 1.9023502232237258e-05, "loss": 0.6352, "step": 2332 }, { "epoch": 0.29464046096771646, "grad_norm": 1.9765625, "learning_rate": 1.9022641581114392e-05, "loss": 0.7683, "step": 2333 }, { "epoch": 0.29476675349277764, "grad_norm": 1.9609375, "learning_rate": 1.902178057037077e-05, "loss": 0.6595, "step": 2334 }, { "epoch": 0.29489304601783883, "grad_norm": 1.8046875, "learning_rate": 1.9020919200040703e-05, "loss": 0.6284, "step": 2335 }, { "epoch": 0.29501933854289997, "grad_norm": 1.7734375, "learning_rate": 1.9020057470158526e-05, "loss": 0.7049, "step": 2336 }, { "epoch": 0.29514563106796116, "grad_norm": 1.796875, "learning_rate": 1.9019195380758585e-05, "loss": 0.7181, "step": 2337 }, { "epoch": 0.29527192359302235, "grad_norm": 2.140625, "learning_rate": 1.9018332931875238e-05, "loss": 0.8395, "step": 2338 }, { "epoch": 0.29539821611808353, "grad_norm": 1.8046875, "learning_rate": 1.9017470123542866e-05, "loss": 0.701, "step": 2339 }, { "epoch": 0.29552450864314467, "grad_norm": 1.75, "learning_rate": 1.901660695579585e-05, "loss": 0.6733, "step": 2340 }, { "epoch": 0.29565080116820586, "grad_norm": 1.859375, "learning_rate": 1.90157434286686e-05, "loss": 0.7018, "step": 2341 }, { "epoch": 0.29577709369326705, "grad_norm": 1.953125, "learning_rate": 1.9014879542195536e-05, "loss": 0.7155, "step": 2342 }, { "epoch": 0.2959033862183282, "grad_norm": 1.8125, "learning_rate": 1.9014015296411085e-05, "loss": 0.66, "step": 2343 }, { "epoch": 0.29602967874338937, "grad_norm": 1.8671875, "learning_rate": 1.9013150691349694e-05, "loss": 0.709, "step": 2344 }, { "epoch": 0.29615597126845056, "grad_norm": 1.78125, "learning_rate": 1.9012285727045828e-05, "loss": 0.7117, "step": 2345 }, { "epoch": 0.29628226379351175, "grad_norm": 1.90625, "learning_rate": 1.901142040353396e-05, "loss": 0.6878, "step": 2346 }, { "epoch": 0.2964085563185729, "grad_norm": 1.828125, "learning_rate": 1.901055472084858e-05, "loss": 0.7318, "step": 2347 }, { "epoch": 0.29653484884363407, "grad_norm": 1.8671875, "learning_rate": 1.900968867902419e-05, "loss": 0.6368, "step": 2348 }, { "epoch": 0.29666114136869526, "grad_norm": 1.8671875, "learning_rate": 1.9008822278095316e-05, "loss": 0.6437, "step": 2349 }, { "epoch": 0.2967874338937564, "grad_norm": 1.703125, "learning_rate": 1.9007955518096483e-05, "loss": 0.6028, "step": 2350 }, { "epoch": 0.2969137264188176, "grad_norm": 1.8828125, "learning_rate": 1.9007088399062236e-05, "loss": 0.6813, "step": 2351 }, { "epoch": 0.29704001894387877, "grad_norm": 1.7421875, "learning_rate": 1.9006220921027143e-05, "loss": 0.6109, "step": 2352 }, { "epoch": 0.29716631146893996, "grad_norm": 1.921875, "learning_rate": 1.900535308402578e-05, "loss": 0.7278, "step": 2353 }, { "epoch": 0.2972926039940011, "grad_norm": 1.734375, "learning_rate": 1.900448488809273e-05, "loss": 0.6501, "step": 2354 }, { "epoch": 0.2974188965190623, "grad_norm": 1.8515625, "learning_rate": 1.9003616333262602e-05, "loss": 0.7604, "step": 2355 }, { "epoch": 0.29754518904412347, "grad_norm": 1.828125, "learning_rate": 1.9002747419570014e-05, "loss": 0.682, "step": 2356 }, { "epoch": 0.2976714815691846, "grad_norm": 1.7578125, "learning_rate": 1.9001878147049597e-05, "loss": 0.656, "step": 2357 }, { "epoch": 0.2977977740942458, "grad_norm": 1.8984375, "learning_rate": 1.9001008515735998e-05, "loss": 0.715, "step": 2358 }, { "epoch": 0.297924066619307, "grad_norm": 1.8203125, "learning_rate": 1.9000138525663885e-05, "loss": 0.6118, "step": 2359 }, { "epoch": 0.2980503591443681, "grad_norm": 1.9921875, "learning_rate": 1.8999268176867925e-05, "loss": 0.6197, "step": 2360 }, { "epoch": 0.2981766516694293, "grad_norm": 1.9765625, "learning_rate": 1.8998397469382812e-05, "loss": 0.7219, "step": 2361 }, { "epoch": 0.2983029441944905, "grad_norm": 1.7890625, "learning_rate": 1.8997526403243252e-05, "loss": 0.6691, "step": 2362 }, { "epoch": 0.2984292367195517, "grad_norm": 2.078125, "learning_rate": 1.8996654978483958e-05, "loss": 0.8093, "step": 2363 }, { "epoch": 0.2985555292446128, "grad_norm": 1.8203125, "learning_rate": 1.899578319513967e-05, "loss": 0.7298, "step": 2364 }, { "epoch": 0.298681821769674, "grad_norm": 1.953125, "learning_rate": 1.8994911053245125e-05, "loss": 0.6861, "step": 2365 }, { "epoch": 0.2988081142947352, "grad_norm": 1.6953125, "learning_rate": 1.89940385528351e-05, "loss": 0.5868, "step": 2366 }, { "epoch": 0.29893440681979633, "grad_norm": 2.046875, "learning_rate": 1.8993165693944357e-05, "loss": 0.725, "step": 2367 }, { "epoch": 0.2990606993448575, "grad_norm": 2.0, "learning_rate": 1.899229247660769e-05, "loss": 0.6771, "step": 2368 }, { "epoch": 0.2991869918699187, "grad_norm": 1.8984375, "learning_rate": 1.8991418900859907e-05, "loss": 0.6547, "step": 2369 }, { "epoch": 0.2993132843949799, "grad_norm": 1.859375, "learning_rate": 1.899054496673582e-05, "loss": 0.7032, "step": 2370 }, { "epoch": 0.29943957692004103, "grad_norm": 1.9296875, "learning_rate": 1.8989670674270268e-05, "loss": 0.7293, "step": 2371 }, { "epoch": 0.2995658694451022, "grad_norm": 2.0625, "learning_rate": 1.8988796023498096e-05, "loss": 0.6703, "step": 2372 }, { "epoch": 0.2996921619701634, "grad_norm": 1.8046875, "learning_rate": 1.8987921014454166e-05, "loss": 0.6342, "step": 2373 }, { "epoch": 0.29981845449522454, "grad_norm": 1.7578125, "learning_rate": 1.898704564717335e-05, "loss": 0.6152, "step": 2374 }, { "epoch": 0.29994474702028573, "grad_norm": 1.7421875, "learning_rate": 1.8986169921690546e-05, "loss": 0.6033, "step": 2375 }, { "epoch": 0.3000710395453469, "grad_norm": 1.984375, "learning_rate": 1.898529383804065e-05, "loss": 0.7064, "step": 2376 }, { "epoch": 0.3001973320704081, "grad_norm": 1.8984375, "learning_rate": 1.8984417396258585e-05, "loss": 0.6792, "step": 2377 }, { "epoch": 0.30032362459546924, "grad_norm": 1.78125, "learning_rate": 1.8983540596379284e-05, "loss": 0.6741, "step": 2378 }, { "epoch": 0.30044991712053043, "grad_norm": 1.8515625, "learning_rate": 1.8982663438437693e-05, "loss": 0.7275, "step": 2379 }, { "epoch": 0.3005762096455916, "grad_norm": 1.875, "learning_rate": 1.898178592246877e-05, "loss": 0.7303, "step": 2380 }, { "epoch": 0.30070250217065275, "grad_norm": 1.953125, "learning_rate": 1.8980908048507495e-05, "loss": 0.7319, "step": 2381 }, { "epoch": 0.30082879469571394, "grad_norm": 2.015625, "learning_rate": 1.898002981658886e-05, "loss": 0.7523, "step": 2382 }, { "epoch": 0.30095508722077513, "grad_norm": 1.796875, "learning_rate": 1.8979151226747866e-05, "loss": 0.7126, "step": 2383 }, { "epoch": 0.3010813797458363, "grad_norm": 1.921875, "learning_rate": 1.897827227901953e-05, "loss": 0.7768, "step": 2384 }, { "epoch": 0.30120767227089745, "grad_norm": 1.8046875, "learning_rate": 1.897739297343889e-05, "loss": 0.7169, "step": 2385 }, { "epoch": 0.30133396479595864, "grad_norm": 1.828125, "learning_rate": 1.8976513310040983e-05, "loss": 0.6911, "step": 2386 }, { "epoch": 0.30146025732101983, "grad_norm": 1.9296875, "learning_rate": 1.8975633288860883e-05, "loss": 0.6157, "step": 2387 }, { "epoch": 0.30158654984608096, "grad_norm": 1.8515625, "learning_rate": 1.8974752909933653e-05, "loss": 0.6242, "step": 2388 }, { "epoch": 0.30171284237114215, "grad_norm": 2.03125, "learning_rate": 1.8973872173294394e-05, "loss": 0.7376, "step": 2389 }, { "epoch": 0.30183913489620334, "grad_norm": 1.8515625, "learning_rate": 1.8972991078978204e-05, "loss": 0.6809, "step": 2390 }, { "epoch": 0.3019654274212645, "grad_norm": 1.8515625, "learning_rate": 1.89721096270202e-05, "loss": 0.7163, "step": 2391 }, { "epoch": 0.30209171994632567, "grad_norm": 1.9375, "learning_rate": 1.897122781745552e-05, "loss": 0.6675, "step": 2392 }, { "epoch": 0.30221801247138685, "grad_norm": 1.90625, "learning_rate": 1.897034565031931e-05, "loss": 0.6396, "step": 2393 }, { "epoch": 0.30234430499644804, "grad_norm": 1.90625, "learning_rate": 1.8969463125646723e-05, "loss": 0.7548, "step": 2394 }, { "epoch": 0.3024705975215092, "grad_norm": 1.8125, "learning_rate": 1.896858024347294e-05, "loss": 0.6887, "step": 2395 }, { "epoch": 0.30259689004657037, "grad_norm": 1.8046875, "learning_rate": 1.8967697003833156e-05, "loss": 0.7433, "step": 2396 }, { "epoch": 0.30272318257163155, "grad_norm": 1.8125, "learning_rate": 1.8966813406762567e-05, "loss": 0.6685, "step": 2397 }, { "epoch": 0.3028494750966927, "grad_norm": 1.921875, "learning_rate": 1.8965929452296393e-05, "loss": 0.8016, "step": 2398 }, { "epoch": 0.3029757676217539, "grad_norm": 1.8203125, "learning_rate": 1.8965045140469866e-05, "loss": 0.6688, "step": 2399 }, { "epoch": 0.30310206014681507, "grad_norm": 1.8359375, "learning_rate": 1.8964160471318237e-05, "loss": 0.7379, "step": 2400 }, { "epoch": 0.30322835267187626, "grad_norm": 2.0, "learning_rate": 1.8963275444876758e-05, "loss": 0.6573, "step": 2401 }, { "epoch": 0.3033546451969374, "grad_norm": 1.796875, "learning_rate": 1.8962390061180715e-05, "loss": 0.6631, "step": 2402 }, { "epoch": 0.3034809377219986, "grad_norm": 1.8828125, "learning_rate": 1.8961504320265386e-05, "loss": 0.7557, "step": 2403 }, { "epoch": 0.30360723024705977, "grad_norm": 1.875, "learning_rate": 1.896061822216608e-05, "loss": 0.6807, "step": 2404 }, { "epoch": 0.3037335227721209, "grad_norm": 1.8359375, "learning_rate": 1.895973176691812e-05, "loss": 0.6914, "step": 2405 }, { "epoch": 0.3038598152971821, "grad_norm": 1.8359375, "learning_rate": 1.895884495455683e-05, "loss": 0.6924, "step": 2406 }, { "epoch": 0.3039861078222433, "grad_norm": 1.8046875, "learning_rate": 1.895795778511756e-05, "loss": 0.5955, "step": 2407 }, { "epoch": 0.30411240034730447, "grad_norm": 1.9375, "learning_rate": 1.8957070258635666e-05, "loss": 0.7849, "step": 2408 }, { "epoch": 0.3042386928723656, "grad_norm": 1.828125, "learning_rate": 1.895618237514653e-05, "loss": 0.6365, "step": 2409 }, { "epoch": 0.3043649853974268, "grad_norm": 1.8671875, "learning_rate": 1.895529413468553e-05, "loss": 0.6719, "step": 2410 }, { "epoch": 0.304491277922488, "grad_norm": 1.8515625, "learning_rate": 1.8954405537288082e-05, "loss": 0.6327, "step": 2411 }, { "epoch": 0.3046175704475491, "grad_norm": 1.875, "learning_rate": 1.8953516582989597e-05, "loss": 0.6876, "step": 2412 }, { "epoch": 0.3047438629726103, "grad_norm": 1.953125, "learning_rate": 1.8952627271825506e-05, "loss": 0.7158, "step": 2413 }, { "epoch": 0.3048701554976715, "grad_norm": 2.09375, "learning_rate": 1.8951737603831256e-05, "loss": 0.78, "step": 2414 }, { "epoch": 0.3049964480227327, "grad_norm": 3.15625, "learning_rate": 1.8950847579042304e-05, "loss": 0.8176, "step": 2415 }, { "epoch": 0.3051227405477938, "grad_norm": 2.171875, "learning_rate": 1.894995719749413e-05, "loss": 0.7066, "step": 2416 }, { "epoch": 0.305249033072855, "grad_norm": 2.578125, "learning_rate": 1.8949066459222217e-05, "loss": 0.7136, "step": 2417 }, { "epoch": 0.3053753255979162, "grad_norm": 2.0, "learning_rate": 1.8948175364262075e-05, "loss": 0.7853, "step": 2418 }, { "epoch": 0.3055016181229773, "grad_norm": 1.96875, "learning_rate": 1.894728391264921e-05, "loss": 0.6694, "step": 2419 }, { "epoch": 0.3056279106480385, "grad_norm": 1.8046875, "learning_rate": 1.8946392104419162e-05, "loss": 0.6423, "step": 2420 }, { "epoch": 0.3057542031730997, "grad_norm": 1.8125, "learning_rate": 1.8945499939607473e-05, "loss": 0.6326, "step": 2421 }, { "epoch": 0.3058804956981609, "grad_norm": 1.84375, "learning_rate": 1.8944607418249705e-05, "loss": 0.6565, "step": 2422 }, { "epoch": 0.306006788223222, "grad_norm": 1.796875, "learning_rate": 1.8943714540381428e-05, "loss": 0.7105, "step": 2423 }, { "epoch": 0.3061330807482832, "grad_norm": 1.9140625, "learning_rate": 1.894282130603823e-05, "loss": 0.6068, "step": 2424 }, { "epoch": 0.3062593732733444, "grad_norm": 1.8359375, "learning_rate": 1.8941927715255718e-05, "loss": 0.6782, "step": 2425 }, { "epoch": 0.30638566579840554, "grad_norm": 1.8515625, "learning_rate": 1.89410337680695e-05, "loss": 0.669, "step": 2426 }, { "epoch": 0.3065119583234667, "grad_norm": 1.9140625, "learning_rate": 1.8940139464515218e-05, "loss": 0.7546, "step": 2427 }, { "epoch": 0.3066382508485279, "grad_norm": 1.8515625, "learning_rate": 1.893924480462851e-05, "loss": 0.7004, "step": 2428 }, { "epoch": 0.30676454337358905, "grad_norm": 1.8984375, "learning_rate": 1.8938349788445036e-05, "loss": 0.7105, "step": 2429 }, { "epoch": 0.30689083589865024, "grad_norm": 1.9765625, "learning_rate": 1.8937454416000466e-05, "loss": 0.7081, "step": 2430 }, { "epoch": 0.3070171284237114, "grad_norm": 1.7734375, "learning_rate": 1.893655868733049e-05, "loss": 0.6423, "step": 2431 }, { "epoch": 0.3071434209487726, "grad_norm": 1.859375, "learning_rate": 1.8935662602470807e-05, "loss": 0.5937, "step": 2432 }, { "epoch": 0.30726971347383375, "grad_norm": 1.78125, "learning_rate": 1.8934766161457142e-05, "loss": 0.6764, "step": 2433 }, { "epoch": 0.30739600599889494, "grad_norm": 1.8984375, "learning_rate": 1.8933869364325215e-05, "loss": 0.7503, "step": 2434 }, { "epoch": 0.30752229852395613, "grad_norm": 1.8984375, "learning_rate": 1.8932972211110775e-05, "loss": 0.7307, "step": 2435 }, { "epoch": 0.30764859104901726, "grad_norm": 1.9453125, "learning_rate": 1.8932074701849576e-05, "loss": 0.6507, "step": 2436 }, { "epoch": 0.30777488357407845, "grad_norm": 1.859375, "learning_rate": 1.8931176836577396e-05, "loss": 0.6515, "step": 2437 }, { "epoch": 0.30790117609913964, "grad_norm": 2.140625, "learning_rate": 1.893027861533002e-05, "loss": 0.6533, "step": 2438 }, { "epoch": 0.30802746862420083, "grad_norm": 1.8671875, "learning_rate": 1.892938003814325e-05, "loss": 0.7942, "step": 2439 }, { "epoch": 0.30815376114926196, "grad_norm": 2.03125, "learning_rate": 1.8928481105052897e-05, "loss": 0.722, "step": 2440 }, { "epoch": 0.30828005367432315, "grad_norm": 1.734375, "learning_rate": 1.8927581816094794e-05, "loss": 0.6697, "step": 2441 }, { "epoch": 0.30840634619938434, "grad_norm": 2.140625, "learning_rate": 1.892668217130478e-05, "loss": 0.6718, "step": 2442 }, { "epoch": 0.3085326387244455, "grad_norm": 1.9140625, "learning_rate": 1.892578217071872e-05, "loss": 0.7645, "step": 2443 }, { "epoch": 0.30865893124950666, "grad_norm": 1.96875, "learning_rate": 1.892488181437248e-05, "loss": 0.6941, "step": 2444 }, { "epoch": 0.30878522377456785, "grad_norm": 1.65625, "learning_rate": 1.8923981102301944e-05, "loss": 0.5996, "step": 2445 }, { "epoch": 0.30891151629962904, "grad_norm": 1.96875, "learning_rate": 1.892308003454302e-05, "loss": 0.7695, "step": 2446 }, { "epoch": 0.3090378088246902, "grad_norm": 1.796875, "learning_rate": 1.892217861113162e-05, "loss": 0.6615, "step": 2447 }, { "epoch": 0.30916410134975136, "grad_norm": 1.953125, "learning_rate": 1.892127683210367e-05, "loss": 0.8085, "step": 2448 }, { "epoch": 0.30929039387481255, "grad_norm": 1.8671875, "learning_rate": 1.8920374697495113e-05, "loss": 0.6423, "step": 2449 }, { "epoch": 0.3094166863998737, "grad_norm": 1.9296875, "learning_rate": 1.8919472207341906e-05, "loss": 0.6719, "step": 2450 }, { "epoch": 0.3095429789249349, "grad_norm": 1.8359375, "learning_rate": 1.891856936168002e-05, "loss": 0.6301, "step": 2451 }, { "epoch": 0.30966927144999606, "grad_norm": 1.8359375, "learning_rate": 1.8917666160545446e-05, "loss": 0.661, "step": 2452 }, { "epoch": 0.30979556397505725, "grad_norm": 1.890625, "learning_rate": 1.891676260397417e-05, "loss": 0.7044, "step": 2453 }, { "epoch": 0.3099218565001184, "grad_norm": 1.8203125, "learning_rate": 1.891585869200222e-05, "loss": 0.6562, "step": 2454 }, { "epoch": 0.3100481490251796, "grad_norm": 2.015625, "learning_rate": 1.8914954424665618e-05, "loss": 0.696, "step": 2455 }, { "epoch": 0.31017444155024076, "grad_norm": 1.796875, "learning_rate": 1.8914049802000403e-05, "loss": 0.6278, "step": 2456 }, { "epoch": 0.3103007340753019, "grad_norm": 2.140625, "learning_rate": 1.8913144824042634e-05, "loss": 0.8216, "step": 2457 }, { "epoch": 0.3104270266003631, "grad_norm": 1.75, "learning_rate": 1.8912239490828385e-05, "loss": 0.6734, "step": 2458 }, { "epoch": 0.3105533191254243, "grad_norm": 1.8671875, "learning_rate": 1.891133380239373e-05, "loss": 0.7569, "step": 2459 }, { "epoch": 0.3106796116504854, "grad_norm": 1.7421875, "learning_rate": 1.8910427758774775e-05, "loss": 0.621, "step": 2460 }, { "epoch": 0.3108059041755466, "grad_norm": 1.8203125, "learning_rate": 1.8909521360007637e-05, "loss": 0.7345, "step": 2461 }, { "epoch": 0.3109321967006078, "grad_norm": 1.9375, "learning_rate": 1.8908614606128434e-05, "loss": 0.6585, "step": 2462 }, { "epoch": 0.311058489225669, "grad_norm": 1.84375, "learning_rate": 1.8907707497173308e-05, "loss": 0.7474, "step": 2463 }, { "epoch": 0.3111847817507301, "grad_norm": 1.8671875, "learning_rate": 1.890680003317842e-05, "loss": 0.7181, "step": 2464 }, { "epoch": 0.3113110742757913, "grad_norm": 1.671875, "learning_rate": 1.8905892214179937e-05, "loss": 0.5813, "step": 2465 }, { "epoch": 0.3114373668008525, "grad_norm": 1.796875, "learning_rate": 1.890498404021404e-05, "loss": 0.6316, "step": 2466 }, { "epoch": 0.3115636593259136, "grad_norm": 1.796875, "learning_rate": 1.8904075511316926e-05, "loss": 0.6286, "step": 2467 }, { "epoch": 0.3116899518509748, "grad_norm": 1.90625, "learning_rate": 1.8903166627524812e-05, "loss": 0.7041, "step": 2468 }, { "epoch": 0.311816244376036, "grad_norm": 1.90625, "learning_rate": 1.890225738887392e-05, "loss": 0.6676, "step": 2469 }, { "epoch": 0.3119425369010972, "grad_norm": 1.921875, "learning_rate": 1.8901347795400492e-05, "loss": 0.7077, "step": 2470 }, { "epoch": 0.3120688294261583, "grad_norm": 1.8203125, "learning_rate": 1.890043784714078e-05, "loss": 0.701, "step": 2471 }, { "epoch": 0.3121951219512195, "grad_norm": 2.15625, "learning_rate": 1.889952754413105e-05, "loss": 0.786, "step": 2472 }, { "epoch": 0.3123214144762807, "grad_norm": 1.796875, "learning_rate": 1.8898616886407595e-05, "loss": 0.6881, "step": 2473 }, { "epoch": 0.31244770700134183, "grad_norm": 1.7890625, "learning_rate": 1.88977058740067e-05, "loss": 0.6992, "step": 2474 }, { "epoch": 0.312573999526403, "grad_norm": 2.109375, "learning_rate": 1.889679450696468e-05, "loss": 0.7469, "step": 2475 }, { "epoch": 0.3127002920514642, "grad_norm": 2.015625, "learning_rate": 1.889588278531786e-05, "loss": 0.674, "step": 2476 }, { "epoch": 0.3128265845765254, "grad_norm": 1.8515625, "learning_rate": 1.8894970709102582e-05, "loss": 0.7376, "step": 2477 }, { "epoch": 0.31295287710158654, "grad_norm": 1.890625, "learning_rate": 1.8894058278355194e-05, "loss": 0.6776, "step": 2478 }, { "epoch": 0.3130791696266477, "grad_norm": 1.8828125, "learning_rate": 1.889314549311207e-05, "loss": 0.6786, "step": 2479 }, { "epoch": 0.3132054621517089, "grad_norm": 2.078125, "learning_rate": 1.8892232353409582e-05, "loss": 0.6293, "step": 2480 }, { "epoch": 0.31333175467677005, "grad_norm": 2.015625, "learning_rate": 1.8891318859284133e-05, "loss": 0.7562, "step": 2481 }, { "epoch": 0.31345804720183124, "grad_norm": 1.7578125, "learning_rate": 1.889040501077213e-05, "loss": 0.7107, "step": 2482 }, { "epoch": 0.3135843397268924, "grad_norm": 1.9375, "learning_rate": 1.8889490807909995e-05, "loss": 0.7278, "step": 2483 }, { "epoch": 0.3137106322519536, "grad_norm": 1.90625, "learning_rate": 1.888857625073417e-05, "loss": 0.663, "step": 2484 }, { "epoch": 0.31383692477701475, "grad_norm": 1.8671875, "learning_rate": 1.8887661339281106e-05, "loss": 0.6902, "step": 2485 }, { "epoch": 0.31396321730207594, "grad_norm": 1.859375, "learning_rate": 1.888674607358727e-05, "loss": 0.7613, "step": 2486 }, { "epoch": 0.3140895098271371, "grad_norm": 2.046875, "learning_rate": 1.888583045368914e-05, "loss": 0.7326, "step": 2487 }, { "epoch": 0.31421580235219826, "grad_norm": 1.7578125, "learning_rate": 1.8884914479623205e-05, "loss": 0.767, "step": 2488 }, { "epoch": 0.31434209487725945, "grad_norm": 1.8359375, "learning_rate": 1.8883998151425986e-05, "loss": 0.6869, "step": 2489 }, { "epoch": 0.31446838740232064, "grad_norm": 1.7421875, "learning_rate": 1.8883081469133996e-05, "loss": 0.6232, "step": 2490 }, { "epoch": 0.31459467992738177, "grad_norm": 1.921875, "learning_rate": 1.8882164432783775e-05, "loss": 0.6838, "step": 2491 }, { "epoch": 0.31472097245244296, "grad_norm": 1.890625, "learning_rate": 1.8881247042411878e-05, "loss": 0.6672, "step": 2492 }, { "epoch": 0.31484726497750415, "grad_norm": 1.703125, "learning_rate": 1.888032929805486e-05, "loss": 0.562, "step": 2493 }, { "epoch": 0.31497355750256534, "grad_norm": 1.8046875, "learning_rate": 1.8879411199749306e-05, "loss": 0.6581, "step": 2494 }, { "epoch": 0.31509985002762647, "grad_norm": 2.046875, "learning_rate": 1.887849274753181e-05, "loss": 0.7221, "step": 2495 }, { "epoch": 0.31522614255268766, "grad_norm": 1.8515625, "learning_rate": 1.8877573941438984e-05, "loss": 0.6955, "step": 2496 }, { "epoch": 0.31535243507774885, "grad_norm": 1.7421875, "learning_rate": 1.8876654781507436e-05, "loss": 0.6414, "step": 2497 }, { "epoch": 0.31547872760281, "grad_norm": 1.9140625, "learning_rate": 1.8875735267773812e-05, "loss": 0.7282, "step": 2498 }, { "epoch": 0.31560502012787117, "grad_norm": 1.75, "learning_rate": 1.887481540027476e-05, "loss": 0.6415, "step": 2499 }, { "epoch": 0.31573131265293236, "grad_norm": 1.921875, "learning_rate": 1.887389517904694e-05, "loss": 0.7386, "step": 2500 }, { "epoch": 0.31585760517799355, "grad_norm": 1.9921875, "learning_rate": 1.8872974604127035e-05, "loss": 0.7292, "step": 2501 }, { "epoch": 0.3159838977030547, "grad_norm": 1.96875, "learning_rate": 1.887205367555173e-05, "loss": 0.6814, "step": 2502 }, { "epoch": 0.3161101902281159, "grad_norm": 1.9609375, "learning_rate": 1.8871132393357737e-05, "loss": 0.6851, "step": 2503 }, { "epoch": 0.31623648275317706, "grad_norm": 1.8046875, "learning_rate": 1.8870210757581777e-05, "loss": 0.61, "step": 2504 }, { "epoch": 0.3163627752782382, "grad_norm": 1.828125, "learning_rate": 1.886928876826058e-05, "loss": 0.6468, "step": 2505 }, { "epoch": 0.3164890678032994, "grad_norm": 1.7578125, "learning_rate": 1.8868366425430898e-05, "loss": 0.6218, "step": 2506 }, { "epoch": 0.3166153603283606, "grad_norm": 1.8125, "learning_rate": 1.8867443729129484e-05, "loss": 0.6718, "step": 2507 }, { "epoch": 0.31674165285342176, "grad_norm": 3.34375, "learning_rate": 1.8866520679393127e-05, "loss": 0.9055, "step": 2508 }, { "epoch": 0.3168679453784829, "grad_norm": 2.03125, "learning_rate": 1.8865597276258613e-05, "loss": 0.7492, "step": 2509 }, { "epoch": 0.3169942379035441, "grad_norm": 1.828125, "learning_rate": 1.8864673519762742e-05, "loss": 0.7454, "step": 2510 }, { "epoch": 0.3171205304286053, "grad_norm": 1.9140625, "learning_rate": 1.8863749409942338e-05, "loss": 0.8017, "step": 2511 }, { "epoch": 0.3172468229536664, "grad_norm": 1.6796875, "learning_rate": 1.8862824946834232e-05, "loss": 0.5583, "step": 2512 }, { "epoch": 0.3173731154787276, "grad_norm": 1.7109375, "learning_rate": 1.886190013047527e-05, "loss": 0.6416, "step": 2513 }, { "epoch": 0.3174994080037888, "grad_norm": 1.78125, "learning_rate": 1.8860974960902317e-05, "loss": 0.6862, "step": 2514 }, { "epoch": 0.31762570052885, "grad_norm": 1.9140625, "learning_rate": 1.8860049438152247e-05, "loss": 0.703, "step": 2515 }, { "epoch": 0.3177519930539111, "grad_norm": 1.8984375, "learning_rate": 1.885912356226194e-05, "loss": 0.6803, "step": 2516 }, { "epoch": 0.3178782855789723, "grad_norm": 1.84375, "learning_rate": 1.8858197333268312e-05, "loss": 0.6778, "step": 2517 }, { "epoch": 0.3180045781040335, "grad_norm": 1.796875, "learning_rate": 1.885727075120827e-05, "loss": 0.7075, "step": 2518 }, { "epoch": 0.3181308706290946, "grad_norm": 2.0, "learning_rate": 1.8856343816118754e-05, "loss": 0.6839, "step": 2519 }, { "epoch": 0.3182571631541558, "grad_norm": 1.8125, "learning_rate": 1.8855416528036703e-05, "loss": 0.7439, "step": 2520 }, { "epoch": 0.318383455679217, "grad_norm": 1.84375, "learning_rate": 1.885448888699908e-05, "loss": 0.7206, "step": 2521 }, { "epoch": 0.3185097482042782, "grad_norm": 1.9375, "learning_rate": 1.8853560893042858e-05, "loss": 0.6246, "step": 2522 }, { "epoch": 0.3186360407293393, "grad_norm": 1.9375, "learning_rate": 1.8852632546205022e-05, "loss": 0.691, "step": 2523 }, { "epoch": 0.3187623332544005, "grad_norm": 2.078125, "learning_rate": 1.885170384652258e-05, "loss": 0.7293, "step": 2524 }, { "epoch": 0.3188886257794617, "grad_norm": 1.703125, "learning_rate": 1.8850774794032535e-05, "loss": 0.5933, "step": 2525 }, { "epoch": 0.31901491830452283, "grad_norm": 1.890625, "learning_rate": 1.884984538877193e-05, "loss": 0.709, "step": 2526 }, { "epoch": 0.319141210829584, "grad_norm": 1.9140625, "learning_rate": 1.8848915630777805e-05, "loss": 0.7544, "step": 2527 }, { "epoch": 0.3192675033546452, "grad_norm": 1.9140625, "learning_rate": 1.8847985520087217e-05, "loss": 0.7012, "step": 2528 }, { "epoch": 0.31939379587970634, "grad_norm": 2.046875, "learning_rate": 1.8847055056737236e-05, "loss": 0.7617, "step": 2529 }, { "epoch": 0.31952008840476753, "grad_norm": 1.7265625, "learning_rate": 1.8846124240764953e-05, "loss": 0.5975, "step": 2530 }, { "epoch": 0.3196463809298287, "grad_norm": 1.796875, "learning_rate": 1.8845193072207463e-05, "loss": 0.6682, "step": 2531 }, { "epoch": 0.3197726734548899, "grad_norm": 1.828125, "learning_rate": 1.884426155110188e-05, "loss": 0.6249, "step": 2532 }, { "epoch": 0.31989896597995104, "grad_norm": 1.9453125, "learning_rate": 1.8843329677485336e-05, "loss": 0.6911, "step": 2533 }, { "epoch": 0.32002525850501223, "grad_norm": 2.03125, "learning_rate": 1.8842397451394974e-05, "loss": 0.7277, "step": 2534 }, { "epoch": 0.3201515510300734, "grad_norm": 1.859375, "learning_rate": 1.8841464872867944e-05, "loss": 0.6554, "step": 2535 }, { "epoch": 0.32027784355513456, "grad_norm": 2.046875, "learning_rate": 1.884053194194142e-05, "loss": 0.7708, "step": 2536 }, { "epoch": 0.32040413608019574, "grad_norm": 1.7265625, "learning_rate": 1.883959865865259e-05, "loss": 0.6207, "step": 2537 }, { "epoch": 0.32053042860525693, "grad_norm": 2.078125, "learning_rate": 1.8838665023038646e-05, "loss": 0.7268, "step": 2538 }, { "epoch": 0.3206567211303181, "grad_norm": 1.828125, "learning_rate": 1.8837731035136806e-05, "loss": 0.7454, "step": 2539 }, { "epoch": 0.32078301365537926, "grad_norm": 1.8515625, "learning_rate": 1.883679669498429e-05, "loss": 0.6608, "step": 2540 }, { "epoch": 0.32090930618044045, "grad_norm": 2.0625, "learning_rate": 1.8835862002618345e-05, "loss": 0.6764, "step": 2541 }, { "epoch": 0.32103559870550163, "grad_norm": 1.921875, "learning_rate": 1.8834926958076225e-05, "loss": 0.657, "step": 2542 }, { "epoch": 0.32116189123056277, "grad_norm": 1.828125, "learning_rate": 1.8833991561395194e-05, "loss": 0.7425, "step": 2543 }, { "epoch": 0.32128818375562396, "grad_norm": 1.9140625, "learning_rate": 1.8833055812612538e-05, "loss": 0.6593, "step": 2544 }, { "epoch": 0.32141447628068515, "grad_norm": 1.9453125, "learning_rate": 1.8832119711765554e-05, "loss": 0.6745, "step": 2545 }, { "epoch": 0.32154076880574634, "grad_norm": 1.78125, "learning_rate": 1.883118325889155e-05, "loss": 0.6839, "step": 2546 }, { "epoch": 0.32166706133080747, "grad_norm": 1.6875, "learning_rate": 1.8830246454027855e-05, "loss": 0.6712, "step": 2547 }, { "epoch": 0.32179335385586866, "grad_norm": 1.875, "learning_rate": 1.8829309297211804e-05, "loss": 0.6127, "step": 2548 }, { "epoch": 0.32191964638092985, "grad_norm": 1.765625, "learning_rate": 1.882837178848075e-05, "loss": 0.6877, "step": 2549 }, { "epoch": 0.322045938905991, "grad_norm": 1.8671875, "learning_rate": 1.8827433927872066e-05, "loss": 0.6291, "step": 2550 }, { "epoch": 0.32217223143105217, "grad_norm": 1.890625, "learning_rate": 1.8826495715423122e-05, "loss": 0.6385, "step": 2551 }, { "epoch": 0.32229852395611336, "grad_norm": 1.828125, "learning_rate": 1.882555715117132e-05, "loss": 0.7184, "step": 2552 }, { "epoch": 0.32242481648117455, "grad_norm": 1.78125, "learning_rate": 1.8824618235154068e-05, "loss": 0.6447, "step": 2553 }, { "epoch": 0.3225511090062357, "grad_norm": 1.9765625, "learning_rate": 1.8823678967408788e-05, "loss": 0.7539, "step": 2554 }, { "epoch": 0.32267740153129687, "grad_norm": 1.921875, "learning_rate": 1.882273934797292e-05, "loss": 0.6627, "step": 2555 }, { "epoch": 0.32280369405635806, "grad_norm": 1.8984375, "learning_rate": 1.8821799376883912e-05, "loss": 0.6334, "step": 2556 }, { "epoch": 0.3229299865814192, "grad_norm": 1.9140625, "learning_rate": 1.8820859054179225e-05, "loss": 0.6517, "step": 2557 }, { "epoch": 0.3230562791064804, "grad_norm": 1.8046875, "learning_rate": 1.881991837989635e-05, "loss": 0.7186, "step": 2558 }, { "epoch": 0.32318257163154157, "grad_norm": 1.8984375, "learning_rate": 1.8818977354072766e-05, "loss": 0.7117, "step": 2559 }, { "epoch": 0.3233088641566027, "grad_norm": 2.03125, "learning_rate": 1.881803597674599e-05, "loss": 0.7639, "step": 2560 }, { "epoch": 0.3234351566816639, "grad_norm": 1.8828125, "learning_rate": 1.8817094247953537e-05, "loss": 0.6807, "step": 2561 }, { "epoch": 0.3235614492067251, "grad_norm": 1.890625, "learning_rate": 1.8816152167732947e-05, "loss": 0.686, "step": 2562 }, { "epoch": 0.32368774173178627, "grad_norm": 1.8515625, "learning_rate": 1.8815209736121766e-05, "loss": 0.5551, "step": 2563 }, { "epoch": 0.3238140342568474, "grad_norm": 2.078125, "learning_rate": 1.8814266953157557e-05, "loss": 0.6909, "step": 2564 }, { "epoch": 0.3239403267819086, "grad_norm": 1.859375, "learning_rate": 1.88133238188779e-05, "loss": 0.7212, "step": 2565 }, { "epoch": 0.3240666193069698, "grad_norm": 1.8125, "learning_rate": 1.881238033332038e-05, "loss": 0.6773, "step": 2566 }, { "epoch": 0.3241929118320309, "grad_norm": 1.8984375, "learning_rate": 1.881143649652261e-05, "loss": 0.7077, "step": 2567 }, { "epoch": 0.3243192043570921, "grad_norm": 1.7734375, "learning_rate": 1.88104923085222e-05, "loss": 0.6926, "step": 2568 }, { "epoch": 0.3244454968821533, "grad_norm": 1.8671875, "learning_rate": 1.8809547769356792e-05, "loss": 0.6616, "step": 2569 }, { "epoch": 0.3245717894072145, "grad_norm": 1.8984375, "learning_rate": 1.8808602879064027e-05, "loss": 0.72, "step": 2570 }, { "epoch": 0.3246980819322756, "grad_norm": 1.7734375, "learning_rate": 1.8807657637681567e-05, "loss": 0.6275, "step": 2571 }, { "epoch": 0.3248243744573368, "grad_norm": 2.5625, "learning_rate": 1.880671204524709e-05, "loss": 0.7686, "step": 2572 }, { "epoch": 0.324950666982398, "grad_norm": 1.8671875, "learning_rate": 1.8805766101798286e-05, "loss": 0.7238, "step": 2573 }, { "epoch": 0.32507695950745913, "grad_norm": 1.953125, "learning_rate": 1.880481980737285e-05, "loss": 0.7292, "step": 2574 }, { "epoch": 0.3252032520325203, "grad_norm": 1.953125, "learning_rate": 1.8803873162008506e-05, "loss": 0.7296, "step": 2575 }, { "epoch": 0.3253295445575815, "grad_norm": 1.953125, "learning_rate": 1.8802926165742983e-05, "loss": 0.72, "step": 2576 }, { "epoch": 0.3254558370826427, "grad_norm": 1.7890625, "learning_rate": 1.8801978818614027e-05, "loss": 0.7234, "step": 2577 }, { "epoch": 0.32558212960770383, "grad_norm": 1.734375, "learning_rate": 1.8801031120659396e-05, "loss": 0.7283, "step": 2578 }, { "epoch": 0.325708422132765, "grad_norm": 1.875, "learning_rate": 1.8800083071916865e-05, "loss": 0.6433, "step": 2579 }, { "epoch": 0.3258347146578262, "grad_norm": 1.7890625, "learning_rate": 1.8799134672424214e-05, "loss": 0.6807, "step": 2580 }, { "epoch": 0.32596100718288734, "grad_norm": 1.703125, "learning_rate": 1.8798185922219256e-05, "loss": 0.5982, "step": 2581 }, { "epoch": 0.32608729970794853, "grad_norm": 1.7109375, "learning_rate": 1.8797236821339794e-05, "loss": 0.6464, "step": 2582 }, { "epoch": 0.3262135922330097, "grad_norm": 1.9296875, "learning_rate": 1.879628736982366e-05, "loss": 0.6887, "step": 2583 }, { "epoch": 0.3263398847580709, "grad_norm": 1.7265625, "learning_rate": 1.8795337567708706e-05, "loss": 0.6129, "step": 2584 }, { "epoch": 0.32646617728313204, "grad_norm": 1.796875, "learning_rate": 1.8794387415032777e-05, "loss": 0.6551, "step": 2585 }, { "epoch": 0.32659246980819323, "grad_norm": 2.125, "learning_rate": 1.8793436911833746e-05, "loss": 0.7895, "step": 2586 }, { "epoch": 0.3267187623332544, "grad_norm": 1.8671875, "learning_rate": 1.8792486058149505e-05, "loss": 0.6606, "step": 2587 }, { "epoch": 0.32684505485831555, "grad_norm": 1.8046875, "learning_rate": 1.879153485401794e-05, "loss": 0.6759, "step": 2588 }, { "epoch": 0.32697134738337674, "grad_norm": 1.7265625, "learning_rate": 1.879058329947698e-05, "loss": 0.6688, "step": 2589 }, { "epoch": 0.32709763990843793, "grad_norm": 1.828125, "learning_rate": 1.8789631394564537e-05, "loss": 0.6363, "step": 2590 }, { "epoch": 0.32722393243349907, "grad_norm": 1.765625, "learning_rate": 1.878867913931856e-05, "loss": 0.6981, "step": 2591 }, { "epoch": 0.32735022495856025, "grad_norm": 2.015625, "learning_rate": 1.8787726533777003e-05, "loss": 0.6411, "step": 2592 }, { "epoch": 0.32747651748362144, "grad_norm": 1.8671875, "learning_rate": 1.878677357797783e-05, "loss": 0.694, "step": 2593 }, { "epoch": 0.32760281000868263, "grad_norm": 1.796875, "learning_rate": 1.8785820271959034e-05, "loss": 0.6662, "step": 2594 }, { "epoch": 0.32772910253374377, "grad_norm": 2.125, "learning_rate": 1.8784866615758594e-05, "loss": 0.7299, "step": 2595 }, { "epoch": 0.32785539505880495, "grad_norm": 1.9375, "learning_rate": 1.8783912609414535e-05, "loss": 0.6738, "step": 2596 }, { "epoch": 0.32798168758386614, "grad_norm": 1.9921875, "learning_rate": 1.8782958252964877e-05, "loss": 0.6714, "step": 2597 }, { "epoch": 0.3281079801089273, "grad_norm": 2.34375, "learning_rate": 1.878200354644766e-05, "loss": 0.7773, "step": 2598 }, { "epoch": 0.32823427263398847, "grad_norm": 1.9765625, "learning_rate": 1.878104848990093e-05, "loss": 0.8334, "step": 2599 }, { "epoch": 0.32836056515904966, "grad_norm": 1.8828125, "learning_rate": 1.8780093083362758e-05, "loss": 0.6863, "step": 2600 }, { "epoch": 0.32848685768411084, "grad_norm": 1.765625, "learning_rate": 1.8779137326871226e-05, "loss": 0.6475, "step": 2601 }, { "epoch": 0.328613150209172, "grad_norm": 2.0, "learning_rate": 1.8778181220464425e-05, "loss": 0.9498, "step": 2602 }, { "epoch": 0.32873944273423317, "grad_norm": 2.5625, "learning_rate": 1.8777224764180463e-05, "loss": 0.8101, "step": 2603 }, { "epoch": 0.32886573525929436, "grad_norm": 2.015625, "learning_rate": 1.8776267958057467e-05, "loss": 0.7036, "step": 2604 }, { "epoch": 0.3289920277843555, "grad_norm": 1.890625, "learning_rate": 1.8775310802133567e-05, "loss": 0.6442, "step": 2605 }, { "epoch": 0.3291183203094167, "grad_norm": 1.8515625, "learning_rate": 1.8774353296446914e-05, "loss": 0.688, "step": 2606 }, { "epoch": 0.32924461283447787, "grad_norm": 1.8359375, "learning_rate": 1.8773395441035675e-05, "loss": 0.7445, "step": 2607 }, { "epoch": 0.32937090535953906, "grad_norm": 1.9765625, "learning_rate": 1.8772437235938023e-05, "loss": 0.7612, "step": 2608 }, { "epoch": 0.3294971978846002, "grad_norm": 1.796875, "learning_rate": 1.8771478681192154e-05, "loss": 0.6325, "step": 2609 }, { "epoch": 0.3296234904096614, "grad_norm": 1.7734375, "learning_rate": 1.8770519776836272e-05, "loss": 0.6856, "step": 2610 }, { "epoch": 0.32974978293472257, "grad_norm": 1.8828125, "learning_rate": 1.8769560522908595e-05, "loss": 0.6941, "step": 2611 }, { "epoch": 0.3298760754597837, "grad_norm": 1.90625, "learning_rate": 1.876860091944736e-05, "loss": 0.6085, "step": 2612 }, { "epoch": 0.3300023679848449, "grad_norm": 1.8046875, "learning_rate": 1.8767640966490816e-05, "loss": 0.6052, "step": 2613 }, { "epoch": 0.3301286605099061, "grad_norm": 1.765625, "learning_rate": 1.876668066407722e-05, "loss": 0.7188, "step": 2614 }, { "epoch": 0.33025495303496727, "grad_norm": 1.796875, "learning_rate": 1.8765720012244844e-05, "loss": 0.6805, "step": 2615 }, { "epoch": 0.3303812455600284, "grad_norm": 1.7265625, "learning_rate": 1.876475901103199e-05, "loss": 0.6025, "step": 2616 }, { "epoch": 0.3305075380850896, "grad_norm": 1.9375, "learning_rate": 1.8763797660476946e-05, "loss": 0.7116, "step": 2617 }, { "epoch": 0.3306338306101508, "grad_norm": 2.0625, "learning_rate": 1.8762835960618038e-05, "loss": 0.6879, "step": 2618 }, { "epoch": 0.3307601231352119, "grad_norm": 1.828125, "learning_rate": 1.8761873911493595e-05, "loss": 0.7085, "step": 2619 }, { "epoch": 0.3308864156602731, "grad_norm": 1.7421875, "learning_rate": 1.876091151314196e-05, "loss": 0.6131, "step": 2620 }, { "epoch": 0.3310127081853343, "grad_norm": 1.78125, "learning_rate": 1.8759948765601498e-05, "loss": 0.5845, "step": 2621 }, { "epoch": 0.3311390007103954, "grad_norm": 1.890625, "learning_rate": 1.875898566891058e-05, "loss": 0.6803, "step": 2622 }, { "epoch": 0.3312652932354566, "grad_norm": 1.7734375, "learning_rate": 1.8758022223107586e-05, "loss": 0.6327, "step": 2623 }, { "epoch": 0.3313915857605178, "grad_norm": 1.9140625, "learning_rate": 1.8757058428230922e-05, "loss": 0.8157, "step": 2624 }, { "epoch": 0.331517878285579, "grad_norm": 1.9609375, "learning_rate": 1.8756094284319e-05, "loss": 0.7642, "step": 2625 }, { "epoch": 0.3316441708106401, "grad_norm": 1.875, "learning_rate": 1.8755129791410254e-05, "loss": 0.7538, "step": 2626 }, { "epoch": 0.3317704633357013, "grad_norm": 1.8359375, "learning_rate": 1.8754164949543123e-05, "loss": 0.7263, "step": 2627 }, { "epoch": 0.3318967558607625, "grad_norm": 1.9453125, "learning_rate": 1.875319975875606e-05, "loss": 0.683, "step": 2628 }, { "epoch": 0.33202304838582364, "grad_norm": 1.7734375, "learning_rate": 1.8752234219087538e-05, "loss": 0.6486, "step": 2629 }, { "epoch": 0.3321493409108848, "grad_norm": 1.84375, "learning_rate": 1.875126833057604e-05, "loss": 0.712, "step": 2630 }, { "epoch": 0.332275633435946, "grad_norm": 1.96875, "learning_rate": 1.875030209326007e-05, "loss": 0.6274, "step": 2631 }, { "epoch": 0.3324019259610072, "grad_norm": 1.734375, "learning_rate": 1.8749335507178134e-05, "loss": 0.6641, "step": 2632 }, { "epoch": 0.33252821848606834, "grad_norm": 1.7578125, "learning_rate": 1.8748368572368756e-05, "loss": 0.7847, "step": 2633 }, { "epoch": 0.33265451101112953, "grad_norm": 1.7734375, "learning_rate": 1.874740128887048e-05, "loss": 0.6735, "step": 2634 }, { "epoch": 0.3327808035361907, "grad_norm": 1.8828125, "learning_rate": 1.8746433656721853e-05, "loss": 0.682, "step": 2635 }, { "epoch": 0.33290709606125185, "grad_norm": 2.203125, "learning_rate": 1.8745465675961453e-05, "loss": 0.7008, "step": 2636 }, { "epoch": 0.33303338858631304, "grad_norm": 2.09375, "learning_rate": 1.8744497346627856e-05, "loss": 0.6523, "step": 2637 }, { "epoch": 0.33315968111137423, "grad_norm": 1.90625, "learning_rate": 1.8743528668759655e-05, "loss": 0.6099, "step": 2638 }, { "epoch": 0.3332859736364354, "grad_norm": 1.953125, "learning_rate": 1.8742559642395463e-05, "loss": 0.692, "step": 2639 }, { "epoch": 0.33341226616149655, "grad_norm": 2.34375, "learning_rate": 1.8741590267573903e-05, "loss": 0.752, "step": 2640 }, { "epoch": 0.33353855868655774, "grad_norm": 1.8984375, "learning_rate": 1.8740620544333604e-05, "loss": 0.6813, "step": 2641 }, { "epoch": 0.33366485121161893, "grad_norm": 1.90625, "learning_rate": 1.873965047271323e-05, "loss": 0.7115, "step": 2642 }, { "epoch": 0.33379114373668006, "grad_norm": 1.828125, "learning_rate": 1.8738680052751436e-05, "loss": 0.6801, "step": 2643 }, { "epoch": 0.33391743626174125, "grad_norm": 1.9453125, "learning_rate": 1.8737709284486905e-05, "loss": 0.7469, "step": 2644 }, { "epoch": 0.33404372878680244, "grad_norm": 1.6796875, "learning_rate": 1.8736738167958326e-05, "loss": 0.7135, "step": 2645 }, { "epoch": 0.33417002131186363, "grad_norm": 1.8359375, "learning_rate": 1.8735766703204406e-05, "loss": 0.6369, "step": 2646 }, { "epoch": 0.33429631383692476, "grad_norm": 1.828125, "learning_rate": 1.873479489026387e-05, "loss": 0.6123, "step": 2647 }, { "epoch": 0.33442260636198595, "grad_norm": 1.8671875, "learning_rate": 1.8733822729175452e-05, "loss": 0.6975, "step": 2648 }, { "epoch": 0.33454889888704714, "grad_norm": 1.984375, "learning_rate": 1.8732850219977887e-05, "loss": 0.7316, "step": 2649 }, { "epoch": 0.3346751914121083, "grad_norm": 1.828125, "learning_rate": 1.8731877362709953e-05, "loss": 0.7194, "step": 2650 }, { "epoch": 0.33480148393716946, "grad_norm": 2.015625, "learning_rate": 1.873090415741042e-05, "loss": 0.7664, "step": 2651 }, { "epoch": 0.33492777646223065, "grad_norm": 1.8515625, "learning_rate": 1.8729930604118074e-05, "loss": 0.7368, "step": 2652 }, { "epoch": 0.33505406898729184, "grad_norm": 1.8046875, "learning_rate": 1.8728956702871724e-05, "loss": 0.6798, "step": 2653 }, { "epoch": 0.335180361512353, "grad_norm": 1.9609375, "learning_rate": 1.8727982453710184e-05, "loss": 0.748, "step": 2654 }, { "epoch": 0.33530665403741416, "grad_norm": 1.859375, "learning_rate": 1.8727007856672285e-05, "loss": 0.6263, "step": 2655 }, { "epoch": 0.33543294656247535, "grad_norm": 1.8203125, "learning_rate": 1.8726032911796874e-05, "loss": 0.6823, "step": 2656 }, { "epoch": 0.3355592390875365, "grad_norm": 1.859375, "learning_rate": 1.872505761912281e-05, "loss": 0.6946, "step": 2657 }, { "epoch": 0.3356855316125977, "grad_norm": 2.8125, "learning_rate": 1.8724081978688963e-05, "loss": 0.8246, "step": 2658 }, { "epoch": 0.33581182413765887, "grad_norm": 1.8203125, "learning_rate": 1.8723105990534224e-05, "loss": 0.7046, "step": 2659 }, { "epoch": 0.33593811666272, "grad_norm": 1.8046875, "learning_rate": 1.872212965469749e-05, "loss": 0.8193, "step": 2660 }, { "epoch": 0.3360644091877812, "grad_norm": 1.984375, "learning_rate": 1.8721152971217672e-05, "loss": 0.6915, "step": 2661 }, { "epoch": 0.3361907017128424, "grad_norm": 1.765625, "learning_rate": 1.872017594013371e-05, "loss": 0.6879, "step": 2662 }, { "epoch": 0.33631699423790357, "grad_norm": 1.7421875, "learning_rate": 1.8719198561484534e-05, "loss": 0.5998, "step": 2663 }, { "epoch": 0.3364432867629647, "grad_norm": 1.78125, "learning_rate": 1.8718220835309108e-05, "loss": 0.6809, "step": 2664 }, { "epoch": 0.3365695792880259, "grad_norm": 1.828125, "learning_rate": 1.8717242761646398e-05, "loss": 0.7067, "step": 2665 }, { "epoch": 0.3366958718130871, "grad_norm": 1.71875, "learning_rate": 1.8716264340535387e-05, "loss": 0.6367, "step": 2666 }, { "epoch": 0.3368221643381482, "grad_norm": 1.734375, "learning_rate": 1.8715285572015074e-05, "loss": 0.6446, "step": 2667 }, { "epoch": 0.3369484568632094, "grad_norm": 1.796875, "learning_rate": 1.871430645612447e-05, "loss": 0.6263, "step": 2668 }, { "epoch": 0.3370747493882706, "grad_norm": 1.9921875, "learning_rate": 1.8713326992902602e-05, "loss": 0.6181, "step": 2669 }, { "epoch": 0.3372010419133318, "grad_norm": 1.75, "learning_rate": 1.8712347182388507e-05, "loss": 0.724, "step": 2670 }, { "epoch": 0.3373273344383929, "grad_norm": 1.953125, "learning_rate": 1.8711367024621237e-05, "loss": 0.7164, "step": 2671 }, { "epoch": 0.3374536269634541, "grad_norm": 1.8046875, "learning_rate": 1.871038651963986e-05, "loss": 0.6775, "step": 2672 }, { "epoch": 0.3375799194885153, "grad_norm": 1.7421875, "learning_rate": 1.8709405667483458e-05, "loss": 0.6134, "step": 2673 }, { "epoch": 0.3377062120135764, "grad_norm": 1.828125, "learning_rate": 1.8708424468191123e-05, "loss": 0.6456, "step": 2674 }, { "epoch": 0.3378325045386376, "grad_norm": 1.7734375, "learning_rate": 1.8707442921801963e-05, "loss": 0.6507, "step": 2675 }, { "epoch": 0.3379587970636988, "grad_norm": 1.8125, "learning_rate": 1.8706461028355107e-05, "loss": 0.7622, "step": 2676 }, { "epoch": 0.33808508958876, "grad_norm": 1.890625, "learning_rate": 1.870547878788968e-05, "loss": 0.6405, "step": 2677 }, { "epoch": 0.3382113821138211, "grad_norm": 1.796875, "learning_rate": 1.8704496200444837e-05, "loss": 0.6882, "step": 2678 }, { "epoch": 0.3383376746388823, "grad_norm": 1.8359375, "learning_rate": 1.8703513266059744e-05, "loss": 0.742, "step": 2679 }, { "epoch": 0.3384639671639435, "grad_norm": 1.8359375, "learning_rate": 1.8702529984773574e-05, "loss": 0.7113, "step": 2680 }, { "epoch": 0.33859025968900464, "grad_norm": 1.8359375, "learning_rate": 1.870154635662552e-05, "loss": 0.7018, "step": 2681 }, { "epoch": 0.3387165522140658, "grad_norm": 1.765625, "learning_rate": 1.870056238165479e-05, "loss": 0.6245, "step": 2682 }, { "epoch": 0.338842844739127, "grad_norm": 1.8515625, "learning_rate": 1.8699578059900597e-05, "loss": 0.6465, "step": 2683 }, { "epoch": 0.3389691372641882, "grad_norm": 1.796875, "learning_rate": 1.8698593391402177e-05, "loss": 0.5814, "step": 2684 }, { "epoch": 0.33909542978924934, "grad_norm": 1.9609375, "learning_rate": 1.8697608376198778e-05, "loss": 0.7123, "step": 2685 }, { "epoch": 0.3392217223143105, "grad_norm": 1.8203125, "learning_rate": 1.869662301432966e-05, "loss": 0.6214, "step": 2686 }, { "epoch": 0.3393480148393717, "grad_norm": 2.0625, "learning_rate": 1.8695637305834092e-05, "loss": 0.7586, "step": 2687 }, { "epoch": 0.33947430736443285, "grad_norm": 1.796875, "learning_rate": 1.8694651250751366e-05, "loss": 0.6526, "step": 2688 }, { "epoch": 0.33960059988949404, "grad_norm": 1.7734375, "learning_rate": 1.8693664849120784e-05, "loss": 0.6319, "step": 2689 }, { "epoch": 0.3397268924145552, "grad_norm": 1.8046875, "learning_rate": 1.8692678100981663e-05, "loss": 0.7404, "step": 2690 }, { "epoch": 0.33985318493961636, "grad_norm": 1.7578125, "learning_rate": 1.869169100637333e-05, "loss": 0.6795, "step": 2691 }, { "epoch": 0.33997947746467755, "grad_norm": 1.8515625, "learning_rate": 1.8690703565335127e-05, "loss": 0.7604, "step": 2692 }, { "epoch": 0.34010576998973874, "grad_norm": 1.75, "learning_rate": 1.8689715777906415e-05, "loss": 0.6684, "step": 2693 }, { "epoch": 0.3402320625147999, "grad_norm": 1.859375, "learning_rate": 1.8688727644126564e-05, "loss": 0.6782, "step": 2694 }, { "epoch": 0.34035835503986106, "grad_norm": 1.9453125, "learning_rate": 1.8687739164034955e-05, "loss": 0.7047, "step": 2695 }, { "epoch": 0.34048464756492225, "grad_norm": 1.9453125, "learning_rate": 1.8686750337670987e-05, "loss": 0.691, "step": 2696 }, { "epoch": 0.34061094008998344, "grad_norm": 1.859375, "learning_rate": 1.8685761165074073e-05, "loss": 0.6652, "step": 2697 }, { "epoch": 0.34073723261504457, "grad_norm": 1.921875, "learning_rate": 1.8684771646283643e-05, "loss": 0.6908, "step": 2698 }, { "epoch": 0.34086352514010576, "grad_norm": 1.734375, "learning_rate": 1.8683781781339136e-05, "loss": 0.6744, "step": 2699 }, { "epoch": 0.34098981766516695, "grad_norm": 1.9375, "learning_rate": 1.8682791570279998e-05, "loss": 0.7408, "step": 2700 }, { "epoch": 0.34111611019022814, "grad_norm": 1.9375, "learning_rate": 1.8681801013145705e-05, "loss": 0.6575, "step": 2701 }, { "epoch": 0.3412424027152893, "grad_norm": 1.9375, "learning_rate": 1.8680810109975735e-05, "loss": 0.6947, "step": 2702 }, { "epoch": 0.34136869524035046, "grad_norm": 1.8828125, "learning_rate": 1.8679818860809585e-05, "loss": 0.6756, "step": 2703 }, { "epoch": 0.34149498776541165, "grad_norm": 1.8828125, "learning_rate": 1.867882726568676e-05, "loss": 0.6358, "step": 2704 }, { "epoch": 0.3416212802904728, "grad_norm": 1.7109375, "learning_rate": 1.867783532464678e-05, "loss": 0.5523, "step": 2705 }, { "epoch": 0.341747572815534, "grad_norm": 1.8828125, "learning_rate": 1.867684303772919e-05, "loss": 0.6454, "step": 2706 }, { "epoch": 0.34187386534059516, "grad_norm": 1.8984375, "learning_rate": 1.8675850404973535e-05, "loss": 0.7147, "step": 2707 }, { "epoch": 0.34200015786565635, "grad_norm": 1.9296875, "learning_rate": 1.8674857426419383e-05, "loss": 0.6699, "step": 2708 }, { "epoch": 0.3421264503907175, "grad_norm": 1.9296875, "learning_rate": 1.8673864102106304e-05, "loss": 0.6801, "step": 2709 }, { "epoch": 0.3422527429157787, "grad_norm": 1.78125, "learning_rate": 1.86728704320739e-05, "loss": 0.6946, "step": 2710 }, { "epoch": 0.34237903544083986, "grad_norm": 1.7265625, "learning_rate": 1.8671876416361763e-05, "loss": 0.5759, "step": 2711 }, { "epoch": 0.342505327965901, "grad_norm": 1.75, "learning_rate": 1.8670882055009523e-05, "loss": 0.6439, "step": 2712 }, { "epoch": 0.3426316204909622, "grad_norm": 1.796875, "learning_rate": 1.8669887348056812e-05, "loss": 0.7346, "step": 2713 }, { "epoch": 0.3427579130160234, "grad_norm": 1.875, "learning_rate": 1.866889229554327e-05, "loss": 0.6718, "step": 2714 }, { "epoch": 0.34288420554108456, "grad_norm": 1.890625, "learning_rate": 1.8667896897508562e-05, "loss": 0.6626, "step": 2715 }, { "epoch": 0.3430104980661457, "grad_norm": 2.0, "learning_rate": 1.8666901153992367e-05, "loss": 0.8091, "step": 2716 }, { "epoch": 0.3431367905912069, "grad_norm": 1.90625, "learning_rate": 1.8665905065034364e-05, "loss": 0.6793, "step": 2717 }, { "epoch": 0.3432630831162681, "grad_norm": 1.875, "learning_rate": 1.8664908630674258e-05, "loss": 0.7305, "step": 2718 }, { "epoch": 0.3433893756413292, "grad_norm": 1.90625, "learning_rate": 1.8663911850951765e-05, "loss": 0.8334, "step": 2719 }, { "epoch": 0.3435156681663904, "grad_norm": 1.8671875, "learning_rate": 1.866291472590661e-05, "loss": 0.6986, "step": 2720 }, { "epoch": 0.3436419606914516, "grad_norm": 1.875, "learning_rate": 1.8661917255578546e-05, "loss": 0.6234, "step": 2721 }, { "epoch": 0.3437682532165127, "grad_norm": 1.8125, "learning_rate": 1.866091944000732e-05, "loss": 0.6823, "step": 2722 }, { "epoch": 0.3438945457415739, "grad_norm": 1.8125, "learning_rate": 1.8659921279232707e-05, "loss": 0.6431, "step": 2723 }, { "epoch": 0.3440208382666351, "grad_norm": 1.78125, "learning_rate": 1.865892277329449e-05, "loss": 0.6623, "step": 2724 }, { "epoch": 0.3441471307916963, "grad_norm": 1.90625, "learning_rate": 1.865792392223247e-05, "loss": 0.6426, "step": 2725 }, { "epoch": 0.3442734233167574, "grad_norm": 1.90625, "learning_rate": 1.8656924726086454e-05, "loss": 0.647, "step": 2726 }, { "epoch": 0.3443997158418186, "grad_norm": 1.8125, "learning_rate": 1.8655925184896275e-05, "loss": 0.6636, "step": 2727 }, { "epoch": 0.3445260083668798, "grad_norm": 1.9765625, "learning_rate": 1.865492529870176e-05, "loss": 0.7808, "step": 2728 }, { "epoch": 0.34465230089194093, "grad_norm": 1.7890625, "learning_rate": 1.8653925067542773e-05, "loss": 0.7222, "step": 2729 }, { "epoch": 0.3447785934170021, "grad_norm": 1.796875, "learning_rate": 1.8652924491459176e-05, "loss": 0.6397, "step": 2730 }, { "epoch": 0.3449048859420633, "grad_norm": 1.7734375, "learning_rate": 1.8651923570490852e-05, "loss": 0.6199, "step": 2731 }, { "epoch": 0.3450311784671245, "grad_norm": 1.8515625, "learning_rate": 1.865092230467769e-05, "loss": 0.7105, "step": 2732 }, { "epoch": 0.34515747099218563, "grad_norm": 1.765625, "learning_rate": 1.8649920694059607e-05, "loss": 0.6508, "step": 2733 }, { "epoch": 0.3452837635172468, "grad_norm": 1.6640625, "learning_rate": 1.8648918738676518e-05, "loss": 0.5704, "step": 2734 }, { "epoch": 0.345410056042308, "grad_norm": 1.9296875, "learning_rate": 1.864791643856836e-05, "loss": 0.7145, "step": 2735 }, { "epoch": 0.34553634856736914, "grad_norm": 1.90625, "learning_rate": 1.8646913793775085e-05, "loss": 0.7311, "step": 2736 }, { "epoch": 0.34566264109243033, "grad_norm": 1.9765625, "learning_rate": 1.864591080433665e-05, "loss": 0.7133, "step": 2737 }, { "epoch": 0.3457889336174915, "grad_norm": 1.984375, "learning_rate": 1.8644907470293035e-05, "loss": 0.7293, "step": 2738 }, { "epoch": 0.3459152261425527, "grad_norm": 2.03125, "learning_rate": 1.864390379168423e-05, "loss": 0.721, "step": 2739 }, { "epoch": 0.34604151866761385, "grad_norm": 1.796875, "learning_rate": 1.8642899768550243e-05, "loss": 0.7166, "step": 2740 }, { "epoch": 0.34616781119267503, "grad_norm": 1.90625, "learning_rate": 1.864189540093109e-05, "loss": 0.7171, "step": 2741 }, { "epoch": 0.3462941037177362, "grad_norm": 1.71875, "learning_rate": 1.86408906888668e-05, "loss": 0.5706, "step": 2742 }, { "epoch": 0.34642039624279736, "grad_norm": 1.7578125, "learning_rate": 1.863988563239742e-05, "loss": 0.6067, "step": 2743 }, { "epoch": 0.34654668876785855, "grad_norm": 1.890625, "learning_rate": 1.863888023156301e-05, "loss": 0.75, "step": 2744 }, { "epoch": 0.34667298129291974, "grad_norm": 1.765625, "learning_rate": 1.863787448640364e-05, "loss": 0.6717, "step": 2745 }, { "epoch": 0.3467992738179809, "grad_norm": 1.8984375, "learning_rate": 1.86368683969594e-05, "loss": 0.7717, "step": 2746 }, { "epoch": 0.34692556634304206, "grad_norm": 1.6796875, "learning_rate": 1.863586196327039e-05, "loss": 0.6199, "step": 2747 }, { "epoch": 0.34705185886810325, "grad_norm": 2.25, "learning_rate": 1.8634855185376722e-05, "loss": 0.7968, "step": 2748 }, { "epoch": 0.34717815139316444, "grad_norm": 1.7421875, "learning_rate": 1.8633848063318524e-05, "loss": 0.5903, "step": 2749 }, { "epoch": 0.34730444391822557, "grad_norm": 3.15625, "learning_rate": 1.8632840597135936e-05, "loss": 0.8563, "step": 2750 }, { "epoch": 0.34743073644328676, "grad_norm": 1.7109375, "learning_rate": 1.8631832786869116e-05, "loss": 0.6303, "step": 2751 }, { "epoch": 0.34755702896834795, "grad_norm": 1.8515625, "learning_rate": 1.8630824632558235e-05, "loss": 0.69, "step": 2752 }, { "epoch": 0.34768332149340914, "grad_norm": 1.8046875, "learning_rate": 1.8629816134243473e-05, "loss": 0.6848, "step": 2753 }, { "epoch": 0.34780961401847027, "grad_norm": 1.7890625, "learning_rate": 1.8628807291965026e-05, "loss": 0.6594, "step": 2754 }, { "epoch": 0.34793590654353146, "grad_norm": 1.8671875, "learning_rate": 1.86277981057631e-05, "loss": 0.7584, "step": 2755 }, { "epoch": 0.34806219906859265, "grad_norm": 1.9375, "learning_rate": 1.8626788575677926e-05, "loss": 0.7798, "step": 2756 }, { "epoch": 0.3481884915936538, "grad_norm": 1.84375, "learning_rate": 1.8625778701749738e-05, "loss": 0.6855, "step": 2757 }, { "epoch": 0.34831478411871497, "grad_norm": 1.8828125, "learning_rate": 1.862476848401879e-05, "loss": 0.655, "step": 2758 }, { "epoch": 0.34844107664377616, "grad_norm": 1.84375, "learning_rate": 1.862375792252534e-05, "loss": 0.6723, "step": 2759 }, { "epoch": 0.3485673691688373, "grad_norm": 1.8203125, "learning_rate": 1.8622747017309676e-05, "loss": 0.7221, "step": 2760 }, { "epoch": 0.3486936616938985, "grad_norm": 1.734375, "learning_rate": 1.8621735768412078e-05, "loss": 0.5541, "step": 2761 }, { "epoch": 0.34881995421895967, "grad_norm": 1.921875, "learning_rate": 1.8620724175872864e-05, "loss": 0.7883, "step": 2762 }, { "epoch": 0.34894624674402086, "grad_norm": 1.9453125, "learning_rate": 1.8619712239732347e-05, "loss": 0.7406, "step": 2763 }, { "epoch": 0.349072539269082, "grad_norm": 1.9375, "learning_rate": 1.8618699960030865e-05, "loss": 0.7467, "step": 2764 }, { "epoch": 0.3491988317941432, "grad_norm": 2.109375, "learning_rate": 1.8617687336808758e-05, "loss": 0.7982, "step": 2765 }, { "epoch": 0.34932512431920437, "grad_norm": 2.921875, "learning_rate": 1.861667437010639e-05, "loss": 0.8085, "step": 2766 }, { "epoch": 0.3494514168442655, "grad_norm": 1.8515625, "learning_rate": 1.861566105996414e-05, "loss": 0.6201, "step": 2767 }, { "epoch": 0.3495777093693267, "grad_norm": 1.875, "learning_rate": 1.861464740642239e-05, "loss": 0.7301, "step": 2768 }, { "epoch": 0.3497040018943879, "grad_norm": 1.96875, "learning_rate": 1.8613633409521546e-05, "loss": 0.7469, "step": 2769 }, { "epoch": 0.3498302944194491, "grad_norm": 1.9609375, "learning_rate": 1.861261906930202e-05, "loss": 0.7176, "step": 2770 }, { "epoch": 0.3499565869445102, "grad_norm": 1.921875, "learning_rate": 1.8611604385804243e-05, "loss": 0.7285, "step": 2771 }, { "epoch": 0.3500828794695714, "grad_norm": 1.984375, "learning_rate": 1.8610589359068658e-05, "loss": 0.6804, "step": 2772 }, { "epoch": 0.3502091719946326, "grad_norm": 1.9375, "learning_rate": 1.860957398913572e-05, "loss": 0.7361, "step": 2773 }, { "epoch": 0.3503354645196937, "grad_norm": 1.8359375, "learning_rate": 1.86085582760459e-05, "loss": 0.6737, "step": 2774 }, { "epoch": 0.3504617570447549, "grad_norm": 1.8984375, "learning_rate": 1.8607542219839682e-05, "loss": 0.6874, "step": 2775 }, { "epoch": 0.3505880495698161, "grad_norm": 1.828125, "learning_rate": 1.8606525820557568e-05, "loss": 0.6757, "step": 2776 }, { "epoch": 0.3507143420948773, "grad_norm": 1.8984375, "learning_rate": 1.860550907824006e-05, "loss": 0.7, "step": 2777 }, { "epoch": 0.3508406346199384, "grad_norm": 1.8984375, "learning_rate": 1.8604491992927688e-05, "loss": 0.6581, "step": 2778 }, { "epoch": 0.3509669271449996, "grad_norm": 1.7578125, "learning_rate": 1.8603474564660993e-05, "loss": 0.7271, "step": 2779 }, { "epoch": 0.3510932196700608, "grad_norm": 1.984375, "learning_rate": 1.8602456793480522e-05, "loss": 0.7107, "step": 2780 }, { "epoch": 0.35121951219512193, "grad_norm": 1.8359375, "learning_rate": 1.8601438679426843e-05, "loss": 0.717, "step": 2781 }, { "epoch": 0.3513458047201831, "grad_norm": 1.8359375, "learning_rate": 1.860042022254054e-05, "loss": 0.691, "step": 2782 }, { "epoch": 0.3514720972452443, "grad_norm": 1.7734375, "learning_rate": 1.85994014228622e-05, "loss": 0.6452, "step": 2783 }, { "epoch": 0.3515983897703055, "grad_norm": 1.8125, "learning_rate": 1.8598382280432435e-05, "loss": 0.6838, "step": 2784 }, { "epoch": 0.35172468229536663, "grad_norm": 2.171875, "learning_rate": 1.8597362795291857e-05, "loss": 0.7407, "step": 2785 }, { "epoch": 0.3518509748204278, "grad_norm": 1.8046875, "learning_rate": 1.859634296748111e-05, "loss": 0.6152, "step": 2786 }, { "epoch": 0.351977267345489, "grad_norm": 1.8828125, "learning_rate": 1.8595322797040837e-05, "loss": 0.7051, "step": 2787 }, { "epoch": 0.35210355987055014, "grad_norm": 1.8125, "learning_rate": 1.8594302284011704e-05, "loss": 0.6317, "step": 2788 }, { "epoch": 0.35222985239561133, "grad_norm": 2.15625, "learning_rate": 1.859328142843438e-05, "loss": 0.7233, "step": 2789 }, { "epoch": 0.3523561449206725, "grad_norm": 1.8984375, "learning_rate": 1.859226023034955e-05, "loss": 0.6203, "step": 2790 }, { "epoch": 0.35248243744573365, "grad_norm": 1.75, "learning_rate": 1.859123868979793e-05, "loss": 0.624, "step": 2791 }, { "epoch": 0.35260872997079484, "grad_norm": 1.90625, "learning_rate": 1.8590216806820228e-05, "loss": 0.6773, "step": 2792 }, { "epoch": 0.35273502249585603, "grad_norm": 1.7578125, "learning_rate": 1.8589194581457176e-05, "loss": 0.6733, "step": 2793 }, { "epoch": 0.3528613150209172, "grad_norm": 1.9296875, "learning_rate": 1.8588172013749516e-05, "loss": 0.6305, "step": 2794 }, { "epoch": 0.35298760754597835, "grad_norm": 1.921875, "learning_rate": 1.8587149103738003e-05, "loss": 0.7181, "step": 2795 }, { "epoch": 0.35311390007103954, "grad_norm": 1.828125, "learning_rate": 1.8586125851463414e-05, "loss": 0.6785, "step": 2796 }, { "epoch": 0.35324019259610073, "grad_norm": 1.8828125, "learning_rate": 1.8585102256966525e-05, "loss": 0.6741, "step": 2797 }, { "epoch": 0.35336648512116187, "grad_norm": 2.0625, "learning_rate": 1.8584078320288142e-05, "loss": 0.7382, "step": 2798 }, { "epoch": 0.35349277764622306, "grad_norm": 1.875, "learning_rate": 1.8583054041469072e-05, "loss": 0.6739, "step": 2799 }, { "epoch": 0.35361907017128424, "grad_norm": 1.703125, "learning_rate": 1.858202942055014e-05, "loss": 0.5901, "step": 2800 }, { "epoch": 0.35374536269634543, "grad_norm": 1.78125, "learning_rate": 1.8581004457572187e-05, "loss": 0.683, "step": 2801 }, { "epoch": 0.35387165522140657, "grad_norm": 1.8828125, "learning_rate": 1.857997915257607e-05, "loss": 0.6871, "step": 2802 }, { "epoch": 0.35399794774646776, "grad_norm": 1.78125, "learning_rate": 1.8578953505602644e-05, "loss": 0.684, "step": 2803 }, { "epoch": 0.35412424027152895, "grad_norm": 1.6328125, "learning_rate": 1.85779275166928e-05, "loss": 0.5672, "step": 2804 }, { "epoch": 0.3542505327965901, "grad_norm": 1.890625, "learning_rate": 1.857690118588742e-05, "loss": 0.7139, "step": 2805 }, { "epoch": 0.35437682532165127, "grad_norm": 1.84375, "learning_rate": 1.8575874513227422e-05, "loss": 0.5848, "step": 2806 }, { "epoch": 0.35450311784671246, "grad_norm": 1.9375, "learning_rate": 1.857484749875372e-05, "loss": 0.7162, "step": 2807 }, { "epoch": 0.35462941037177365, "grad_norm": 1.7421875, "learning_rate": 1.8573820142507252e-05, "loss": 0.558, "step": 2808 }, { "epoch": 0.3547557028968348, "grad_norm": 1.8671875, "learning_rate": 1.8572792444528963e-05, "loss": 0.7299, "step": 2809 }, { "epoch": 0.35488199542189597, "grad_norm": 1.8671875, "learning_rate": 1.8571764404859816e-05, "loss": 0.6948, "step": 2810 }, { "epoch": 0.35500828794695716, "grad_norm": 1.78125, "learning_rate": 1.857073602354079e-05, "loss": 0.7036, "step": 2811 }, { "epoch": 0.3551345804720183, "grad_norm": 1.8203125, "learning_rate": 1.8569707300612866e-05, "loss": 0.6705, "step": 2812 }, { "epoch": 0.3552608729970795, "grad_norm": 1.8203125, "learning_rate": 1.856867823611705e-05, "loss": 0.6875, "step": 2813 }, { "epoch": 0.35538716552214067, "grad_norm": 1.8125, "learning_rate": 1.856764883009436e-05, "loss": 0.6369, "step": 2814 }, { "epoch": 0.35551345804720186, "grad_norm": 1.75, "learning_rate": 1.8566619082585826e-05, "loss": 0.5601, "step": 2815 }, { "epoch": 0.355639750572263, "grad_norm": 1.9296875, "learning_rate": 1.8565588993632488e-05, "loss": 0.6652, "step": 2816 }, { "epoch": 0.3557660430973242, "grad_norm": 1.8359375, "learning_rate": 1.8564558563275406e-05, "loss": 0.6919, "step": 2817 }, { "epoch": 0.35589233562238537, "grad_norm": 1.765625, "learning_rate": 1.8563527791555643e-05, "loss": 0.6711, "step": 2818 }, { "epoch": 0.3560186281474465, "grad_norm": 1.7578125, "learning_rate": 1.8562496678514295e-05, "loss": 0.7128, "step": 2819 }, { "epoch": 0.3561449206725077, "grad_norm": 1.875, "learning_rate": 1.8561465224192453e-05, "loss": 0.6278, "step": 2820 }, { "epoch": 0.3562712131975689, "grad_norm": 1.8203125, "learning_rate": 1.8560433428631228e-05, "loss": 0.6472, "step": 2821 }, { "epoch": 0.35639750572263, "grad_norm": 1.84375, "learning_rate": 1.8559401291871738e-05, "loss": 0.6282, "step": 2822 }, { "epoch": 0.3565237982476912, "grad_norm": 1.9375, "learning_rate": 1.8558368813955136e-05, "loss": 0.6339, "step": 2823 }, { "epoch": 0.3566500907727524, "grad_norm": 1.8984375, "learning_rate": 1.8557335994922567e-05, "loss": 0.66, "step": 2824 }, { "epoch": 0.3567763832978136, "grad_norm": 1.8828125, "learning_rate": 1.8556302834815198e-05, "loss": 0.6855, "step": 2825 }, { "epoch": 0.3569026758228747, "grad_norm": 1.6640625, "learning_rate": 1.8555269333674207e-05, "loss": 0.6518, "step": 2826 }, { "epoch": 0.3570289683479359, "grad_norm": 1.84375, "learning_rate": 1.8554235491540783e-05, "loss": 0.6976, "step": 2827 }, { "epoch": 0.3571552608729971, "grad_norm": 1.703125, "learning_rate": 1.8553201308456138e-05, "loss": 0.5453, "step": 2828 }, { "epoch": 0.3572815533980582, "grad_norm": 1.9296875, "learning_rate": 1.8552166784461494e-05, "loss": 0.6754, "step": 2829 }, { "epoch": 0.3574078459231194, "grad_norm": 1.8515625, "learning_rate": 1.855113191959808e-05, "loss": 0.6389, "step": 2830 }, { "epoch": 0.3575341384481806, "grad_norm": 1.8046875, "learning_rate": 1.8550096713907143e-05, "loss": 0.6931, "step": 2831 }, { "epoch": 0.3576604309732418, "grad_norm": 1.796875, "learning_rate": 1.8549061167429945e-05, "loss": 0.6694, "step": 2832 }, { "epoch": 0.3577867234983029, "grad_norm": 1.828125, "learning_rate": 1.854802528020776e-05, "loss": 0.6924, "step": 2833 }, { "epoch": 0.3579130160233641, "grad_norm": 1.8671875, "learning_rate": 1.854698905228188e-05, "loss": 0.7184, "step": 2834 }, { "epoch": 0.3580393085484253, "grad_norm": 1.828125, "learning_rate": 1.8545952483693598e-05, "loss": 0.7279, "step": 2835 }, { "epoch": 0.35816560107348644, "grad_norm": 1.890625, "learning_rate": 1.854491557448424e-05, "loss": 0.682, "step": 2836 }, { "epoch": 0.35829189359854763, "grad_norm": 1.828125, "learning_rate": 1.8543878324695122e-05, "loss": 0.6139, "step": 2837 }, { "epoch": 0.3584181861236088, "grad_norm": 1.9296875, "learning_rate": 1.8542840734367598e-05, "loss": 0.7196, "step": 2838 }, { "epoch": 0.35854447864867, "grad_norm": 1.921875, "learning_rate": 1.8541802803543018e-05, "loss": 0.736, "step": 2839 }, { "epoch": 0.35867077117373114, "grad_norm": 1.6640625, "learning_rate": 1.8540764532262753e-05, "loss": 0.6162, "step": 2840 }, { "epoch": 0.35879706369879233, "grad_norm": 1.78125, "learning_rate": 1.8539725920568183e-05, "loss": 0.6024, "step": 2841 }, { "epoch": 0.3589233562238535, "grad_norm": 1.875, "learning_rate": 1.853868696850071e-05, "loss": 0.7538, "step": 2842 }, { "epoch": 0.35904964874891465, "grad_norm": 1.6875, "learning_rate": 1.853764767610174e-05, "loss": 0.6109, "step": 2843 }, { "epoch": 0.35917594127397584, "grad_norm": 1.8828125, "learning_rate": 1.85366080434127e-05, "loss": 0.6429, "step": 2844 }, { "epoch": 0.35930223379903703, "grad_norm": 2.0625, "learning_rate": 1.853556807047502e-05, "loss": 0.7564, "step": 2845 }, { "epoch": 0.3594285263240982, "grad_norm": 1.953125, "learning_rate": 1.853452775733016e-05, "loss": 0.7045, "step": 2846 }, { "epoch": 0.35955481884915935, "grad_norm": 1.7421875, "learning_rate": 1.853348710401958e-05, "loss": 0.7096, "step": 2847 }, { "epoch": 0.35968111137422054, "grad_norm": 1.78125, "learning_rate": 1.8532446110584757e-05, "loss": 0.7479, "step": 2848 }, { "epoch": 0.35980740389928173, "grad_norm": 1.8046875, "learning_rate": 1.8531404777067187e-05, "loss": 0.7492, "step": 2849 }, { "epoch": 0.35993369642434286, "grad_norm": 1.78125, "learning_rate": 1.8530363103508365e-05, "loss": 0.7677, "step": 2850 }, { "epoch": 0.36005998894940405, "grad_norm": 1.84375, "learning_rate": 1.852932108994982e-05, "loss": 0.7362, "step": 2851 }, { "epoch": 0.36018628147446524, "grad_norm": 1.765625, "learning_rate": 1.852827873643308e-05, "loss": 0.7086, "step": 2852 }, { "epoch": 0.3603125739995264, "grad_norm": 1.703125, "learning_rate": 1.8527236042999695e-05, "loss": 0.6293, "step": 2853 }, { "epoch": 0.36043886652458756, "grad_norm": 1.9296875, "learning_rate": 1.8526193009691215e-05, "loss": 0.748, "step": 2854 }, { "epoch": 0.36056515904964875, "grad_norm": 1.7578125, "learning_rate": 1.852514963654922e-05, "loss": 0.6978, "step": 2855 }, { "epoch": 0.36069145157470994, "grad_norm": 1.75, "learning_rate": 1.8524105923615298e-05, "loss": 0.6748, "step": 2856 }, { "epoch": 0.3608177440997711, "grad_norm": 1.7265625, "learning_rate": 1.8523061870931042e-05, "loss": 0.6776, "step": 2857 }, { "epoch": 0.36094403662483227, "grad_norm": 1.640625, "learning_rate": 1.852201747853807e-05, "loss": 0.602, "step": 2858 }, { "epoch": 0.36107032914989345, "grad_norm": 1.953125, "learning_rate": 1.8520972746478008e-05, "loss": 0.7347, "step": 2859 }, { "epoch": 0.3611966216749546, "grad_norm": 1.875, "learning_rate": 1.8519927674792496e-05, "loss": 0.7005, "step": 2860 }, { "epoch": 0.3613229142000158, "grad_norm": 1.984375, "learning_rate": 1.851888226352319e-05, "loss": 0.748, "step": 2861 }, { "epoch": 0.36144920672507697, "grad_norm": 1.8671875, "learning_rate": 1.851783651271175e-05, "loss": 0.6903, "step": 2862 }, { "epoch": 0.36157549925013815, "grad_norm": 1.875, "learning_rate": 1.8516790422399872e-05, "loss": 0.6474, "step": 2863 }, { "epoch": 0.3617017917751993, "grad_norm": 1.7734375, "learning_rate": 1.8515743992629234e-05, "loss": 0.603, "step": 2864 }, { "epoch": 0.3618280843002605, "grad_norm": 1.671875, "learning_rate": 1.8514697223441555e-05, "loss": 0.6013, "step": 2865 }, { "epoch": 0.36195437682532167, "grad_norm": 1.8984375, "learning_rate": 1.8513650114878557e-05, "loss": 0.695, "step": 2866 }, { "epoch": 0.3620806693503828, "grad_norm": 1.953125, "learning_rate": 1.8512602666981968e-05, "loss": 0.6259, "step": 2867 }, { "epoch": 0.362206961875444, "grad_norm": 1.84375, "learning_rate": 1.8511554879793537e-05, "loss": 0.691, "step": 2868 }, { "epoch": 0.3623332544005052, "grad_norm": 1.765625, "learning_rate": 1.8510506753355035e-05, "loss": 0.645, "step": 2869 }, { "epoch": 0.36245954692556637, "grad_norm": 2.140625, "learning_rate": 1.8509458287708233e-05, "loss": 0.8221, "step": 2870 }, { "epoch": 0.3625858394506275, "grad_norm": 1.984375, "learning_rate": 1.8508409482894918e-05, "loss": 0.7203, "step": 2871 }, { "epoch": 0.3627121319756887, "grad_norm": 1.7578125, "learning_rate": 1.8507360338956896e-05, "loss": 0.7222, "step": 2872 }, { "epoch": 0.3628384245007499, "grad_norm": 1.921875, "learning_rate": 1.8506310855935982e-05, "loss": 0.678, "step": 2873 }, { "epoch": 0.362964717025811, "grad_norm": 1.828125, "learning_rate": 1.8505261033874003e-05, "loss": 0.6374, "step": 2874 }, { "epoch": 0.3630910095508722, "grad_norm": 2.59375, "learning_rate": 1.850421087281281e-05, "loss": 0.8042, "step": 2875 }, { "epoch": 0.3632173020759334, "grad_norm": 1.84375, "learning_rate": 1.8503160372794253e-05, "loss": 0.622, "step": 2876 }, { "epoch": 0.3633435946009946, "grad_norm": 1.9140625, "learning_rate": 1.8502109533860206e-05, "loss": 0.6364, "step": 2877 }, { "epoch": 0.3634698871260557, "grad_norm": 1.921875, "learning_rate": 1.850105835605255e-05, "loss": 0.7162, "step": 2878 }, { "epoch": 0.3635961796511169, "grad_norm": 1.8515625, "learning_rate": 1.8500006839413183e-05, "loss": 0.6977, "step": 2879 }, { "epoch": 0.3637224721761781, "grad_norm": 1.8984375, "learning_rate": 1.849895498398402e-05, "loss": 0.7015, "step": 2880 }, { "epoch": 0.3638487647012392, "grad_norm": 1.8984375, "learning_rate": 1.849790278980698e-05, "loss": 0.7217, "step": 2881 }, { "epoch": 0.3639750572263004, "grad_norm": 1.8203125, "learning_rate": 1.8496850256924004e-05, "loss": 0.6935, "step": 2882 }, { "epoch": 0.3641013497513616, "grad_norm": 1.9140625, "learning_rate": 1.8495797385377045e-05, "loss": 0.6968, "step": 2883 }, { "epoch": 0.3642276422764228, "grad_norm": 1.8203125, "learning_rate": 1.8494744175208065e-05, "loss": 0.6499, "step": 2884 }, { "epoch": 0.3643539348014839, "grad_norm": 1.859375, "learning_rate": 1.849369062645904e-05, "loss": 0.759, "step": 2885 }, { "epoch": 0.3644802273265451, "grad_norm": 1.9375, "learning_rate": 1.8492636739171966e-05, "loss": 0.7384, "step": 2886 }, { "epoch": 0.3646065198516063, "grad_norm": 1.7109375, "learning_rate": 1.8491582513388846e-05, "loss": 0.6248, "step": 2887 }, { "epoch": 0.36473281237666744, "grad_norm": 1.796875, "learning_rate": 1.8490527949151703e-05, "loss": 0.6035, "step": 2888 }, { "epoch": 0.3648591049017286, "grad_norm": 2.03125, "learning_rate": 1.8489473046502565e-05, "loss": 0.6024, "step": 2889 }, { "epoch": 0.3649853974267898, "grad_norm": 1.765625, "learning_rate": 1.848841780548348e-05, "loss": 0.6423, "step": 2890 }, { "epoch": 0.36511168995185095, "grad_norm": 1.9921875, "learning_rate": 1.848736222613651e-05, "loss": 0.6559, "step": 2891 }, { "epoch": 0.36523798247691214, "grad_norm": 1.765625, "learning_rate": 1.848630630850372e-05, "loss": 0.6737, "step": 2892 }, { "epoch": 0.3653642750019733, "grad_norm": 1.8828125, "learning_rate": 1.8485250052627205e-05, "loss": 0.6416, "step": 2893 }, { "epoch": 0.3654905675270345, "grad_norm": 2.0, "learning_rate": 1.8484193458549058e-05, "loss": 0.6152, "step": 2894 }, { "epoch": 0.36561686005209565, "grad_norm": 1.859375, "learning_rate": 1.8483136526311397e-05, "loss": 0.6654, "step": 2895 }, { "epoch": 0.36574315257715684, "grad_norm": 1.84375, "learning_rate": 1.8482079255956345e-05, "loss": 0.7153, "step": 2896 }, { "epoch": 0.365869445102218, "grad_norm": 2.046875, "learning_rate": 1.8481021647526045e-05, "loss": 0.7259, "step": 2897 }, { "epoch": 0.36599573762727916, "grad_norm": 2.0, "learning_rate": 1.847996370106265e-05, "loss": 0.7048, "step": 2898 }, { "epoch": 0.36612203015234035, "grad_norm": 1.8203125, "learning_rate": 1.8478905416608328e-05, "loss": 0.7523, "step": 2899 }, { "epoch": 0.36624832267740154, "grad_norm": 1.75, "learning_rate": 1.8477846794205258e-05, "loss": 0.6639, "step": 2900 }, { "epoch": 0.36637461520246273, "grad_norm": 2.046875, "learning_rate": 1.8476787833895636e-05, "loss": 0.713, "step": 2901 }, { "epoch": 0.36650090772752386, "grad_norm": 1.7734375, "learning_rate": 1.8475728535721668e-05, "loss": 0.6154, "step": 2902 }, { "epoch": 0.36662720025258505, "grad_norm": 1.8515625, "learning_rate": 1.8474668899725576e-05, "loss": 0.6393, "step": 2903 }, { "epoch": 0.36675349277764624, "grad_norm": 1.8828125, "learning_rate": 1.8473608925949595e-05, "loss": 0.7317, "step": 2904 }, { "epoch": 0.3668797853027074, "grad_norm": 1.8046875, "learning_rate": 1.847254861443597e-05, "loss": 0.6719, "step": 2905 }, { "epoch": 0.36700607782776856, "grad_norm": 1.8984375, "learning_rate": 1.8471487965226968e-05, "loss": 0.6088, "step": 2906 }, { "epoch": 0.36713237035282975, "grad_norm": 1.8671875, "learning_rate": 1.8470426978364857e-05, "loss": 0.7564, "step": 2907 }, { "epoch": 0.36725866287789094, "grad_norm": 1.875, "learning_rate": 1.846936565389193e-05, "loss": 0.6824, "step": 2908 }, { "epoch": 0.3673849554029521, "grad_norm": 1.9609375, "learning_rate": 1.8468303991850484e-05, "loss": 0.6986, "step": 2909 }, { "epoch": 0.36751124792801326, "grad_norm": 1.78125, "learning_rate": 1.8467241992282842e-05, "loss": 0.722, "step": 2910 }, { "epoch": 0.36763754045307445, "grad_norm": 1.90625, "learning_rate": 1.8466179655231327e-05, "loss": 0.6817, "step": 2911 }, { "epoch": 0.3677638329781356, "grad_norm": 1.9296875, "learning_rate": 1.8465116980738288e-05, "loss": 0.7879, "step": 2912 }, { "epoch": 0.3678901255031968, "grad_norm": 1.765625, "learning_rate": 1.8464053968846073e-05, "loss": 0.6379, "step": 2913 }, { "epoch": 0.36801641802825796, "grad_norm": 1.765625, "learning_rate": 1.846299061959705e-05, "loss": 0.68, "step": 2914 }, { "epoch": 0.36814271055331915, "grad_norm": 1.78125, "learning_rate": 1.846192693303361e-05, "loss": 0.6217, "step": 2915 }, { "epoch": 0.3682690030783803, "grad_norm": 1.875, "learning_rate": 1.8460862909198143e-05, "loss": 0.6246, "step": 2916 }, { "epoch": 0.3683952956034415, "grad_norm": 1.90625, "learning_rate": 1.845979854813306e-05, "loss": 0.7232, "step": 2917 }, { "epoch": 0.36852158812850266, "grad_norm": 1.7109375, "learning_rate": 1.845873384988078e-05, "loss": 0.5561, "step": 2918 }, { "epoch": 0.3686478806535638, "grad_norm": 1.8046875, "learning_rate": 1.8457668814483748e-05, "loss": 0.6455, "step": 2919 }, { "epoch": 0.368774173178625, "grad_norm": 2.046875, "learning_rate": 1.8456603441984408e-05, "loss": 0.7495, "step": 2920 }, { "epoch": 0.3689004657036862, "grad_norm": 1.8828125, "learning_rate": 1.8455537732425223e-05, "loss": 0.755, "step": 2921 }, { "epoch": 0.3690267582287473, "grad_norm": 1.8515625, "learning_rate": 1.8454471685848667e-05, "loss": 0.6953, "step": 2922 }, { "epoch": 0.3691530507538085, "grad_norm": 2.015625, "learning_rate": 1.8453405302297235e-05, "loss": 0.7428, "step": 2923 }, { "epoch": 0.3692793432788697, "grad_norm": 1.796875, "learning_rate": 1.8452338581813433e-05, "loss": 0.696, "step": 2924 }, { "epoch": 0.3694056358039309, "grad_norm": 1.8515625, "learning_rate": 1.845127152443977e-05, "loss": 0.6829, "step": 2925 }, { "epoch": 0.369531928328992, "grad_norm": 1.7421875, "learning_rate": 1.8450204130218782e-05, "loss": 0.7387, "step": 2926 }, { "epoch": 0.3696582208540532, "grad_norm": 1.7578125, "learning_rate": 1.844913639919301e-05, "loss": 0.6464, "step": 2927 }, { "epoch": 0.3697845133791144, "grad_norm": 1.78125, "learning_rate": 1.844806833140501e-05, "loss": 0.7507, "step": 2928 }, { "epoch": 0.3699108059041755, "grad_norm": 1.703125, "learning_rate": 1.8446999926897358e-05, "loss": 0.6355, "step": 2929 }, { "epoch": 0.3700370984292367, "grad_norm": 1.7890625, "learning_rate": 1.8445931185712635e-05, "loss": 0.7065, "step": 2930 }, { "epoch": 0.3701633909542979, "grad_norm": 1.7265625, "learning_rate": 1.8444862107893437e-05, "loss": 0.6707, "step": 2931 }, { "epoch": 0.3702896834793591, "grad_norm": 1.71875, "learning_rate": 1.8443792693482377e-05, "loss": 0.5957, "step": 2932 }, { "epoch": 0.3704159760044202, "grad_norm": 1.9375, "learning_rate": 1.8442722942522077e-05, "loss": 0.6808, "step": 2933 }, { "epoch": 0.3705422685294814, "grad_norm": 1.9921875, "learning_rate": 1.8441652855055178e-05, "loss": 0.7216, "step": 2934 }, { "epoch": 0.3706685610545426, "grad_norm": 1.8125, "learning_rate": 1.8440582431124325e-05, "loss": 0.6088, "step": 2935 }, { "epoch": 0.37079485357960373, "grad_norm": 2.390625, "learning_rate": 1.843951167077219e-05, "loss": 0.7253, "step": 2936 }, { "epoch": 0.3709211461046649, "grad_norm": 1.7890625, "learning_rate": 1.8438440574041447e-05, "loss": 0.6295, "step": 2937 }, { "epoch": 0.3710474386297261, "grad_norm": 2.265625, "learning_rate": 1.843736914097479e-05, "loss": 0.6229, "step": 2938 }, { "epoch": 0.3711737311547873, "grad_norm": 1.796875, "learning_rate": 1.843629737161492e-05, "loss": 0.7255, "step": 2939 }, { "epoch": 0.37130002367984843, "grad_norm": 1.828125, "learning_rate": 1.8435225266004555e-05, "loss": 0.7042, "step": 2940 }, { "epoch": 0.3714263162049096, "grad_norm": 1.890625, "learning_rate": 1.843415282418643e-05, "loss": 0.6544, "step": 2941 }, { "epoch": 0.3715526087299708, "grad_norm": 1.8984375, "learning_rate": 1.8433080046203293e-05, "loss": 0.7113, "step": 2942 }, { "epoch": 0.37167890125503195, "grad_norm": 1.7578125, "learning_rate": 1.843200693209789e-05, "loss": 0.6807, "step": 2943 }, { "epoch": 0.37180519378009314, "grad_norm": 2.046875, "learning_rate": 1.8430933481913006e-05, "loss": 0.6923, "step": 2944 }, { "epoch": 0.3719314863051543, "grad_norm": 1.8671875, "learning_rate": 1.8429859695691418e-05, "loss": 0.6762, "step": 2945 }, { "epoch": 0.3720577788302155, "grad_norm": 1.6640625, "learning_rate": 1.8428785573475926e-05, "loss": 0.6244, "step": 2946 }, { "epoch": 0.37218407135527665, "grad_norm": 1.65625, "learning_rate": 1.8427711115309343e-05, "loss": 0.6201, "step": 2947 }, { "epoch": 0.37231036388033784, "grad_norm": 1.6796875, "learning_rate": 1.8426636321234497e-05, "loss": 0.6184, "step": 2948 }, { "epoch": 0.372436656405399, "grad_norm": 1.875, "learning_rate": 1.842556119129422e-05, "loss": 0.7103, "step": 2949 }, { "epoch": 0.37256294893046016, "grad_norm": 1.703125, "learning_rate": 1.8424485725531375e-05, "loss": 0.6405, "step": 2950 }, { "epoch": 0.37268924145552135, "grad_norm": 1.75, "learning_rate": 1.8423409923988816e-05, "loss": 0.6005, "step": 2951 }, { "epoch": 0.37281553398058254, "grad_norm": 1.8046875, "learning_rate": 1.8422333786709424e-05, "loss": 0.6743, "step": 2952 }, { "epoch": 0.37294182650564367, "grad_norm": 1.796875, "learning_rate": 1.84212573137361e-05, "loss": 0.6803, "step": 2953 }, { "epoch": 0.37306811903070486, "grad_norm": 1.9609375, "learning_rate": 1.842018050511174e-05, "loss": 0.7497, "step": 2954 }, { "epoch": 0.37319441155576605, "grad_norm": 1.5625, "learning_rate": 1.8419103360879264e-05, "loss": 0.5847, "step": 2955 }, { "epoch": 0.37332070408082724, "grad_norm": 1.828125, "learning_rate": 1.8418025881081612e-05, "loss": 0.6765, "step": 2956 }, { "epoch": 0.37344699660588837, "grad_norm": 1.7734375, "learning_rate": 1.841694806576172e-05, "loss": 0.6622, "step": 2957 }, { "epoch": 0.37357328913094956, "grad_norm": 1.953125, "learning_rate": 1.8415869914962553e-05, "loss": 0.6801, "step": 2958 }, { "epoch": 0.37369958165601075, "grad_norm": 1.78125, "learning_rate": 1.8414791428727082e-05, "loss": 0.6015, "step": 2959 }, { "epoch": 0.3738258741810719, "grad_norm": 1.6484375, "learning_rate": 1.8413712607098294e-05, "loss": 0.5804, "step": 2960 }, { "epoch": 0.37395216670613307, "grad_norm": 1.9140625, "learning_rate": 1.8412633450119187e-05, "loss": 0.8055, "step": 2961 }, { "epoch": 0.37407845923119426, "grad_norm": 1.7890625, "learning_rate": 1.841155395783277e-05, "loss": 0.698, "step": 2962 }, { "epoch": 0.37420475175625545, "grad_norm": 1.8046875, "learning_rate": 1.8410474130282078e-05, "loss": 0.6345, "step": 2963 }, { "epoch": 0.3743310442813166, "grad_norm": 1.703125, "learning_rate": 1.8409393967510144e-05, "loss": 0.6317, "step": 2964 }, { "epoch": 0.37445733680637777, "grad_norm": 1.8671875, "learning_rate": 1.8408313469560023e-05, "loss": 0.7001, "step": 2965 }, { "epoch": 0.37458362933143896, "grad_norm": 1.859375, "learning_rate": 1.8407232636474773e-05, "loss": 0.6665, "step": 2966 }, { "epoch": 0.3747099218565001, "grad_norm": 2.046875, "learning_rate": 1.840615146829749e-05, "loss": 0.6901, "step": 2967 }, { "epoch": 0.3748362143815613, "grad_norm": 1.7265625, "learning_rate": 1.840506996507125e-05, "loss": 0.6382, "step": 2968 }, { "epoch": 0.3749625069066225, "grad_norm": 1.90625, "learning_rate": 1.8403988126839168e-05, "loss": 0.6827, "step": 2969 }, { "epoch": 0.37508879943168366, "grad_norm": 1.6875, "learning_rate": 1.840290595364436e-05, "loss": 0.662, "step": 2970 }, { "epoch": 0.3752150919567448, "grad_norm": 1.9140625, "learning_rate": 1.8401823445529962e-05, "loss": 0.6344, "step": 2971 }, { "epoch": 0.375341384481806, "grad_norm": 1.8125, "learning_rate": 1.8400740602539117e-05, "loss": 0.713, "step": 2972 }, { "epoch": 0.3754676770068672, "grad_norm": 1.6640625, "learning_rate": 1.839965742471499e-05, "loss": 0.6104, "step": 2973 }, { "epoch": 0.3755939695319283, "grad_norm": 1.8203125, "learning_rate": 1.8398573912100744e-05, "loss": 0.6715, "step": 2974 }, { "epoch": 0.3757202620569895, "grad_norm": 1.765625, "learning_rate": 1.839749006473958e-05, "loss": 0.6506, "step": 2975 }, { "epoch": 0.3758465545820507, "grad_norm": 1.8125, "learning_rate": 1.839640588267468e-05, "loss": 0.6548, "step": 2976 }, { "epoch": 0.3759728471071119, "grad_norm": 1.703125, "learning_rate": 1.839532136594927e-05, "loss": 0.5981, "step": 2977 }, { "epoch": 0.376099139632173, "grad_norm": 1.9296875, "learning_rate": 1.8394236514606565e-05, "loss": 0.6793, "step": 2978 }, { "epoch": 0.3762254321572342, "grad_norm": 1.8671875, "learning_rate": 1.839315132868982e-05, "loss": 0.656, "step": 2979 }, { "epoch": 0.3763517246822954, "grad_norm": 1.734375, "learning_rate": 1.8392065808242278e-05, "loss": 0.6401, "step": 2980 }, { "epoch": 0.3764780172073565, "grad_norm": 1.90625, "learning_rate": 1.8390979953307203e-05, "loss": 0.6605, "step": 2981 }, { "epoch": 0.3766043097324177, "grad_norm": 2.078125, "learning_rate": 1.838989376392788e-05, "loss": 0.6953, "step": 2982 }, { "epoch": 0.3767306022574789, "grad_norm": 1.8828125, "learning_rate": 1.8388807240147598e-05, "loss": 0.7142, "step": 2983 }, { "epoch": 0.3768568947825401, "grad_norm": 1.8125, "learning_rate": 1.8387720382009665e-05, "loss": 0.6522, "step": 2984 }, { "epoch": 0.3769831873076012, "grad_norm": 1.84375, "learning_rate": 1.83866331895574e-05, "loss": 0.6378, "step": 2985 }, { "epoch": 0.3771094798326624, "grad_norm": 1.9140625, "learning_rate": 1.8385545662834142e-05, "loss": 0.7669, "step": 2986 }, { "epoch": 0.3772357723577236, "grad_norm": 1.953125, "learning_rate": 1.8384457801883227e-05, "loss": 0.7206, "step": 2987 }, { "epoch": 0.37736206488278473, "grad_norm": 1.84375, "learning_rate": 1.838336960674802e-05, "loss": 0.7821, "step": 2988 }, { "epoch": 0.3774883574078459, "grad_norm": 1.78125, "learning_rate": 1.8382281077471896e-05, "loss": 0.6495, "step": 2989 }, { "epoch": 0.3776146499329071, "grad_norm": 1.625, "learning_rate": 1.8381192214098234e-05, "loss": 0.5772, "step": 2990 }, { "epoch": 0.37774094245796824, "grad_norm": 2.0, "learning_rate": 1.838010301667044e-05, "loss": 0.6575, "step": 2991 }, { "epoch": 0.37786723498302943, "grad_norm": 1.9140625, "learning_rate": 1.837901348523192e-05, "loss": 0.6817, "step": 2992 }, { "epoch": 0.3779935275080906, "grad_norm": 1.796875, "learning_rate": 1.8377923619826112e-05, "loss": 0.6499, "step": 2993 }, { "epoch": 0.3781198200331518, "grad_norm": 2.046875, "learning_rate": 1.8376833420496446e-05, "loss": 0.7505, "step": 2994 }, { "epoch": 0.37824611255821294, "grad_norm": 1.828125, "learning_rate": 1.837574288728637e-05, "loss": 0.7018, "step": 2995 }, { "epoch": 0.37837240508327413, "grad_norm": 1.734375, "learning_rate": 1.8374652020239365e-05, "loss": 0.6871, "step": 2996 }, { "epoch": 0.3784986976083353, "grad_norm": 1.859375, "learning_rate": 1.8373560819398897e-05, "loss": 0.6402, "step": 2997 }, { "epoch": 0.37862499013339646, "grad_norm": 1.8984375, "learning_rate": 1.8372469284808468e-05, "loss": 0.6732, "step": 2998 }, { "epoch": 0.37875128265845764, "grad_norm": 1.953125, "learning_rate": 1.8371377416511578e-05, "loss": 0.6054, "step": 2999 }, { "epoch": 0.37887757518351883, "grad_norm": 1.7109375, "learning_rate": 1.837028521455175e-05, "loss": 0.6433, "step": 3000 }, { "epoch": 0.37900386770858, "grad_norm": 1.7421875, "learning_rate": 1.836919267897251e-05, "loss": 0.6334, "step": 3001 }, { "epoch": 0.37913016023364116, "grad_norm": 1.8828125, "learning_rate": 1.836809980981741e-05, "loss": 0.7407, "step": 3002 }, { "epoch": 0.37925645275870234, "grad_norm": 1.8515625, "learning_rate": 1.836700660713001e-05, "loss": 0.6192, "step": 3003 }, { "epoch": 0.37938274528376353, "grad_norm": 1.8828125, "learning_rate": 1.8365913070953875e-05, "loss": 0.6202, "step": 3004 }, { "epoch": 0.37950903780882467, "grad_norm": 1.8046875, "learning_rate": 1.83648192013326e-05, "loss": 0.6447, "step": 3005 }, { "epoch": 0.37963533033388586, "grad_norm": 2.140625, "learning_rate": 1.8363724998309776e-05, "loss": 0.6789, "step": 3006 }, { "epoch": 0.37976162285894705, "grad_norm": 2.0, "learning_rate": 1.836263046192902e-05, "loss": 0.7014, "step": 3007 }, { "epoch": 0.37988791538400823, "grad_norm": 1.875, "learning_rate": 1.8361535592233957e-05, "loss": 0.6632, "step": 3008 }, { "epoch": 0.38001420790906937, "grad_norm": 1.5703125, "learning_rate": 1.8360440389268225e-05, "loss": 0.6173, "step": 3009 }, { "epoch": 0.38014050043413056, "grad_norm": 1.84375, "learning_rate": 1.8359344853075476e-05, "loss": 0.7059, "step": 3010 }, { "epoch": 0.38026679295919175, "grad_norm": 1.8046875, "learning_rate": 1.8358248983699377e-05, "loss": 0.6539, "step": 3011 }, { "epoch": 0.3803930854842529, "grad_norm": 1.875, "learning_rate": 1.8357152781183606e-05, "loss": 0.6725, "step": 3012 }, { "epoch": 0.38051937800931407, "grad_norm": 1.765625, "learning_rate": 1.8356056245571855e-05, "loss": 0.6586, "step": 3013 }, { "epoch": 0.38064567053437526, "grad_norm": 1.9453125, "learning_rate": 1.8354959376907822e-05, "loss": 0.7409, "step": 3014 }, { "epoch": 0.38077196305943645, "grad_norm": 1.9296875, "learning_rate": 1.835386217523524e-05, "loss": 0.6692, "step": 3015 }, { "epoch": 0.3808982555844976, "grad_norm": 1.7265625, "learning_rate": 1.8352764640597834e-05, "loss": 0.69, "step": 3016 }, { "epoch": 0.38102454810955877, "grad_norm": 1.734375, "learning_rate": 1.8351666773039343e-05, "loss": 0.6125, "step": 3017 }, { "epoch": 0.38115084063461996, "grad_norm": 1.6796875, "learning_rate": 1.8350568572603535e-05, "loss": 0.5597, "step": 3018 }, { "epoch": 0.3812771331596811, "grad_norm": 1.875, "learning_rate": 1.8349470039334173e-05, "loss": 0.6475, "step": 3019 }, { "epoch": 0.3814034256847423, "grad_norm": 1.828125, "learning_rate": 1.834837117327505e-05, "loss": 0.6202, "step": 3020 }, { "epoch": 0.38152971820980347, "grad_norm": 2.015625, "learning_rate": 1.834727197446996e-05, "loss": 0.6669, "step": 3021 }, { "epoch": 0.3816560107348646, "grad_norm": 1.8203125, "learning_rate": 1.8346172442962714e-05, "loss": 0.6329, "step": 3022 }, { "epoch": 0.3817823032599258, "grad_norm": 1.9140625, "learning_rate": 1.8345072578797137e-05, "loss": 0.6572, "step": 3023 }, { "epoch": 0.381908595784987, "grad_norm": 1.875, "learning_rate": 1.834397238201707e-05, "loss": 0.6144, "step": 3024 }, { "epoch": 0.38203488831004817, "grad_norm": 1.8125, "learning_rate": 1.8342871852666358e-05, "loss": 0.7154, "step": 3025 }, { "epoch": 0.3821611808351093, "grad_norm": 1.8125, "learning_rate": 1.834177099078887e-05, "loss": 0.6707, "step": 3026 }, { "epoch": 0.3822874733601705, "grad_norm": 2.015625, "learning_rate": 1.8340669796428482e-05, "loss": 0.7488, "step": 3027 }, { "epoch": 0.3824137658852317, "grad_norm": 2.046875, "learning_rate": 1.8339568269629085e-05, "loss": 0.7535, "step": 3028 }, { "epoch": 0.3825400584102928, "grad_norm": 1.765625, "learning_rate": 1.8338466410434586e-05, "loss": 0.6418, "step": 3029 }, { "epoch": 0.382666350935354, "grad_norm": 1.7734375, "learning_rate": 1.8337364218888902e-05, "loss": 0.6269, "step": 3030 }, { "epoch": 0.3827926434604152, "grad_norm": 1.859375, "learning_rate": 1.833626169503596e-05, "loss": 0.7415, "step": 3031 }, { "epoch": 0.3829189359854764, "grad_norm": 2.28125, "learning_rate": 1.8335158838919705e-05, "loss": 0.7441, "step": 3032 }, { "epoch": 0.3830452285105375, "grad_norm": 1.953125, "learning_rate": 1.8334055650584094e-05, "loss": 0.7152, "step": 3033 }, { "epoch": 0.3831715210355987, "grad_norm": 1.890625, "learning_rate": 1.8332952130073104e-05, "loss": 0.7562, "step": 3034 }, { "epoch": 0.3832978135606599, "grad_norm": 1.9375, "learning_rate": 1.8331848277430708e-05, "loss": 0.7716, "step": 3035 }, { "epoch": 0.38342410608572103, "grad_norm": 1.9765625, "learning_rate": 1.8330744092700913e-05, "loss": 0.7029, "step": 3036 }, { "epoch": 0.3835503986107822, "grad_norm": 1.765625, "learning_rate": 1.8329639575927718e-05, "loss": 0.6959, "step": 3037 }, { "epoch": 0.3836766911358434, "grad_norm": 1.8515625, "learning_rate": 1.8328534727155158e-05, "loss": 0.6421, "step": 3038 }, { "epoch": 0.3838029836609046, "grad_norm": 1.9609375, "learning_rate": 1.8327429546427262e-05, "loss": 0.6582, "step": 3039 }, { "epoch": 0.38392927618596573, "grad_norm": 1.734375, "learning_rate": 1.832632403378808e-05, "loss": 0.6875, "step": 3040 }, { "epoch": 0.3840555687110269, "grad_norm": 1.875, "learning_rate": 1.8325218189281683e-05, "loss": 0.7148, "step": 3041 }, { "epoch": 0.3841818612360881, "grad_norm": 1.9140625, "learning_rate": 1.832411201295214e-05, "loss": 0.7137, "step": 3042 }, { "epoch": 0.38430815376114924, "grad_norm": 2.015625, "learning_rate": 1.832300550484354e-05, "loss": 0.7309, "step": 3043 }, { "epoch": 0.38443444628621043, "grad_norm": 1.96875, "learning_rate": 1.8321898664999986e-05, "loss": 0.7672, "step": 3044 }, { "epoch": 0.3845607388112716, "grad_norm": 1.9609375, "learning_rate": 1.83207914934656e-05, "loss": 0.7525, "step": 3045 }, { "epoch": 0.3846870313363328, "grad_norm": 1.890625, "learning_rate": 1.83196839902845e-05, "loss": 0.6909, "step": 3046 }, { "epoch": 0.38481332386139394, "grad_norm": 1.8125, "learning_rate": 1.831857615550084e-05, "loss": 0.7379, "step": 3047 }, { "epoch": 0.38493961638645513, "grad_norm": 1.6796875, "learning_rate": 1.8317467989158776e-05, "loss": 0.5984, "step": 3048 }, { "epoch": 0.3850659089115163, "grad_norm": 1.8203125, "learning_rate": 1.8316359491302465e-05, "loss": 0.6946, "step": 3049 }, { "epoch": 0.38519220143657745, "grad_norm": 1.8203125, "learning_rate": 1.83152506619761e-05, "loss": 0.6609, "step": 3050 }, { "epoch": 0.38531849396163864, "grad_norm": 1.8203125, "learning_rate": 1.831414150122387e-05, "loss": 0.6412, "step": 3051 }, { "epoch": 0.38544478648669983, "grad_norm": 1.734375, "learning_rate": 1.8313032009089985e-05, "loss": 0.5941, "step": 3052 }, { "epoch": 0.38557107901176096, "grad_norm": 1.8515625, "learning_rate": 1.8311922185618668e-05, "loss": 0.6789, "step": 3053 }, { "epoch": 0.38569737153682215, "grad_norm": 1.8828125, "learning_rate": 1.831081203085415e-05, "loss": 0.7657, "step": 3054 }, { "epoch": 0.38582366406188334, "grad_norm": 1.84375, "learning_rate": 1.830970154484069e-05, "loss": 0.7274, "step": 3055 }, { "epoch": 0.38594995658694453, "grad_norm": 2.0625, "learning_rate": 1.8308590727622534e-05, "loss": 0.7802, "step": 3056 }, { "epoch": 0.38607624911200566, "grad_norm": 1.9765625, "learning_rate": 1.830747957924397e-05, "loss": 0.7077, "step": 3057 }, { "epoch": 0.38620254163706685, "grad_norm": 1.875, "learning_rate": 1.8306368099749274e-05, "loss": 0.693, "step": 3058 }, { "epoch": 0.38632883416212804, "grad_norm": 1.828125, "learning_rate": 1.830525628918276e-05, "loss": 0.7237, "step": 3059 }, { "epoch": 0.3864551266871892, "grad_norm": 1.9375, "learning_rate": 1.830414414758873e-05, "loss": 0.7189, "step": 3060 }, { "epoch": 0.38658141921225037, "grad_norm": 1.796875, "learning_rate": 1.8303031675011515e-05, "loss": 0.6652, "step": 3061 }, { "epoch": 0.38670771173731155, "grad_norm": 1.796875, "learning_rate": 1.8301918871495456e-05, "loss": 0.6843, "step": 3062 }, { "epoch": 0.38683400426237274, "grad_norm": 2.59375, "learning_rate": 1.8300805737084913e-05, "loss": 0.7386, "step": 3063 }, { "epoch": 0.3869602967874339, "grad_norm": 1.7265625, "learning_rate": 1.8299692271824242e-05, "loss": 0.6808, "step": 3064 }, { "epoch": 0.38708658931249507, "grad_norm": 1.859375, "learning_rate": 1.8298578475757828e-05, "loss": 0.7426, "step": 3065 }, { "epoch": 0.38721288183755626, "grad_norm": 1.7734375, "learning_rate": 1.8297464348930066e-05, "loss": 0.7234, "step": 3066 }, { "epoch": 0.3873391743626174, "grad_norm": 1.8046875, "learning_rate": 1.829634989138536e-05, "loss": 0.6247, "step": 3067 }, { "epoch": 0.3874654668876786, "grad_norm": 2.015625, "learning_rate": 1.829523510316813e-05, "loss": 0.7572, "step": 3068 }, { "epoch": 0.38759175941273977, "grad_norm": 1.7578125, "learning_rate": 1.8294119984322812e-05, "loss": 0.6035, "step": 3069 }, { "epoch": 0.38771805193780096, "grad_norm": 1.9765625, "learning_rate": 1.829300453489385e-05, "loss": 0.9326, "step": 3070 }, { "epoch": 0.3878443444628621, "grad_norm": 1.828125, "learning_rate": 1.8291888754925694e-05, "loss": 0.7107, "step": 3071 }, { "epoch": 0.3879706369879233, "grad_norm": 1.765625, "learning_rate": 1.829077264446283e-05, "loss": 0.6724, "step": 3072 }, { "epoch": 0.38809692951298447, "grad_norm": 1.8046875, "learning_rate": 1.828965620354974e-05, "loss": 0.7112, "step": 3073 }, { "epoch": 0.3882232220380456, "grad_norm": 1.828125, "learning_rate": 1.8288539432230917e-05, "loss": 0.6772, "step": 3074 }, { "epoch": 0.3883495145631068, "grad_norm": 1.78125, "learning_rate": 1.8287422330550878e-05, "loss": 0.6986, "step": 3075 }, { "epoch": 0.388475807088168, "grad_norm": 1.859375, "learning_rate": 1.8286304898554145e-05, "loss": 0.7311, "step": 3076 }, { "epoch": 0.38860209961322917, "grad_norm": 1.75, "learning_rate": 1.828518713628526e-05, "loss": 0.7073, "step": 3077 }, { "epoch": 0.3887283921382903, "grad_norm": 1.8203125, "learning_rate": 1.828406904378877e-05, "loss": 0.6065, "step": 3078 }, { "epoch": 0.3888546846633515, "grad_norm": 1.9453125, "learning_rate": 1.8282950621109244e-05, "loss": 0.7872, "step": 3079 }, { "epoch": 0.3889809771884127, "grad_norm": 1.734375, "learning_rate": 1.8281831868291255e-05, "loss": 0.6846, "step": 3080 }, { "epoch": 0.3891072697134738, "grad_norm": 1.8046875, "learning_rate": 1.8280712785379396e-05, "loss": 0.6754, "step": 3081 }, { "epoch": 0.389233562238535, "grad_norm": 1.7109375, "learning_rate": 1.827959337241827e-05, "loss": 0.6565, "step": 3082 }, { "epoch": 0.3893598547635962, "grad_norm": 1.7265625, "learning_rate": 1.8278473629452495e-05, "loss": 0.5989, "step": 3083 }, { "epoch": 0.3894861472886573, "grad_norm": 1.828125, "learning_rate": 1.82773535565267e-05, "loss": 0.6845, "step": 3084 }, { "epoch": 0.3896124398137185, "grad_norm": 1.921875, "learning_rate": 1.8276233153685532e-05, "loss": 0.7592, "step": 3085 }, { "epoch": 0.3897387323387797, "grad_norm": 1.734375, "learning_rate": 1.8275112420973644e-05, "loss": 0.6645, "step": 3086 }, { "epoch": 0.3898650248638409, "grad_norm": 1.8046875, "learning_rate": 1.8273991358435707e-05, "loss": 0.7302, "step": 3087 }, { "epoch": 0.389991317388902, "grad_norm": 1.7109375, "learning_rate": 1.8272869966116406e-05, "loss": 0.6059, "step": 3088 }, { "epoch": 0.3901176099139632, "grad_norm": 1.7421875, "learning_rate": 1.827174824406043e-05, "loss": 0.6522, "step": 3089 }, { "epoch": 0.3902439024390244, "grad_norm": 1.8125, "learning_rate": 1.8270626192312494e-05, "loss": 0.6585, "step": 3090 }, { "epoch": 0.39037019496408554, "grad_norm": 1.8046875, "learning_rate": 1.826950381091732e-05, "loss": 0.688, "step": 3091 }, { "epoch": 0.3904964874891467, "grad_norm": 2.0625, "learning_rate": 1.826838109991964e-05, "loss": 0.7439, "step": 3092 }, { "epoch": 0.3906227800142079, "grad_norm": 1.6953125, "learning_rate": 1.826725805936421e-05, "loss": 0.6206, "step": 3093 }, { "epoch": 0.3907490725392691, "grad_norm": 1.828125, "learning_rate": 1.8266134689295782e-05, "loss": 0.658, "step": 3094 }, { "epoch": 0.39087536506433024, "grad_norm": 1.96875, "learning_rate": 1.8265010989759137e-05, "loss": 0.7179, "step": 3095 }, { "epoch": 0.3910016575893914, "grad_norm": 2.25, "learning_rate": 1.8263886960799062e-05, "loss": 0.6956, "step": 3096 }, { "epoch": 0.3911279501144526, "grad_norm": 2.03125, "learning_rate": 1.826276260246036e-05, "loss": 0.7051, "step": 3097 }, { "epoch": 0.39125424263951375, "grad_norm": 1.9765625, "learning_rate": 1.8261637914787837e-05, "loss": 0.7704, "step": 3098 }, { "epoch": 0.39138053516457494, "grad_norm": 1.8828125, "learning_rate": 1.826051289782633e-05, "loss": 0.6643, "step": 3099 }, { "epoch": 0.3915068276896361, "grad_norm": 1.6875, "learning_rate": 1.8259387551620674e-05, "loss": 0.6676, "step": 3100 }, { "epoch": 0.3916331202146973, "grad_norm": 1.7890625, "learning_rate": 1.8258261876215727e-05, "loss": 0.5972, "step": 3101 }, { "epoch": 0.39175941273975845, "grad_norm": 1.8125, "learning_rate": 1.825713587165635e-05, "loss": 0.6303, "step": 3102 }, { "epoch": 0.39188570526481964, "grad_norm": 1.7421875, "learning_rate": 1.8256009537987424e-05, "loss": 0.6465, "step": 3103 }, { "epoch": 0.39201199778988083, "grad_norm": 1.640625, "learning_rate": 1.825488287525385e-05, "loss": 0.6012, "step": 3104 }, { "epoch": 0.39213829031494196, "grad_norm": 1.953125, "learning_rate": 1.8253755883500523e-05, "loss": 0.67, "step": 3105 }, { "epoch": 0.39226458284000315, "grad_norm": 1.78125, "learning_rate": 1.8252628562772368e-05, "loss": 0.6529, "step": 3106 }, { "epoch": 0.39239087536506434, "grad_norm": 1.765625, "learning_rate": 1.8251500913114314e-05, "loss": 0.643, "step": 3107 }, { "epoch": 0.39251716789012553, "grad_norm": 1.8984375, "learning_rate": 1.8250372934571313e-05, "loss": 0.6967, "step": 3108 }, { "epoch": 0.39264346041518666, "grad_norm": 1.6640625, "learning_rate": 1.8249244627188316e-05, "loss": 0.6371, "step": 3109 }, { "epoch": 0.39276975294024785, "grad_norm": 1.75, "learning_rate": 1.8248115991010296e-05, "loss": 0.7003, "step": 3110 }, { "epoch": 0.39289604546530904, "grad_norm": 1.6796875, "learning_rate": 1.8246987026082243e-05, "loss": 0.6583, "step": 3111 }, { "epoch": 0.3930223379903702, "grad_norm": 1.875, "learning_rate": 1.8245857732449152e-05, "loss": 0.7247, "step": 3112 }, { "epoch": 0.39314863051543136, "grad_norm": 1.796875, "learning_rate": 1.824472811015603e-05, "loss": 0.6791, "step": 3113 }, { "epoch": 0.39327492304049255, "grad_norm": 1.953125, "learning_rate": 1.8243598159247907e-05, "loss": 0.7334, "step": 3114 }, { "epoch": 0.39340121556555374, "grad_norm": 1.7890625, "learning_rate": 1.8242467879769815e-05, "loss": 0.6486, "step": 3115 }, { "epoch": 0.3935275080906149, "grad_norm": 1.8125, "learning_rate": 1.8241337271766807e-05, "loss": 0.769, "step": 3116 }, { "epoch": 0.39365380061567606, "grad_norm": 1.8125, "learning_rate": 1.8240206335283947e-05, "loss": 0.6221, "step": 3117 }, { "epoch": 0.39378009314073725, "grad_norm": 1.859375, "learning_rate": 1.823907507036631e-05, "loss": 0.7078, "step": 3118 }, { "epoch": 0.3939063856657984, "grad_norm": 1.8359375, "learning_rate": 1.8237943477058983e-05, "loss": 0.6536, "step": 3119 }, { "epoch": 0.3940326781908596, "grad_norm": 1.84375, "learning_rate": 1.8236811555407077e-05, "loss": 0.7494, "step": 3120 }, { "epoch": 0.39415897071592076, "grad_norm": 1.765625, "learning_rate": 1.8235679305455698e-05, "loss": 0.6665, "step": 3121 }, { "epoch": 0.3942852632409819, "grad_norm": 1.7578125, "learning_rate": 1.823454672724998e-05, "loss": 0.5914, "step": 3122 }, { "epoch": 0.3944115557660431, "grad_norm": 1.90625, "learning_rate": 1.823341382083506e-05, "loss": 0.6857, "step": 3123 }, { "epoch": 0.3945378482911043, "grad_norm": 2.078125, "learning_rate": 1.82322805862561e-05, "loss": 0.6585, "step": 3124 }, { "epoch": 0.39466414081616547, "grad_norm": 1.9296875, "learning_rate": 1.8231147023558264e-05, "loss": 0.6958, "step": 3125 }, { "epoch": 0.3947904333412266, "grad_norm": 2.015625, "learning_rate": 1.8230013132786735e-05, "loss": 0.6904, "step": 3126 }, { "epoch": 0.3949167258662878, "grad_norm": 1.796875, "learning_rate": 1.82288789139867e-05, "loss": 0.6426, "step": 3127 }, { "epoch": 0.395043018391349, "grad_norm": 1.96875, "learning_rate": 1.822774436720338e-05, "loss": 0.7117, "step": 3128 }, { "epoch": 0.3951693109164101, "grad_norm": 1.8046875, "learning_rate": 1.8226609492481985e-05, "loss": 0.7147, "step": 3129 }, { "epoch": 0.3952956034414713, "grad_norm": 1.796875, "learning_rate": 1.822547428986775e-05, "loss": 0.7091, "step": 3130 }, { "epoch": 0.3954218959665325, "grad_norm": 1.8359375, "learning_rate": 1.822433875940592e-05, "loss": 0.7322, "step": 3131 }, { "epoch": 0.3955481884915937, "grad_norm": 1.8203125, "learning_rate": 1.8223202901141758e-05, "loss": 0.7086, "step": 3132 }, { "epoch": 0.3956744810166548, "grad_norm": 1.8203125, "learning_rate": 1.8222066715120535e-05, "loss": 0.6042, "step": 3133 }, { "epoch": 0.395800773541716, "grad_norm": 1.8203125, "learning_rate": 1.8220930201387537e-05, "loss": 0.6749, "step": 3134 }, { "epoch": 0.3959270660667772, "grad_norm": 1.7734375, "learning_rate": 1.8219793359988063e-05, "loss": 0.683, "step": 3135 }, { "epoch": 0.3960533585918383, "grad_norm": 1.8125, "learning_rate": 1.8218656190967422e-05, "loss": 0.7521, "step": 3136 }, { "epoch": 0.3961796511168995, "grad_norm": 1.640625, "learning_rate": 1.8217518694370944e-05, "loss": 0.6489, "step": 3137 }, { "epoch": 0.3963059436419607, "grad_norm": 1.8515625, "learning_rate": 1.8216380870243963e-05, "loss": 0.6879, "step": 3138 }, { "epoch": 0.3964322361670219, "grad_norm": 1.984375, "learning_rate": 1.8215242718631832e-05, "loss": 0.8043, "step": 3139 }, { "epoch": 0.396558528692083, "grad_norm": 1.78125, "learning_rate": 1.821410423957991e-05, "loss": 0.7154, "step": 3140 }, { "epoch": 0.3966848212171442, "grad_norm": 1.921875, "learning_rate": 1.821296543313358e-05, "loss": 0.6863, "step": 3141 }, { "epoch": 0.3968111137422054, "grad_norm": 1.828125, "learning_rate": 1.8211826299338233e-05, "loss": 0.6628, "step": 3142 }, { "epoch": 0.39693740626726653, "grad_norm": 1.7109375, "learning_rate": 1.8210686838239268e-05, "loss": 0.6465, "step": 3143 }, { "epoch": 0.3970636987923277, "grad_norm": 1.953125, "learning_rate": 1.82095470498821e-05, "loss": 0.6433, "step": 3144 }, { "epoch": 0.3971899913173889, "grad_norm": 1.875, "learning_rate": 1.820840693431216e-05, "loss": 0.8128, "step": 3145 }, { "epoch": 0.3973162838424501, "grad_norm": 1.828125, "learning_rate": 1.8207266491574895e-05, "loss": 0.6375, "step": 3146 }, { "epoch": 0.39744257636751124, "grad_norm": 1.7734375, "learning_rate": 1.8206125721715754e-05, "loss": 0.6956, "step": 3147 }, { "epoch": 0.3975688688925724, "grad_norm": 2.015625, "learning_rate": 1.8204984624780205e-05, "loss": 0.7263, "step": 3148 }, { "epoch": 0.3976951614176336, "grad_norm": 1.8984375, "learning_rate": 1.8203843200813732e-05, "loss": 0.6736, "step": 3149 }, { "epoch": 0.39782145394269475, "grad_norm": 1.65625, "learning_rate": 1.820270144986183e-05, "loss": 0.5076, "step": 3150 }, { "epoch": 0.39794774646775594, "grad_norm": 2.59375, "learning_rate": 1.820155937197001e-05, "loss": 0.8403, "step": 3151 }, { "epoch": 0.3980740389928171, "grad_norm": 1.90625, "learning_rate": 1.8200416967183785e-05, "loss": 0.6058, "step": 3152 }, { "epoch": 0.39820033151787826, "grad_norm": 1.78125, "learning_rate": 1.819927423554869e-05, "loss": 0.6366, "step": 3153 }, { "epoch": 0.39832662404293945, "grad_norm": 1.84375, "learning_rate": 1.8198131177110275e-05, "loss": 0.6941, "step": 3154 }, { "epoch": 0.39845291656800064, "grad_norm": 1.75, "learning_rate": 1.8196987791914098e-05, "loss": 0.6176, "step": 3155 }, { "epoch": 0.3985792090930618, "grad_norm": 2.0, "learning_rate": 1.8195844080005728e-05, "loss": 0.6292, "step": 3156 }, { "epoch": 0.39870550161812296, "grad_norm": 1.828125, "learning_rate": 1.8194700041430757e-05, "loss": 0.7412, "step": 3157 }, { "epoch": 0.39883179414318415, "grad_norm": 1.8046875, "learning_rate": 1.819355567623478e-05, "loss": 0.6714, "step": 3158 }, { "epoch": 0.39895808666824534, "grad_norm": 2.046875, "learning_rate": 1.819241098446341e-05, "loss": 0.7539, "step": 3159 }, { "epoch": 0.39908437919330647, "grad_norm": 1.9296875, "learning_rate": 1.819126596616227e-05, "loss": 0.6312, "step": 3160 }, { "epoch": 0.39921067171836766, "grad_norm": 1.953125, "learning_rate": 1.8190120621377e-05, "loss": 0.7399, "step": 3161 }, { "epoch": 0.39933696424342885, "grad_norm": 1.828125, "learning_rate": 1.8188974950153246e-05, "loss": 0.7687, "step": 3162 }, { "epoch": 0.39946325676849004, "grad_norm": 1.7890625, "learning_rate": 1.8187828952536673e-05, "loss": 0.6581, "step": 3163 }, { "epoch": 0.39958954929355117, "grad_norm": 1.6640625, "learning_rate": 1.8186682628572966e-05, "loss": 0.6138, "step": 3164 }, { "epoch": 0.39971584181861236, "grad_norm": 2.0, "learning_rate": 1.8185535978307807e-05, "loss": 0.7119, "step": 3165 }, { "epoch": 0.39984213434367355, "grad_norm": 1.8125, "learning_rate": 1.8184389001786895e-05, "loss": 0.593, "step": 3166 }, { "epoch": 0.3999684268687347, "grad_norm": 1.6875, "learning_rate": 1.8183241699055957e-05, "loss": 0.6179, "step": 3167 }, { "epoch": 0.4000947193937959, "grad_norm": 1.9140625, "learning_rate": 1.818209407016071e-05, "loss": 0.6581, "step": 3168 }, { "epoch": 0.40022101191885706, "grad_norm": 1.765625, "learning_rate": 1.8180946115146906e-05, "loss": 0.6723, "step": 3169 }, { "epoch": 0.40034730444391825, "grad_norm": 1.921875, "learning_rate": 1.8179797834060292e-05, "loss": 0.6432, "step": 3170 }, { "epoch": 0.4004735969689794, "grad_norm": 1.765625, "learning_rate": 1.817864922694664e-05, "loss": 0.7182, "step": 3171 }, { "epoch": 0.4005998894940406, "grad_norm": 1.8203125, "learning_rate": 1.8177500293851728e-05, "loss": 0.6233, "step": 3172 }, { "epoch": 0.40072618201910176, "grad_norm": 1.9921875, "learning_rate": 1.8176351034821352e-05, "loss": 0.7256, "step": 3173 }, { "epoch": 0.4008524745441629, "grad_norm": 1.8203125, "learning_rate": 1.8175201449901317e-05, "loss": 0.6022, "step": 3174 }, { "epoch": 0.4009787670692241, "grad_norm": 1.828125, "learning_rate": 1.8174051539137442e-05, "loss": 0.6822, "step": 3175 }, { "epoch": 0.4011050595942853, "grad_norm": 1.78125, "learning_rate": 1.8172901302575564e-05, "loss": 0.6757, "step": 3176 }, { "epoch": 0.40123135211934646, "grad_norm": 1.8984375, "learning_rate": 1.8171750740261525e-05, "loss": 0.6607, "step": 3177 }, { "epoch": 0.4013576446444076, "grad_norm": 1.90625, "learning_rate": 1.8170599852241182e-05, "loss": 0.6748, "step": 3178 }, { "epoch": 0.4014839371694688, "grad_norm": 1.5546875, "learning_rate": 1.8169448638560413e-05, "loss": 0.5723, "step": 3179 }, { "epoch": 0.40161022969453, "grad_norm": 1.65625, "learning_rate": 1.8168297099265094e-05, "loss": 0.5617, "step": 3180 }, { "epoch": 0.4017365222195911, "grad_norm": 1.9140625, "learning_rate": 1.8167145234401132e-05, "loss": 0.7683, "step": 3181 }, { "epoch": 0.4018628147446523, "grad_norm": 1.9375, "learning_rate": 1.816599304401443e-05, "loss": 0.7756, "step": 3182 }, { "epoch": 0.4019891072697135, "grad_norm": 1.6875, "learning_rate": 1.8164840528150916e-05, "loss": 0.6337, "step": 3183 }, { "epoch": 0.4021153997947746, "grad_norm": 1.625, "learning_rate": 1.816368768685652e-05, "loss": 0.5492, "step": 3184 }, { "epoch": 0.4022416923198358, "grad_norm": 1.796875, "learning_rate": 1.81625345201772e-05, "loss": 0.6368, "step": 3185 }, { "epoch": 0.402367984844897, "grad_norm": 1.7890625, "learning_rate": 1.8161381028158916e-05, "loss": 0.5606, "step": 3186 }, { "epoch": 0.4024942773699582, "grad_norm": 1.9375, "learning_rate": 1.8160227210847642e-05, "loss": 0.6266, "step": 3187 }, { "epoch": 0.4026205698950193, "grad_norm": 1.6796875, "learning_rate": 1.8159073068289365e-05, "loss": 0.6059, "step": 3188 }, { "epoch": 0.4027468624200805, "grad_norm": 1.9921875, "learning_rate": 1.8157918600530087e-05, "loss": 0.7741, "step": 3189 }, { "epoch": 0.4028731549451417, "grad_norm": 1.7109375, "learning_rate": 1.8156763807615826e-05, "loss": 0.583, "step": 3190 }, { "epoch": 0.40299944747020283, "grad_norm": 1.8046875, "learning_rate": 1.8155608689592604e-05, "loss": 0.7383, "step": 3191 }, { "epoch": 0.403125739995264, "grad_norm": 2.0, "learning_rate": 1.8154453246506464e-05, "loss": 0.7007, "step": 3192 }, { "epoch": 0.4032520325203252, "grad_norm": 1.75, "learning_rate": 1.815329747840346e-05, "loss": 0.6558, "step": 3193 }, { "epoch": 0.4033783250453864, "grad_norm": 1.7578125, "learning_rate": 1.8152141385329654e-05, "loss": 0.6987, "step": 3194 }, { "epoch": 0.40350461757044753, "grad_norm": 1.734375, "learning_rate": 1.815098496733113e-05, "loss": 0.6216, "step": 3195 }, { "epoch": 0.4036309100955087, "grad_norm": 1.71875, "learning_rate": 1.8149828224453977e-05, "loss": 0.6939, "step": 3196 }, { "epoch": 0.4037572026205699, "grad_norm": 1.890625, "learning_rate": 1.8148671156744306e-05, "loss": 0.8348, "step": 3197 }, { "epoch": 0.40388349514563104, "grad_norm": 1.7109375, "learning_rate": 1.8147513764248224e-05, "loss": 0.662, "step": 3198 }, { "epoch": 0.40400978767069223, "grad_norm": 1.75, "learning_rate": 1.814635604701187e-05, "loss": 0.6126, "step": 3199 }, { "epoch": 0.4041360801957534, "grad_norm": 1.7890625, "learning_rate": 1.814519800508139e-05, "loss": 0.6911, "step": 3200 }, { "epoch": 0.4042623727208146, "grad_norm": 1.7890625, "learning_rate": 1.814403963850293e-05, "loss": 0.5975, "step": 3201 }, { "epoch": 0.40438866524587574, "grad_norm": 1.9296875, "learning_rate": 1.814288094732267e-05, "loss": 0.6249, "step": 3202 }, { "epoch": 0.40451495777093693, "grad_norm": 1.765625, "learning_rate": 1.8141721931586785e-05, "loss": 0.6706, "step": 3203 }, { "epoch": 0.4046412502959981, "grad_norm": 1.78125, "learning_rate": 1.8140562591341476e-05, "loss": 0.7313, "step": 3204 }, { "epoch": 0.40476754282105926, "grad_norm": 1.78125, "learning_rate": 1.8139402926632953e-05, "loss": 0.6717, "step": 3205 }, { "epoch": 0.40489383534612045, "grad_norm": 1.78125, "learning_rate": 1.813824293750743e-05, "loss": 0.606, "step": 3206 }, { "epoch": 0.40502012787118163, "grad_norm": 1.8359375, "learning_rate": 1.8137082624011148e-05, "loss": 0.6657, "step": 3207 }, { "epoch": 0.4051464203962428, "grad_norm": 2.296875, "learning_rate": 1.813592198619035e-05, "loss": 0.74, "step": 3208 }, { "epoch": 0.40527271292130396, "grad_norm": 1.6875, "learning_rate": 1.8134761024091302e-05, "loss": 0.6405, "step": 3209 }, { "epoch": 0.40539900544636515, "grad_norm": 1.8515625, "learning_rate": 1.813359973776027e-05, "loss": 0.7501, "step": 3210 }, { "epoch": 0.40552529797142634, "grad_norm": 1.84375, "learning_rate": 1.8132438127243546e-05, "loss": 0.6995, "step": 3211 }, { "epoch": 0.40565159049648747, "grad_norm": 2.0625, "learning_rate": 1.8131276192587423e-05, "loss": 0.8253, "step": 3212 }, { "epoch": 0.40577788302154866, "grad_norm": 2.109375, "learning_rate": 1.8130113933838222e-05, "loss": 0.7085, "step": 3213 }, { "epoch": 0.40590417554660985, "grad_norm": 1.8359375, "learning_rate": 1.8128951351042256e-05, "loss": 0.8077, "step": 3214 }, { "epoch": 0.40603046807167104, "grad_norm": 1.890625, "learning_rate": 1.812778844424587e-05, "loss": 0.6627, "step": 3215 }, { "epoch": 0.40615676059673217, "grad_norm": 3.390625, "learning_rate": 1.8126625213495415e-05, "loss": 0.7886, "step": 3216 }, { "epoch": 0.40628305312179336, "grad_norm": 1.9453125, "learning_rate": 1.8125461658837254e-05, "loss": 0.647, "step": 3217 }, { "epoch": 0.40640934564685455, "grad_norm": 1.859375, "learning_rate": 1.8124297780317763e-05, "loss": 0.6475, "step": 3218 }, { "epoch": 0.4065356381719157, "grad_norm": 2.046875, "learning_rate": 1.812313357798333e-05, "loss": 0.7649, "step": 3219 }, { "epoch": 0.40666193069697687, "grad_norm": 1.671875, "learning_rate": 1.812196905188036e-05, "loss": 0.6492, "step": 3220 }, { "epoch": 0.40678822322203806, "grad_norm": 1.8828125, "learning_rate": 1.8120804202055263e-05, "loss": 0.6742, "step": 3221 }, { "epoch": 0.4069145157470992, "grad_norm": 1.71875, "learning_rate": 1.8119639028554475e-05, "loss": 0.6316, "step": 3222 }, { "epoch": 0.4070408082721604, "grad_norm": 1.796875, "learning_rate": 1.811847353142443e-05, "loss": 0.7221, "step": 3223 }, { "epoch": 0.40716710079722157, "grad_norm": 1.75, "learning_rate": 1.8117307710711586e-05, "loss": 0.6098, "step": 3224 }, { "epoch": 0.40729339332228276, "grad_norm": 1.703125, "learning_rate": 1.8116141566462407e-05, "loss": 0.66, "step": 3225 }, { "epoch": 0.4074196858473439, "grad_norm": 1.84375, "learning_rate": 1.8114975098723374e-05, "loss": 0.6893, "step": 3226 }, { "epoch": 0.4075459783724051, "grad_norm": 1.828125, "learning_rate": 1.8113808307540978e-05, "loss": 0.6162, "step": 3227 }, { "epoch": 0.40767227089746627, "grad_norm": 1.78125, "learning_rate": 1.811264119296173e-05, "loss": 0.6548, "step": 3228 }, { "epoch": 0.4077985634225274, "grad_norm": 1.9140625, "learning_rate": 1.8111473755032142e-05, "loss": 0.6572, "step": 3229 }, { "epoch": 0.4079248559475886, "grad_norm": 1.8984375, "learning_rate": 1.8110305993798748e-05, "loss": 0.6963, "step": 3230 }, { "epoch": 0.4080511484726498, "grad_norm": 1.75, "learning_rate": 1.8109137909308097e-05, "loss": 0.6684, "step": 3231 }, { "epoch": 0.40817744099771097, "grad_norm": 1.6953125, "learning_rate": 1.8107969501606737e-05, "loss": 0.716, "step": 3232 }, { "epoch": 0.4083037335227721, "grad_norm": 1.8046875, "learning_rate": 1.8106800770741243e-05, "loss": 0.6512, "step": 3233 }, { "epoch": 0.4084300260478333, "grad_norm": 1.8984375, "learning_rate": 1.8105631716758192e-05, "loss": 0.6904, "step": 3234 }, { "epoch": 0.4085563185728945, "grad_norm": 1.703125, "learning_rate": 1.810446233970419e-05, "loss": 0.6495, "step": 3235 }, { "epoch": 0.4086826110979556, "grad_norm": 1.8671875, "learning_rate": 1.810329263962584e-05, "loss": 0.7585, "step": 3236 }, { "epoch": 0.4088089036230168, "grad_norm": 1.8203125, "learning_rate": 1.8102122616569757e-05, "loss": 0.6775, "step": 3237 }, { "epoch": 0.408935196148078, "grad_norm": 1.8359375, "learning_rate": 1.8100952270582585e-05, "loss": 0.7736, "step": 3238 }, { "epoch": 0.4090614886731392, "grad_norm": 1.7890625, "learning_rate": 1.809978160171097e-05, "loss": 0.748, "step": 3239 }, { "epoch": 0.4091877811982003, "grad_norm": 1.796875, "learning_rate": 1.809861061000157e-05, "loss": 0.5913, "step": 3240 }, { "epoch": 0.4093140737232615, "grad_norm": 1.9140625, "learning_rate": 1.8097439295501054e-05, "loss": 0.7717, "step": 3241 }, { "epoch": 0.4094403662483227, "grad_norm": 1.9140625, "learning_rate": 1.8096267658256118e-05, "loss": 0.6457, "step": 3242 }, { "epoch": 0.40956665877338383, "grad_norm": 1.703125, "learning_rate": 1.809509569831345e-05, "loss": 0.6185, "step": 3243 }, { "epoch": 0.409692951298445, "grad_norm": 1.875, "learning_rate": 1.8093923415719765e-05, "loss": 0.7373, "step": 3244 }, { "epoch": 0.4098192438235062, "grad_norm": 2.484375, "learning_rate": 1.809275081052179e-05, "loss": 0.7546, "step": 3245 }, { "epoch": 0.4099455363485674, "grad_norm": 1.9375, "learning_rate": 1.809157788276626e-05, "loss": 0.7218, "step": 3246 }, { "epoch": 0.41007182887362853, "grad_norm": 2.09375, "learning_rate": 1.8090404632499922e-05, "loss": 0.8336, "step": 3247 }, { "epoch": 0.4101981213986897, "grad_norm": 1.9296875, "learning_rate": 1.8089231059769546e-05, "loss": 0.6542, "step": 3248 }, { "epoch": 0.4103244139237509, "grad_norm": 1.703125, "learning_rate": 1.8088057164621907e-05, "loss": 0.563, "step": 3249 }, { "epoch": 0.41045070644881204, "grad_norm": 1.75, "learning_rate": 1.8086882947103787e-05, "loss": 0.5384, "step": 3250 }, { "epoch": 0.41057699897387323, "grad_norm": 2.015625, "learning_rate": 1.808570840726199e-05, "loss": 0.7091, "step": 3251 }, { "epoch": 0.4107032914989344, "grad_norm": 1.8046875, "learning_rate": 1.8084533545143333e-05, "loss": 0.706, "step": 3252 }, { "epoch": 0.41082958402399555, "grad_norm": 1.7578125, "learning_rate": 1.808335836079464e-05, "loss": 0.6723, "step": 3253 }, { "epoch": 0.41095587654905674, "grad_norm": 1.8671875, "learning_rate": 1.8082182854262755e-05, "loss": 0.6595, "step": 3254 }, { "epoch": 0.41108216907411793, "grad_norm": 1.703125, "learning_rate": 1.8081007025594528e-05, "loss": 0.5703, "step": 3255 }, { "epoch": 0.4112084615991791, "grad_norm": 1.7734375, "learning_rate": 1.8079830874836822e-05, "loss": 0.6548, "step": 3256 }, { "epoch": 0.41133475412424025, "grad_norm": 1.8984375, "learning_rate": 1.8078654402036526e-05, "loss": 0.7665, "step": 3257 }, { "epoch": 0.41146104664930144, "grad_norm": 1.7890625, "learning_rate": 1.807747760724052e-05, "loss": 0.7008, "step": 3258 }, { "epoch": 0.41158733917436263, "grad_norm": 1.8046875, "learning_rate": 1.8076300490495714e-05, "loss": 0.6456, "step": 3259 }, { "epoch": 0.41171363169942377, "grad_norm": 1.84375, "learning_rate": 1.807512305184902e-05, "loss": 0.6608, "step": 3260 }, { "epoch": 0.41183992422448495, "grad_norm": 1.671875, "learning_rate": 1.8073945291347373e-05, "loss": 0.5746, "step": 3261 }, { "epoch": 0.41196621674954614, "grad_norm": 1.7890625, "learning_rate": 1.8072767209037713e-05, "loss": 0.6441, "step": 3262 }, { "epoch": 0.41209250927460733, "grad_norm": 1.90625, "learning_rate": 1.8071588804967e-05, "loss": 0.6789, "step": 3263 }, { "epoch": 0.41221880179966847, "grad_norm": 1.6484375, "learning_rate": 1.8070410079182198e-05, "loss": 0.615, "step": 3264 }, { "epoch": 0.41234509432472966, "grad_norm": 1.84375, "learning_rate": 1.8069231031730286e-05, "loss": 0.7312, "step": 3265 }, { "epoch": 0.41247138684979084, "grad_norm": 1.71875, "learning_rate": 1.8068051662658262e-05, "loss": 0.651, "step": 3266 }, { "epoch": 0.412597679374852, "grad_norm": 1.8828125, "learning_rate": 1.8066871972013135e-05, "loss": 0.7455, "step": 3267 }, { "epoch": 0.41272397189991317, "grad_norm": 1.8359375, "learning_rate": 1.8065691959841918e-05, "loss": 0.6741, "step": 3268 }, { "epoch": 0.41285026442497436, "grad_norm": 1.8671875, "learning_rate": 1.8064511626191647e-05, "loss": 0.728, "step": 3269 }, { "epoch": 0.41297655695003554, "grad_norm": 1.921875, "learning_rate": 1.8063330971109368e-05, "loss": 0.6061, "step": 3270 }, { "epoch": 0.4131028494750967, "grad_norm": 1.7109375, "learning_rate": 1.806214999464214e-05, "loss": 0.6461, "step": 3271 }, { "epoch": 0.41322914200015787, "grad_norm": 1.640625, "learning_rate": 1.8060968696837032e-05, "loss": 0.6182, "step": 3272 }, { "epoch": 0.41335543452521906, "grad_norm": 1.640625, "learning_rate": 1.8059787077741125e-05, "loss": 0.5671, "step": 3273 }, { "epoch": 0.4134817270502802, "grad_norm": 1.8125, "learning_rate": 1.8058605137401518e-05, "loss": 0.6986, "step": 3274 }, { "epoch": 0.4136080195753414, "grad_norm": 1.8359375, "learning_rate": 1.8057422875865323e-05, "loss": 0.6584, "step": 3275 }, { "epoch": 0.41373431210040257, "grad_norm": 2.0, "learning_rate": 1.805624029317966e-05, "loss": 0.7141, "step": 3276 }, { "epoch": 0.41386060462546376, "grad_norm": 1.78125, "learning_rate": 1.8055057389391663e-05, "loss": 0.6272, "step": 3277 }, { "epoch": 0.4139868971505249, "grad_norm": 1.828125, "learning_rate": 1.805387416454848e-05, "loss": 0.6777, "step": 3278 }, { "epoch": 0.4141131896755861, "grad_norm": 1.71875, "learning_rate": 1.805269061869727e-05, "loss": 0.5979, "step": 3279 }, { "epoch": 0.41423948220064727, "grad_norm": 1.7265625, "learning_rate": 1.805150675188521e-05, "loss": 0.7057, "step": 3280 }, { "epoch": 0.4143657747257084, "grad_norm": 1.734375, "learning_rate": 1.8050322564159483e-05, "loss": 0.6741, "step": 3281 }, { "epoch": 0.4144920672507696, "grad_norm": 1.796875, "learning_rate": 1.804913805556729e-05, "loss": 0.61, "step": 3282 }, { "epoch": 0.4146183597758308, "grad_norm": 1.8828125, "learning_rate": 1.804795322615584e-05, "loss": 0.673, "step": 3283 }, { "epoch": 0.4147446523008919, "grad_norm": 1.78125, "learning_rate": 1.804676807597236e-05, "loss": 0.648, "step": 3284 }, { "epoch": 0.4148709448259531, "grad_norm": 1.9296875, "learning_rate": 1.804558260506409e-05, "loss": 0.6608, "step": 3285 }, { "epoch": 0.4149972373510143, "grad_norm": 1.96875, "learning_rate": 1.804439681347827e-05, "loss": 0.6825, "step": 3286 }, { "epoch": 0.4151235298760755, "grad_norm": 1.78125, "learning_rate": 1.8043210701262176e-05, "loss": 0.7134, "step": 3287 }, { "epoch": 0.4152498224011366, "grad_norm": 2.140625, "learning_rate": 1.804202426846307e-05, "loss": 0.6906, "step": 3288 }, { "epoch": 0.4153761149261978, "grad_norm": 1.7421875, "learning_rate": 1.8040837515128253e-05, "loss": 0.5979, "step": 3289 }, { "epoch": 0.415502407451259, "grad_norm": 1.59375, "learning_rate": 1.8039650441305015e-05, "loss": 0.5155, "step": 3290 }, { "epoch": 0.4156286999763201, "grad_norm": 1.8125, "learning_rate": 1.803846304704068e-05, "loss": 0.6943, "step": 3291 }, { "epoch": 0.4157549925013813, "grad_norm": 1.765625, "learning_rate": 1.803727533238257e-05, "loss": 0.6546, "step": 3292 }, { "epoch": 0.4158812850264425, "grad_norm": 1.8515625, "learning_rate": 1.803608729737802e-05, "loss": 0.6924, "step": 3293 }, { "epoch": 0.4160075775515037, "grad_norm": 1.6171875, "learning_rate": 1.8034898942074388e-05, "loss": 0.5672, "step": 3294 }, { "epoch": 0.4161338700765648, "grad_norm": 1.859375, "learning_rate": 1.8033710266519036e-05, "loss": 0.7045, "step": 3295 }, { "epoch": 0.416260162601626, "grad_norm": 1.828125, "learning_rate": 1.8032521270759345e-05, "loss": 0.7249, "step": 3296 }, { "epoch": 0.4163864551266872, "grad_norm": 1.7890625, "learning_rate": 1.8031331954842704e-05, "loss": 0.656, "step": 3297 }, { "epoch": 0.41651274765174834, "grad_norm": 1.6640625, "learning_rate": 1.8030142318816513e-05, "loss": 0.6359, "step": 3298 }, { "epoch": 0.4166390401768095, "grad_norm": 1.703125, "learning_rate": 1.8028952362728197e-05, "loss": 0.6048, "step": 3299 }, { "epoch": 0.4167653327018707, "grad_norm": 1.84375, "learning_rate": 1.8027762086625175e-05, "loss": 0.6293, "step": 3300 }, { "epoch": 0.4168916252269319, "grad_norm": 1.890625, "learning_rate": 1.8026571490554888e-05, "loss": 0.6753, "step": 3301 }, { "epoch": 0.41701791775199304, "grad_norm": 1.8203125, "learning_rate": 1.80253805745648e-05, "loss": 0.6534, "step": 3302 }, { "epoch": 0.41714421027705423, "grad_norm": 1.6875, "learning_rate": 1.8024189338702373e-05, "loss": 0.653, "step": 3303 }, { "epoch": 0.4172705028021154, "grad_norm": 1.8359375, "learning_rate": 1.8022997783015083e-05, "loss": 0.7431, "step": 3304 }, { "epoch": 0.41739679532717655, "grad_norm": 1.84375, "learning_rate": 1.802180590755043e-05, "loss": 0.6737, "step": 3305 }, { "epoch": 0.41752308785223774, "grad_norm": 1.796875, "learning_rate": 1.8020613712355915e-05, "loss": 0.6545, "step": 3306 }, { "epoch": 0.41764938037729893, "grad_norm": 1.8671875, "learning_rate": 1.8019421197479053e-05, "loss": 0.711, "step": 3307 }, { "epoch": 0.4177756729023601, "grad_norm": 1.78125, "learning_rate": 1.8018228362967377e-05, "loss": 0.7415, "step": 3308 }, { "epoch": 0.41790196542742125, "grad_norm": 1.96875, "learning_rate": 1.8017035208868438e-05, "loss": 0.6761, "step": 3309 }, { "epoch": 0.41802825795248244, "grad_norm": 1.7890625, "learning_rate": 1.801584173522978e-05, "loss": 0.6399, "step": 3310 }, { "epoch": 0.41815455047754363, "grad_norm": 1.9375, "learning_rate": 1.8014647942098982e-05, "loss": 0.6701, "step": 3311 }, { "epoch": 0.41828084300260476, "grad_norm": 1.8671875, "learning_rate": 1.801345382952362e-05, "loss": 0.6626, "step": 3312 }, { "epoch": 0.41840713552766595, "grad_norm": 1.828125, "learning_rate": 1.8012259397551287e-05, "loss": 0.7229, "step": 3313 }, { "epoch": 0.41853342805272714, "grad_norm": 1.8515625, "learning_rate": 1.8011064646229597e-05, "loss": 0.7701, "step": 3314 }, { "epoch": 0.4186597205777883, "grad_norm": 1.8125, "learning_rate": 1.8009869575606163e-05, "loss": 0.6547, "step": 3315 }, { "epoch": 0.41878601310284946, "grad_norm": 1.90625, "learning_rate": 1.800867418572862e-05, "loss": 0.6455, "step": 3316 }, { "epoch": 0.41891230562791065, "grad_norm": 1.9453125, "learning_rate": 1.8007478476644616e-05, "loss": 0.7592, "step": 3317 }, { "epoch": 0.41903859815297184, "grad_norm": 1.796875, "learning_rate": 1.8006282448401812e-05, "loss": 0.6068, "step": 3318 }, { "epoch": 0.419164890678033, "grad_norm": 1.953125, "learning_rate": 1.8005086101047866e-05, "loss": 0.6407, "step": 3319 }, { "epoch": 0.41929118320309416, "grad_norm": 1.9375, "learning_rate": 1.8003889434630473e-05, "loss": 0.7004, "step": 3320 }, { "epoch": 0.41941747572815535, "grad_norm": 1.765625, "learning_rate": 1.8002692449197328e-05, "loss": 0.692, "step": 3321 }, { "epoch": 0.4195437682532165, "grad_norm": 1.859375, "learning_rate": 1.8001495144796135e-05, "loss": 0.7063, "step": 3322 }, { "epoch": 0.4196700607782777, "grad_norm": 1.859375, "learning_rate": 1.8000297521474617e-05, "loss": 0.7444, "step": 3323 }, { "epoch": 0.41979635330333887, "grad_norm": 1.8046875, "learning_rate": 1.7999099579280512e-05, "loss": 0.631, "step": 3324 }, { "epoch": 0.41992264582840005, "grad_norm": 1.7578125, "learning_rate": 1.7997901318261563e-05, "loss": 0.6978, "step": 3325 }, { "epoch": 0.4200489383534612, "grad_norm": 1.71875, "learning_rate": 1.7996702738465535e-05, "loss": 0.672, "step": 3326 }, { "epoch": 0.4201752308785224, "grad_norm": 1.71875, "learning_rate": 1.7995503839940194e-05, "loss": 0.6168, "step": 3327 }, { "epoch": 0.42030152340358357, "grad_norm": 1.9765625, "learning_rate": 1.7994304622733326e-05, "loss": 0.7062, "step": 3328 }, { "epoch": 0.4204278159286447, "grad_norm": 1.7421875, "learning_rate": 1.799310508689274e-05, "loss": 0.6246, "step": 3329 }, { "epoch": 0.4205541084537059, "grad_norm": 1.8515625, "learning_rate": 1.799190523246623e-05, "loss": 0.7098, "step": 3330 }, { "epoch": 0.4206804009787671, "grad_norm": 1.8984375, "learning_rate": 1.7990705059501626e-05, "loss": 0.6786, "step": 3331 }, { "epoch": 0.42080669350382827, "grad_norm": 1.796875, "learning_rate": 1.798950456804677e-05, "loss": 0.6498, "step": 3332 }, { "epoch": 0.4209329860288894, "grad_norm": 1.875, "learning_rate": 1.7988303758149504e-05, "loss": 0.6836, "step": 3333 }, { "epoch": 0.4210592785539506, "grad_norm": 1.8046875, "learning_rate": 1.7987102629857692e-05, "loss": 0.5918, "step": 3334 }, { "epoch": 0.4211855710790118, "grad_norm": 1.796875, "learning_rate": 1.7985901183219206e-05, "loss": 0.6385, "step": 3335 }, { "epoch": 0.4213118636040729, "grad_norm": 1.78125, "learning_rate": 1.7984699418281937e-05, "loss": 0.7003, "step": 3336 }, { "epoch": 0.4214381561291341, "grad_norm": 1.7734375, "learning_rate": 1.798349733509378e-05, "loss": 0.6627, "step": 3337 }, { "epoch": 0.4215644486541953, "grad_norm": 1.7265625, "learning_rate": 1.798229493370265e-05, "loss": 0.6656, "step": 3338 }, { "epoch": 0.4216907411792565, "grad_norm": 1.765625, "learning_rate": 1.7981092214156466e-05, "loss": 0.68, "step": 3339 }, { "epoch": 0.4218170337043176, "grad_norm": 1.6796875, "learning_rate": 1.7979889176503174e-05, "loss": 0.6393, "step": 3340 }, { "epoch": 0.4219433262293788, "grad_norm": 1.8515625, "learning_rate": 1.7978685820790718e-05, "loss": 0.661, "step": 3341 }, { "epoch": 0.42206961875444, "grad_norm": 1.7265625, "learning_rate": 1.7977482147067064e-05, "loss": 0.6943, "step": 3342 }, { "epoch": 0.4221959112795011, "grad_norm": 1.953125, "learning_rate": 1.7976278155380187e-05, "loss": 0.7244, "step": 3343 }, { "epoch": 0.4223222038045623, "grad_norm": 1.671875, "learning_rate": 1.7975073845778074e-05, "loss": 0.6498, "step": 3344 }, { "epoch": 0.4224484963296235, "grad_norm": 1.8671875, "learning_rate": 1.7973869218308725e-05, "loss": 0.6654, "step": 3345 }, { "epoch": 0.4225747888546847, "grad_norm": 1.8046875, "learning_rate": 1.797266427302016e-05, "loss": 0.6865, "step": 3346 }, { "epoch": 0.4227010813797458, "grad_norm": 1.8359375, "learning_rate": 1.7971459009960396e-05, "loss": 0.6751, "step": 3347 }, { "epoch": 0.422827373904807, "grad_norm": 1.71875, "learning_rate": 1.7970253429177477e-05, "loss": 0.5969, "step": 3348 }, { "epoch": 0.4229536664298682, "grad_norm": 1.7890625, "learning_rate": 1.7969047530719456e-05, "loss": 0.6017, "step": 3349 }, { "epoch": 0.42307995895492934, "grad_norm": 1.9140625, "learning_rate": 1.7967841314634396e-05, "loss": 0.6229, "step": 3350 }, { "epoch": 0.4232062514799905, "grad_norm": 2.390625, "learning_rate": 1.796663478097037e-05, "loss": 0.8477, "step": 3351 }, { "epoch": 0.4233325440050517, "grad_norm": 1.859375, "learning_rate": 1.796542792977547e-05, "loss": 0.6392, "step": 3352 }, { "epoch": 0.42345883653011285, "grad_norm": 1.8125, "learning_rate": 1.79642207610978e-05, "loss": 0.6916, "step": 3353 }, { "epoch": 0.42358512905517404, "grad_norm": 1.9140625, "learning_rate": 1.7963013274985475e-05, "loss": 0.7838, "step": 3354 }, { "epoch": 0.4237114215802352, "grad_norm": 1.703125, "learning_rate": 1.796180547148662e-05, "loss": 0.6074, "step": 3355 }, { "epoch": 0.4238377141052964, "grad_norm": 1.7109375, "learning_rate": 1.7960597350649377e-05, "loss": 0.7007, "step": 3356 }, { "epoch": 0.42396400663035755, "grad_norm": 1.7421875, "learning_rate": 1.7959388912521896e-05, "loss": 0.6151, "step": 3357 }, { "epoch": 0.42409029915541874, "grad_norm": 1.7265625, "learning_rate": 1.7958180157152342e-05, "loss": 0.6544, "step": 3358 }, { "epoch": 0.4242165916804799, "grad_norm": 1.828125, "learning_rate": 1.79569710845889e-05, "loss": 0.7683, "step": 3359 }, { "epoch": 0.42434288420554106, "grad_norm": 1.8046875, "learning_rate": 1.7955761694879753e-05, "loss": 0.6248, "step": 3360 }, { "epoch": 0.42446917673060225, "grad_norm": 1.7109375, "learning_rate": 1.795455198807311e-05, "loss": 0.605, "step": 3361 }, { "epoch": 0.42459546925566344, "grad_norm": 1.8515625, "learning_rate": 1.7953341964217183e-05, "loss": 0.7613, "step": 3362 }, { "epoch": 0.4247217617807246, "grad_norm": 1.7265625, "learning_rate": 1.79521316233602e-05, "loss": 0.5746, "step": 3363 }, { "epoch": 0.42484805430578576, "grad_norm": 1.7890625, "learning_rate": 1.795092096555041e-05, "loss": 0.72, "step": 3364 }, { "epoch": 0.42497434683084695, "grad_norm": 1.703125, "learning_rate": 1.7949709990836058e-05, "loss": 0.7223, "step": 3365 }, { "epoch": 0.42510063935590814, "grad_norm": 1.9140625, "learning_rate": 1.7948498699265414e-05, "loss": 0.673, "step": 3366 }, { "epoch": 0.42522693188096927, "grad_norm": 1.8671875, "learning_rate": 1.7947287090886755e-05, "loss": 0.6604, "step": 3367 }, { "epoch": 0.42535322440603046, "grad_norm": 1.90625, "learning_rate": 1.794607516574838e-05, "loss": 0.701, "step": 3368 }, { "epoch": 0.42547951693109165, "grad_norm": 1.8828125, "learning_rate": 1.7944862923898583e-05, "loss": 0.7006, "step": 3369 }, { "epoch": 0.42560580945615284, "grad_norm": 1.828125, "learning_rate": 1.794365036538569e-05, "loss": 0.6706, "step": 3370 }, { "epoch": 0.425732101981214, "grad_norm": 1.953125, "learning_rate": 1.7942437490258025e-05, "loss": 0.6971, "step": 3371 }, { "epoch": 0.42585839450627516, "grad_norm": 1.9609375, "learning_rate": 1.7941224298563934e-05, "loss": 0.6835, "step": 3372 }, { "epoch": 0.42598468703133635, "grad_norm": 1.828125, "learning_rate": 1.7940010790351767e-05, "loss": 0.6539, "step": 3373 }, { "epoch": 0.4261109795563975, "grad_norm": 1.875, "learning_rate": 1.7938796965669897e-05, "loss": 0.7524, "step": 3374 }, { "epoch": 0.4262372720814587, "grad_norm": 1.828125, "learning_rate": 1.7937582824566703e-05, "loss": 0.6834, "step": 3375 }, { "epoch": 0.42636356460651986, "grad_norm": 1.8984375, "learning_rate": 1.793636836709057e-05, "loss": 0.7311, "step": 3376 }, { "epoch": 0.42648985713158105, "grad_norm": 1.8515625, "learning_rate": 1.793515359328992e-05, "loss": 0.7205, "step": 3377 }, { "epoch": 0.4266161496566422, "grad_norm": 1.6875, "learning_rate": 1.793393850321315e-05, "loss": 0.6069, "step": 3378 }, { "epoch": 0.4267424421817034, "grad_norm": 1.96875, "learning_rate": 1.793272309690871e-05, "loss": 0.723, "step": 3379 }, { "epoch": 0.42686873470676456, "grad_norm": 1.84375, "learning_rate": 1.7931507374425028e-05, "loss": 0.6596, "step": 3380 }, { "epoch": 0.4269950272318257, "grad_norm": 1.953125, "learning_rate": 1.793029133581057e-05, "loss": 0.6958, "step": 3381 }, { "epoch": 0.4271213197568869, "grad_norm": 1.921875, "learning_rate": 1.79290749811138e-05, "loss": 0.7242, "step": 3382 }, { "epoch": 0.4272476122819481, "grad_norm": 1.8046875, "learning_rate": 1.79278583103832e-05, "loss": 0.7487, "step": 3383 }, { "epoch": 0.4273739048070092, "grad_norm": 1.78125, "learning_rate": 1.7926641323667262e-05, "loss": 0.5955, "step": 3384 }, { "epoch": 0.4275001973320704, "grad_norm": 2.0, "learning_rate": 1.7925424021014493e-05, "loss": 0.7165, "step": 3385 }, { "epoch": 0.4276264898571316, "grad_norm": 1.6640625, "learning_rate": 1.7924206402473414e-05, "loss": 0.6362, "step": 3386 }, { "epoch": 0.4277527823821928, "grad_norm": 1.765625, "learning_rate": 1.7922988468092553e-05, "loss": 0.7182, "step": 3387 }, { "epoch": 0.4278790749072539, "grad_norm": 1.9296875, "learning_rate": 1.792177021792046e-05, "loss": 0.6608, "step": 3388 }, { "epoch": 0.4280053674323151, "grad_norm": 2.328125, "learning_rate": 1.792055165200568e-05, "loss": 0.7325, "step": 3389 }, { "epoch": 0.4281316599573763, "grad_norm": 1.90625, "learning_rate": 1.791933277039679e-05, "loss": 0.7293, "step": 3390 }, { "epoch": 0.4282579524824374, "grad_norm": 1.96875, "learning_rate": 1.7918113573142377e-05, "loss": 0.7198, "step": 3391 }, { "epoch": 0.4283842450074986, "grad_norm": 1.8359375, "learning_rate": 1.791689406029102e-05, "loss": 0.7349, "step": 3392 }, { "epoch": 0.4285105375325598, "grad_norm": 1.7265625, "learning_rate": 1.791567423189134e-05, "loss": 0.6274, "step": 3393 }, { "epoch": 0.428636830057621, "grad_norm": 1.8359375, "learning_rate": 1.791445408799195e-05, "loss": 0.6658, "step": 3394 }, { "epoch": 0.4287631225826821, "grad_norm": 1.7578125, "learning_rate": 1.7913233628641484e-05, "loss": 0.5788, "step": 3395 }, { "epoch": 0.4288894151077433, "grad_norm": 2.015625, "learning_rate": 1.7912012853888585e-05, "loss": 0.7105, "step": 3396 }, { "epoch": 0.4290157076328045, "grad_norm": 1.6796875, "learning_rate": 1.791079176378191e-05, "loss": 0.6158, "step": 3397 }, { "epoch": 0.42914200015786563, "grad_norm": 1.828125, "learning_rate": 1.7909570358370133e-05, "loss": 0.6325, "step": 3398 }, { "epoch": 0.4292682926829268, "grad_norm": 1.703125, "learning_rate": 1.790834863770193e-05, "loss": 0.5928, "step": 3399 }, { "epoch": 0.429394585207988, "grad_norm": 1.875, "learning_rate": 1.7907126601826e-05, "loss": 0.6383, "step": 3400 }, { "epoch": 0.4295208777330492, "grad_norm": 1.9140625, "learning_rate": 1.7905904250791048e-05, "loss": 0.7857, "step": 3401 }, { "epoch": 0.42964717025811033, "grad_norm": 1.7734375, "learning_rate": 1.79046815846458e-05, "loss": 0.7223, "step": 3402 }, { "epoch": 0.4297734627831715, "grad_norm": 1.7265625, "learning_rate": 1.7903458603438978e-05, "loss": 0.6124, "step": 3403 }, { "epoch": 0.4298997553082327, "grad_norm": 1.734375, "learning_rate": 1.7902235307219333e-05, "loss": 0.5957, "step": 3404 }, { "epoch": 0.43002604783329385, "grad_norm": 1.6953125, "learning_rate": 1.7901011696035623e-05, "loss": 0.6117, "step": 3405 }, { "epoch": 0.43015234035835503, "grad_norm": 1.6796875, "learning_rate": 1.789978776993662e-05, "loss": 0.6695, "step": 3406 }, { "epoch": 0.4302786328834162, "grad_norm": 1.8046875, "learning_rate": 1.7898563528971104e-05, "loss": 0.7226, "step": 3407 }, { "epoch": 0.4304049254084774, "grad_norm": 1.703125, "learning_rate": 1.789733897318787e-05, "loss": 0.5899, "step": 3408 }, { "epoch": 0.43053121793353855, "grad_norm": 1.6875, "learning_rate": 1.7896114102635726e-05, "loss": 0.6883, "step": 3409 }, { "epoch": 0.43065751045859973, "grad_norm": 1.859375, "learning_rate": 1.789488891736349e-05, "loss": 0.6682, "step": 3410 }, { "epoch": 0.4307838029836609, "grad_norm": 1.7578125, "learning_rate": 1.789366341742e-05, "loss": 0.6847, "step": 3411 }, { "epoch": 0.43091009550872206, "grad_norm": 1.796875, "learning_rate": 1.78924376028541e-05, "loss": 0.684, "step": 3412 }, { "epoch": 0.43103638803378325, "grad_norm": 1.8515625, "learning_rate": 1.7891211473714647e-05, "loss": 0.5998, "step": 3413 }, { "epoch": 0.43116268055884444, "grad_norm": 1.7734375, "learning_rate": 1.7889985030050512e-05, "loss": 0.6686, "step": 3414 }, { "epoch": 0.43128897308390557, "grad_norm": 1.8671875, "learning_rate": 1.7888758271910575e-05, "loss": 0.7937, "step": 3415 }, { "epoch": 0.43141526560896676, "grad_norm": 1.8359375, "learning_rate": 1.7887531199343738e-05, "loss": 0.6461, "step": 3416 }, { "epoch": 0.43154155813402795, "grad_norm": 2.140625, "learning_rate": 1.7886303812398905e-05, "loss": 0.7155, "step": 3417 }, { "epoch": 0.43166785065908914, "grad_norm": 1.8984375, "learning_rate": 1.7885076111125e-05, "loss": 0.6951, "step": 3418 }, { "epoch": 0.43179414318415027, "grad_norm": 1.84375, "learning_rate": 1.788384809557095e-05, "loss": 0.6792, "step": 3419 }, { "epoch": 0.43192043570921146, "grad_norm": 1.8515625, "learning_rate": 1.7882619765785705e-05, "loss": 0.7226, "step": 3420 }, { "epoch": 0.43204672823427265, "grad_norm": 1.8125, "learning_rate": 1.7881391121818222e-05, "loss": 0.6473, "step": 3421 }, { "epoch": 0.4321730207593338, "grad_norm": 1.71875, "learning_rate": 1.7880162163717477e-05, "loss": 0.6054, "step": 3422 }, { "epoch": 0.43229931328439497, "grad_norm": 1.7734375, "learning_rate": 1.787893289153244e-05, "loss": 0.6482, "step": 3423 }, { "epoch": 0.43242560580945616, "grad_norm": 1.6953125, "learning_rate": 1.7877703305312123e-05, "loss": 0.6132, "step": 3424 }, { "epoch": 0.43255189833451735, "grad_norm": 1.9765625, "learning_rate": 1.7876473405105525e-05, "loss": 0.7058, "step": 3425 }, { "epoch": 0.4326781908595785, "grad_norm": 1.921875, "learning_rate": 1.787524319096167e-05, "loss": 0.6381, "step": 3426 }, { "epoch": 0.43280448338463967, "grad_norm": 1.7109375, "learning_rate": 1.7874012662929585e-05, "loss": 0.5988, "step": 3427 }, { "epoch": 0.43293077590970086, "grad_norm": 1.8125, "learning_rate": 1.7872781821058324e-05, "loss": 0.6327, "step": 3428 }, { "epoch": 0.433057068434762, "grad_norm": 1.84375, "learning_rate": 1.7871550665396944e-05, "loss": 0.6277, "step": 3429 }, { "epoch": 0.4331833609598232, "grad_norm": 1.7265625, "learning_rate": 1.7870319195994512e-05, "loss": 0.6439, "step": 3430 }, { "epoch": 0.43330965348488437, "grad_norm": 2.015625, "learning_rate": 1.7869087412900118e-05, "loss": 0.6939, "step": 3431 }, { "epoch": 0.43343594600994556, "grad_norm": 1.9296875, "learning_rate": 1.7867855316162846e-05, "loss": 0.6305, "step": 3432 }, { "epoch": 0.4335622385350067, "grad_norm": 1.7421875, "learning_rate": 1.786662290583182e-05, "loss": 0.6541, "step": 3433 }, { "epoch": 0.4336885310600679, "grad_norm": 1.8984375, "learning_rate": 1.7865390181956148e-05, "loss": 0.7295, "step": 3434 }, { "epoch": 0.4338148235851291, "grad_norm": 2.03125, "learning_rate": 1.786415714458497e-05, "loss": 0.6972, "step": 3435 }, { "epoch": 0.4339411161101902, "grad_norm": 1.9140625, "learning_rate": 1.786292379376743e-05, "loss": 0.7032, "step": 3436 }, { "epoch": 0.4340674086352514, "grad_norm": 1.8203125, "learning_rate": 1.7861690129552684e-05, "loss": 0.7105, "step": 3437 }, { "epoch": 0.4341937011603126, "grad_norm": 1.7734375, "learning_rate": 1.7860456151989907e-05, "loss": 0.7025, "step": 3438 }, { "epoch": 0.4343199936853738, "grad_norm": 1.7890625, "learning_rate": 1.7859221861128284e-05, "loss": 0.6493, "step": 3439 }, { "epoch": 0.4344462862104349, "grad_norm": 1.9375, "learning_rate": 1.7857987257017005e-05, "loss": 0.7385, "step": 3440 }, { "epoch": 0.4345725787354961, "grad_norm": 1.7421875, "learning_rate": 1.7856752339705284e-05, "loss": 0.6685, "step": 3441 }, { "epoch": 0.4346988712605573, "grad_norm": 1.765625, "learning_rate": 1.7855517109242332e-05, "loss": 0.6918, "step": 3442 }, { "epoch": 0.4348251637856184, "grad_norm": 1.90625, "learning_rate": 1.7854281565677395e-05, "loss": 0.6201, "step": 3443 }, { "epoch": 0.4349514563106796, "grad_norm": 1.828125, "learning_rate": 1.7853045709059713e-05, "loss": 0.6912, "step": 3444 }, { "epoch": 0.4350777488357408, "grad_norm": 1.8359375, "learning_rate": 1.7851809539438542e-05, "loss": 0.6419, "step": 3445 }, { "epoch": 0.43520404136080193, "grad_norm": 1.6875, "learning_rate": 1.7850573056863156e-05, "loss": 0.6866, "step": 3446 }, { "epoch": 0.4353303338858631, "grad_norm": 1.921875, "learning_rate": 1.7849336261382838e-05, "loss": 0.733, "step": 3447 }, { "epoch": 0.4354566264109243, "grad_norm": 1.765625, "learning_rate": 1.7848099153046883e-05, "loss": 0.7065, "step": 3448 }, { "epoch": 0.4355829189359855, "grad_norm": 1.7734375, "learning_rate": 1.7846861731904595e-05, "loss": 0.6288, "step": 3449 }, { "epoch": 0.43570921146104663, "grad_norm": 1.890625, "learning_rate": 1.7845623998005305e-05, "loss": 0.7693, "step": 3450 }, { "epoch": 0.4358355039861078, "grad_norm": 1.7890625, "learning_rate": 1.7844385951398337e-05, "loss": 0.5647, "step": 3451 }, { "epoch": 0.435961796511169, "grad_norm": 1.90625, "learning_rate": 1.784314759213304e-05, "loss": 0.6267, "step": 3452 }, { "epoch": 0.43608808903623014, "grad_norm": 1.8125, "learning_rate": 1.784190892025877e-05, "loss": 0.671, "step": 3453 }, { "epoch": 0.43621438156129133, "grad_norm": 1.828125, "learning_rate": 1.78406699358249e-05, "loss": 0.7025, "step": 3454 }, { "epoch": 0.4363406740863525, "grad_norm": 1.8515625, "learning_rate": 1.783943063888081e-05, "loss": 0.6766, "step": 3455 }, { "epoch": 0.4364669666114137, "grad_norm": 1.828125, "learning_rate": 1.78381910294759e-05, "loss": 0.7079, "step": 3456 }, { "epoch": 0.43659325913647484, "grad_norm": 1.65625, "learning_rate": 1.7836951107659573e-05, "loss": 0.574, "step": 3457 }, { "epoch": 0.43671955166153603, "grad_norm": 1.703125, "learning_rate": 1.783571087348125e-05, "loss": 0.6445, "step": 3458 }, { "epoch": 0.4368458441865972, "grad_norm": 1.78125, "learning_rate": 1.783447032699037e-05, "loss": 0.6349, "step": 3459 }, { "epoch": 0.43697213671165835, "grad_norm": 1.8359375, "learning_rate": 1.7833229468236367e-05, "loss": 0.685, "step": 3460 }, { "epoch": 0.43709842923671954, "grad_norm": 1.734375, "learning_rate": 1.783198829726871e-05, "loss": 0.647, "step": 3461 }, { "epoch": 0.43722472176178073, "grad_norm": 1.8515625, "learning_rate": 1.7830746814136862e-05, "loss": 0.7105, "step": 3462 }, { "epoch": 0.4373510142868419, "grad_norm": 1.71875, "learning_rate": 1.7829505018890308e-05, "loss": 0.69, "step": 3463 }, { "epoch": 0.43747730681190306, "grad_norm": 1.96875, "learning_rate": 1.782826291157854e-05, "loss": 0.6799, "step": 3464 }, { "epoch": 0.43760359933696424, "grad_norm": 1.75, "learning_rate": 1.782702049225107e-05, "loss": 0.6816, "step": 3465 }, { "epoch": 0.43772989186202543, "grad_norm": 1.71875, "learning_rate": 1.7825777760957417e-05, "loss": 0.6077, "step": 3466 }, { "epoch": 0.43785618438708657, "grad_norm": 1.65625, "learning_rate": 1.782453471774711e-05, "loss": 0.6253, "step": 3467 }, { "epoch": 0.43798247691214776, "grad_norm": 1.7890625, "learning_rate": 1.7823291362669697e-05, "loss": 0.7038, "step": 3468 }, { "epoch": 0.43810876943720894, "grad_norm": 1.7734375, "learning_rate": 1.7822047695774735e-05, "loss": 0.6767, "step": 3469 }, { "epoch": 0.43823506196227013, "grad_norm": 1.7734375, "learning_rate": 1.782080371711179e-05, "loss": 0.6034, "step": 3470 }, { "epoch": 0.43836135448733127, "grad_norm": 1.8203125, "learning_rate": 1.7819559426730448e-05, "loss": 0.6711, "step": 3471 }, { "epoch": 0.43848764701239246, "grad_norm": 1.78125, "learning_rate": 1.78183148246803e-05, "loss": 0.6341, "step": 3472 }, { "epoch": 0.43861393953745365, "grad_norm": 1.8828125, "learning_rate": 1.7817069911010953e-05, "loss": 0.664, "step": 3473 }, { "epoch": 0.4387402320625148, "grad_norm": 1.9296875, "learning_rate": 1.7815824685772032e-05, "loss": 0.6668, "step": 3474 }, { "epoch": 0.43886652458757597, "grad_norm": 1.8046875, "learning_rate": 1.7814579149013163e-05, "loss": 0.6364, "step": 3475 }, { "epoch": 0.43899281711263716, "grad_norm": 1.9609375, "learning_rate": 1.7813333300783996e-05, "loss": 0.7714, "step": 3476 }, { "epoch": 0.43911910963769835, "grad_norm": 1.765625, "learning_rate": 1.7812087141134174e-05, "loss": 0.6424, "step": 3477 }, { "epoch": 0.4392454021627595, "grad_norm": 1.703125, "learning_rate": 1.7810840670113384e-05, "loss": 0.6453, "step": 3478 }, { "epoch": 0.43937169468782067, "grad_norm": 1.7109375, "learning_rate": 1.7809593887771296e-05, "loss": 0.6571, "step": 3479 }, { "epoch": 0.43949798721288186, "grad_norm": 1.7578125, "learning_rate": 1.7808346794157603e-05, "loss": 0.6606, "step": 3480 }, { "epoch": 0.439624279737943, "grad_norm": 1.734375, "learning_rate": 1.780709938932202e-05, "loss": 0.6594, "step": 3481 }, { "epoch": 0.4397505722630042, "grad_norm": 1.78125, "learning_rate": 1.7805851673314253e-05, "loss": 0.7069, "step": 3482 }, { "epoch": 0.43987686478806537, "grad_norm": 1.7109375, "learning_rate": 1.780460364618404e-05, "loss": 0.5923, "step": 3483 }, { "epoch": 0.4400031573131265, "grad_norm": 1.6875, "learning_rate": 1.7803355307981132e-05, "loss": 0.6416, "step": 3484 }, { "epoch": 0.4401294498381877, "grad_norm": 2.0, "learning_rate": 1.780210665875527e-05, "loss": 0.7723, "step": 3485 }, { "epoch": 0.4402557423632489, "grad_norm": 1.7578125, "learning_rate": 1.780085769855623e-05, "loss": 0.6705, "step": 3486 }, { "epoch": 0.44038203488831007, "grad_norm": 1.8359375, "learning_rate": 1.7799608427433798e-05, "loss": 0.7419, "step": 3487 }, { "epoch": 0.4405083274133712, "grad_norm": 1.8046875, "learning_rate": 1.7798358845437754e-05, "loss": 0.6661, "step": 3488 }, { "epoch": 0.4406346199384324, "grad_norm": 2.046875, "learning_rate": 1.7797108952617913e-05, "loss": 0.7871, "step": 3489 }, { "epoch": 0.4407609124634936, "grad_norm": 1.75, "learning_rate": 1.7795858749024085e-05, "loss": 0.7257, "step": 3490 }, { "epoch": 0.4408872049885547, "grad_norm": 1.8984375, "learning_rate": 1.779460823470611e-05, "loss": 0.6676, "step": 3491 }, { "epoch": 0.4410134975136159, "grad_norm": 1.8671875, "learning_rate": 1.779335740971382e-05, "loss": 0.7371, "step": 3492 }, { "epoch": 0.4411397900386771, "grad_norm": 1.78125, "learning_rate": 1.7792106274097082e-05, "loss": 0.6823, "step": 3493 }, { "epoch": 0.4412660825637383, "grad_norm": 1.6953125, "learning_rate": 1.7790854827905754e-05, "loss": 0.6323, "step": 3494 }, { "epoch": 0.4413923750887994, "grad_norm": 1.65625, "learning_rate": 1.7789603071189716e-05, "loss": 0.5873, "step": 3495 }, { "epoch": 0.4415186676138606, "grad_norm": 1.6328125, "learning_rate": 1.7788351003998863e-05, "loss": 0.6409, "step": 3496 }, { "epoch": 0.4416449601389218, "grad_norm": 1.9453125, "learning_rate": 1.77870986263831e-05, "loss": 0.7074, "step": 3497 }, { "epoch": 0.4417712526639829, "grad_norm": 1.859375, "learning_rate": 1.778584593839234e-05, "loss": 0.7442, "step": 3498 }, { "epoch": 0.4418975451890441, "grad_norm": 1.734375, "learning_rate": 1.7784592940076515e-05, "loss": 0.6477, "step": 3499 }, { "epoch": 0.4420238377141053, "grad_norm": 1.921875, "learning_rate": 1.7783339631485566e-05, "loss": 0.7308, "step": 3500 }, { "epoch": 0.4421501302391665, "grad_norm": 1.59375, "learning_rate": 1.7782086012669447e-05, "loss": 0.5741, "step": 3501 }, { "epoch": 0.44227642276422763, "grad_norm": 2.125, "learning_rate": 1.7780832083678122e-05, "loss": 0.7513, "step": 3502 }, { "epoch": 0.4424027152892888, "grad_norm": 1.8984375, "learning_rate": 1.777957784456157e-05, "loss": 0.7095, "step": 3503 }, { "epoch": 0.44252900781435, "grad_norm": 2.015625, "learning_rate": 1.777832329536979e-05, "loss": 0.7347, "step": 3504 }, { "epoch": 0.44265530033941114, "grad_norm": 1.8046875, "learning_rate": 1.777706843615277e-05, "loss": 0.6723, "step": 3505 }, { "epoch": 0.44278159286447233, "grad_norm": 2.0625, "learning_rate": 1.777581326696054e-05, "loss": 0.7184, "step": 3506 }, { "epoch": 0.4429078853895335, "grad_norm": 1.875, "learning_rate": 1.777455778784312e-05, "loss": 0.6872, "step": 3507 }, { "epoch": 0.4430341779145947, "grad_norm": 1.8046875, "learning_rate": 1.777330199885056e-05, "loss": 0.6748, "step": 3508 }, { "epoch": 0.44316047043965584, "grad_norm": 1.9453125, "learning_rate": 1.7772045900032898e-05, "loss": 0.6869, "step": 3509 }, { "epoch": 0.44328676296471703, "grad_norm": 1.859375, "learning_rate": 1.7770789491440208e-05, "loss": 0.6643, "step": 3510 }, { "epoch": 0.4434130554897782, "grad_norm": 1.71875, "learning_rate": 1.7769532773122567e-05, "loss": 0.6891, "step": 3511 }, { "epoch": 0.44353934801483935, "grad_norm": 1.8515625, "learning_rate": 1.776827574513006e-05, "loss": 0.7004, "step": 3512 }, { "epoch": 0.44366564053990054, "grad_norm": 1.78125, "learning_rate": 1.7767018407512796e-05, "loss": 0.7067, "step": 3513 }, { "epoch": 0.44379193306496173, "grad_norm": 1.7109375, "learning_rate": 1.7765760760320886e-05, "loss": 0.6691, "step": 3514 }, { "epoch": 0.44391822559002286, "grad_norm": 1.75, "learning_rate": 1.776450280360446e-05, "loss": 0.7176, "step": 3515 }, { "epoch": 0.44404451811508405, "grad_norm": 1.8125, "learning_rate": 1.776324453741365e-05, "loss": 0.6771, "step": 3516 }, { "epoch": 0.44417081064014524, "grad_norm": 1.765625, "learning_rate": 1.7761985961798616e-05, "loss": 0.5863, "step": 3517 }, { "epoch": 0.44429710316520643, "grad_norm": 1.8359375, "learning_rate": 1.7760727076809515e-05, "loss": 0.665, "step": 3518 }, { "epoch": 0.44442339569026756, "grad_norm": 1.7265625, "learning_rate": 1.7759467882496524e-05, "loss": 0.6724, "step": 3519 }, { "epoch": 0.44454968821532875, "grad_norm": 1.71875, "learning_rate": 1.7758208378909832e-05, "loss": 0.6335, "step": 3520 }, { "epoch": 0.44467598074038994, "grad_norm": 1.796875, "learning_rate": 1.7756948566099645e-05, "loss": 0.6753, "step": 3521 }, { "epoch": 0.4448022732654511, "grad_norm": 1.828125, "learning_rate": 1.775568844411617e-05, "loss": 0.6195, "step": 3522 }, { "epoch": 0.44492856579051226, "grad_norm": 1.8984375, "learning_rate": 1.7754428013009634e-05, "loss": 0.8101, "step": 3523 }, { "epoch": 0.44505485831557345, "grad_norm": 1.796875, "learning_rate": 1.7753167272830276e-05, "loss": 0.7056, "step": 3524 }, { "epoch": 0.44518115084063464, "grad_norm": 1.8125, "learning_rate": 1.7751906223628345e-05, "loss": 0.6857, "step": 3525 }, { "epoch": 0.4453074433656958, "grad_norm": 1.953125, "learning_rate": 1.7750644865454104e-05, "loss": 0.7499, "step": 3526 }, { "epoch": 0.44543373589075697, "grad_norm": 1.8046875, "learning_rate": 1.7749383198357827e-05, "loss": 0.7584, "step": 3527 }, { "epoch": 0.44556002841581815, "grad_norm": 1.8203125, "learning_rate": 1.77481212223898e-05, "loss": 0.7213, "step": 3528 }, { "epoch": 0.4456863209408793, "grad_norm": 1.9609375, "learning_rate": 1.7746858937600325e-05, "loss": 0.6944, "step": 3529 }, { "epoch": 0.4458126134659405, "grad_norm": 1.7421875, "learning_rate": 1.7745596344039712e-05, "loss": 0.653, "step": 3530 }, { "epoch": 0.44593890599100167, "grad_norm": 1.75, "learning_rate": 1.7744333441758286e-05, "loss": 0.7082, "step": 3531 }, { "epoch": 0.44606519851606286, "grad_norm": 1.78125, "learning_rate": 1.774307023080638e-05, "loss": 0.7283, "step": 3532 }, { "epoch": 0.446191491041124, "grad_norm": 1.7890625, "learning_rate": 1.7741806711234345e-05, "loss": 0.6876, "step": 3533 }, { "epoch": 0.4463177835661852, "grad_norm": 1.8359375, "learning_rate": 1.7740542883092545e-05, "loss": 0.6578, "step": 3534 }, { "epoch": 0.44644407609124637, "grad_norm": 1.9453125, "learning_rate": 1.7739278746431347e-05, "loss": 0.773, "step": 3535 }, { "epoch": 0.4465703686163075, "grad_norm": 1.8515625, "learning_rate": 1.773801430130114e-05, "loss": 0.7278, "step": 3536 }, { "epoch": 0.4466966611413687, "grad_norm": 1.9296875, "learning_rate": 1.773674954775232e-05, "loss": 0.726, "step": 3537 }, { "epoch": 0.4468229536664299, "grad_norm": 1.8359375, "learning_rate": 1.7735484485835303e-05, "loss": 0.619, "step": 3538 }, { "epoch": 0.44694924619149107, "grad_norm": 1.8203125, "learning_rate": 1.77342191156005e-05, "loss": 0.6791, "step": 3539 }, { "epoch": 0.4470755387165522, "grad_norm": 1.8828125, "learning_rate": 1.7732953437098358e-05, "loss": 0.7389, "step": 3540 }, { "epoch": 0.4472018312416134, "grad_norm": 1.8828125, "learning_rate": 1.7731687450379317e-05, "loss": 0.7996, "step": 3541 }, { "epoch": 0.4473281237666746, "grad_norm": 1.875, "learning_rate": 1.7730421155493832e-05, "loss": 0.7494, "step": 3542 }, { "epoch": 0.4474544162917357, "grad_norm": 1.7890625, "learning_rate": 1.7729154552492385e-05, "loss": 0.6464, "step": 3543 }, { "epoch": 0.4475807088167969, "grad_norm": 1.578125, "learning_rate": 1.772788764142545e-05, "loss": 0.5807, "step": 3544 }, { "epoch": 0.4477070013418581, "grad_norm": 1.734375, "learning_rate": 1.772662042234353e-05, "loss": 0.645, "step": 3545 }, { "epoch": 0.4478332938669192, "grad_norm": 1.6328125, "learning_rate": 1.7725352895297132e-05, "loss": 0.529, "step": 3546 }, { "epoch": 0.4479595863919804, "grad_norm": 1.7265625, "learning_rate": 1.7724085060336773e-05, "loss": 0.7107, "step": 3547 }, { "epoch": 0.4480858789170416, "grad_norm": 1.765625, "learning_rate": 1.772281691751299e-05, "loss": 0.6237, "step": 3548 }, { "epoch": 0.4482121714421028, "grad_norm": 1.8671875, "learning_rate": 1.7721548466876325e-05, "loss": 0.6538, "step": 3549 }, { "epoch": 0.4483384639671639, "grad_norm": 1.984375, "learning_rate": 1.7720279708477337e-05, "loss": 0.7628, "step": 3550 }, { "epoch": 0.4484647564922251, "grad_norm": 1.78125, "learning_rate": 1.7719010642366594e-05, "loss": 0.6209, "step": 3551 }, { "epoch": 0.4485910490172863, "grad_norm": 1.8359375, "learning_rate": 1.771774126859468e-05, "loss": 0.6593, "step": 3552 }, { "epoch": 0.44871734154234744, "grad_norm": 1.8359375, "learning_rate": 1.771647158721219e-05, "loss": 0.6739, "step": 3553 }, { "epoch": 0.4488436340674086, "grad_norm": 1.84375, "learning_rate": 1.7715201598269727e-05, "loss": 0.6587, "step": 3554 }, { "epoch": 0.4489699265924698, "grad_norm": 2.46875, "learning_rate": 1.7713931301817912e-05, "loss": 0.7692, "step": 3555 }, { "epoch": 0.449096219117531, "grad_norm": 2.09375, "learning_rate": 1.7712660697907377e-05, "loss": 0.7792, "step": 3556 }, { "epoch": 0.44922251164259214, "grad_norm": 1.9296875, "learning_rate": 1.771138978658876e-05, "loss": 0.6884, "step": 3557 }, { "epoch": 0.4493488041676533, "grad_norm": 1.6953125, "learning_rate": 1.7710118567912726e-05, "loss": 0.6111, "step": 3558 }, { "epoch": 0.4494750966927145, "grad_norm": 1.7734375, "learning_rate": 1.770884704192993e-05, "loss": 0.6347, "step": 3559 }, { "epoch": 0.44960138921777565, "grad_norm": 1.765625, "learning_rate": 1.7707575208691064e-05, "loss": 0.6812, "step": 3560 }, { "epoch": 0.44972768174283684, "grad_norm": 1.8515625, "learning_rate": 1.7706303068246812e-05, "loss": 0.7048, "step": 3561 }, { "epoch": 0.449853974267898, "grad_norm": 1.8203125, "learning_rate": 1.7705030620647884e-05, "loss": 0.7056, "step": 3562 }, { "epoch": 0.4499802667929592, "grad_norm": 1.78125, "learning_rate": 1.770375786594499e-05, "loss": 0.6217, "step": 3563 }, { "epoch": 0.45010655931802035, "grad_norm": 1.7578125, "learning_rate": 1.7702484804188867e-05, "loss": 0.6352, "step": 3564 }, { "epoch": 0.45023285184308154, "grad_norm": 1.96875, "learning_rate": 1.7701211435430252e-05, "loss": 0.6028, "step": 3565 }, { "epoch": 0.4503591443681427, "grad_norm": 1.8203125, "learning_rate": 1.7699937759719898e-05, "loss": 0.6392, "step": 3566 }, { "epoch": 0.45048543689320386, "grad_norm": 1.6328125, "learning_rate": 1.7698663777108567e-05, "loss": 0.6022, "step": 3567 }, { "epoch": 0.45061172941826505, "grad_norm": 1.6953125, "learning_rate": 1.7697389487647047e-05, "loss": 0.6174, "step": 3568 }, { "epoch": 0.45073802194332624, "grad_norm": 1.984375, "learning_rate": 1.7696114891386123e-05, "loss": 0.7183, "step": 3569 }, { "epoch": 0.45086431446838743, "grad_norm": 1.828125, "learning_rate": 1.7694839988376592e-05, "loss": 0.7188, "step": 3570 }, { "epoch": 0.45099060699344856, "grad_norm": 1.7890625, "learning_rate": 1.7693564778669278e-05, "loss": 0.7151, "step": 3571 }, { "epoch": 0.45111689951850975, "grad_norm": 1.78125, "learning_rate": 1.7692289262315e-05, "loss": 0.708, "step": 3572 }, { "epoch": 0.45124319204357094, "grad_norm": 2.0, "learning_rate": 1.76910134393646e-05, "loss": 0.6244, "step": 3573 }, { "epoch": 0.4513694845686321, "grad_norm": 1.84375, "learning_rate": 1.768973730986893e-05, "loss": 0.6913, "step": 3574 }, { "epoch": 0.45149577709369326, "grad_norm": 1.7890625, "learning_rate": 1.7688460873878856e-05, "loss": 0.6788, "step": 3575 }, { "epoch": 0.45162206961875445, "grad_norm": 1.9296875, "learning_rate": 1.7687184131445245e-05, "loss": 0.6141, "step": 3576 }, { "epoch": 0.45174836214381564, "grad_norm": 1.796875, "learning_rate": 1.7685907082618993e-05, "loss": 0.6597, "step": 3577 }, { "epoch": 0.4518746546688768, "grad_norm": 1.984375, "learning_rate": 1.7684629727450997e-05, "loss": 0.7208, "step": 3578 }, { "epoch": 0.45200094719393796, "grad_norm": 1.7890625, "learning_rate": 1.768335206599217e-05, "loss": 0.6882, "step": 3579 }, { "epoch": 0.45212723971899915, "grad_norm": 2.140625, "learning_rate": 1.7682074098293434e-05, "loss": 0.8371, "step": 3580 }, { "epoch": 0.4522535322440603, "grad_norm": 1.78125, "learning_rate": 1.768079582440573e-05, "loss": 0.6495, "step": 3581 }, { "epoch": 0.4523798247691215, "grad_norm": 1.9453125, "learning_rate": 1.7679517244380005e-05, "loss": 0.7074, "step": 3582 }, { "epoch": 0.45250611729418266, "grad_norm": 1.8515625, "learning_rate": 1.767823835826722e-05, "loss": 0.7357, "step": 3583 }, { "epoch": 0.4526324098192438, "grad_norm": 1.7578125, "learning_rate": 1.7676959166118345e-05, "loss": 0.6353, "step": 3584 }, { "epoch": 0.452758702344305, "grad_norm": 1.8046875, "learning_rate": 1.7675679667984372e-05, "loss": 0.7588, "step": 3585 }, { "epoch": 0.4528849948693662, "grad_norm": 1.921875, "learning_rate": 1.7674399863916295e-05, "loss": 0.6643, "step": 3586 }, { "epoch": 0.45301128739442736, "grad_norm": 1.71875, "learning_rate": 1.7673119753965125e-05, "loss": 0.6765, "step": 3587 }, { "epoch": 0.4531375799194885, "grad_norm": 1.7578125, "learning_rate": 1.767183933818188e-05, "loss": 0.6695, "step": 3588 }, { "epoch": 0.4532638724445497, "grad_norm": 1.921875, "learning_rate": 1.7670558616617602e-05, "loss": 0.7102, "step": 3589 }, { "epoch": 0.4533901649696109, "grad_norm": 1.71875, "learning_rate": 1.766927758932333e-05, "loss": 0.6145, "step": 3590 }, { "epoch": 0.453516457494672, "grad_norm": 1.875, "learning_rate": 1.7667996256350127e-05, "loss": 0.6724, "step": 3591 }, { "epoch": 0.4536427500197332, "grad_norm": 1.796875, "learning_rate": 1.7666714617749067e-05, "loss": 0.5601, "step": 3592 }, { "epoch": 0.4537690425447944, "grad_norm": 1.7109375, "learning_rate": 1.7665432673571224e-05, "loss": 0.665, "step": 3593 }, { "epoch": 0.4538953350698556, "grad_norm": 1.734375, "learning_rate": 1.76641504238677e-05, "loss": 0.671, "step": 3594 }, { "epoch": 0.4540216275949167, "grad_norm": 1.9375, "learning_rate": 1.7662867868689597e-05, "loss": 0.7194, "step": 3595 }, { "epoch": 0.4541479201199779, "grad_norm": 1.7265625, "learning_rate": 1.766158500808804e-05, "loss": 0.6627, "step": 3596 }, { "epoch": 0.4542742126450391, "grad_norm": 1.75, "learning_rate": 1.766030184211416e-05, "loss": 0.6407, "step": 3597 }, { "epoch": 0.4544005051701002, "grad_norm": 1.6640625, "learning_rate": 1.76590183708191e-05, "loss": 0.567, "step": 3598 }, { "epoch": 0.4545267976951614, "grad_norm": 1.6015625, "learning_rate": 1.765773459425401e-05, "loss": 0.5337, "step": 3599 }, { "epoch": 0.4546530902202226, "grad_norm": 1.859375, "learning_rate": 1.765645051247007e-05, "loss": 0.7056, "step": 3600 }, { "epoch": 0.4547793827452838, "grad_norm": 1.875, "learning_rate": 1.7655166125518452e-05, "loss": 0.7445, "step": 3601 }, { "epoch": 0.4549056752703449, "grad_norm": 1.625, "learning_rate": 1.7653881433450353e-05, "loss": 0.5743, "step": 3602 }, { "epoch": 0.4550319677954061, "grad_norm": 1.8046875, "learning_rate": 1.7652596436316975e-05, "loss": 0.719, "step": 3603 }, { "epoch": 0.4551582603204673, "grad_norm": 1.8359375, "learning_rate": 1.765131113416953e-05, "loss": 0.6855, "step": 3604 }, { "epoch": 0.45528455284552843, "grad_norm": 1.8515625, "learning_rate": 1.765002552705926e-05, "loss": 0.6819, "step": 3605 }, { "epoch": 0.4554108453705896, "grad_norm": 1.8671875, "learning_rate": 1.7648739615037395e-05, "loss": 0.7015, "step": 3606 }, { "epoch": 0.4555371378956508, "grad_norm": 2.546875, "learning_rate": 1.764745339815519e-05, "loss": 0.6986, "step": 3607 }, { "epoch": 0.455663430420712, "grad_norm": 1.796875, "learning_rate": 1.7646166876463918e-05, "loss": 0.754, "step": 3608 }, { "epoch": 0.45578972294577313, "grad_norm": 1.75, "learning_rate": 1.764488005001485e-05, "loss": 0.6328, "step": 3609 }, { "epoch": 0.4559160154708343, "grad_norm": 1.765625, "learning_rate": 1.7643592918859274e-05, "loss": 0.7767, "step": 3610 }, { "epoch": 0.4560423079958955, "grad_norm": 1.796875, "learning_rate": 1.7642305483048496e-05, "loss": 0.6889, "step": 3611 }, { "epoch": 0.45616860052095665, "grad_norm": 1.8359375, "learning_rate": 1.764101774263383e-05, "loss": 0.7032, "step": 3612 }, { "epoch": 0.45629489304601784, "grad_norm": 1.7890625, "learning_rate": 1.7639729697666597e-05, "loss": 0.6979, "step": 3613 }, { "epoch": 0.456421185571079, "grad_norm": 1.7421875, "learning_rate": 1.7638441348198147e-05, "loss": 0.6008, "step": 3614 }, { "epoch": 0.45654747809614016, "grad_norm": 1.765625, "learning_rate": 1.7637152694279818e-05, "loss": 0.646, "step": 3615 }, { "epoch": 0.45667377062120135, "grad_norm": 1.8046875, "learning_rate": 1.763586373596298e-05, "loss": 0.684, "step": 3616 }, { "epoch": 0.45680006314626254, "grad_norm": 1.75, "learning_rate": 1.7634574473299007e-05, "loss": 0.6462, "step": 3617 }, { "epoch": 0.4569263556713237, "grad_norm": 2.109375, "learning_rate": 1.7633284906339283e-05, "loss": 0.6679, "step": 3618 }, { "epoch": 0.45705264819638486, "grad_norm": 2.609375, "learning_rate": 1.7631995035135207e-05, "loss": 0.8331, "step": 3619 }, { "epoch": 0.45717894072144605, "grad_norm": 1.921875, "learning_rate": 1.7630704859738192e-05, "loss": 0.6906, "step": 3620 }, { "epoch": 0.45730523324650724, "grad_norm": 1.703125, "learning_rate": 1.7629414380199662e-05, "loss": 0.6217, "step": 3621 }, { "epoch": 0.45743152577156837, "grad_norm": 1.8515625, "learning_rate": 1.7628123596571056e-05, "loss": 0.7768, "step": 3622 }, { "epoch": 0.45755781829662956, "grad_norm": 1.7734375, "learning_rate": 1.762683250890381e-05, "loss": 0.6733, "step": 3623 }, { "epoch": 0.45768411082169075, "grad_norm": 1.8828125, "learning_rate": 1.7625541117249392e-05, "loss": 0.6607, "step": 3624 }, { "epoch": 0.45781040334675194, "grad_norm": 3.09375, "learning_rate": 1.7624249421659273e-05, "loss": 0.72, "step": 3625 }, { "epoch": 0.45793669587181307, "grad_norm": 1.765625, "learning_rate": 1.7622957422184937e-05, "loss": 0.6802, "step": 3626 }, { "epoch": 0.45806298839687426, "grad_norm": 1.796875, "learning_rate": 1.762166511887788e-05, "loss": 0.659, "step": 3627 }, { "epoch": 0.45818928092193545, "grad_norm": 1.7421875, "learning_rate": 1.7620372511789607e-05, "loss": 0.7119, "step": 3628 }, { "epoch": 0.4583155734469966, "grad_norm": 1.8046875, "learning_rate": 1.761907960097164e-05, "loss": 0.6434, "step": 3629 }, { "epoch": 0.45844186597205777, "grad_norm": 1.6796875, "learning_rate": 1.7617786386475514e-05, "loss": 0.652, "step": 3630 }, { "epoch": 0.45856815849711896, "grad_norm": 1.8359375, "learning_rate": 1.761649286835277e-05, "loss": 0.6651, "step": 3631 }, { "epoch": 0.45869445102218015, "grad_norm": 1.8984375, "learning_rate": 1.761519904665497e-05, "loss": 0.6643, "step": 3632 }, { "epoch": 0.4588207435472413, "grad_norm": 1.8203125, "learning_rate": 1.761390492143367e-05, "loss": 0.6271, "step": 3633 }, { "epoch": 0.45894703607230247, "grad_norm": 1.75, "learning_rate": 1.7612610492740464e-05, "loss": 0.5859, "step": 3634 }, { "epoch": 0.45907332859736366, "grad_norm": 1.7734375, "learning_rate": 1.761131576062694e-05, "loss": 0.6431, "step": 3635 }, { "epoch": 0.4591996211224248, "grad_norm": 1.8515625, "learning_rate": 1.76100207251447e-05, "loss": 0.7092, "step": 3636 }, { "epoch": 0.459325913647486, "grad_norm": 2.03125, "learning_rate": 1.760872538634537e-05, "loss": 0.7309, "step": 3637 }, { "epoch": 0.4594522061725472, "grad_norm": 1.65625, "learning_rate": 1.7607429744280566e-05, "loss": 0.626, "step": 3638 }, { "epoch": 0.45957849869760836, "grad_norm": 1.8046875, "learning_rate": 1.760613379900194e-05, "loss": 0.5756, "step": 3639 }, { "epoch": 0.4597047912226695, "grad_norm": 1.8671875, "learning_rate": 1.760483755056114e-05, "loss": 0.687, "step": 3640 }, { "epoch": 0.4598310837477307, "grad_norm": 2.0, "learning_rate": 1.7603540999009833e-05, "loss": 0.7443, "step": 3641 }, { "epoch": 0.4599573762727919, "grad_norm": 1.75, "learning_rate": 1.7602244144399696e-05, "loss": 0.6575, "step": 3642 }, { "epoch": 0.460083668797853, "grad_norm": 1.734375, "learning_rate": 1.760094698678242e-05, "loss": 0.6914, "step": 3643 }, { "epoch": 0.4602099613229142, "grad_norm": 1.828125, "learning_rate": 1.7599649526209702e-05, "loss": 0.6301, "step": 3644 }, { "epoch": 0.4603362538479754, "grad_norm": 1.8359375, "learning_rate": 1.759835176273326e-05, "loss": 0.705, "step": 3645 }, { "epoch": 0.4604625463730365, "grad_norm": 1.6484375, "learning_rate": 1.7597053696404824e-05, "loss": 0.6346, "step": 3646 }, { "epoch": 0.4605888388980977, "grad_norm": 1.734375, "learning_rate": 1.7595755327276118e-05, "loss": 0.6239, "step": 3647 }, { "epoch": 0.4607151314231589, "grad_norm": 1.8828125, "learning_rate": 1.7594456655398904e-05, "loss": 0.7355, "step": 3648 }, { "epoch": 0.4608414239482201, "grad_norm": 1.9140625, "learning_rate": 1.759315768082494e-05, "loss": 0.6867, "step": 3649 }, { "epoch": 0.4609677164732812, "grad_norm": 1.9296875, "learning_rate": 1.7591858403606e-05, "loss": 0.6898, "step": 3650 }, { "epoch": 0.4610940089983424, "grad_norm": 1.953125, "learning_rate": 1.7590558823793872e-05, "loss": 0.6525, "step": 3651 }, { "epoch": 0.4612203015234036, "grad_norm": 1.9140625, "learning_rate": 1.7589258941440346e-05, "loss": 0.7146, "step": 3652 }, { "epoch": 0.46134659404846473, "grad_norm": 1.78125, "learning_rate": 1.7587958756597242e-05, "loss": 0.6328, "step": 3653 }, { "epoch": 0.4614728865735259, "grad_norm": 1.6953125, "learning_rate": 1.7586658269316383e-05, "loss": 0.6421, "step": 3654 }, { "epoch": 0.4615991790985871, "grad_norm": 1.7890625, "learning_rate": 1.7585357479649593e-05, "loss": 0.7446, "step": 3655 }, { "epoch": 0.4617254716236483, "grad_norm": 1.7421875, "learning_rate": 1.7584056387648727e-05, "loss": 0.7146, "step": 3656 }, { "epoch": 0.46185176414870943, "grad_norm": 1.7109375, "learning_rate": 1.758275499336564e-05, "loss": 0.6289, "step": 3657 }, { "epoch": 0.4619780566737706, "grad_norm": 1.625, "learning_rate": 1.7581453296852206e-05, "loss": 0.6186, "step": 3658 }, { "epoch": 0.4621043491988318, "grad_norm": 1.8203125, "learning_rate": 1.75801512981603e-05, "loss": 0.6434, "step": 3659 }, { "epoch": 0.46223064172389294, "grad_norm": 1.90625, "learning_rate": 1.7578848997341823e-05, "loss": 0.7197, "step": 3660 }, { "epoch": 0.46235693424895413, "grad_norm": 1.7890625, "learning_rate": 1.7577546394448683e-05, "loss": 0.579, "step": 3661 }, { "epoch": 0.4624832267740153, "grad_norm": 1.8046875, "learning_rate": 1.7576243489532792e-05, "loss": 0.5407, "step": 3662 }, { "epoch": 0.4626095192990765, "grad_norm": 1.65625, "learning_rate": 1.7574940282646085e-05, "loss": 0.6502, "step": 3663 }, { "epoch": 0.46273581182413764, "grad_norm": 1.7265625, "learning_rate": 1.75736367738405e-05, "loss": 0.673, "step": 3664 }, { "epoch": 0.46286210434919883, "grad_norm": 1.84375, "learning_rate": 1.7572332963168e-05, "loss": 0.7499, "step": 3665 }, { "epoch": 0.46298839687426, "grad_norm": 1.59375, "learning_rate": 1.7571028850680547e-05, "loss": 0.685, "step": 3666 }, { "epoch": 0.46311468939932116, "grad_norm": 1.7578125, "learning_rate": 1.756972443643012e-05, "loss": 0.6875, "step": 3667 }, { "epoch": 0.46324098192438234, "grad_norm": 1.8046875, "learning_rate": 1.7568419720468706e-05, "loss": 0.742, "step": 3668 }, { "epoch": 0.46336727444944353, "grad_norm": 1.7265625, "learning_rate": 1.7567114702848317e-05, "loss": 0.6323, "step": 3669 }, { "epoch": 0.4634935669745047, "grad_norm": 1.7890625, "learning_rate": 1.7565809383620956e-05, "loss": 0.6954, "step": 3670 }, { "epoch": 0.46361985949956586, "grad_norm": 1.8984375, "learning_rate": 1.7564503762838664e-05, "loss": 0.7772, "step": 3671 }, { "epoch": 0.46374615202462705, "grad_norm": 1.7421875, "learning_rate": 1.7563197840553467e-05, "loss": 0.6657, "step": 3672 }, { "epoch": 0.46387244454968823, "grad_norm": 3.8125, "learning_rate": 1.7561891616817423e-05, "loss": 0.7542, "step": 3673 }, { "epoch": 0.46399873707474937, "grad_norm": 1.75, "learning_rate": 1.7560585091682594e-05, "loss": 0.6269, "step": 3674 }, { "epoch": 0.46412502959981056, "grad_norm": 1.765625, "learning_rate": 1.7559278265201052e-05, "loss": 0.6727, "step": 3675 }, { "epoch": 0.46425132212487175, "grad_norm": 1.84375, "learning_rate": 1.7557971137424886e-05, "loss": 0.6635, "step": 3676 }, { "epoch": 0.4643776146499329, "grad_norm": 1.703125, "learning_rate": 1.7556663708406196e-05, "loss": 0.6126, "step": 3677 }, { "epoch": 0.46450390717499407, "grad_norm": 1.9375, "learning_rate": 1.7555355978197092e-05, "loss": 0.6818, "step": 3678 }, { "epoch": 0.46463019970005526, "grad_norm": 2.0625, "learning_rate": 1.75540479468497e-05, "loss": 0.6322, "step": 3679 }, { "epoch": 0.46475649222511645, "grad_norm": 1.671875, "learning_rate": 1.7552739614416147e-05, "loss": 0.6347, "step": 3680 }, { "epoch": 0.4648827847501776, "grad_norm": 1.9140625, "learning_rate": 1.7551430980948587e-05, "loss": 0.7242, "step": 3681 }, { "epoch": 0.46500907727523877, "grad_norm": 1.7578125, "learning_rate": 1.7550122046499175e-05, "loss": 0.6377, "step": 3682 }, { "epoch": 0.46513536980029996, "grad_norm": 1.7890625, "learning_rate": 1.7548812811120085e-05, "loss": 0.6805, "step": 3683 }, { "epoch": 0.4652616623253611, "grad_norm": 1.734375, "learning_rate": 1.75475032748635e-05, "loss": 0.6138, "step": 3684 }, { "epoch": 0.4653879548504223, "grad_norm": 1.9375, "learning_rate": 1.7546193437781612e-05, "loss": 0.797, "step": 3685 }, { "epoch": 0.46551424737548347, "grad_norm": 2.15625, "learning_rate": 1.754488329992663e-05, "loss": 0.7564, "step": 3686 }, { "epoch": 0.46564053990054466, "grad_norm": 1.9375, "learning_rate": 1.754357286135077e-05, "loss": 0.6949, "step": 3687 }, { "epoch": 0.4657668324256058, "grad_norm": 2.421875, "learning_rate": 1.7542262122106273e-05, "loss": 0.8523, "step": 3688 }, { "epoch": 0.465893124950667, "grad_norm": 1.890625, "learning_rate": 1.754095108224537e-05, "loss": 0.725, "step": 3689 }, { "epoch": 0.46601941747572817, "grad_norm": 1.8203125, "learning_rate": 1.753963974182032e-05, "loss": 0.7269, "step": 3690 }, { "epoch": 0.4661457100007893, "grad_norm": 1.9609375, "learning_rate": 1.753832810088339e-05, "loss": 0.9092, "step": 3691 }, { "epoch": 0.4662720025258505, "grad_norm": 1.6640625, "learning_rate": 1.753701615948686e-05, "loss": 0.6804, "step": 3692 }, { "epoch": 0.4663982950509117, "grad_norm": 1.765625, "learning_rate": 1.7535703917683024e-05, "loss": 0.6345, "step": 3693 }, { "epoch": 0.46652458757597287, "grad_norm": 1.8515625, "learning_rate": 1.7534391375524174e-05, "loss": 0.6916, "step": 3694 }, { "epoch": 0.466650880101034, "grad_norm": 1.8046875, "learning_rate": 1.7533078533062635e-05, "loss": 0.6332, "step": 3695 }, { "epoch": 0.4667771726260952, "grad_norm": 1.7578125, "learning_rate": 1.7531765390350727e-05, "loss": 0.6917, "step": 3696 }, { "epoch": 0.4669034651511564, "grad_norm": 1.890625, "learning_rate": 1.7530451947440796e-05, "loss": 0.6776, "step": 3697 }, { "epoch": 0.4670297576762175, "grad_norm": 1.71875, "learning_rate": 1.7529138204385186e-05, "loss": 0.6365, "step": 3698 }, { "epoch": 0.4671560502012787, "grad_norm": 1.75, "learning_rate": 1.7527824161236263e-05, "loss": 0.6503, "step": 3699 }, { "epoch": 0.4672823427263399, "grad_norm": 1.65625, "learning_rate": 1.75265098180464e-05, "loss": 0.6931, "step": 3700 }, { "epoch": 0.4674086352514011, "grad_norm": 1.7734375, "learning_rate": 1.7525195174867986e-05, "loss": 0.585, "step": 3701 }, { "epoch": 0.4675349277764622, "grad_norm": 1.65625, "learning_rate": 1.7523880231753417e-05, "loss": 0.583, "step": 3702 }, { "epoch": 0.4676612203015234, "grad_norm": 1.84375, "learning_rate": 1.7522564988755104e-05, "loss": 0.6775, "step": 3703 }, { "epoch": 0.4677875128265846, "grad_norm": 1.6796875, "learning_rate": 1.7521249445925474e-05, "loss": 0.7039, "step": 3704 }, { "epoch": 0.46791380535164573, "grad_norm": 1.671875, "learning_rate": 1.751993360331695e-05, "loss": 0.6213, "step": 3705 }, { "epoch": 0.4680400978767069, "grad_norm": 1.6796875, "learning_rate": 1.751861746098199e-05, "loss": 0.6521, "step": 3706 }, { "epoch": 0.4681663904017681, "grad_norm": 1.78125, "learning_rate": 1.751730101897305e-05, "loss": 0.679, "step": 3707 }, { "epoch": 0.4682926829268293, "grad_norm": 1.75, "learning_rate": 1.751598427734259e-05, "loss": 0.6556, "step": 3708 }, { "epoch": 0.46841897545189043, "grad_norm": 1.8203125, "learning_rate": 1.7514667236143106e-05, "loss": 0.6578, "step": 3709 }, { "epoch": 0.4685452679769516, "grad_norm": 2.09375, "learning_rate": 1.7513349895427085e-05, "loss": 0.6985, "step": 3710 }, { "epoch": 0.4686715605020128, "grad_norm": 1.8125, "learning_rate": 1.7512032255247035e-05, "loss": 0.6399, "step": 3711 }, { "epoch": 0.46879785302707394, "grad_norm": 1.890625, "learning_rate": 1.751071431565547e-05, "loss": 0.7505, "step": 3712 }, { "epoch": 0.46892414555213513, "grad_norm": 2.203125, "learning_rate": 1.7509396076704926e-05, "loss": 0.6771, "step": 3713 }, { "epoch": 0.4690504380771963, "grad_norm": 1.90625, "learning_rate": 1.750807753844794e-05, "loss": 0.7469, "step": 3714 }, { "epoch": 0.46917673060225745, "grad_norm": 1.859375, "learning_rate": 1.7506758700937072e-05, "loss": 0.6881, "step": 3715 }, { "epoch": 0.46930302312731864, "grad_norm": 1.7734375, "learning_rate": 1.750543956422488e-05, "loss": 0.6165, "step": 3716 }, { "epoch": 0.46942931565237983, "grad_norm": 1.875, "learning_rate": 1.7504120128363945e-05, "loss": 0.6659, "step": 3717 }, { "epoch": 0.469555608177441, "grad_norm": 1.8203125, "learning_rate": 1.7502800393406856e-05, "loss": 0.7313, "step": 3718 }, { "epoch": 0.46968190070250215, "grad_norm": 1.9453125, "learning_rate": 1.7501480359406217e-05, "loss": 0.7417, "step": 3719 }, { "epoch": 0.46980819322756334, "grad_norm": 1.7890625, "learning_rate": 1.7500160026414637e-05, "loss": 0.5818, "step": 3720 }, { "epoch": 0.46993448575262453, "grad_norm": 1.875, "learning_rate": 1.7498839394484744e-05, "loss": 0.6997, "step": 3721 }, { "epoch": 0.47006077827768566, "grad_norm": 1.703125, "learning_rate": 1.7497518463669175e-05, "loss": 0.5693, "step": 3722 }, { "epoch": 0.47018707080274685, "grad_norm": 1.796875, "learning_rate": 1.749619723402058e-05, "loss": 0.7155, "step": 3723 }, { "epoch": 0.47031336332780804, "grad_norm": 1.7890625, "learning_rate": 1.7494875705591617e-05, "loss": 0.7084, "step": 3724 }, { "epoch": 0.47043965585286923, "grad_norm": 1.796875, "learning_rate": 1.7493553878434962e-05, "loss": 0.6553, "step": 3725 }, { "epoch": 0.47056594837793037, "grad_norm": 1.9140625, "learning_rate": 1.7492231752603302e-05, "loss": 0.7017, "step": 3726 }, { "epoch": 0.47069224090299155, "grad_norm": 1.8828125, "learning_rate": 1.7490909328149326e-05, "loss": 0.6976, "step": 3727 }, { "epoch": 0.47081853342805274, "grad_norm": 1.84375, "learning_rate": 1.748958660512575e-05, "loss": 0.7134, "step": 3728 }, { "epoch": 0.4709448259531139, "grad_norm": 1.7734375, "learning_rate": 1.748826358358529e-05, "loss": 0.6376, "step": 3729 }, { "epoch": 0.47107111847817507, "grad_norm": 1.90625, "learning_rate": 1.7486940263580678e-05, "loss": 0.7079, "step": 3730 }, { "epoch": 0.47119741100323626, "grad_norm": 1.6796875, "learning_rate": 1.7485616645164665e-05, "loss": 0.6152, "step": 3731 }, { "epoch": 0.47132370352829744, "grad_norm": 1.8125, "learning_rate": 1.748429272839e-05, "loss": 0.6663, "step": 3732 }, { "epoch": 0.4714499960533586, "grad_norm": 1.7890625, "learning_rate": 1.7482968513309458e-05, "loss": 0.6729, "step": 3733 }, { "epoch": 0.47157628857841977, "grad_norm": 1.890625, "learning_rate": 1.748164399997581e-05, "loss": 0.7608, "step": 3734 }, { "epoch": 0.47170258110348096, "grad_norm": 1.9453125, "learning_rate": 1.7480319188441854e-05, "loss": 0.6823, "step": 3735 }, { "epoch": 0.4718288736285421, "grad_norm": 1.65625, "learning_rate": 1.7478994078760393e-05, "loss": 0.6337, "step": 3736 }, { "epoch": 0.4719551661536033, "grad_norm": 2.03125, "learning_rate": 1.7477668670984243e-05, "loss": 0.8416, "step": 3737 }, { "epoch": 0.47208145867866447, "grad_norm": 1.625, "learning_rate": 1.7476342965166233e-05, "loss": 0.7248, "step": 3738 }, { "epoch": 0.47220775120372566, "grad_norm": 1.8984375, "learning_rate": 1.7475016961359195e-05, "loss": 0.7663, "step": 3739 }, { "epoch": 0.4723340437287868, "grad_norm": 1.8203125, "learning_rate": 1.747369065961599e-05, "loss": 0.7664, "step": 3740 }, { "epoch": 0.472460336253848, "grad_norm": 1.796875, "learning_rate": 1.7472364059989473e-05, "loss": 0.6948, "step": 3741 }, { "epoch": 0.47258662877890917, "grad_norm": 2.015625, "learning_rate": 1.7471037162532525e-05, "loss": 0.7482, "step": 3742 }, { "epoch": 0.4727129213039703, "grad_norm": 1.8046875, "learning_rate": 1.746970996729803e-05, "loss": 0.6573, "step": 3743 }, { "epoch": 0.4728392138290315, "grad_norm": 1.9296875, "learning_rate": 1.746838247433889e-05, "loss": 0.6755, "step": 3744 }, { "epoch": 0.4729655063540927, "grad_norm": 2.0625, "learning_rate": 1.746705468370801e-05, "loss": 0.6434, "step": 3745 }, { "epoch": 0.4730917988791538, "grad_norm": 1.8671875, "learning_rate": 1.746572659545832e-05, "loss": 0.667, "step": 3746 }, { "epoch": 0.473218091404215, "grad_norm": 1.90625, "learning_rate": 1.7464398209642744e-05, "loss": 0.6862, "step": 3747 }, { "epoch": 0.4733443839292762, "grad_norm": 1.7890625, "learning_rate": 1.746306952631424e-05, "loss": 0.6882, "step": 3748 }, { "epoch": 0.4734706764543374, "grad_norm": 1.734375, "learning_rate": 1.7461740545525758e-05, "loss": 0.6235, "step": 3749 }, { "epoch": 0.4735969689793985, "grad_norm": 1.796875, "learning_rate": 1.7460411267330273e-05, "loss": 0.6239, "step": 3750 }, { "epoch": 0.4737232615044597, "grad_norm": 1.75, "learning_rate": 1.745908169178076e-05, "loss": 0.6751, "step": 3751 }, { "epoch": 0.4738495540295209, "grad_norm": 1.6484375, "learning_rate": 1.745775181893022e-05, "loss": 0.6182, "step": 3752 }, { "epoch": 0.473975846554582, "grad_norm": 1.7421875, "learning_rate": 1.7456421648831658e-05, "loss": 0.6784, "step": 3753 }, { "epoch": 0.4741021390796432, "grad_norm": 1.6875, "learning_rate": 1.7455091181538087e-05, "loss": 0.6141, "step": 3754 }, { "epoch": 0.4742284316047044, "grad_norm": 1.8046875, "learning_rate": 1.745376041710254e-05, "loss": 0.7248, "step": 3755 }, { "epoch": 0.4743547241297656, "grad_norm": 1.7109375, "learning_rate": 1.7452429355578053e-05, "loss": 0.69, "step": 3756 }, { "epoch": 0.4744810166548267, "grad_norm": 1.875, "learning_rate": 1.7451097997017683e-05, "loss": 0.6893, "step": 3757 }, { "epoch": 0.4746073091798879, "grad_norm": 1.734375, "learning_rate": 1.7449766341474493e-05, "loss": 0.6849, "step": 3758 }, { "epoch": 0.4747336017049491, "grad_norm": 1.625, "learning_rate": 1.7448434389001562e-05, "loss": 0.5979, "step": 3759 }, { "epoch": 0.47485989423001024, "grad_norm": 1.9375, "learning_rate": 1.7447102139651975e-05, "loss": 0.7446, "step": 3760 }, { "epoch": 0.4749861867550714, "grad_norm": 1.796875, "learning_rate": 1.744576959347884e-05, "loss": 0.6642, "step": 3761 }, { "epoch": 0.4751124792801326, "grad_norm": 1.8359375, "learning_rate": 1.7444436750535258e-05, "loss": 0.6381, "step": 3762 }, { "epoch": 0.4752387718051938, "grad_norm": 1.875, "learning_rate": 1.744310361087436e-05, "loss": 0.643, "step": 3763 }, { "epoch": 0.47536506433025494, "grad_norm": 1.9609375, "learning_rate": 1.7441770174549276e-05, "loss": 0.6844, "step": 3764 }, { "epoch": 0.4754913568553161, "grad_norm": 1.7109375, "learning_rate": 1.744043644161316e-05, "loss": 0.6359, "step": 3765 }, { "epoch": 0.4756176493803773, "grad_norm": 1.953125, "learning_rate": 1.743910241211917e-05, "loss": 0.7762, "step": 3766 }, { "epoch": 0.47574394190543845, "grad_norm": 1.890625, "learning_rate": 1.7437768086120476e-05, "loss": 0.7451, "step": 3767 }, { "epoch": 0.47587023443049964, "grad_norm": 1.828125, "learning_rate": 1.7436433463670262e-05, "loss": 0.7328, "step": 3768 }, { "epoch": 0.47599652695556083, "grad_norm": 1.703125, "learning_rate": 1.7435098544821722e-05, "loss": 0.6292, "step": 3769 }, { "epoch": 0.476122819480622, "grad_norm": 2.0625, "learning_rate": 1.743376332962806e-05, "loss": 0.7442, "step": 3770 }, { "epoch": 0.47624911200568315, "grad_norm": 1.78125, "learning_rate": 1.7432427818142496e-05, "loss": 0.6606, "step": 3771 }, { "epoch": 0.47637540453074434, "grad_norm": 1.703125, "learning_rate": 1.7431092010418266e-05, "loss": 0.5999, "step": 3772 }, { "epoch": 0.47650169705580553, "grad_norm": 1.7734375, "learning_rate": 1.7429755906508603e-05, "loss": 0.6434, "step": 3773 }, { "epoch": 0.47662798958086666, "grad_norm": 1.765625, "learning_rate": 1.742841950646677e-05, "loss": 0.6072, "step": 3774 }, { "epoch": 0.47675428210592785, "grad_norm": 1.8125, "learning_rate": 1.7427082810346024e-05, "loss": 0.6789, "step": 3775 }, { "epoch": 0.47688057463098904, "grad_norm": 1.8515625, "learning_rate": 1.7425745818199646e-05, "loss": 0.6308, "step": 3776 }, { "epoch": 0.4770068671560502, "grad_norm": 1.8046875, "learning_rate": 1.7424408530080924e-05, "loss": 0.7054, "step": 3777 }, { "epoch": 0.47713315968111136, "grad_norm": 1.7265625, "learning_rate": 1.7423070946043168e-05, "loss": 0.5734, "step": 3778 }, { "epoch": 0.47725945220617255, "grad_norm": 1.8046875, "learning_rate": 1.742173306613968e-05, "loss": 0.6602, "step": 3779 }, { "epoch": 0.47738574473123374, "grad_norm": 1.9921875, "learning_rate": 1.7420394890423786e-05, "loss": 0.7473, "step": 3780 }, { "epoch": 0.4775120372562949, "grad_norm": 2.0, "learning_rate": 1.741905641894883e-05, "loss": 0.7144, "step": 3781 }, { "epoch": 0.47763832978135606, "grad_norm": 1.7578125, "learning_rate": 1.741771765176815e-05, "loss": 0.6333, "step": 3782 }, { "epoch": 0.47776462230641725, "grad_norm": 1.6015625, "learning_rate": 1.741637858893511e-05, "loss": 0.6056, "step": 3783 }, { "epoch": 0.4778909148314784, "grad_norm": 1.671875, "learning_rate": 1.7415039230503085e-05, "loss": 0.6661, "step": 3784 }, { "epoch": 0.4780172073565396, "grad_norm": 1.9296875, "learning_rate": 1.741369957652546e-05, "loss": 0.7061, "step": 3785 }, { "epoch": 0.47814349988160076, "grad_norm": 1.9765625, "learning_rate": 1.7412359627055625e-05, "loss": 0.7672, "step": 3786 }, { "epoch": 0.47826979240666195, "grad_norm": 1.6640625, "learning_rate": 1.7411019382146987e-05, "loss": 0.6167, "step": 3787 }, { "epoch": 0.4783960849317231, "grad_norm": 1.7265625, "learning_rate": 1.7409678841852968e-05, "loss": 0.5958, "step": 3788 }, { "epoch": 0.4785223774567843, "grad_norm": 3.671875, "learning_rate": 1.7408338006226998e-05, "loss": 0.7068, "step": 3789 }, { "epoch": 0.47864866998184546, "grad_norm": 1.765625, "learning_rate": 1.740699687532252e-05, "loss": 0.6888, "step": 3790 }, { "epoch": 0.4787749625069066, "grad_norm": 1.84375, "learning_rate": 1.7405655449192985e-05, "loss": 0.768, "step": 3791 }, { "epoch": 0.4789012550319678, "grad_norm": 2.34375, "learning_rate": 1.7404313727891865e-05, "loss": 0.7135, "step": 3792 }, { "epoch": 0.479027547557029, "grad_norm": 1.7734375, "learning_rate": 1.7402971711472632e-05, "loss": 0.6605, "step": 3793 }, { "epoch": 0.47915384008209017, "grad_norm": 1.75, "learning_rate": 1.7401629399988777e-05, "loss": 0.6256, "step": 3794 }, { "epoch": 0.4792801326071513, "grad_norm": 1.8046875, "learning_rate": 1.7400286793493802e-05, "loss": 0.6625, "step": 3795 }, { "epoch": 0.4794064251322125, "grad_norm": 1.875, "learning_rate": 1.7398943892041223e-05, "loss": 0.6647, "step": 3796 }, { "epoch": 0.4795327176572737, "grad_norm": 1.8671875, "learning_rate": 1.739760069568456e-05, "loss": 0.6483, "step": 3797 }, { "epoch": 0.4796590101823348, "grad_norm": 1.7578125, "learning_rate": 1.7396257204477352e-05, "loss": 0.7408, "step": 3798 }, { "epoch": 0.479785302707396, "grad_norm": 2.015625, "learning_rate": 1.7394913418473145e-05, "loss": 0.6282, "step": 3799 }, { "epoch": 0.4799115952324572, "grad_norm": 1.7734375, "learning_rate": 1.7393569337725505e-05, "loss": 0.595, "step": 3800 }, { "epoch": 0.4800378877575184, "grad_norm": 1.8359375, "learning_rate": 1.7392224962287997e-05, "loss": 0.6566, "step": 3801 }, { "epoch": 0.4801641802825795, "grad_norm": 1.9140625, "learning_rate": 1.739088029221421e-05, "loss": 0.668, "step": 3802 }, { "epoch": 0.4802904728076407, "grad_norm": 1.8515625, "learning_rate": 1.738953532755773e-05, "loss": 0.7367, "step": 3803 }, { "epoch": 0.4804167653327019, "grad_norm": 1.8984375, "learning_rate": 1.7388190068372174e-05, "loss": 0.6813, "step": 3804 }, { "epoch": 0.480543057857763, "grad_norm": 1.8046875, "learning_rate": 1.738684451471116e-05, "loss": 0.6313, "step": 3805 }, { "epoch": 0.4806693503828242, "grad_norm": 1.6796875, "learning_rate": 1.7385498666628312e-05, "loss": 0.6724, "step": 3806 }, { "epoch": 0.4807956429078854, "grad_norm": 1.8359375, "learning_rate": 1.7384152524177277e-05, "loss": 0.6365, "step": 3807 }, { "epoch": 0.4809219354329466, "grad_norm": 1.6484375, "learning_rate": 1.738280608741171e-05, "loss": 0.6456, "step": 3808 }, { "epoch": 0.4810482279580077, "grad_norm": 1.875, "learning_rate": 1.7381459356385275e-05, "loss": 0.6585, "step": 3809 }, { "epoch": 0.4811745204830689, "grad_norm": 1.671875, "learning_rate": 1.738011233115165e-05, "loss": 0.6347, "step": 3810 }, { "epoch": 0.4813008130081301, "grad_norm": 1.8046875, "learning_rate": 1.737876501176452e-05, "loss": 0.6841, "step": 3811 }, { "epoch": 0.48142710553319124, "grad_norm": 1.78125, "learning_rate": 1.7377417398277593e-05, "loss": 0.6433, "step": 3812 }, { "epoch": 0.4815533980582524, "grad_norm": 1.7421875, "learning_rate": 1.7376069490744577e-05, "loss": 0.702, "step": 3813 }, { "epoch": 0.4816796905833136, "grad_norm": 1.8359375, "learning_rate": 1.73747212892192e-05, "loss": 0.636, "step": 3814 }, { "epoch": 0.48180598310837475, "grad_norm": 1.828125, "learning_rate": 1.7373372793755194e-05, "loss": 0.6614, "step": 3815 }, { "epoch": 0.48193227563343594, "grad_norm": 1.8515625, "learning_rate": 1.7372024004406304e-05, "loss": 0.6291, "step": 3816 }, { "epoch": 0.4820585681584971, "grad_norm": 1.8984375, "learning_rate": 1.73706749212263e-05, "loss": 0.6536, "step": 3817 }, { "epoch": 0.4821848606835583, "grad_norm": 1.8046875, "learning_rate": 1.7369325544268943e-05, "loss": 0.7501, "step": 3818 }, { "epoch": 0.48231115320861945, "grad_norm": 1.71875, "learning_rate": 1.7367975873588022e-05, "loss": 0.605, "step": 3819 }, { "epoch": 0.48243744573368064, "grad_norm": 1.90625, "learning_rate": 1.7366625909237328e-05, "loss": 0.7628, "step": 3820 }, { "epoch": 0.4825637382587418, "grad_norm": 1.703125, "learning_rate": 1.7365275651270666e-05, "loss": 0.6074, "step": 3821 }, { "epoch": 0.48269003078380296, "grad_norm": 1.6796875, "learning_rate": 1.7363925099741867e-05, "loss": 0.5817, "step": 3822 }, { "epoch": 0.48281632330886415, "grad_norm": 1.859375, "learning_rate": 1.7362574254704743e-05, "loss": 0.7042, "step": 3823 }, { "epoch": 0.48294261583392534, "grad_norm": 1.6875, "learning_rate": 1.7361223116213143e-05, "loss": 0.663, "step": 3824 }, { "epoch": 0.4830689083589865, "grad_norm": 1.7734375, "learning_rate": 1.735987168432092e-05, "loss": 0.7305, "step": 3825 }, { "epoch": 0.48319520088404766, "grad_norm": 1.9296875, "learning_rate": 1.735851995908194e-05, "loss": 0.6678, "step": 3826 }, { "epoch": 0.48332149340910885, "grad_norm": 1.875, "learning_rate": 1.735716794055008e-05, "loss": 0.7399, "step": 3827 }, { "epoch": 0.48344778593417004, "grad_norm": 1.7890625, "learning_rate": 1.7355815628779227e-05, "loss": 0.7034, "step": 3828 }, { "epoch": 0.48357407845923117, "grad_norm": 1.7734375, "learning_rate": 1.735446302382328e-05, "loss": 0.719, "step": 3829 }, { "epoch": 0.48370037098429236, "grad_norm": 1.859375, "learning_rate": 1.735311012573615e-05, "loss": 0.6411, "step": 3830 }, { "epoch": 0.48382666350935355, "grad_norm": 1.8125, "learning_rate": 1.735175693457176e-05, "loss": 0.6976, "step": 3831 }, { "epoch": 0.48395295603441474, "grad_norm": 1.703125, "learning_rate": 1.735040345038405e-05, "loss": 0.6213, "step": 3832 }, { "epoch": 0.48407924855947587, "grad_norm": 1.7578125, "learning_rate": 1.734904967322696e-05, "loss": 0.7119, "step": 3833 }, { "epoch": 0.48420554108453706, "grad_norm": 1.7890625, "learning_rate": 1.7347695603154455e-05, "loss": 0.6316, "step": 3834 }, { "epoch": 0.48433183360959825, "grad_norm": 1.8203125, "learning_rate": 1.7346341240220498e-05, "loss": 0.6677, "step": 3835 }, { "epoch": 0.4844581261346594, "grad_norm": 1.875, "learning_rate": 1.734498658447908e-05, "loss": 0.7177, "step": 3836 }, { "epoch": 0.4845844186597206, "grad_norm": 1.78125, "learning_rate": 1.734363163598418e-05, "loss": 0.6784, "step": 3837 }, { "epoch": 0.48471071118478176, "grad_norm": 2.265625, "learning_rate": 1.7342276394789815e-05, "loss": 0.6886, "step": 3838 }, { "epoch": 0.48483700370984295, "grad_norm": 1.890625, "learning_rate": 1.7340920860949997e-05, "loss": 0.677, "step": 3839 }, { "epoch": 0.4849632962349041, "grad_norm": 1.8125, "learning_rate": 1.7339565034518754e-05, "loss": 0.6875, "step": 3840 }, { "epoch": 0.4850895887599653, "grad_norm": 1.8671875, "learning_rate": 1.733820891555013e-05, "loss": 0.6302, "step": 3841 }, { "epoch": 0.48521588128502646, "grad_norm": 1.671875, "learning_rate": 1.7336852504098172e-05, "loss": 0.5952, "step": 3842 }, { "epoch": 0.4853421738100876, "grad_norm": 1.8203125, "learning_rate": 1.733549580021695e-05, "loss": 0.6333, "step": 3843 }, { "epoch": 0.4854684663351488, "grad_norm": 1.78125, "learning_rate": 1.7334138803960527e-05, "loss": 0.7149, "step": 3844 }, { "epoch": 0.48559475886021, "grad_norm": 1.765625, "learning_rate": 1.7332781515383e-05, "loss": 0.714, "step": 3845 }, { "epoch": 0.4857210513852711, "grad_norm": 1.8671875, "learning_rate": 1.7331423934538462e-05, "loss": 0.717, "step": 3846 }, { "epoch": 0.4858473439103323, "grad_norm": 1.6640625, "learning_rate": 1.7330066061481025e-05, "loss": 0.6022, "step": 3847 }, { "epoch": 0.4859736364353935, "grad_norm": 2.0, "learning_rate": 1.7328707896264814e-05, "loss": 0.7313, "step": 3848 }, { "epoch": 0.4860999289604547, "grad_norm": 1.796875, "learning_rate": 1.7327349438943956e-05, "loss": 0.6931, "step": 3849 }, { "epoch": 0.4862262214855158, "grad_norm": 1.796875, "learning_rate": 1.7325990689572598e-05, "loss": 0.7069, "step": 3850 }, { "epoch": 0.486352514010577, "grad_norm": 1.96875, "learning_rate": 1.73246316482049e-05, "loss": 0.7458, "step": 3851 }, { "epoch": 0.4864788065356382, "grad_norm": 1.6328125, "learning_rate": 1.7323272314895022e-05, "loss": 0.6531, "step": 3852 }, { "epoch": 0.4866050990606993, "grad_norm": 1.796875, "learning_rate": 1.732191268969715e-05, "loss": 0.6533, "step": 3853 }, { "epoch": 0.4867313915857605, "grad_norm": 1.765625, "learning_rate": 1.7320552772665474e-05, "loss": 0.6887, "step": 3854 }, { "epoch": 0.4868576841108217, "grad_norm": 1.6796875, "learning_rate": 1.73191925638542e-05, "loss": 0.6298, "step": 3855 }, { "epoch": 0.4869839766358829, "grad_norm": 1.8828125, "learning_rate": 1.731783206331754e-05, "loss": 0.7281, "step": 3856 }, { "epoch": 0.487110269160944, "grad_norm": 1.828125, "learning_rate": 1.7316471271109717e-05, "loss": 0.6034, "step": 3857 }, { "epoch": 0.4872365616860052, "grad_norm": 1.859375, "learning_rate": 1.7315110187284975e-05, "loss": 0.613, "step": 3858 }, { "epoch": 0.4873628542110664, "grad_norm": 1.796875, "learning_rate": 1.7313748811897558e-05, "loss": 0.657, "step": 3859 }, { "epoch": 0.48748914673612753, "grad_norm": 1.703125, "learning_rate": 1.731238714500173e-05, "loss": 0.5907, "step": 3860 }, { "epoch": 0.4876154392611887, "grad_norm": 1.8125, "learning_rate": 1.7311025186651763e-05, "loss": 0.5882, "step": 3861 }, { "epoch": 0.4877417317862499, "grad_norm": 1.796875, "learning_rate": 1.730966293690194e-05, "loss": 0.7274, "step": 3862 }, { "epoch": 0.4878680243113111, "grad_norm": 1.6796875, "learning_rate": 1.730830039580656e-05, "loss": 0.5847, "step": 3863 }, { "epoch": 0.48799431683637223, "grad_norm": 1.9765625, "learning_rate": 1.7306937563419933e-05, "loss": 0.6761, "step": 3864 }, { "epoch": 0.4881206093614334, "grad_norm": 3.625, "learning_rate": 1.7305574439796374e-05, "loss": 0.6723, "step": 3865 }, { "epoch": 0.4882469018864946, "grad_norm": 1.84375, "learning_rate": 1.7304211024990212e-05, "loss": 0.7059, "step": 3866 }, { "epoch": 0.48837319441155574, "grad_norm": 1.7265625, "learning_rate": 1.730284731905579e-05, "loss": 0.6418, "step": 3867 }, { "epoch": 0.48849948693661693, "grad_norm": 1.8671875, "learning_rate": 1.7301483322047467e-05, "loss": 0.6511, "step": 3868 }, { "epoch": 0.4886257794616781, "grad_norm": 1.8515625, "learning_rate": 1.7300119034019607e-05, "loss": 0.7386, "step": 3869 }, { "epoch": 0.4887520719867393, "grad_norm": 1.9921875, "learning_rate": 1.7298754455026582e-05, "loss": 0.7071, "step": 3870 }, { "epoch": 0.48887836451180045, "grad_norm": 1.8984375, "learning_rate": 1.729738958512279e-05, "loss": 0.7458, "step": 3871 }, { "epoch": 0.48900465703686163, "grad_norm": 1.7578125, "learning_rate": 1.729602442436262e-05, "loss": 0.6235, "step": 3872 }, { "epoch": 0.4891309495619228, "grad_norm": 1.859375, "learning_rate": 1.729465897280049e-05, "loss": 0.7203, "step": 3873 }, { "epoch": 0.48925724208698396, "grad_norm": 1.796875, "learning_rate": 1.7293293230490828e-05, "loss": 0.7414, "step": 3874 }, { "epoch": 0.48938353461204515, "grad_norm": 1.953125, "learning_rate": 1.729192719748806e-05, "loss": 0.7091, "step": 3875 }, { "epoch": 0.48950982713710633, "grad_norm": 1.7890625, "learning_rate": 1.729056087384664e-05, "loss": 0.7418, "step": 3876 }, { "epoch": 0.48963611966216747, "grad_norm": 1.8515625, "learning_rate": 1.7289194259621023e-05, "loss": 0.6026, "step": 3877 }, { "epoch": 0.48976241218722866, "grad_norm": 1.7578125, "learning_rate": 1.7287827354865684e-05, "loss": 0.6239, "step": 3878 }, { "epoch": 0.48988870471228985, "grad_norm": 1.671875, "learning_rate": 1.7286460159635097e-05, "loss": 0.6695, "step": 3879 }, { "epoch": 0.49001499723735104, "grad_norm": 1.6875, "learning_rate": 1.728509267398376e-05, "loss": 0.5865, "step": 3880 }, { "epoch": 0.49014128976241217, "grad_norm": 1.78125, "learning_rate": 1.7283724897966174e-05, "loss": 0.6712, "step": 3881 }, { "epoch": 0.49026758228747336, "grad_norm": 1.8046875, "learning_rate": 1.728235683163686e-05, "loss": 0.662, "step": 3882 }, { "epoch": 0.49039387481253455, "grad_norm": 1.8828125, "learning_rate": 1.728098847505034e-05, "loss": 0.7016, "step": 3883 }, { "epoch": 0.4905201673375957, "grad_norm": 1.859375, "learning_rate": 1.7279619828261164e-05, "loss": 0.6789, "step": 3884 }, { "epoch": 0.49064645986265687, "grad_norm": 1.984375, "learning_rate": 1.7278250891323867e-05, "loss": 0.6744, "step": 3885 }, { "epoch": 0.49077275238771806, "grad_norm": 1.7890625, "learning_rate": 1.7276881664293024e-05, "loss": 0.7001, "step": 3886 }, { "epoch": 0.49089904491277925, "grad_norm": 1.84375, "learning_rate": 1.7275512147223207e-05, "loss": 0.6632, "step": 3887 }, { "epoch": 0.4910253374378404, "grad_norm": 1.8984375, "learning_rate": 1.7274142340168997e-05, "loss": 0.7136, "step": 3888 }, { "epoch": 0.49115162996290157, "grad_norm": 1.765625, "learning_rate": 1.7272772243184997e-05, "loss": 0.6974, "step": 3889 }, { "epoch": 0.49127792248796276, "grad_norm": 1.6875, "learning_rate": 1.727140185632581e-05, "loss": 0.6063, "step": 3890 }, { "epoch": 0.4914042150130239, "grad_norm": 1.8828125, "learning_rate": 1.727003117964606e-05, "loss": 0.7214, "step": 3891 }, { "epoch": 0.4915305075380851, "grad_norm": 1.859375, "learning_rate": 1.726866021320038e-05, "loss": 0.7652, "step": 3892 }, { "epoch": 0.49165680006314627, "grad_norm": 1.8125, "learning_rate": 1.726728895704341e-05, "loss": 0.7286, "step": 3893 }, { "epoch": 0.49178309258820746, "grad_norm": 1.921875, "learning_rate": 1.726591741122981e-05, "loss": 0.6452, "step": 3894 }, { "epoch": 0.4919093851132686, "grad_norm": 1.8828125, "learning_rate": 1.7264545575814238e-05, "loss": 0.657, "step": 3895 }, { "epoch": 0.4920356776383298, "grad_norm": 2.34375, "learning_rate": 1.726317345085138e-05, "loss": 0.6856, "step": 3896 }, { "epoch": 0.49216197016339097, "grad_norm": 1.796875, "learning_rate": 1.726180103639592e-05, "loss": 0.6176, "step": 3897 }, { "epoch": 0.4922882626884521, "grad_norm": 1.984375, "learning_rate": 1.7260428332502565e-05, "loss": 0.6293, "step": 3898 }, { "epoch": 0.4924145552135133, "grad_norm": 2.078125, "learning_rate": 1.7259055339226022e-05, "loss": 0.6784, "step": 3899 }, { "epoch": 0.4925408477385745, "grad_norm": 1.8203125, "learning_rate": 1.7257682056621022e-05, "loss": 0.6277, "step": 3900 }, { "epoch": 0.4926671402636357, "grad_norm": 1.703125, "learning_rate": 1.725630848474229e-05, "loss": 0.6295, "step": 3901 }, { "epoch": 0.4927934327886968, "grad_norm": 1.8359375, "learning_rate": 1.7254934623644585e-05, "loss": 0.7075, "step": 3902 }, { "epoch": 0.492919725313758, "grad_norm": 1.8984375, "learning_rate": 1.725356047338266e-05, "loss": 0.6918, "step": 3903 }, { "epoch": 0.4930460178388192, "grad_norm": 1.640625, "learning_rate": 1.7252186034011282e-05, "loss": 0.5975, "step": 3904 }, { "epoch": 0.4931723103638803, "grad_norm": 1.734375, "learning_rate": 1.725081130558524e-05, "loss": 0.628, "step": 3905 }, { "epoch": 0.4932986028889415, "grad_norm": 1.8515625, "learning_rate": 1.724943628815932e-05, "loss": 0.6683, "step": 3906 }, { "epoch": 0.4934248954140027, "grad_norm": 1.6796875, "learning_rate": 1.7248060981788336e-05, "loss": 0.607, "step": 3907 }, { "epoch": 0.49355118793906383, "grad_norm": 1.8125, "learning_rate": 1.7246685386527098e-05, "loss": 0.7492, "step": 3908 }, { "epoch": 0.493677480464125, "grad_norm": 1.703125, "learning_rate": 1.7245309502430434e-05, "loss": 0.66, "step": 3909 }, { "epoch": 0.4938037729891862, "grad_norm": 1.8984375, "learning_rate": 1.7243933329553187e-05, "loss": 0.6764, "step": 3910 }, { "epoch": 0.4939300655142474, "grad_norm": 1.78125, "learning_rate": 1.7242556867950202e-05, "loss": 0.6521, "step": 3911 }, { "epoch": 0.49405635803930853, "grad_norm": 1.796875, "learning_rate": 1.7241180117676348e-05, "loss": 0.7162, "step": 3912 }, { "epoch": 0.4941826505643697, "grad_norm": 1.78125, "learning_rate": 1.7239803078786494e-05, "loss": 0.6182, "step": 3913 }, { "epoch": 0.4943089430894309, "grad_norm": 1.703125, "learning_rate": 1.7238425751335527e-05, "loss": 0.6544, "step": 3914 }, { "epoch": 0.49443523561449204, "grad_norm": 1.7421875, "learning_rate": 1.7237048135378348e-05, "loss": 0.5803, "step": 3915 }, { "epoch": 0.49456152813955323, "grad_norm": 1.703125, "learning_rate": 1.723567023096986e-05, "loss": 0.6549, "step": 3916 }, { "epoch": 0.4946878206646144, "grad_norm": 1.8125, "learning_rate": 1.7234292038164984e-05, "loss": 0.6393, "step": 3917 }, { "epoch": 0.4948141131896756, "grad_norm": 1.6875, "learning_rate": 1.7232913557018652e-05, "loss": 0.6023, "step": 3918 }, { "epoch": 0.49494040571473674, "grad_norm": 1.6875, "learning_rate": 1.7231534787585806e-05, "loss": 0.5885, "step": 3919 }, { "epoch": 0.49506669823979793, "grad_norm": 1.6796875, "learning_rate": 1.7230155729921403e-05, "loss": 0.6496, "step": 3920 }, { "epoch": 0.4951929907648591, "grad_norm": 1.9296875, "learning_rate": 1.7228776384080406e-05, "loss": 0.7042, "step": 3921 }, { "epoch": 0.49531928328992025, "grad_norm": 1.7734375, "learning_rate": 1.72273967501178e-05, "loss": 0.7225, "step": 3922 }, { "epoch": 0.49544557581498144, "grad_norm": 1.859375, "learning_rate": 1.7226016828088562e-05, "loss": 0.745, "step": 3923 }, { "epoch": 0.49557186834004263, "grad_norm": 1.7578125, "learning_rate": 1.72246366180477e-05, "loss": 0.618, "step": 3924 }, { "epoch": 0.4956981608651038, "grad_norm": 1.7109375, "learning_rate": 1.7223256120050225e-05, "loss": 0.5969, "step": 3925 }, { "epoch": 0.49582445339016495, "grad_norm": 1.7109375, "learning_rate": 1.7221875334151157e-05, "loss": 0.7004, "step": 3926 }, { "epoch": 0.49595074591522614, "grad_norm": 1.8984375, "learning_rate": 1.7220494260405534e-05, "loss": 0.7129, "step": 3927 }, { "epoch": 0.49607703844028733, "grad_norm": 2.15625, "learning_rate": 1.72191128988684e-05, "loss": 0.8106, "step": 3928 }, { "epoch": 0.49620333096534847, "grad_norm": 1.75, "learning_rate": 1.7217731249594817e-05, "loss": 0.6879, "step": 3929 }, { "epoch": 0.49632962349040965, "grad_norm": 1.8203125, "learning_rate": 1.721634931263985e-05, "loss": 0.7094, "step": 3930 }, { "epoch": 0.49645591601547084, "grad_norm": 1.875, "learning_rate": 1.7214967088058582e-05, "loss": 0.674, "step": 3931 }, { "epoch": 0.49658220854053203, "grad_norm": 1.7265625, "learning_rate": 1.7213584575906104e-05, "loss": 0.6444, "step": 3932 }, { "epoch": 0.49670850106559317, "grad_norm": 2.046875, "learning_rate": 1.721220177623752e-05, "loss": 0.761, "step": 3933 }, { "epoch": 0.49683479359065436, "grad_norm": 1.65625, "learning_rate": 1.7210818689107944e-05, "loss": 0.6028, "step": 3934 }, { "epoch": 0.49696108611571554, "grad_norm": 1.71875, "learning_rate": 1.7209435314572506e-05, "loss": 0.6969, "step": 3935 }, { "epoch": 0.4970873786407767, "grad_norm": 1.7109375, "learning_rate": 1.7208051652686335e-05, "loss": 0.6234, "step": 3936 }, { "epoch": 0.49721367116583787, "grad_norm": 1.890625, "learning_rate": 1.7206667703504592e-05, "loss": 0.685, "step": 3937 }, { "epoch": 0.49733996369089906, "grad_norm": 1.84375, "learning_rate": 1.720528346708243e-05, "loss": 0.6365, "step": 3938 }, { "epoch": 0.49746625621596025, "grad_norm": 1.9375, "learning_rate": 1.720389894347503e-05, "loss": 0.669, "step": 3939 }, { "epoch": 0.4975925487410214, "grad_norm": 1.7421875, "learning_rate": 1.7202514132737566e-05, "loss": 0.6357, "step": 3940 }, { "epoch": 0.49771884126608257, "grad_norm": 1.8203125, "learning_rate": 1.720112903492523e-05, "loss": 0.6818, "step": 3941 }, { "epoch": 0.49784513379114376, "grad_norm": 1.703125, "learning_rate": 1.7199743650093245e-05, "loss": 0.6966, "step": 3942 }, { "epoch": 0.4979714263162049, "grad_norm": 1.765625, "learning_rate": 1.7198357978296817e-05, "loss": 0.7448, "step": 3943 }, { "epoch": 0.4980977188412661, "grad_norm": 1.84375, "learning_rate": 1.719697201959118e-05, "loss": 0.6852, "step": 3944 }, { "epoch": 0.49822401136632727, "grad_norm": 2.171875, "learning_rate": 1.719558577403157e-05, "loss": 0.7603, "step": 3945 }, { "epoch": 0.4983503038913884, "grad_norm": 1.8046875, "learning_rate": 1.7194199241673244e-05, "loss": 0.6369, "step": 3946 }, { "epoch": 0.4984765964164496, "grad_norm": 1.9453125, "learning_rate": 1.7192812422571465e-05, "loss": 0.5908, "step": 3947 }, { "epoch": 0.4986028889415108, "grad_norm": 2.03125, "learning_rate": 1.7191425316781508e-05, "loss": 0.7217, "step": 3948 }, { "epoch": 0.49872918146657197, "grad_norm": 1.7578125, "learning_rate": 1.7190037924358657e-05, "loss": 0.6353, "step": 3949 }, { "epoch": 0.4988554739916331, "grad_norm": 1.75, "learning_rate": 1.7188650245358215e-05, "loss": 0.7029, "step": 3950 }, { "epoch": 0.4989817665166943, "grad_norm": 1.71875, "learning_rate": 1.7187262279835488e-05, "loss": 0.6289, "step": 3951 }, { "epoch": 0.4991080590417555, "grad_norm": 1.765625, "learning_rate": 1.7185874027845798e-05, "loss": 0.6189, "step": 3952 }, { "epoch": 0.4992343515668166, "grad_norm": 1.8671875, "learning_rate": 1.718448548944448e-05, "loss": 0.6238, "step": 3953 }, { "epoch": 0.4993606440918778, "grad_norm": 1.6640625, "learning_rate": 1.7183096664686873e-05, "loss": 0.6197, "step": 3954 }, { "epoch": 0.499486936616939, "grad_norm": 1.7890625, "learning_rate": 1.7181707553628335e-05, "loss": 0.6204, "step": 3955 }, { "epoch": 0.4996132291420002, "grad_norm": 1.6796875, "learning_rate": 1.7180318156324235e-05, "loss": 0.6463, "step": 3956 }, { "epoch": 0.4997395216670613, "grad_norm": 1.890625, "learning_rate": 1.7178928472829945e-05, "loss": 0.7317, "step": 3957 }, { "epoch": 0.4998658141921225, "grad_norm": 2.03125, "learning_rate": 1.7177538503200858e-05, "loss": 0.8871, "step": 3958 }, { "epoch": 0.4999921067171837, "grad_norm": 1.8828125, "learning_rate": 1.7176148247492373e-05, "loss": 0.6657, "step": 3959 }, { "epoch": 0.4999921067171837, "eval_loss": 0.8082488775253296, "eval_runtime": 4334.8769, "eval_samples_per_second": 11.497, "eval_steps_per_second": 3.833, "step": 3959 }, { "epoch": 0.5001183992422449, "grad_norm": 1.7890625, "learning_rate": 1.7174757705759906e-05, "loss": 0.6539, "step": 3960 }, { "epoch": 0.500244691767306, "grad_norm": 1.875, "learning_rate": 1.7173366878058877e-05, "loss": 0.6692, "step": 3961 }, { "epoch": 0.5003709842923671, "grad_norm": 1.796875, "learning_rate": 1.7171975764444723e-05, "loss": 0.6833, "step": 3962 }, { "epoch": 0.5004972768174284, "grad_norm": 1.6484375, "learning_rate": 1.717058436497289e-05, "loss": 0.5914, "step": 3963 }, { "epoch": 0.5006235693424895, "grad_norm": 1.8359375, "learning_rate": 1.7169192679698837e-05, "loss": 0.6961, "step": 3964 }, { "epoch": 0.5007498618675507, "grad_norm": 1.8984375, "learning_rate": 1.716780070867803e-05, "loss": 0.6539, "step": 3965 }, { "epoch": 0.5008761543926119, "grad_norm": 1.90625, "learning_rate": 1.716640845196595e-05, "loss": 0.688, "step": 3966 }, { "epoch": 0.501002446917673, "grad_norm": 1.625, "learning_rate": 1.7165015909618094e-05, "loss": 0.6323, "step": 3967 }, { "epoch": 0.5011287394427343, "grad_norm": 1.6640625, "learning_rate": 1.716362308168996e-05, "loss": 0.6355, "step": 3968 }, { "epoch": 0.5012550319677954, "grad_norm": 1.8671875, "learning_rate": 1.7162229968237066e-05, "loss": 0.6738, "step": 3969 }, { "epoch": 0.5013813244928566, "grad_norm": 1.796875, "learning_rate": 1.7160836569314932e-05, "loss": 0.6791, "step": 3970 }, { "epoch": 0.5015076170179178, "grad_norm": 1.6875, "learning_rate": 1.7159442884979104e-05, "loss": 0.6528, "step": 3971 }, { "epoch": 0.5016339095429789, "grad_norm": 1.6484375, "learning_rate": 1.715804891528513e-05, "loss": 0.5721, "step": 3972 }, { "epoch": 0.5017602020680401, "grad_norm": 1.765625, "learning_rate": 1.7156654660288564e-05, "loss": 0.6518, "step": 3973 }, { "epoch": 0.5018864945931013, "grad_norm": 1.7421875, "learning_rate": 1.715526012004498e-05, "loss": 0.6755, "step": 3974 }, { "epoch": 0.5020127871181624, "grad_norm": 1.796875, "learning_rate": 1.7153865294609964e-05, "loss": 0.6361, "step": 3975 }, { "epoch": 0.5021390796432236, "grad_norm": 1.59375, "learning_rate": 1.715247018403911e-05, "loss": 0.6124, "step": 3976 }, { "epoch": 0.5022653721682848, "grad_norm": 1.8203125, "learning_rate": 1.715107478838802e-05, "loss": 0.5981, "step": 3977 }, { "epoch": 0.502391664693346, "grad_norm": 1.9140625, "learning_rate": 1.7149679107712313e-05, "loss": 0.7203, "step": 3978 }, { "epoch": 0.5025179572184071, "grad_norm": 1.9296875, "learning_rate": 1.714828314206762e-05, "loss": 0.7051, "step": 3979 }, { "epoch": 0.5026442497434683, "grad_norm": 1.8828125, "learning_rate": 1.714688689150958e-05, "loss": 0.6934, "step": 3980 }, { "epoch": 0.5027705422685295, "grad_norm": 2.140625, "learning_rate": 1.714549035609384e-05, "loss": 0.701, "step": 3981 }, { "epoch": 0.5028968347935907, "grad_norm": 1.703125, "learning_rate": 1.7144093535876063e-05, "loss": 0.6471, "step": 3982 }, { "epoch": 0.5030231273186518, "grad_norm": 1.765625, "learning_rate": 1.714269643091193e-05, "loss": 0.6422, "step": 3983 }, { "epoch": 0.503149419843713, "grad_norm": 1.65625, "learning_rate": 1.714129904125712e-05, "loss": 0.5958, "step": 3984 }, { "epoch": 0.5032757123687742, "grad_norm": 1.734375, "learning_rate": 1.7139901366967332e-05, "loss": 0.5806, "step": 3985 }, { "epoch": 0.5034020048938354, "grad_norm": 1.828125, "learning_rate": 1.7138503408098272e-05, "loss": 0.7086, "step": 3986 }, { "epoch": 0.5035282974188965, "grad_norm": 1.7265625, "learning_rate": 1.7137105164705658e-05, "loss": 0.6669, "step": 3987 }, { "epoch": 0.5036545899439577, "grad_norm": 1.765625, "learning_rate": 1.7135706636845224e-05, "loss": 0.6781, "step": 3988 }, { "epoch": 0.5037808824690189, "grad_norm": 1.875, "learning_rate": 1.7134307824572712e-05, "loss": 0.7076, "step": 3989 }, { "epoch": 0.50390717499408, "grad_norm": 2.0, "learning_rate": 1.7132908727943874e-05, "loss": 0.6557, "step": 3990 }, { "epoch": 0.5040334675191412, "grad_norm": 1.8359375, "learning_rate": 1.7131509347014475e-05, "loss": 0.7056, "step": 3991 }, { "epoch": 0.5041597600442024, "grad_norm": 1.8046875, "learning_rate": 1.713010968184029e-05, "loss": 0.7193, "step": 3992 }, { "epoch": 0.5042860525692635, "grad_norm": 1.8671875, "learning_rate": 1.7128709732477107e-05, "loss": 0.6975, "step": 3993 }, { "epoch": 0.5044123450943248, "grad_norm": 1.9140625, "learning_rate": 1.7127309498980725e-05, "loss": 0.7126, "step": 3994 }, { "epoch": 0.5045386376193859, "grad_norm": 1.8046875, "learning_rate": 1.7125908981406955e-05, "loss": 0.6233, "step": 3995 }, { "epoch": 0.504664930144447, "grad_norm": 1.84375, "learning_rate": 1.7124508179811617e-05, "loss": 0.5861, "step": 3996 }, { "epoch": 0.5047912226695083, "grad_norm": 2.046875, "learning_rate": 1.712310709425054e-05, "loss": 0.6352, "step": 3997 }, { "epoch": 0.5049175151945694, "grad_norm": 1.6953125, "learning_rate": 1.7121705724779577e-05, "loss": 0.7048, "step": 3998 }, { "epoch": 0.5050438077196306, "grad_norm": 1.9375, "learning_rate": 1.712030407145457e-05, "loss": 0.8434, "step": 3999 }, { "epoch": 0.5051701002446918, "grad_norm": 1.7265625, "learning_rate": 1.7118902134331397e-05, "loss": 0.7068, "step": 4000 }, { "epoch": 0.5052963927697529, "grad_norm": 1.859375, "learning_rate": 1.7117499913465935e-05, "loss": 0.6767, "step": 4001 }, { "epoch": 0.5054226852948142, "grad_norm": 1.84375, "learning_rate": 1.7116097408914065e-05, "loss": 0.6647, "step": 4002 }, { "epoch": 0.5055489778198753, "grad_norm": 2.09375, "learning_rate": 1.7114694620731694e-05, "loss": 0.7058, "step": 4003 }, { "epoch": 0.5056752703449364, "grad_norm": 1.8828125, "learning_rate": 1.7113291548974735e-05, "loss": 0.7034, "step": 4004 }, { "epoch": 0.5058015628699977, "grad_norm": 1.796875, "learning_rate": 1.7111888193699103e-05, "loss": 0.6981, "step": 4005 }, { "epoch": 0.5059278553950588, "grad_norm": 1.71875, "learning_rate": 1.711048455496074e-05, "loss": 0.5749, "step": 4006 }, { "epoch": 0.5060541479201199, "grad_norm": 1.671875, "learning_rate": 1.710908063281559e-05, "loss": 0.6042, "step": 4007 }, { "epoch": 0.5061804404451812, "grad_norm": 1.625, "learning_rate": 1.710767642731961e-05, "loss": 0.6304, "step": 4008 }, { "epoch": 0.5063067329702423, "grad_norm": 1.7734375, "learning_rate": 1.710627193852877e-05, "loss": 0.6285, "step": 4009 }, { "epoch": 0.5064330254953034, "grad_norm": 1.84375, "learning_rate": 1.710486716649904e-05, "loss": 0.5908, "step": 4010 }, { "epoch": 0.5065593180203647, "grad_norm": 2.75, "learning_rate": 1.7103462111286423e-05, "loss": 0.6999, "step": 4011 }, { "epoch": 0.5066856105454258, "grad_norm": 1.765625, "learning_rate": 1.7102056772946912e-05, "loss": 0.6138, "step": 4012 }, { "epoch": 0.5068119030704871, "grad_norm": 1.734375, "learning_rate": 1.710065115153653e-05, "loss": 0.6864, "step": 4013 }, { "epoch": 0.5069381955955482, "grad_norm": 1.78125, "learning_rate": 1.709924524711129e-05, "loss": 0.6456, "step": 4014 }, { "epoch": 0.5070644881206093, "grad_norm": 1.796875, "learning_rate": 1.7097839059727238e-05, "loss": 0.6025, "step": 4015 }, { "epoch": 0.5071907806456706, "grad_norm": 1.9609375, "learning_rate": 1.7096432589440418e-05, "loss": 0.7326, "step": 4016 }, { "epoch": 0.5073170731707317, "grad_norm": 1.796875, "learning_rate": 1.7095025836306884e-05, "loss": 0.7361, "step": 4017 }, { "epoch": 0.5074433656957928, "grad_norm": 1.78125, "learning_rate": 1.7093618800382714e-05, "loss": 0.6182, "step": 4018 }, { "epoch": 0.5075696582208541, "grad_norm": 1.859375, "learning_rate": 1.709221148172398e-05, "loss": 0.6084, "step": 4019 }, { "epoch": 0.5076959507459152, "grad_norm": 1.828125, "learning_rate": 1.7090803880386784e-05, "loss": 0.6664, "step": 4020 }, { "epoch": 0.5078222432709764, "grad_norm": 1.828125, "learning_rate": 1.7089395996427224e-05, "loss": 0.7316, "step": 4021 }, { "epoch": 0.5079485357960376, "grad_norm": 1.875, "learning_rate": 1.7087987829901412e-05, "loss": 0.6481, "step": 4022 }, { "epoch": 0.5080748283210987, "grad_norm": 1.8046875, "learning_rate": 1.7086579380865477e-05, "loss": 0.6962, "step": 4023 }, { "epoch": 0.5082011208461599, "grad_norm": 1.9921875, "learning_rate": 1.708517064937556e-05, "loss": 0.6264, "step": 4024 }, { "epoch": 0.5083274133712211, "grad_norm": 1.796875, "learning_rate": 1.7083761635487808e-05, "loss": 0.6745, "step": 4025 }, { "epoch": 0.5084537058962822, "grad_norm": 1.6875, "learning_rate": 1.7082352339258376e-05, "loss": 0.583, "step": 4026 }, { "epoch": 0.5085799984213434, "grad_norm": 1.7109375, "learning_rate": 1.708094276074344e-05, "loss": 0.6395, "step": 4027 }, { "epoch": 0.5087062909464046, "grad_norm": 1.8125, "learning_rate": 1.707953289999918e-05, "loss": 0.68, "step": 4028 }, { "epoch": 0.5088325834714658, "grad_norm": 1.765625, "learning_rate": 1.707812275708179e-05, "loss": 0.6355, "step": 4029 }, { "epoch": 0.508958875996527, "grad_norm": 1.7421875, "learning_rate": 1.707671233204748e-05, "loss": 0.7173, "step": 4030 }, { "epoch": 0.5090851685215881, "grad_norm": 1.984375, "learning_rate": 1.707530162495246e-05, "loss": 0.7386, "step": 4031 }, { "epoch": 0.5092114610466493, "grad_norm": 1.8046875, "learning_rate": 1.7073890635852958e-05, "loss": 0.7068, "step": 4032 }, { "epoch": 0.5093377535717105, "grad_norm": 1.78125, "learning_rate": 1.7072479364805214e-05, "loss": 0.7148, "step": 4033 }, { "epoch": 0.5094640460967716, "grad_norm": 1.8984375, "learning_rate": 1.7071067811865477e-05, "loss": 0.7482, "step": 4034 }, { "epoch": 0.5095903386218328, "grad_norm": 1.9609375, "learning_rate": 1.706965597709001e-05, "loss": 0.655, "step": 4035 }, { "epoch": 0.509716631146894, "grad_norm": 1.703125, "learning_rate": 1.7068243860535084e-05, "loss": 0.668, "step": 4036 }, { "epoch": 0.5098429236719552, "grad_norm": 1.75, "learning_rate": 1.7066831462256985e-05, "loss": 0.6286, "step": 4037 }, { "epoch": 0.5099692161970163, "grad_norm": 1.7265625, "learning_rate": 1.7065418782312e-05, "loss": 0.6128, "step": 4038 }, { "epoch": 0.5100955087220775, "grad_norm": 1.7734375, "learning_rate": 1.7064005820756442e-05, "loss": 0.7469, "step": 4039 }, { "epoch": 0.5102218012471387, "grad_norm": 1.7890625, "learning_rate": 1.7062592577646627e-05, "loss": 0.6421, "step": 4040 }, { "epoch": 0.5103480937721998, "grad_norm": 1.96875, "learning_rate": 1.7061179053038883e-05, "loss": 0.7869, "step": 4041 }, { "epoch": 0.510474386297261, "grad_norm": 1.9375, "learning_rate": 1.7059765246989552e-05, "loss": 0.6702, "step": 4042 }, { "epoch": 0.5106006788223222, "grad_norm": 1.8359375, "learning_rate": 1.705835115955498e-05, "loss": 0.7073, "step": 4043 }, { "epoch": 0.5107269713473834, "grad_norm": 1.9296875, "learning_rate": 1.7056936790791536e-05, "loss": 0.7081, "step": 4044 }, { "epoch": 0.5108532638724446, "grad_norm": 1.734375, "learning_rate": 1.7055522140755588e-05, "loss": 0.614, "step": 4045 }, { "epoch": 0.5109795563975057, "grad_norm": 1.7734375, "learning_rate": 1.7054107209503522e-05, "loss": 0.5973, "step": 4046 }, { "epoch": 0.5111058489225669, "grad_norm": 1.7734375, "learning_rate": 1.705269199709173e-05, "loss": 0.6831, "step": 4047 }, { "epoch": 0.5112321414476281, "grad_norm": 1.6640625, "learning_rate": 1.7051276503576623e-05, "loss": 0.6198, "step": 4048 }, { "epoch": 0.5113584339726892, "grad_norm": 1.7890625, "learning_rate": 1.704986072901462e-05, "loss": 0.6638, "step": 4049 }, { "epoch": 0.5114847264977505, "grad_norm": 1.8828125, "learning_rate": 1.704844467346215e-05, "loss": 0.7665, "step": 4050 }, { "epoch": 0.5116110190228116, "grad_norm": 1.875, "learning_rate": 1.704702833697565e-05, "loss": 0.604, "step": 4051 }, { "epoch": 0.5117373115478727, "grad_norm": 1.8046875, "learning_rate": 1.704561171961158e-05, "loss": 0.6759, "step": 4052 }, { "epoch": 0.511863604072934, "grad_norm": 1.765625, "learning_rate": 1.704419482142639e-05, "loss": 0.6898, "step": 4053 }, { "epoch": 0.5119898965979951, "grad_norm": 1.765625, "learning_rate": 1.7042777642476567e-05, "loss": 0.6768, "step": 4054 }, { "epoch": 0.5121161891230562, "grad_norm": 1.8515625, "learning_rate": 1.704136018281859e-05, "loss": 0.6998, "step": 4055 }, { "epoch": 0.5122424816481175, "grad_norm": 1.8828125, "learning_rate": 1.7039942442508956e-05, "loss": 0.6708, "step": 4056 }, { "epoch": 0.5123687741731786, "grad_norm": 1.65625, "learning_rate": 1.7038524421604167e-05, "loss": 0.6095, "step": 4057 }, { "epoch": 0.5124950666982397, "grad_norm": 1.8125, "learning_rate": 1.7037106120160754e-05, "loss": 0.6555, "step": 4058 }, { "epoch": 0.512621359223301, "grad_norm": 1.8359375, "learning_rate": 1.703568753823524e-05, "loss": 0.63, "step": 4059 }, { "epoch": 0.5127476517483621, "grad_norm": 1.796875, "learning_rate": 1.7034268675884167e-05, "loss": 0.6209, "step": 4060 }, { "epoch": 0.5128739442734234, "grad_norm": 1.703125, "learning_rate": 1.7032849533164087e-05, "loss": 0.6179, "step": 4061 }, { "epoch": 0.5130002367984845, "grad_norm": 1.765625, "learning_rate": 1.7031430110131566e-05, "loss": 0.6596, "step": 4062 }, { "epoch": 0.5131265293235456, "grad_norm": 1.78125, "learning_rate": 1.7030010406843174e-05, "loss": 0.6283, "step": 4063 }, { "epoch": 0.5132528218486069, "grad_norm": 1.78125, "learning_rate": 1.7028590423355504e-05, "loss": 0.6057, "step": 4064 }, { "epoch": 0.513379114373668, "grad_norm": 1.890625, "learning_rate": 1.702717015972515e-05, "loss": 0.6942, "step": 4065 }, { "epoch": 0.5135054068987291, "grad_norm": 1.8203125, "learning_rate": 1.702574961600871e-05, "loss": 0.662, "step": 4066 }, { "epoch": 0.5136316994237904, "grad_norm": 1.8671875, "learning_rate": 1.702432879226282e-05, "loss": 0.6476, "step": 4067 }, { "epoch": 0.5137579919488515, "grad_norm": 1.6640625, "learning_rate": 1.7022907688544106e-05, "loss": 0.6483, "step": 4068 }, { "epoch": 0.5138842844739127, "grad_norm": 1.8203125, "learning_rate": 1.7021486304909202e-05, "loss": 0.6229, "step": 4069 }, { "epoch": 0.5140105769989739, "grad_norm": 1.8203125, "learning_rate": 1.702006464141477e-05, "loss": 0.6487, "step": 4070 }, { "epoch": 0.514136869524035, "grad_norm": 1.796875, "learning_rate": 1.701864269811747e-05, "loss": 0.7385, "step": 4071 }, { "epoch": 0.5142631620490962, "grad_norm": 1.8203125, "learning_rate": 1.7017220475073975e-05, "loss": 0.6679, "step": 4072 }, { "epoch": 0.5143894545741574, "grad_norm": 1.9765625, "learning_rate": 1.7015797972340974e-05, "loss": 0.6403, "step": 4073 }, { "epoch": 0.5145157470992185, "grad_norm": 1.984375, "learning_rate": 1.7014375189975164e-05, "loss": 0.6463, "step": 4074 }, { "epoch": 0.5146420396242798, "grad_norm": 1.921875, "learning_rate": 1.701295212803326e-05, "loss": 0.6457, "step": 4075 }, { "epoch": 0.5147683321493409, "grad_norm": 1.6875, "learning_rate": 1.701152878657197e-05, "loss": 0.5806, "step": 4076 }, { "epoch": 0.5148946246744021, "grad_norm": 1.75, "learning_rate": 1.7010105165648032e-05, "loss": 0.6786, "step": 4077 }, { "epoch": 0.5150209171994633, "grad_norm": 1.7734375, "learning_rate": 1.7008681265318187e-05, "loss": 0.6718, "step": 4078 }, { "epoch": 0.5151472097245244, "grad_norm": 1.7578125, "learning_rate": 1.700725708563919e-05, "loss": 0.6453, "step": 4079 }, { "epoch": 0.5152735022495856, "grad_norm": 1.625, "learning_rate": 1.7005832626667802e-05, "loss": 0.585, "step": 4080 }, { "epoch": 0.5153997947746468, "grad_norm": 1.90625, "learning_rate": 1.7004407888460802e-05, "loss": 0.7177, "step": 4081 }, { "epoch": 0.515526087299708, "grad_norm": 1.90625, "learning_rate": 1.700298287107497e-05, "loss": 0.7419, "step": 4082 }, { "epoch": 0.5156523798247691, "grad_norm": 1.953125, "learning_rate": 1.7001557574567113e-05, "loss": 0.7833, "step": 4083 }, { "epoch": 0.5157786723498303, "grad_norm": 1.90625, "learning_rate": 1.700013199899403e-05, "loss": 0.7507, "step": 4084 }, { "epoch": 0.5159049648748915, "grad_norm": 1.8125, "learning_rate": 1.6998706144412555e-05, "loss": 0.7096, "step": 4085 }, { "epoch": 0.5160312573999526, "grad_norm": 1.6328125, "learning_rate": 1.6997280010879504e-05, "loss": 0.6247, "step": 4086 }, { "epoch": 0.5161575499250138, "grad_norm": 1.6328125, "learning_rate": 1.6995853598451728e-05, "loss": 0.5469, "step": 4087 }, { "epoch": 0.516283842450075, "grad_norm": 1.828125, "learning_rate": 1.699442690718608e-05, "loss": 0.6929, "step": 4088 }, { "epoch": 0.5164101349751361, "grad_norm": 1.8671875, "learning_rate": 1.699299993713942e-05, "loss": 0.6716, "step": 4089 }, { "epoch": 0.5165364275001973, "grad_norm": 1.875, "learning_rate": 1.6991572688368628e-05, "loss": 0.8449, "step": 4090 }, { "epoch": 0.5166627200252585, "grad_norm": 1.828125, "learning_rate": 1.6990145160930586e-05, "loss": 0.6214, "step": 4091 }, { "epoch": 0.5167890125503197, "grad_norm": 2.03125, "learning_rate": 1.6988717354882192e-05, "loss": 0.7724, "step": 4092 }, { "epoch": 0.5169153050753809, "grad_norm": 1.7578125, "learning_rate": 1.6987289270280366e-05, "loss": 0.6718, "step": 4093 }, { "epoch": 0.517041597600442, "grad_norm": 1.671875, "learning_rate": 1.6985860907182014e-05, "loss": 0.6876, "step": 4094 }, { "epoch": 0.5171678901255032, "grad_norm": 1.8046875, "learning_rate": 1.6984432265644075e-05, "loss": 0.632, "step": 4095 }, { "epoch": 0.5172941826505644, "grad_norm": 1.71875, "learning_rate": 1.698300334572349e-05, "loss": 0.646, "step": 4096 }, { "epoch": 0.5174204751756255, "grad_norm": 1.828125, "learning_rate": 1.698157414747721e-05, "loss": 0.6615, "step": 4097 }, { "epoch": 0.5175467677006867, "grad_norm": 1.8359375, "learning_rate": 1.6980144670962198e-05, "loss": 0.6521, "step": 4098 }, { "epoch": 0.5176730602257479, "grad_norm": 1.890625, "learning_rate": 1.6978714916235436e-05, "loss": 0.686, "step": 4099 }, { "epoch": 0.517799352750809, "grad_norm": 1.984375, "learning_rate": 1.69772848833539e-05, "loss": 0.6693, "step": 4100 }, { "epoch": 0.5179256452758703, "grad_norm": 1.765625, "learning_rate": 1.69758545723746e-05, "loss": 0.672, "step": 4101 }, { "epoch": 0.5180519378009314, "grad_norm": 1.6640625, "learning_rate": 1.6974423983354543e-05, "loss": 0.5453, "step": 4102 }, { "epoch": 0.5181782303259925, "grad_norm": 1.8515625, "learning_rate": 1.697299311635074e-05, "loss": 0.7187, "step": 4103 }, { "epoch": 0.5183045228510538, "grad_norm": 1.6953125, "learning_rate": 1.6971561971420225e-05, "loss": 0.6295, "step": 4104 }, { "epoch": 0.5184308153761149, "grad_norm": 1.765625, "learning_rate": 1.6970130548620046e-05, "loss": 0.7027, "step": 4105 }, { "epoch": 0.5185571079011762, "grad_norm": 1.859375, "learning_rate": 1.696869884800725e-05, "loss": 0.662, "step": 4106 }, { "epoch": 0.5186834004262373, "grad_norm": 1.796875, "learning_rate": 1.6967266869638904e-05, "loss": 0.7168, "step": 4107 }, { "epoch": 0.5188096929512984, "grad_norm": 1.7109375, "learning_rate": 1.6965834613572082e-05, "loss": 0.6017, "step": 4108 }, { "epoch": 0.5189359854763597, "grad_norm": 1.75, "learning_rate": 1.696440207986387e-05, "loss": 0.6215, "step": 4109 }, { "epoch": 0.5190622780014208, "grad_norm": 1.75, "learning_rate": 1.6962969268571364e-05, "loss": 0.6332, "step": 4110 }, { "epoch": 0.5191885705264819, "grad_norm": 1.7265625, "learning_rate": 1.6961536179751676e-05, "loss": 0.6203, "step": 4111 }, { "epoch": 0.5193148630515432, "grad_norm": 1.796875, "learning_rate": 1.6960102813461924e-05, "loss": 0.7046, "step": 4112 }, { "epoch": 0.5194411555766043, "grad_norm": 1.7578125, "learning_rate": 1.695866916975924e-05, "loss": 0.6718, "step": 4113 }, { "epoch": 0.5195674481016654, "grad_norm": 1.859375, "learning_rate": 1.6957235248700762e-05, "loss": 0.629, "step": 4114 }, { "epoch": 0.5196937406267267, "grad_norm": 1.8828125, "learning_rate": 1.6955801050343644e-05, "loss": 0.7623, "step": 4115 }, { "epoch": 0.5198200331517878, "grad_norm": 1.9296875, "learning_rate": 1.695436657474505e-05, "loss": 0.6865, "step": 4116 }, { "epoch": 0.519946325676849, "grad_norm": 1.8203125, "learning_rate": 1.695293182196216e-05, "loss": 0.6558, "step": 4117 }, { "epoch": 0.5200726182019102, "grad_norm": 1.875, "learning_rate": 1.6951496792052148e-05, "loss": 0.6358, "step": 4118 }, { "epoch": 0.5201989107269713, "grad_norm": 1.671875, "learning_rate": 1.6950061485072224e-05, "loss": 0.6042, "step": 4119 }, { "epoch": 0.5203252032520326, "grad_norm": 1.6953125, "learning_rate": 1.6948625901079583e-05, "loss": 0.6803, "step": 4120 }, { "epoch": 0.5204514957770937, "grad_norm": 1.7109375, "learning_rate": 1.6947190040131454e-05, "loss": 0.6217, "step": 4121 }, { "epoch": 0.5205777883021548, "grad_norm": 1.828125, "learning_rate": 1.6945753902285063e-05, "loss": 0.6959, "step": 4122 }, { "epoch": 0.5207040808272161, "grad_norm": 1.75, "learning_rate": 1.6944317487597653e-05, "loss": 0.5869, "step": 4123 }, { "epoch": 0.5208303733522772, "grad_norm": 1.8515625, "learning_rate": 1.6942880796126472e-05, "loss": 0.6096, "step": 4124 }, { "epoch": 0.5209566658773384, "grad_norm": 1.9375, "learning_rate": 1.6941443827928784e-05, "loss": 0.6713, "step": 4125 }, { "epoch": 0.5210829584023996, "grad_norm": 1.7578125, "learning_rate": 1.6940006583061865e-05, "loss": 0.6053, "step": 4126 }, { "epoch": 0.5212092509274607, "grad_norm": 1.859375, "learning_rate": 1.6938569061583005e-05, "loss": 0.766, "step": 4127 }, { "epoch": 0.5213355434525219, "grad_norm": 1.7109375, "learning_rate": 1.693713126354949e-05, "loss": 0.5754, "step": 4128 }, { "epoch": 0.5214618359775831, "grad_norm": 1.9140625, "learning_rate": 1.6935693189018633e-05, "loss": 0.7102, "step": 4129 }, { "epoch": 0.5215881285026442, "grad_norm": 1.8828125, "learning_rate": 1.693425483804775e-05, "loss": 0.6867, "step": 4130 }, { "epoch": 0.5217144210277054, "grad_norm": 1.828125, "learning_rate": 1.6932816210694172e-05, "loss": 0.6332, "step": 4131 }, { "epoch": 0.5218407135527666, "grad_norm": 1.6484375, "learning_rate": 1.693137730701524e-05, "loss": 0.562, "step": 4132 }, { "epoch": 0.5219670060778278, "grad_norm": 1.84375, "learning_rate": 1.69299381270683e-05, "loss": 0.69, "step": 4133 }, { "epoch": 0.5220932986028889, "grad_norm": 1.8515625, "learning_rate": 1.692849867091072e-05, "loss": 0.6202, "step": 4134 }, { "epoch": 0.5222195911279501, "grad_norm": 1.953125, "learning_rate": 1.6927058938599873e-05, "loss": 0.6875, "step": 4135 }, { "epoch": 0.5223458836530113, "grad_norm": 1.65625, "learning_rate": 1.6925618930193143e-05, "loss": 0.6239, "step": 4136 }, { "epoch": 0.5224721761780725, "grad_norm": 1.5625, "learning_rate": 1.6924178645747923e-05, "loss": 0.5939, "step": 4137 }, { "epoch": 0.5225984687031336, "grad_norm": 1.8359375, "learning_rate": 1.6922738085321616e-05, "loss": 0.6341, "step": 4138 }, { "epoch": 0.5227247612281948, "grad_norm": 1.953125, "learning_rate": 1.6921297248971645e-05, "loss": 0.683, "step": 4139 }, { "epoch": 0.522851053753256, "grad_norm": 1.78125, "learning_rate": 1.6919856136755438e-05, "loss": 0.6334, "step": 4140 }, { "epoch": 0.5229773462783172, "grad_norm": 1.875, "learning_rate": 1.6918414748730432e-05, "loss": 0.6513, "step": 4141 }, { "epoch": 0.5231036388033783, "grad_norm": 1.6484375, "learning_rate": 1.691697308495408e-05, "loss": 0.6241, "step": 4142 }, { "epoch": 0.5232299313284395, "grad_norm": 1.9140625, "learning_rate": 1.691553114548384e-05, "loss": 0.6911, "step": 4143 }, { "epoch": 0.5233562238535007, "grad_norm": 1.640625, "learning_rate": 1.6914088930377186e-05, "loss": 0.5713, "step": 4144 }, { "epoch": 0.5234825163785618, "grad_norm": 1.6484375, "learning_rate": 1.69126464396916e-05, "loss": 0.6471, "step": 4145 }, { "epoch": 0.523608808903623, "grad_norm": 1.796875, "learning_rate": 1.691120367348458e-05, "loss": 0.5794, "step": 4146 }, { "epoch": 0.5237351014286842, "grad_norm": 1.96875, "learning_rate": 1.6909760631813625e-05, "loss": 0.7366, "step": 4147 }, { "epoch": 0.5238613939537453, "grad_norm": 1.9609375, "learning_rate": 1.6908317314736256e-05, "loss": 0.6528, "step": 4148 }, { "epoch": 0.5239876864788066, "grad_norm": 1.7734375, "learning_rate": 1.6906873722309994e-05, "loss": 0.6609, "step": 4149 }, { "epoch": 0.5241139790038677, "grad_norm": 1.8125, "learning_rate": 1.690542985459239e-05, "loss": 0.6378, "step": 4150 }, { "epoch": 0.5242402715289289, "grad_norm": 1.9296875, "learning_rate": 1.690398571164098e-05, "loss": 0.7414, "step": 4151 }, { "epoch": 0.5243665640539901, "grad_norm": 1.765625, "learning_rate": 1.6902541293513327e-05, "loss": 0.7159, "step": 4152 }, { "epoch": 0.5244928565790512, "grad_norm": 1.8046875, "learning_rate": 1.6901096600267007e-05, "loss": 0.6594, "step": 4153 }, { "epoch": 0.5246191491041124, "grad_norm": 1.796875, "learning_rate": 1.68996516319596e-05, "loss": 0.6281, "step": 4154 }, { "epoch": 0.5247454416291736, "grad_norm": 1.765625, "learning_rate": 1.6898206388648694e-05, "loss": 0.6012, "step": 4155 }, { "epoch": 0.5248717341542347, "grad_norm": 1.890625, "learning_rate": 1.6896760870391898e-05, "loss": 0.704, "step": 4156 }, { "epoch": 0.524998026679296, "grad_norm": 1.734375, "learning_rate": 1.6895315077246825e-05, "loss": 0.6, "step": 4157 }, { "epoch": 0.5251243192043571, "grad_norm": 1.8125, "learning_rate": 1.6893869009271104e-05, "loss": 0.7204, "step": 4158 }, { "epoch": 0.5252506117294182, "grad_norm": 1.828125, "learning_rate": 1.6892422666522367e-05, "loss": 0.6239, "step": 4159 }, { "epoch": 0.5253769042544795, "grad_norm": 1.8671875, "learning_rate": 1.6890976049058267e-05, "loss": 0.6876, "step": 4160 }, { "epoch": 0.5255031967795406, "grad_norm": 1.71875, "learning_rate": 1.6889529156936456e-05, "loss": 0.6099, "step": 4161 }, { "epoch": 0.5256294893046017, "grad_norm": 1.6328125, "learning_rate": 1.6888081990214607e-05, "loss": 0.6202, "step": 4162 }, { "epoch": 0.525755781829663, "grad_norm": 1.921875, "learning_rate": 1.6886634548950406e-05, "loss": 0.7788, "step": 4163 }, { "epoch": 0.5258820743547241, "grad_norm": 1.9296875, "learning_rate": 1.6885186833201533e-05, "loss": 0.7949, "step": 4164 }, { "epoch": 0.5260083668797852, "grad_norm": 1.9140625, "learning_rate": 1.68837388430257e-05, "loss": 0.7605, "step": 4165 }, { "epoch": 0.5261346594048465, "grad_norm": 1.6796875, "learning_rate": 1.6882290578480618e-05, "loss": 0.5856, "step": 4166 }, { "epoch": 0.5262609519299076, "grad_norm": 1.828125, "learning_rate": 1.688084203962401e-05, "loss": 0.668, "step": 4167 }, { "epoch": 0.5263872444549689, "grad_norm": 1.9609375, "learning_rate": 1.687939322651361e-05, "loss": 0.6026, "step": 4168 }, { "epoch": 0.52651353698003, "grad_norm": 1.9296875, "learning_rate": 1.6877944139207168e-05, "loss": 0.6539, "step": 4169 }, { "epoch": 0.5266398295050911, "grad_norm": 1.828125, "learning_rate": 1.6876494777762437e-05, "loss": 0.6872, "step": 4170 }, { "epoch": 0.5267661220301524, "grad_norm": 1.9140625, "learning_rate": 1.687504514223719e-05, "loss": 0.7882, "step": 4171 }, { "epoch": 0.5268924145552135, "grad_norm": 1.9296875, "learning_rate": 1.68735952326892e-05, "loss": 0.7442, "step": 4172 }, { "epoch": 0.5270187070802747, "grad_norm": 1.703125, "learning_rate": 1.6872145049176263e-05, "loss": 0.5963, "step": 4173 }, { "epoch": 0.5271449996053359, "grad_norm": 1.78125, "learning_rate": 1.6870694591756175e-05, "loss": 0.7084, "step": 4174 }, { "epoch": 0.527271292130397, "grad_norm": 1.6640625, "learning_rate": 1.6869243860486753e-05, "loss": 0.5826, "step": 4175 }, { "epoch": 0.5273975846554582, "grad_norm": 1.859375, "learning_rate": 1.6867792855425813e-05, "loss": 0.713, "step": 4176 }, { "epoch": 0.5275238771805194, "grad_norm": 1.703125, "learning_rate": 1.6866341576631194e-05, "loss": 0.5929, "step": 4177 }, { "epoch": 0.5276501697055805, "grad_norm": 1.6640625, "learning_rate": 1.6864890024160735e-05, "loss": 0.632, "step": 4178 }, { "epoch": 0.5277764622306417, "grad_norm": 1.7421875, "learning_rate": 1.6863438198072298e-05, "loss": 0.6113, "step": 4179 }, { "epoch": 0.5279027547557029, "grad_norm": 1.6484375, "learning_rate": 1.6861986098423745e-05, "loss": 0.5519, "step": 4180 }, { "epoch": 0.528029047280764, "grad_norm": 1.7578125, "learning_rate": 1.6860533725272956e-05, "loss": 0.6674, "step": 4181 }, { "epoch": 0.5281553398058253, "grad_norm": 1.953125, "learning_rate": 1.6859081078677815e-05, "loss": 0.7202, "step": 4182 }, { "epoch": 0.5282816323308864, "grad_norm": 1.8828125, "learning_rate": 1.6857628158696223e-05, "loss": 0.6733, "step": 4183 }, { "epoch": 0.5284079248559476, "grad_norm": 1.8515625, "learning_rate": 1.685617496538609e-05, "loss": 0.6874, "step": 4184 }, { "epoch": 0.5285342173810088, "grad_norm": 1.96875, "learning_rate": 1.685472149880534e-05, "loss": 0.6834, "step": 4185 }, { "epoch": 0.5286605099060699, "grad_norm": 1.796875, "learning_rate": 1.68532677590119e-05, "loss": 0.5984, "step": 4186 }, { "epoch": 0.5287868024311311, "grad_norm": 1.8046875, "learning_rate": 1.6851813746063715e-05, "loss": 0.6461, "step": 4187 }, { "epoch": 0.5289130949561923, "grad_norm": 1.84375, "learning_rate": 1.6850359460018737e-05, "loss": 0.787, "step": 4188 }, { "epoch": 0.5290393874812535, "grad_norm": 2.0, "learning_rate": 1.6848904900934934e-05, "loss": 0.652, "step": 4189 }, { "epoch": 0.5291656800063146, "grad_norm": 1.734375, "learning_rate": 1.684745006887027e-05, "loss": 0.6424, "step": 4190 }, { "epoch": 0.5292919725313758, "grad_norm": 1.78125, "learning_rate": 1.6845994963882746e-05, "loss": 0.6112, "step": 4191 }, { "epoch": 0.529418265056437, "grad_norm": 1.828125, "learning_rate": 1.684453958603035e-05, "loss": 0.7578, "step": 4192 }, { "epoch": 0.5295445575814981, "grad_norm": 1.8203125, "learning_rate": 1.6843083935371092e-05, "loss": 0.6195, "step": 4193 }, { "epoch": 0.5296708501065593, "grad_norm": 1.9453125, "learning_rate": 1.6841628011962993e-05, "loss": 0.672, "step": 4194 }, { "epoch": 0.5297971426316205, "grad_norm": 1.75, "learning_rate": 1.6840171815864082e-05, "loss": 0.6535, "step": 4195 }, { "epoch": 0.5299234351566816, "grad_norm": 1.875, "learning_rate": 1.6838715347132394e-05, "loss": 0.6711, "step": 4196 }, { "epoch": 0.5300497276817429, "grad_norm": 1.9140625, "learning_rate": 1.6837258605825988e-05, "loss": 0.7073, "step": 4197 }, { "epoch": 0.530176020206804, "grad_norm": 1.7421875, "learning_rate": 1.683580159200292e-05, "loss": 0.5979, "step": 4198 }, { "epoch": 0.5303023127318652, "grad_norm": 1.8828125, "learning_rate": 1.6834344305721265e-05, "loss": 0.6817, "step": 4199 }, { "epoch": 0.5304286052569264, "grad_norm": 1.8984375, "learning_rate": 1.6832886747039113e-05, "loss": 0.6933, "step": 4200 }, { "epoch": 0.5305548977819875, "grad_norm": 1.7734375, "learning_rate": 1.6831428916014544e-05, "loss": 0.6441, "step": 4201 }, { "epoch": 0.5306811903070487, "grad_norm": 1.7734375, "learning_rate": 1.682997081270568e-05, "loss": 0.6791, "step": 4202 }, { "epoch": 0.5308074828321099, "grad_norm": 1.9609375, "learning_rate": 1.6828512437170627e-05, "loss": 0.6716, "step": 4203 }, { "epoch": 0.530933775357171, "grad_norm": 1.8359375, "learning_rate": 1.682705378946752e-05, "loss": 0.7126, "step": 4204 }, { "epoch": 0.5310600678822323, "grad_norm": 1.84375, "learning_rate": 1.682559486965449e-05, "loss": 0.7135, "step": 4205 }, { "epoch": 0.5311863604072934, "grad_norm": 1.796875, "learning_rate": 1.682413567778969e-05, "loss": 0.6297, "step": 4206 }, { "epoch": 0.5313126529323545, "grad_norm": 1.65625, "learning_rate": 1.6822676213931278e-05, "loss": 0.5945, "step": 4207 }, { "epoch": 0.5314389454574158, "grad_norm": 1.78125, "learning_rate": 1.682121647813742e-05, "loss": 0.7157, "step": 4208 }, { "epoch": 0.5315652379824769, "grad_norm": 1.84375, "learning_rate": 1.681975647046631e-05, "loss": 0.6906, "step": 4209 }, { "epoch": 0.531691530507538, "grad_norm": 1.7890625, "learning_rate": 1.6818296190976135e-05, "loss": 0.67, "step": 4210 }, { "epoch": 0.5318178230325993, "grad_norm": 1.734375, "learning_rate": 1.6816835639725093e-05, "loss": 0.6314, "step": 4211 }, { "epoch": 0.5319441155576604, "grad_norm": 1.6796875, "learning_rate": 1.68153748167714e-05, "loss": 0.6646, "step": 4212 }, { "epoch": 0.5320704080827217, "grad_norm": 1.8046875, "learning_rate": 1.6813913722173288e-05, "loss": 0.6677, "step": 4213 }, { "epoch": 0.5321967006077828, "grad_norm": 1.7421875, "learning_rate": 1.6812452355988985e-05, "loss": 0.5738, "step": 4214 }, { "epoch": 0.5323229931328439, "grad_norm": 1.7421875, "learning_rate": 1.6810990718276738e-05, "loss": 0.5932, "step": 4215 }, { "epoch": 0.5324492856579052, "grad_norm": 1.8359375, "learning_rate": 1.6809528809094808e-05, "loss": 0.6959, "step": 4216 }, { "epoch": 0.5325755781829663, "grad_norm": 1.828125, "learning_rate": 1.6808066628501462e-05, "loss": 0.6867, "step": 4217 }, { "epoch": 0.5327018707080274, "grad_norm": 1.7421875, "learning_rate": 1.680660417655498e-05, "loss": 0.6854, "step": 4218 }, { "epoch": 0.5328281632330887, "grad_norm": 1.7265625, "learning_rate": 1.6805141453313644e-05, "loss": 0.7077, "step": 4219 }, { "epoch": 0.5329544557581498, "grad_norm": 1.96875, "learning_rate": 1.6803678458835765e-05, "loss": 0.6878, "step": 4220 }, { "epoch": 0.533080748283211, "grad_norm": 1.7265625, "learning_rate": 1.680221519317965e-05, "loss": 0.7238, "step": 4221 }, { "epoch": 0.5332070408082722, "grad_norm": 1.5703125, "learning_rate": 1.680075165640362e-05, "loss": 0.5565, "step": 4222 }, { "epoch": 0.5333333333333333, "grad_norm": 1.8828125, "learning_rate": 1.6799287848566014e-05, "loss": 0.6019, "step": 4223 }, { "epoch": 0.5334596258583945, "grad_norm": 2.140625, "learning_rate": 1.6797823769725168e-05, "loss": 0.8383, "step": 4224 }, { "epoch": 0.5335859183834557, "grad_norm": 1.7734375, "learning_rate": 1.6796359419939442e-05, "loss": 0.6841, "step": 4225 }, { "epoch": 0.5337122109085168, "grad_norm": 1.8984375, "learning_rate": 1.67948947992672e-05, "loss": 0.7234, "step": 4226 }, { "epoch": 0.533838503433578, "grad_norm": 1.96875, "learning_rate": 1.6793429907766812e-05, "loss": 0.7225, "step": 4227 }, { "epoch": 0.5339647959586392, "grad_norm": 1.859375, "learning_rate": 1.6791964745496675e-05, "loss": 0.6295, "step": 4228 }, { "epoch": 0.5340910884837003, "grad_norm": 1.75, "learning_rate": 1.6790499312515186e-05, "loss": 0.5788, "step": 4229 }, { "epoch": 0.5342173810087616, "grad_norm": 1.7890625, "learning_rate": 1.678903360888075e-05, "loss": 0.6148, "step": 4230 }, { "epoch": 0.5343436735338227, "grad_norm": 1.6640625, "learning_rate": 1.678756763465178e-05, "loss": 0.6664, "step": 4231 }, { "epoch": 0.5344699660588839, "grad_norm": 1.796875, "learning_rate": 1.678610138988672e-05, "loss": 0.6758, "step": 4232 }, { "epoch": 0.5345962585839451, "grad_norm": 1.8046875, "learning_rate": 1.6784634874644e-05, "loss": 0.7062, "step": 4233 }, { "epoch": 0.5347225511090062, "grad_norm": 1.8125, "learning_rate": 1.6783168088982073e-05, "loss": 0.6945, "step": 4234 }, { "epoch": 0.5348488436340674, "grad_norm": 1.703125, "learning_rate": 1.678170103295941e-05, "loss": 0.6432, "step": 4235 }, { "epoch": 0.5349751361591286, "grad_norm": 1.8203125, "learning_rate": 1.6780233706634478e-05, "loss": 0.7254, "step": 4236 }, { "epoch": 0.5351014286841898, "grad_norm": 1.6953125, "learning_rate": 1.677876611006576e-05, "loss": 0.637, "step": 4237 }, { "epoch": 0.5352277212092509, "grad_norm": 1.7421875, "learning_rate": 1.677729824331175e-05, "loss": 0.6104, "step": 4238 }, { "epoch": 0.5353540137343121, "grad_norm": 1.875, "learning_rate": 1.677583010643096e-05, "loss": 0.7201, "step": 4239 }, { "epoch": 0.5354803062593733, "grad_norm": 1.859375, "learning_rate": 1.67743616994819e-05, "loss": 0.623, "step": 4240 }, { "epoch": 0.5356065987844344, "grad_norm": 1.8046875, "learning_rate": 1.67728930225231e-05, "loss": 0.6735, "step": 4241 }, { "epoch": 0.5357328913094956, "grad_norm": 1.828125, "learning_rate": 1.6771424075613102e-05, "loss": 0.6956, "step": 4242 }, { "epoch": 0.5358591838345568, "grad_norm": 1.8125, "learning_rate": 1.6769954858810447e-05, "loss": 0.7518, "step": 4243 }, { "epoch": 0.535985476359618, "grad_norm": 1.8046875, "learning_rate": 1.6768485372173696e-05, "loss": 0.6043, "step": 4244 }, { "epoch": 0.5361117688846792, "grad_norm": 1.828125, "learning_rate": 1.6767015615761428e-05, "loss": 0.823, "step": 4245 }, { "epoch": 0.5362380614097403, "grad_norm": 1.9375, "learning_rate": 1.676554558963221e-05, "loss": 0.668, "step": 4246 }, { "epoch": 0.5363643539348015, "grad_norm": 1.890625, "learning_rate": 1.6764075293844643e-05, "loss": 0.6495, "step": 4247 }, { "epoch": 0.5364906464598627, "grad_norm": 1.828125, "learning_rate": 1.6762604728457322e-05, "loss": 0.6735, "step": 4248 }, { "epoch": 0.5366169389849238, "grad_norm": 1.78125, "learning_rate": 1.676113389352887e-05, "loss": 0.6594, "step": 4249 }, { "epoch": 0.536743231509985, "grad_norm": 1.796875, "learning_rate": 1.6759662789117907e-05, "loss": 0.6883, "step": 4250 }, { "epoch": 0.5368695240350462, "grad_norm": 1.6640625, "learning_rate": 1.6758191415283066e-05, "loss": 0.5518, "step": 4251 }, { "epoch": 0.5369958165601073, "grad_norm": 1.75, "learning_rate": 1.6756719772082993e-05, "loss": 0.6166, "step": 4252 }, { "epoch": 0.5371221090851686, "grad_norm": 1.6171875, "learning_rate": 1.6755247859576347e-05, "loss": 0.6667, "step": 4253 }, { "epoch": 0.5372484016102297, "grad_norm": 1.8515625, "learning_rate": 1.675377567782179e-05, "loss": 0.8528, "step": 4254 }, { "epoch": 0.5373746941352908, "grad_norm": 1.734375, "learning_rate": 1.6752303226878002e-05, "loss": 0.7559, "step": 4255 }, { "epoch": 0.5375009866603521, "grad_norm": 1.9921875, "learning_rate": 1.675083050680367e-05, "loss": 0.7009, "step": 4256 }, { "epoch": 0.5376272791854132, "grad_norm": 2.109375, "learning_rate": 1.6749357517657496e-05, "loss": 0.8235, "step": 4257 }, { "epoch": 0.5377535717104743, "grad_norm": 2.03125, "learning_rate": 1.674788425949819e-05, "loss": 0.6825, "step": 4258 }, { "epoch": 0.5378798642355356, "grad_norm": 1.8515625, "learning_rate": 1.674641073238447e-05, "loss": 0.6972, "step": 4259 }, { "epoch": 0.5380061567605967, "grad_norm": 1.75, "learning_rate": 1.6744936936375063e-05, "loss": 0.6318, "step": 4260 }, { "epoch": 0.538132449285658, "grad_norm": 1.7265625, "learning_rate": 1.6743462871528723e-05, "loss": 0.6171, "step": 4261 }, { "epoch": 0.5382587418107191, "grad_norm": 1.7265625, "learning_rate": 1.674198853790419e-05, "loss": 0.6603, "step": 4262 }, { "epoch": 0.5383850343357802, "grad_norm": 1.6328125, "learning_rate": 1.6740513935560236e-05, "loss": 0.6297, "step": 4263 }, { "epoch": 0.5385113268608415, "grad_norm": 1.90625, "learning_rate": 1.673903906455563e-05, "loss": 0.6923, "step": 4264 }, { "epoch": 0.5386376193859026, "grad_norm": 1.859375, "learning_rate": 1.673756392494916e-05, "loss": 0.5534, "step": 4265 }, { "epoch": 0.5387639119109637, "grad_norm": 1.6953125, "learning_rate": 1.673608851679962e-05, "loss": 0.655, "step": 4266 }, { "epoch": 0.538890204436025, "grad_norm": 1.7890625, "learning_rate": 1.6734612840165818e-05, "loss": 0.6595, "step": 4267 }, { "epoch": 0.5390164969610861, "grad_norm": 1.7265625, "learning_rate": 1.673313689510657e-05, "loss": 0.6415, "step": 4268 }, { "epoch": 0.5391427894861472, "grad_norm": 1.7421875, "learning_rate": 1.67316606816807e-05, "loss": 0.6501, "step": 4269 }, { "epoch": 0.5392690820112085, "grad_norm": 1.8359375, "learning_rate": 1.673018419994705e-05, "loss": 0.7356, "step": 4270 }, { "epoch": 0.5393953745362696, "grad_norm": 1.796875, "learning_rate": 1.672870744996447e-05, "loss": 0.6575, "step": 4271 }, { "epoch": 0.5395216670613308, "grad_norm": 1.8359375, "learning_rate": 1.6727230431791816e-05, "loss": 0.7256, "step": 4272 }, { "epoch": 0.539647959586392, "grad_norm": 1.828125, "learning_rate": 1.6725753145487964e-05, "loss": 0.6185, "step": 4273 }, { "epoch": 0.5397742521114531, "grad_norm": 1.7734375, "learning_rate": 1.6724275591111788e-05, "loss": 0.6321, "step": 4274 }, { "epoch": 0.5399005446365144, "grad_norm": 1.7578125, "learning_rate": 1.6722797768722187e-05, "loss": 0.6911, "step": 4275 }, { "epoch": 0.5400268371615755, "grad_norm": 1.640625, "learning_rate": 1.672131967837806e-05, "loss": 0.6282, "step": 4276 }, { "epoch": 0.5401531296866366, "grad_norm": 1.7578125, "learning_rate": 1.6719841320138316e-05, "loss": 0.6889, "step": 4277 }, { "epoch": 0.5402794222116979, "grad_norm": 1.734375, "learning_rate": 1.6718362694061885e-05, "loss": 0.6135, "step": 4278 }, { "epoch": 0.540405714736759, "grad_norm": 1.9140625, "learning_rate": 1.6716883800207698e-05, "loss": 0.7512, "step": 4279 }, { "epoch": 0.5405320072618202, "grad_norm": 1.78125, "learning_rate": 1.6715404638634704e-05, "loss": 0.6327, "step": 4280 }, { "epoch": 0.5406582997868814, "grad_norm": 1.8359375, "learning_rate": 1.6713925209401853e-05, "loss": 0.7428, "step": 4281 }, { "epoch": 0.5407845923119425, "grad_norm": 1.8984375, "learning_rate": 1.6712445512568116e-05, "loss": 0.689, "step": 4282 }, { "epoch": 0.5409108848370037, "grad_norm": 1.7421875, "learning_rate": 1.671096554819247e-05, "loss": 0.5976, "step": 4283 }, { "epoch": 0.5410371773620649, "grad_norm": 1.7734375, "learning_rate": 1.67094853163339e-05, "loss": 0.6621, "step": 4284 }, { "epoch": 0.541163469887126, "grad_norm": 1.703125, "learning_rate": 1.6708004817051407e-05, "loss": 0.6136, "step": 4285 }, { "epoch": 0.5412897624121872, "grad_norm": 1.8828125, "learning_rate": 1.6706524050404e-05, "loss": 0.7546, "step": 4286 }, { "epoch": 0.5414160549372484, "grad_norm": 1.8515625, "learning_rate": 1.6705043016450696e-05, "loss": 0.6697, "step": 4287 }, { "epoch": 0.5415423474623096, "grad_norm": 1.78125, "learning_rate": 1.670356171525053e-05, "loss": 0.5769, "step": 4288 }, { "epoch": 0.5416686399873707, "grad_norm": 1.703125, "learning_rate": 1.670208014686254e-05, "loss": 0.6515, "step": 4289 }, { "epoch": 0.5417949325124319, "grad_norm": 1.78125, "learning_rate": 1.6700598311345776e-05, "loss": 0.6437, "step": 4290 }, { "epoch": 0.5419212250374931, "grad_norm": 1.7734375, "learning_rate": 1.6699116208759304e-05, "loss": 0.6795, "step": 4291 }, { "epoch": 0.5420475175625543, "grad_norm": 1.796875, "learning_rate": 1.6697633839162196e-05, "loss": 0.6998, "step": 4292 }, { "epoch": 0.5421738100876154, "grad_norm": 1.890625, "learning_rate": 1.6696151202613534e-05, "loss": 0.6767, "step": 4293 }, { "epoch": 0.5423001026126766, "grad_norm": 1.7109375, "learning_rate": 1.6694668299172417e-05, "loss": 0.6081, "step": 4294 }, { "epoch": 0.5424263951377378, "grad_norm": 1.7890625, "learning_rate": 1.6693185128897944e-05, "loss": 0.6976, "step": 4295 }, { "epoch": 0.542552687662799, "grad_norm": 1.75, "learning_rate": 1.6691701691849234e-05, "loss": 0.6962, "step": 4296 }, { "epoch": 0.5426789801878601, "grad_norm": 1.6796875, "learning_rate": 1.6690217988085412e-05, "loss": 0.6666, "step": 4297 }, { "epoch": 0.5428052727129213, "grad_norm": 1.7421875, "learning_rate": 1.6688734017665615e-05, "loss": 0.6298, "step": 4298 }, { "epoch": 0.5429315652379825, "grad_norm": 1.5859375, "learning_rate": 1.6687249780648992e-05, "loss": 0.5938, "step": 4299 }, { "epoch": 0.5430578577630436, "grad_norm": 1.90625, "learning_rate": 1.6685765277094702e-05, "loss": 0.7261, "step": 4300 }, { "epoch": 0.5431841502881049, "grad_norm": 1.828125, "learning_rate": 1.668428050706191e-05, "loss": 0.6651, "step": 4301 }, { "epoch": 0.543310442813166, "grad_norm": 1.8515625, "learning_rate": 1.6682795470609793e-05, "loss": 0.6497, "step": 4302 }, { "epoch": 0.5434367353382271, "grad_norm": 1.8203125, "learning_rate": 1.668131016779755e-05, "loss": 0.6815, "step": 4303 }, { "epoch": 0.5435630278632884, "grad_norm": 1.84375, "learning_rate": 1.6679824598684376e-05, "loss": 0.7005, "step": 4304 }, { "epoch": 0.5436893203883495, "grad_norm": 1.9375, "learning_rate": 1.667833876332948e-05, "loss": 0.7731, "step": 4305 }, { "epoch": 0.5438156129134107, "grad_norm": 1.765625, "learning_rate": 1.667685266179209e-05, "loss": 0.6519, "step": 4306 }, { "epoch": 0.5439419054384719, "grad_norm": 1.7890625, "learning_rate": 1.6675366294131435e-05, "loss": 0.6921, "step": 4307 }, { "epoch": 0.544068197963533, "grad_norm": 1.65625, "learning_rate": 1.6673879660406756e-05, "loss": 0.6643, "step": 4308 }, { "epoch": 0.5441944904885943, "grad_norm": 1.8359375, "learning_rate": 1.667239276067731e-05, "loss": 0.6732, "step": 4309 }, { "epoch": 0.5443207830136554, "grad_norm": 1.71875, "learning_rate": 1.667090559500236e-05, "loss": 0.6999, "step": 4310 }, { "epoch": 0.5444470755387165, "grad_norm": 1.8046875, "learning_rate": 1.6669418163441183e-05, "loss": 0.7625, "step": 4311 }, { "epoch": 0.5445733680637778, "grad_norm": 1.875, "learning_rate": 1.6667930466053058e-05, "loss": 0.7798, "step": 4312 }, { "epoch": 0.5446996605888389, "grad_norm": 2.359375, "learning_rate": 1.666644250289729e-05, "loss": 0.7623, "step": 4313 }, { "epoch": 0.5448259531139, "grad_norm": 1.8046875, "learning_rate": 1.6664954274033175e-05, "loss": 0.6866, "step": 4314 }, { "epoch": 0.5449522456389613, "grad_norm": 1.75, "learning_rate": 1.6663465779520042e-05, "loss": 0.6313, "step": 4315 }, { "epoch": 0.5450785381640224, "grad_norm": 1.8515625, "learning_rate": 1.666197701941721e-05, "loss": 0.6852, "step": 4316 }, { "epoch": 0.5452048306890835, "grad_norm": 1.8671875, "learning_rate": 1.6660487993784023e-05, "loss": 0.7314, "step": 4317 }, { "epoch": 0.5453311232141448, "grad_norm": 1.875, "learning_rate": 1.6658998702679823e-05, "loss": 0.6959, "step": 4318 }, { "epoch": 0.5454574157392059, "grad_norm": 1.796875, "learning_rate": 1.6657509146163974e-05, "loss": 0.6338, "step": 4319 }, { "epoch": 0.545583708264267, "grad_norm": 1.8203125, "learning_rate": 1.665601932429585e-05, "loss": 0.6512, "step": 4320 }, { "epoch": 0.5457100007893283, "grad_norm": 1.9140625, "learning_rate": 1.6654529237134826e-05, "loss": 0.6607, "step": 4321 }, { "epoch": 0.5458362933143894, "grad_norm": 1.75, "learning_rate": 1.6653038884740295e-05, "loss": 0.6521, "step": 4322 }, { "epoch": 0.5459625858394507, "grad_norm": 1.7421875, "learning_rate": 1.665154826717166e-05, "loss": 0.5571, "step": 4323 }, { "epoch": 0.5460888783645118, "grad_norm": 1.90625, "learning_rate": 1.665005738448833e-05, "loss": 0.6447, "step": 4324 }, { "epoch": 0.5462151708895729, "grad_norm": 1.6796875, "learning_rate": 1.6648566236749736e-05, "loss": 0.6058, "step": 4325 }, { "epoch": 0.5463414634146342, "grad_norm": 1.78125, "learning_rate": 1.66470748240153e-05, "loss": 0.705, "step": 4326 }, { "epoch": 0.5464677559396953, "grad_norm": 1.7734375, "learning_rate": 1.6645583146344473e-05, "loss": 0.7524, "step": 4327 }, { "epoch": 0.5465940484647565, "grad_norm": 1.7421875, "learning_rate": 1.6644091203796707e-05, "loss": 0.7051, "step": 4328 }, { "epoch": 0.5467203409898177, "grad_norm": 1.7734375, "learning_rate": 1.664259899643147e-05, "loss": 0.6844, "step": 4329 }, { "epoch": 0.5468466335148788, "grad_norm": 1.7421875, "learning_rate": 1.6641106524308243e-05, "loss": 0.6586, "step": 4330 }, { "epoch": 0.54697292603994, "grad_norm": 1.78125, "learning_rate": 1.66396137874865e-05, "loss": 0.6771, "step": 4331 }, { "epoch": 0.5470992185650012, "grad_norm": 1.6171875, "learning_rate": 1.6638120786025745e-05, "loss": 0.6069, "step": 4332 }, { "epoch": 0.5472255110900623, "grad_norm": 1.7890625, "learning_rate": 1.6636627519985483e-05, "loss": 0.6154, "step": 4333 }, { "epoch": 0.5473518036151235, "grad_norm": 1.7109375, "learning_rate": 1.6635133989425233e-05, "loss": 0.627, "step": 4334 }, { "epoch": 0.5474780961401847, "grad_norm": 1.7578125, "learning_rate": 1.6633640194404526e-05, "loss": 0.6986, "step": 4335 }, { "epoch": 0.5476043886652459, "grad_norm": 1.890625, "learning_rate": 1.66321461349829e-05, "loss": 0.664, "step": 4336 }, { "epoch": 0.5477306811903071, "grad_norm": 1.8828125, "learning_rate": 1.66306518112199e-05, "loss": 0.6403, "step": 4337 }, { "epoch": 0.5478569737153682, "grad_norm": 1.7265625, "learning_rate": 1.662915722317509e-05, "loss": 0.6083, "step": 4338 }, { "epoch": 0.5479832662404294, "grad_norm": 1.828125, "learning_rate": 1.6627662370908043e-05, "loss": 0.643, "step": 4339 }, { "epoch": 0.5481095587654906, "grad_norm": 1.75, "learning_rate": 1.6626167254478337e-05, "loss": 0.6251, "step": 4340 }, { "epoch": 0.5482358512905517, "grad_norm": 1.8828125, "learning_rate": 1.6624671873945567e-05, "loss": 0.6541, "step": 4341 }, { "epoch": 0.5483621438156129, "grad_norm": 1.6171875, "learning_rate": 1.662317622936933e-05, "loss": 0.6368, "step": 4342 }, { "epoch": 0.5484884363406741, "grad_norm": 1.7421875, "learning_rate": 1.6621680320809242e-05, "loss": 0.6285, "step": 4343 }, { "epoch": 0.5486147288657353, "grad_norm": 1.703125, "learning_rate": 1.6620184148324922e-05, "loss": 0.6279, "step": 4344 }, { "epoch": 0.5487410213907964, "grad_norm": 1.9140625, "learning_rate": 1.6618687711976012e-05, "loss": 0.7533, "step": 4345 }, { "epoch": 0.5488673139158576, "grad_norm": 1.8203125, "learning_rate": 1.661719101182215e-05, "loss": 0.6682, "step": 4346 }, { "epoch": 0.5489936064409188, "grad_norm": 1.9375, "learning_rate": 1.661569404792299e-05, "loss": 0.7636, "step": 4347 }, { "epoch": 0.5491198989659799, "grad_norm": 1.8984375, "learning_rate": 1.6614196820338206e-05, "loss": 0.7029, "step": 4348 }, { "epoch": 0.5492461914910411, "grad_norm": 1.5625, "learning_rate": 1.6612699329127467e-05, "loss": 0.6397, "step": 4349 }, { "epoch": 0.5493724840161023, "grad_norm": 1.7265625, "learning_rate": 1.6611201574350457e-05, "loss": 0.7047, "step": 4350 }, { "epoch": 0.5494987765411635, "grad_norm": 1.7109375, "learning_rate": 1.660970355606688e-05, "loss": 0.6904, "step": 4351 }, { "epoch": 0.5496250690662247, "grad_norm": 1.8671875, "learning_rate": 1.6608205274336436e-05, "loss": 0.6425, "step": 4352 }, { "epoch": 0.5497513615912858, "grad_norm": 1.7734375, "learning_rate": 1.660670672921885e-05, "loss": 0.6523, "step": 4353 }, { "epoch": 0.549877654116347, "grad_norm": 1.8984375, "learning_rate": 1.6605207920773847e-05, "loss": 0.6332, "step": 4354 }, { "epoch": 0.5500039466414082, "grad_norm": 1.75, "learning_rate": 1.660370884906116e-05, "loss": 0.7253, "step": 4355 }, { "epoch": 0.5501302391664693, "grad_norm": 1.7578125, "learning_rate": 1.6602209514140552e-05, "loss": 0.5874, "step": 4356 }, { "epoch": 0.5502565316915305, "grad_norm": 1.9921875, "learning_rate": 1.660070991607177e-05, "loss": 0.67, "step": 4357 }, { "epoch": 0.5503828242165917, "grad_norm": 1.8359375, "learning_rate": 1.6599210054914592e-05, "loss": 0.6635, "step": 4358 }, { "epoch": 0.5505091167416528, "grad_norm": 1.6015625, "learning_rate": 1.6597709930728795e-05, "loss": 0.6246, "step": 4359 }, { "epoch": 0.5506354092667141, "grad_norm": 1.703125, "learning_rate": 1.6596209543574175e-05, "loss": 0.6349, "step": 4360 }, { "epoch": 0.5507617017917752, "grad_norm": 1.8359375, "learning_rate": 1.6594708893510527e-05, "loss": 0.669, "step": 4361 }, { "epoch": 0.5508879943168363, "grad_norm": 1.78125, "learning_rate": 1.659320798059767e-05, "loss": 0.7156, "step": 4362 }, { "epoch": 0.5510142868418976, "grad_norm": 1.671875, "learning_rate": 1.6591706804895422e-05, "loss": 0.6539, "step": 4363 }, { "epoch": 0.5511405793669587, "grad_norm": 1.75, "learning_rate": 1.6590205366463617e-05, "loss": 0.6064, "step": 4364 }, { "epoch": 0.5512668718920198, "grad_norm": 1.6953125, "learning_rate": 1.6588703665362097e-05, "loss": 0.6617, "step": 4365 }, { "epoch": 0.5513931644170811, "grad_norm": 1.921875, "learning_rate": 1.6587201701650725e-05, "loss": 0.6443, "step": 4366 }, { "epoch": 0.5515194569421422, "grad_norm": 1.84375, "learning_rate": 1.6585699475389354e-05, "loss": 0.7565, "step": 4367 }, { "epoch": 0.5516457494672035, "grad_norm": 1.8828125, "learning_rate": 1.6584196986637867e-05, "loss": 0.6809, "step": 4368 }, { "epoch": 0.5517720419922646, "grad_norm": 1.84375, "learning_rate": 1.658269423545615e-05, "loss": 0.6233, "step": 4369 }, { "epoch": 0.5518983345173257, "grad_norm": 1.6484375, "learning_rate": 1.658119122190409e-05, "loss": 0.5712, "step": 4370 }, { "epoch": 0.552024627042387, "grad_norm": 1.84375, "learning_rate": 1.6579687946041606e-05, "loss": 0.7415, "step": 4371 }, { "epoch": 0.5521509195674481, "grad_norm": 1.8046875, "learning_rate": 1.6578184407928604e-05, "loss": 0.75, "step": 4372 }, { "epoch": 0.5522772120925092, "grad_norm": 1.828125, "learning_rate": 1.6576680607625016e-05, "loss": 0.7314, "step": 4373 }, { "epoch": 0.5524035046175705, "grad_norm": 1.8203125, "learning_rate": 1.6575176545190782e-05, "loss": 0.6558, "step": 4374 }, { "epoch": 0.5525297971426316, "grad_norm": 1.828125, "learning_rate": 1.657367222068585e-05, "loss": 0.6977, "step": 4375 }, { "epoch": 0.5526560896676928, "grad_norm": 1.796875, "learning_rate": 1.6572167634170173e-05, "loss": 0.6159, "step": 4376 }, { "epoch": 0.552782382192754, "grad_norm": 1.859375, "learning_rate": 1.6570662785703726e-05, "loss": 0.6901, "step": 4377 }, { "epoch": 0.5529086747178151, "grad_norm": 1.8515625, "learning_rate": 1.6569157675346487e-05, "loss": 0.7233, "step": 4378 }, { "epoch": 0.5530349672428763, "grad_norm": 1.796875, "learning_rate": 1.6567652303158448e-05, "loss": 0.6378, "step": 4379 }, { "epoch": 0.5531612597679375, "grad_norm": 1.671875, "learning_rate": 1.6566146669199603e-05, "loss": 0.6594, "step": 4380 }, { "epoch": 0.5532875522929986, "grad_norm": 1.7265625, "learning_rate": 1.656464077352997e-05, "loss": 0.5743, "step": 4381 }, { "epoch": 0.5534138448180599, "grad_norm": 1.921875, "learning_rate": 1.6563134616209568e-05, "loss": 0.6745, "step": 4382 }, { "epoch": 0.553540137343121, "grad_norm": 1.8125, "learning_rate": 1.6561628197298426e-05, "loss": 0.6933, "step": 4383 }, { "epoch": 0.5536664298681822, "grad_norm": 1.65625, "learning_rate": 1.656012151685659e-05, "loss": 0.6287, "step": 4384 }, { "epoch": 0.5537927223932434, "grad_norm": 1.890625, "learning_rate": 1.655861457494411e-05, "loss": 0.6825, "step": 4385 }, { "epoch": 0.5539190149183045, "grad_norm": 1.6796875, "learning_rate": 1.6557107371621052e-05, "loss": 0.6049, "step": 4386 }, { "epoch": 0.5540453074433657, "grad_norm": 1.765625, "learning_rate": 1.655559990694749e-05, "loss": 0.7005, "step": 4387 }, { "epoch": 0.5541715999684269, "grad_norm": 1.7109375, "learning_rate": 1.6554092180983503e-05, "loss": 0.6703, "step": 4388 }, { "epoch": 0.554297892493488, "grad_norm": 1.8359375, "learning_rate": 1.655258419378919e-05, "loss": 0.633, "step": 4389 }, { "epoch": 0.5544241850185492, "grad_norm": 1.78125, "learning_rate": 1.6551075945424653e-05, "loss": 0.6568, "step": 4390 }, { "epoch": 0.5545504775436104, "grad_norm": 1.75, "learning_rate": 1.6549567435950007e-05, "loss": 0.6555, "step": 4391 }, { "epoch": 0.5546767700686716, "grad_norm": 1.71875, "learning_rate": 1.654805866542538e-05, "loss": 0.5963, "step": 4392 }, { "epoch": 0.5548030625937327, "grad_norm": 1.9921875, "learning_rate": 1.6546549633910905e-05, "loss": 0.7399, "step": 4393 }, { "epoch": 0.5549293551187939, "grad_norm": 1.9140625, "learning_rate": 1.6545040341466734e-05, "loss": 0.6602, "step": 4394 }, { "epoch": 0.5550556476438551, "grad_norm": 1.890625, "learning_rate": 1.6543530788153014e-05, "loss": 0.6261, "step": 4395 }, { "epoch": 0.5551819401689162, "grad_norm": 1.765625, "learning_rate": 1.654202097402992e-05, "loss": 0.6082, "step": 4396 }, { "epoch": 0.5553082326939774, "grad_norm": 1.6796875, "learning_rate": 1.654051089915763e-05, "loss": 0.629, "step": 4397 }, { "epoch": 0.5554345252190386, "grad_norm": 1.8203125, "learning_rate": 1.6539000563596328e-05, "loss": 0.7078, "step": 4398 }, { "epoch": 0.5555608177440998, "grad_norm": 2.0625, "learning_rate": 1.6537489967406214e-05, "loss": 0.713, "step": 4399 }, { "epoch": 0.555687110269161, "grad_norm": 1.8515625, "learning_rate": 1.6535979110647495e-05, "loss": 0.6871, "step": 4400 }, { "epoch": 0.5558134027942221, "grad_norm": 1.6484375, "learning_rate": 1.653446799338039e-05, "loss": 0.6138, "step": 4401 }, { "epoch": 0.5559396953192833, "grad_norm": 1.578125, "learning_rate": 1.6532956615665134e-05, "loss": 0.5779, "step": 4402 }, { "epoch": 0.5560659878443445, "grad_norm": 1.6875, "learning_rate": 1.653144497756196e-05, "loss": 0.573, "step": 4403 }, { "epoch": 0.5561922803694056, "grad_norm": 1.6953125, "learning_rate": 1.6529933079131125e-05, "loss": 0.6316, "step": 4404 }, { "epoch": 0.5563185728944668, "grad_norm": 1.96875, "learning_rate": 1.6528420920432883e-05, "loss": 0.774, "step": 4405 }, { "epoch": 0.556444865419528, "grad_norm": 1.734375, "learning_rate": 1.6526908501527508e-05, "loss": 0.667, "step": 4406 }, { "epoch": 0.5565711579445891, "grad_norm": 1.765625, "learning_rate": 1.652539582247528e-05, "loss": 0.7189, "step": 4407 }, { "epoch": 0.5566974504696504, "grad_norm": 2.375, "learning_rate": 1.652388288333649e-05, "loss": 0.7643, "step": 4408 }, { "epoch": 0.5568237429947115, "grad_norm": 1.8515625, "learning_rate": 1.6522369684171447e-05, "loss": 0.8132, "step": 4409 }, { "epoch": 0.5569500355197726, "grad_norm": 1.8125, "learning_rate": 1.6520856225040458e-05, "loss": 0.7002, "step": 4410 }, { "epoch": 0.5570763280448339, "grad_norm": 1.8828125, "learning_rate": 1.6519342506003843e-05, "loss": 0.6627, "step": 4411 }, { "epoch": 0.557202620569895, "grad_norm": 1.7578125, "learning_rate": 1.6517828527121942e-05, "loss": 0.6548, "step": 4412 }, { "epoch": 0.5573289130949562, "grad_norm": 1.7265625, "learning_rate": 1.6516314288455093e-05, "loss": 0.7028, "step": 4413 }, { "epoch": 0.5574552056200174, "grad_norm": 2.578125, "learning_rate": 1.6514799790063653e-05, "loss": 0.7241, "step": 4414 }, { "epoch": 0.5575814981450785, "grad_norm": 1.84375, "learning_rate": 1.6513285032007985e-05, "loss": 0.5859, "step": 4415 }, { "epoch": 0.5577077906701398, "grad_norm": 1.90625, "learning_rate": 1.651177001434846e-05, "loss": 0.6987, "step": 4416 }, { "epoch": 0.5578340831952009, "grad_norm": 1.671875, "learning_rate": 1.6510254737145473e-05, "loss": 0.6244, "step": 4417 }, { "epoch": 0.557960375720262, "grad_norm": 1.75, "learning_rate": 1.650873920045941e-05, "loss": 0.6061, "step": 4418 }, { "epoch": 0.5580866682453233, "grad_norm": 1.671875, "learning_rate": 1.650722340435068e-05, "loss": 0.7289, "step": 4419 }, { "epoch": 0.5582129607703844, "grad_norm": 1.7109375, "learning_rate": 1.65057073488797e-05, "loss": 0.6765, "step": 4420 }, { "epoch": 0.5583392532954455, "grad_norm": 1.78125, "learning_rate": 1.6504191034106892e-05, "loss": 0.6168, "step": 4421 }, { "epoch": 0.5584655458205068, "grad_norm": 1.8671875, "learning_rate": 1.65026744600927e-05, "loss": 0.7038, "step": 4422 }, { "epoch": 0.5585918383455679, "grad_norm": 1.703125, "learning_rate": 1.6501157626897564e-05, "loss": 0.6506, "step": 4423 }, { "epoch": 0.558718130870629, "grad_norm": 1.828125, "learning_rate": 1.6499640534581947e-05, "loss": 0.6828, "step": 4424 }, { "epoch": 0.5588444233956903, "grad_norm": 1.859375, "learning_rate": 1.6498123183206308e-05, "loss": 0.6948, "step": 4425 }, { "epoch": 0.5589707159207514, "grad_norm": 1.8515625, "learning_rate": 1.6496605572831134e-05, "loss": 0.6971, "step": 4426 }, { "epoch": 0.5590970084458126, "grad_norm": 1.6953125, "learning_rate": 1.649508770351691e-05, "loss": 0.6951, "step": 4427 }, { "epoch": 0.5592233009708738, "grad_norm": 1.8515625, "learning_rate": 1.6493569575324137e-05, "loss": 0.7096, "step": 4428 }, { "epoch": 0.5593495934959349, "grad_norm": 1.8046875, "learning_rate": 1.6492051188313317e-05, "loss": 0.6942, "step": 4429 }, { "epoch": 0.5594758860209962, "grad_norm": 1.9921875, "learning_rate": 1.6490532542544974e-05, "loss": 0.6394, "step": 4430 }, { "epoch": 0.5596021785460573, "grad_norm": 1.8125, "learning_rate": 1.648901363807964e-05, "loss": 0.6158, "step": 4431 }, { "epoch": 0.5597284710711184, "grad_norm": 1.6328125, "learning_rate": 1.6487494474977853e-05, "loss": 0.6107, "step": 4432 }, { "epoch": 0.5598547635961797, "grad_norm": 1.6796875, "learning_rate": 1.6485975053300157e-05, "loss": 0.6138, "step": 4433 }, { "epoch": 0.5599810561212408, "grad_norm": 1.7578125, "learning_rate": 1.6484455373107122e-05, "loss": 0.645, "step": 4434 }, { "epoch": 0.560107348646302, "grad_norm": 1.71875, "learning_rate": 1.6482935434459313e-05, "loss": 0.6322, "step": 4435 }, { "epoch": 0.5602336411713632, "grad_norm": 1.6796875, "learning_rate": 1.6481415237417313e-05, "loss": 0.6465, "step": 4436 }, { "epoch": 0.5603599336964243, "grad_norm": 1.640625, "learning_rate": 1.6479894782041713e-05, "loss": 0.6567, "step": 4437 }, { "epoch": 0.5604862262214855, "grad_norm": 1.7265625, "learning_rate": 1.6478374068393116e-05, "loss": 0.5742, "step": 4438 }, { "epoch": 0.5606125187465467, "grad_norm": 1.6953125, "learning_rate": 1.6476853096532133e-05, "loss": 0.6259, "step": 4439 }, { "epoch": 0.5607388112716079, "grad_norm": 2.578125, "learning_rate": 1.6475331866519387e-05, "loss": 0.7657, "step": 4440 }, { "epoch": 0.560865103796669, "grad_norm": 1.578125, "learning_rate": 1.6473810378415505e-05, "loss": 0.5899, "step": 4441 }, { "epoch": 0.5609913963217302, "grad_norm": 1.8125, "learning_rate": 1.6472288632281135e-05, "loss": 0.674, "step": 4442 }, { "epoch": 0.5611176888467914, "grad_norm": 1.8984375, "learning_rate": 1.6470766628176935e-05, "loss": 0.7111, "step": 4443 }, { "epoch": 0.5612439813718526, "grad_norm": 1.6796875, "learning_rate": 1.646924436616356e-05, "loss": 0.6879, "step": 4444 }, { "epoch": 0.5613702738969137, "grad_norm": 1.9375, "learning_rate": 1.6467721846301686e-05, "loss": 0.6455, "step": 4445 }, { "epoch": 0.5614965664219749, "grad_norm": 1.7578125, "learning_rate": 1.6466199068652e-05, "loss": 0.6572, "step": 4446 }, { "epoch": 0.5616228589470361, "grad_norm": 1.734375, "learning_rate": 1.646467603327519e-05, "loss": 0.5965, "step": 4447 }, { "epoch": 0.5617491514720973, "grad_norm": 1.7421875, "learning_rate": 1.6463152740231966e-05, "loss": 0.6155, "step": 4448 }, { "epoch": 0.5618754439971584, "grad_norm": 1.8671875, "learning_rate": 1.646162918958304e-05, "loss": 0.7057, "step": 4449 }, { "epoch": 0.5620017365222196, "grad_norm": 1.84375, "learning_rate": 1.6460105381389143e-05, "loss": 0.6636, "step": 4450 }, { "epoch": 0.5621280290472808, "grad_norm": 1.828125, "learning_rate": 1.6458581315711002e-05, "loss": 0.6428, "step": 4451 }, { "epoch": 0.5622543215723419, "grad_norm": 1.7109375, "learning_rate": 1.6457056992609368e-05, "loss": 0.6081, "step": 4452 }, { "epoch": 0.5623806140974031, "grad_norm": 1.8203125, "learning_rate": 1.645553241214499e-05, "loss": 0.5989, "step": 4453 }, { "epoch": 0.5625069066224643, "grad_norm": 1.703125, "learning_rate": 1.6454007574378647e-05, "loss": 0.6456, "step": 4454 }, { "epoch": 0.5626331991475254, "grad_norm": 1.9609375, "learning_rate": 1.64524824793711e-05, "loss": 0.7295, "step": 4455 }, { "epoch": 0.5627594916725867, "grad_norm": 1.7890625, "learning_rate": 1.645095712718315e-05, "loss": 0.6562, "step": 4456 }, { "epoch": 0.5628857841976478, "grad_norm": 1.7109375, "learning_rate": 1.644943151787558e-05, "loss": 0.6809, "step": 4457 }, { "epoch": 0.5630120767227089, "grad_norm": 1.8828125, "learning_rate": 1.6447905651509214e-05, "loss": 0.7116, "step": 4458 }, { "epoch": 0.5631383692477702, "grad_norm": 1.875, "learning_rate": 1.644637952814485e-05, "loss": 0.8112, "step": 4459 }, { "epoch": 0.5632646617728313, "grad_norm": 1.703125, "learning_rate": 1.644485314784333e-05, "loss": 0.6764, "step": 4460 }, { "epoch": 0.5633909542978925, "grad_norm": 1.921875, "learning_rate": 1.6443326510665488e-05, "loss": 0.786, "step": 4461 }, { "epoch": 0.5635172468229537, "grad_norm": 1.71875, "learning_rate": 1.6441799616672166e-05, "loss": 0.6561, "step": 4462 }, { "epoch": 0.5636435393480148, "grad_norm": 1.8359375, "learning_rate": 1.644027246592423e-05, "loss": 0.6734, "step": 4463 }, { "epoch": 0.563769831873076, "grad_norm": 1.6953125, "learning_rate": 1.6438745058482545e-05, "loss": 0.6588, "step": 4464 }, { "epoch": 0.5638961243981372, "grad_norm": 1.7265625, "learning_rate": 1.6437217394407992e-05, "loss": 0.632, "step": 4465 }, { "epoch": 0.5640224169231983, "grad_norm": 1.875, "learning_rate": 1.6435689473761457e-05, "loss": 0.6901, "step": 4466 }, { "epoch": 0.5641487094482596, "grad_norm": 1.75, "learning_rate": 1.6434161296603843e-05, "loss": 0.6591, "step": 4467 }, { "epoch": 0.5642750019733207, "grad_norm": 2.046875, "learning_rate": 1.6432632862996056e-05, "loss": 0.7943, "step": 4468 }, { "epoch": 0.5644012944983818, "grad_norm": 1.765625, "learning_rate": 1.643110417299902e-05, "loss": 0.6606, "step": 4469 }, { "epoch": 0.5645275870234431, "grad_norm": 1.8203125, "learning_rate": 1.6429575226673658e-05, "loss": 0.6991, "step": 4470 }, { "epoch": 0.5646538795485042, "grad_norm": 1.703125, "learning_rate": 1.6428046024080915e-05, "loss": 0.5953, "step": 4471 }, { "epoch": 0.5647801720735653, "grad_norm": 1.7421875, "learning_rate": 1.6426516565281742e-05, "loss": 0.7868, "step": 4472 }, { "epoch": 0.5649064645986266, "grad_norm": 1.7734375, "learning_rate": 1.6424986850337095e-05, "loss": 0.6014, "step": 4473 }, { "epoch": 0.5650327571236877, "grad_norm": 1.8203125, "learning_rate": 1.6423456879307948e-05, "loss": 0.6617, "step": 4474 }, { "epoch": 0.565159049648749, "grad_norm": 1.8046875, "learning_rate": 1.6421926652255286e-05, "loss": 0.582, "step": 4475 }, { "epoch": 0.5652853421738101, "grad_norm": 1.8125, "learning_rate": 1.6420396169240095e-05, "loss": 0.6366, "step": 4476 }, { "epoch": 0.5654116346988712, "grad_norm": 1.9609375, "learning_rate": 1.6418865430323373e-05, "loss": 0.5727, "step": 4477 }, { "epoch": 0.5655379272239325, "grad_norm": 1.7734375, "learning_rate": 1.6417334435566136e-05, "loss": 0.6615, "step": 4478 }, { "epoch": 0.5656642197489936, "grad_norm": 1.78125, "learning_rate": 1.641580318502941e-05, "loss": 0.7802, "step": 4479 }, { "epoch": 0.5657905122740547, "grad_norm": 1.7265625, "learning_rate": 1.641427167877422e-05, "loss": 0.6562, "step": 4480 }, { "epoch": 0.565916804799116, "grad_norm": 1.765625, "learning_rate": 1.6412739916861608e-05, "loss": 0.6669, "step": 4481 }, { "epoch": 0.5660430973241771, "grad_norm": 1.734375, "learning_rate": 1.6411207899352633e-05, "loss": 0.6059, "step": 4482 }, { "epoch": 0.5661693898492383, "grad_norm": 1.8515625, "learning_rate": 1.6409675626308353e-05, "loss": 0.6438, "step": 4483 }, { "epoch": 0.5662956823742995, "grad_norm": 1.8046875, "learning_rate": 1.640814309778984e-05, "loss": 0.6692, "step": 4484 }, { "epoch": 0.5664219748993606, "grad_norm": 1.828125, "learning_rate": 1.640661031385818e-05, "loss": 0.6397, "step": 4485 }, { "epoch": 0.5665482674244218, "grad_norm": 1.6328125, "learning_rate": 1.6405077274574458e-05, "loss": 0.5274, "step": 4486 }, { "epoch": 0.566674559949483, "grad_norm": 2.328125, "learning_rate": 1.640354397999979e-05, "loss": 0.7052, "step": 4487 }, { "epoch": 0.5668008524745441, "grad_norm": 1.796875, "learning_rate": 1.640201043019528e-05, "loss": 0.5797, "step": 4488 }, { "epoch": 0.5669271449996053, "grad_norm": 1.6875, "learning_rate": 1.6400476625222057e-05, "loss": 0.5966, "step": 4489 }, { "epoch": 0.5670534375246665, "grad_norm": 1.984375, "learning_rate": 1.6398942565141248e-05, "loss": 0.6886, "step": 4490 }, { "epoch": 0.5671797300497277, "grad_norm": 1.78125, "learning_rate": 1.6397408250014004e-05, "loss": 0.7398, "step": 4491 }, { "epoch": 0.5673060225747889, "grad_norm": 1.8125, "learning_rate": 1.6395873679901474e-05, "loss": 0.7537, "step": 4492 }, { "epoch": 0.56743231509985, "grad_norm": 1.828125, "learning_rate": 1.6394338854864828e-05, "loss": 0.7298, "step": 4493 }, { "epoch": 0.5675586076249112, "grad_norm": 1.8828125, "learning_rate": 1.6392803774965238e-05, "loss": 0.765, "step": 4494 }, { "epoch": 0.5676849001499724, "grad_norm": 1.7890625, "learning_rate": 1.6391268440263886e-05, "loss": 0.7558, "step": 4495 }, { "epoch": 0.5678111926750335, "grad_norm": 1.7109375, "learning_rate": 1.6389732850821967e-05, "loss": 0.6555, "step": 4496 }, { "epoch": 0.5679374852000947, "grad_norm": 1.9609375, "learning_rate": 1.6388197006700686e-05, "loss": 0.682, "step": 4497 }, { "epoch": 0.5680637777251559, "grad_norm": 1.8515625, "learning_rate": 1.638666090796126e-05, "loss": 0.6553, "step": 4498 }, { "epoch": 0.5681900702502171, "grad_norm": 1.765625, "learning_rate": 1.638512455466491e-05, "loss": 0.6249, "step": 4499 }, { "epoch": 0.5683163627752782, "grad_norm": 1.8984375, "learning_rate": 1.6383587946872885e-05, "loss": 0.6518, "step": 4500 }, { "epoch": 0.5684426553003394, "grad_norm": 1.7421875, "learning_rate": 1.638205108464641e-05, "loss": 0.6731, "step": 4501 }, { "epoch": 0.5685689478254006, "grad_norm": 1.7109375, "learning_rate": 1.6380513968046758e-05, "loss": 0.5816, "step": 4502 }, { "epoch": 0.5686952403504617, "grad_norm": 1.796875, "learning_rate": 1.6378976597135183e-05, "loss": 0.6766, "step": 4503 }, { "epoch": 0.568821532875523, "grad_norm": 1.703125, "learning_rate": 1.637743897197297e-05, "loss": 0.6489, "step": 4504 }, { "epoch": 0.5689478254005841, "grad_norm": 1.7578125, "learning_rate": 1.63759010926214e-05, "loss": 0.6347, "step": 4505 }, { "epoch": 0.5690741179256453, "grad_norm": 1.703125, "learning_rate": 1.6374362959141766e-05, "loss": 0.6196, "step": 4506 }, { "epoch": 0.5692004104507065, "grad_norm": 1.890625, "learning_rate": 1.637282457159538e-05, "loss": 0.5883, "step": 4507 }, { "epoch": 0.5693267029757676, "grad_norm": 1.84375, "learning_rate": 1.6371285930043554e-05, "loss": 0.7533, "step": 4508 }, { "epoch": 0.5694529955008288, "grad_norm": 1.7734375, "learning_rate": 1.636974703454762e-05, "loss": 0.6339, "step": 4509 }, { "epoch": 0.56957928802589, "grad_norm": 1.7734375, "learning_rate": 1.636820788516891e-05, "loss": 0.5739, "step": 4510 }, { "epoch": 0.5697055805509511, "grad_norm": 1.9765625, "learning_rate": 1.6366668481968773e-05, "loss": 0.7098, "step": 4511 }, { "epoch": 0.5698318730760124, "grad_norm": 1.8515625, "learning_rate": 1.6365128825008566e-05, "loss": 0.6652, "step": 4512 }, { "epoch": 0.5699581656010735, "grad_norm": 1.8359375, "learning_rate": 1.636358891434966e-05, "loss": 0.6969, "step": 4513 }, { "epoch": 0.5700844581261346, "grad_norm": 1.7578125, "learning_rate": 1.636204875005342e-05, "loss": 0.614, "step": 4514 }, { "epoch": 0.5702107506511959, "grad_norm": 2.703125, "learning_rate": 1.6360508332181245e-05, "loss": 0.788, "step": 4515 }, { "epoch": 0.570337043176257, "grad_norm": 1.7109375, "learning_rate": 1.6358967660794526e-05, "loss": 0.5873, "step": 4516 }, { "epoch": 0.5704633357013181, "grad_norm": 1.90625, "learning_rate": 1.635742673595467e-05, "loss": 0.7144, "step": 4517 }, { "epoch": 0.5705896282263794, "grad_norm": 1.734375, "learning_rate": 1.6355885557723104e-05, "loss": 0.6116, "step": 4518 }, { "epoch": 0.5707159207514405, "grad_norm": 1.59375, "learning_rate": 1.6354344126161245e-05, "loss": 0.6262, "step": 4519 }, { "epoch": 0.5708422132765016, "grad_norm": 1.734375, "learning_rate": 1.6352802441330535e-05, "loss": 0.6376, "step": 4520 }, { "epoch": 0.5709685058015629, "grad_norm": 1.71875, "learning_rate": 1.635126050329242e-05, "loss": 0.6282, "step": 4521 }, { "epoch": 0.571094798326624, "grad_norm": 1.734375, "learning_rate": 1.634971831210836e-05, "loss": 0.7491, "step": 4522 }, { "epoch": 0.5712210908516853, "grad_norm": 1.796875, "learning_rate": 1.6348175867839822e-05, "loss": 0.7116, "step": 4523 }, { "epoch": 0.5713473833767464, "grad_norm": 1.7265625, "learning_rate": 1.634663317054829e-05, "loss": 0.6686, "step": 4524 }, { "epoch": 0.5714736759018075, "grad_norm": 1.765625, "learning_rate": 1.634509022029524e-05, "loss": 0.6148, "step": 4525 }, { "epoch": 0.5715999684268688, "grad_norm": 1.8359375, "learning_rate": 1.6343547017142177e-05, "loss": 0.6006, "step": 4526 }, { "epoch": 0.5717262609519299, "grad_norm": 1.6953125, "learning_rate": 1.6342003561150612e-05, "loss": 0.6545, "step": 4527 }, { "epoch": 0.571852553476991, "grad_norm": 2.0, "learning_rate": 1.634045985238206e-05, "loss": 0.6775, "step": 4528 }, { "epoch": 0.5719788460020523, "grad_norm": 1.8203125, "learning_rate": 1.633891589089805e-05, "loss": 0.7391, "step": 4529 }, { "epoch": 0.5721051385271134, "grad_norm": 1.7734375, "learning_rate": 1.6337371676760122e-05, "loss": 0.6878, "step": 4530 }, { "epoch": 0.5722314310521746, "grad_norm": 1.75, "learning_rate": 1.6335827210029823e-05, "loss": 0.652, "step": 4531 }, { "epoch": 0.5723577235772358, "grad_norm": 1.625, "learning_rate": 1.6334282490768712e-05, "loss": 0.5985, "step": 4532 }, { "epoch": 0.5724840161022969, "grad_norm": 1.8125, "learning_rate": 1.6332737519038356e-05, "loss": 0.607, "step": 4533 }, { "epoch": 0.5726103086273581, "grad_norm": 1.8984375, "learning_rate": 1.633119229490034e-05, "loss": 0.6291, "step": 4534 }, { "epoch": 0.5727366011524193, "grad_norm": 1.7734375, "learning_rate": 1.6329646818416244e-05, "loss": 0.6969, "step": 4535 }, { "epoch": 0.5728628936774804, "grad_norm": 1.8671875, "learning_rate": 1.6328101089647673e-05, "loss": 0.7126, "step": 4536 }, { "epoch": 0.5729891862025417, "grad_norm": 1.8125, "learning_rate": 1.6326555108656235e-05, "loss": 0.7064, "step": 4537 }, { "epoch": 0.5731154787276028, "grad_norm": 1.7734375, "learning_rate": 1.632500887550355e-05, "loss": 0.5938, "step": 4538 }, { "epoch": 0.573241771252664, "grad_norm": 1.9296875, "learning_rate": 1.6323462390251247e-05, "loss": 0.6921, "step": 4539 }, { "epoch": 0.5733680637777252, "grad_norm": 1.7109375, "learning_rate": 1.6321915652960963e-05, "loss": 0.626, "step": 4540 }, { "epoch": 0.5734943563027863, "grad_norm": 1.5703125, "learning_rate": 1.632036866369435e-05, "loss": 0.5904, "step": 4541 }, { "epoch": 0.5736206488278475, "grad_norm": 1.9765625, "learning_rate": 1.631882142251306e-05, "loss": 0.7987, "step": 4542 }, { "epoch": 0.5737469413529087, "grad_norm": 1.734375, "learning_rate": 1.631727392947877e-05, "loss": 0.6002, "step": 4543 }, { "epoch": 0.5738732338779698, "grad_norm": 1.8046875, "learning_rate": 1.631572618465316e-05, "loss": 0.6444, "step": 4544 }, { "epoch": 0.573999526403031, "grad_norm": 1.6796875, "learning_rate": 1.6314178188097917e-05, "loss": 0.5942, "step": 4545 }, { "epoch": 0.5741258189280922, "grad_norm": 1.796875, "learning_rate": 1.6312629939874737e-05, "loss": 0.6402, "step": 4546 }, { "epoch": 0.5742521114531534, "grad_norm": 1.9140625, "learning_rate": 1.6311081440045334e-05, "loss": 0.6483, "step": 4547 }, { "epoch": 0.5743784039782145, "grad_norm": 1.7421875, "learning_rate": 1.6309532688671428e-05, "loss": 0.6935, "step": 4548 }, { "epoch": 0.5745046965032757, "grad_norm": 1.609375, "learning_rate": 1.6307983685814744e-05, "loss": 0.5797, "step": 4549 }, { "epoch": 0.5746309890283369, "grad_norm": 1.7578125, "learning_rate": 1.6306434431537025e-05, "loss": 0.638, "step": 4550 }, { "epoch": 0.574757281553398, "grad_norm": 1.7265625, "learning_rate": 1.630488492590002e-05, "loss": 0.7261, "step": 4551 }, { "epoch": 0.5748835740784592, "grad_norm": 1.8515625, "learning_rate": 1.6303335168965484e-05, "loss": 0.6508, "step": 4552 }, { "epoch": 0.5750098666035204, "grad_norm": 1.859375, "learning_rate": 1.6301785160795194e-05, "loss": 0.659, "step": 4553 }, { "epoch": 0.5751361591285816, "grad_norm": 1.78125, "learning_rate": 1.6300234901450928e-05, "loss": 0.5787, "step": 4554 }, { "epoch": 0.5752624516536428, "grad_norm": 1.75, "learning_rate": 1.6298684390994474e-05, "loss": 0.6026, "step": 4555 }, { "epoch": 0.5753887441787039, "grad_norm": 1.703125, "learning_rate": 1.629713362948763e-05, "loss": 0.59, "step": 4556 }, { "epoch": 0.5755150367037651, "grad_norm": 1.8203125, "learning_rate": 1.6295582616992203e-05, "loss": 0.7478, "step": 4557 }, { "epoch": 0.5756413292288263, "grad_norm": 1.765625, "learning_rate": 1.6294031353570023e-05, "loss": 0.6903, "step": 4558 }, { "epoch": 0.5757676217538874, "grad_norm": 1.65625, "learning_rate": 1.629247983928291e-05, "loss": 0.6347, "step": 4559 }, { "epoch": 0.5758939142789486, "grad_norm": 1.84375, "learning_rate": 1.629092807419271e-05, "loss": 0.6148, "step": 4560 }, { "epoch": 0.5760202068040098, "grad_norm": 1.625, "learning_rate": 1.6289376058361266e-05, "loss": 0.6468, "step": 4561 }, { "epoch": 0.5761464993290709, "grad_norm": 1.875, "learning_rate": 1.628782379185044e-05, "loss": 0.7142, "step": 4562 }, { "epoch": 0.5762727918541322, "grad_norm": 1.84375, "learning_rate": 1.6286271274722107e-05, "loss": 0.7121, "step": 4563 }, { "epoch": 0.5763990843791933, "grad_norm": 1.7890625, "learning_rate": 1.6284718507038143e-05, "loss": 0.6637, "step": 4564 }, { "epoch": 0.5765253769042544, "grad_norm": 1.7578125, "learning_rate": 1.6283165488860432e-05, "loss": 0.6599, "step": 4565 }, { "epoch": 0.5766516694293157, "grad_norm": 1.859375, "learning_rate": 1.6281612220250883e-05, "loss": 0.762, "step": 4566 }, { "epoch": 0.5767779619543768, "grad_norm": 1.734375, "learning_rate": 1.6280058701271403e-05, "loss": 0.6166, "step": 4567 }, { "epoch": 0.576904254479438, "grad_norm": 1.8984375, "learning_rate": 1.6278504931983902e-05, "loss": 0.6807, "step": 4568 }, { "epoch": 0.5770305470044992, "grad_norm": 1.8515625, "learning_rate": 1.627695091245032e-05, "loss": 0.6442, "step": 4569 }, { "epoch": 0.5771568395295603, "grad_norm": 1.7109375, "learning_rate": 1.6275396642732597e-05, "loss": 0.603, "step": 4570 }, { "epoch": 0.5772831320546216, "grad_norm": 1.78125, "learning_rate": 1.6273842122892678e-05, "loss": 0.6551, "step": 4571 }, { "epoch": 0.5774094245796827, "grad_norm": 1.734375, "learning_rate": 1.6272287352992522e-05, "loss": 0.6619, "step": 4572 }, { "epoch": 0.5775357171047438, "grad_norm": 1.828125, "learning_rate": 1.6270732333094098e-05, "loss": 0.7098, "step": 4573 }, { "epoch": 0.5776620096298051, "grad_norm": 1.765625, "learning_rate": 1.6269177063259392e-05, "loss": 0.682, "step": 4574 }, { "epoch": 0.5777883021548662, "grad_norm": 1.9453125, "learning_rate": 1.6267621543550387e-05, "loss": 0.7014, "step": 4575 }, { "epoch": 0.5779145946799273, "grad_norm": 1.8203125, "learning_rate": 1.6266065774029083e-05, "loss": 0.6592, "step": 4576 }, { "epoch": 0.5780408872049886, "grad_norm": 1.890625, "learning_rate": 1.626450975475749e-05, "loss": 0.611, "step": 4577 }, { "epoch": 0.5781671797300497, "grad_norm": 1.65625, "learning_rate": 1.626295348579763e-05, "loss": 0.6303, "step": 4578 }, { "epoch": 0.5782934722551109, "grad_norm": 1.8125, "learning_rate": 1.6261396967211526e-05, "loss": 0.6636, "step": 4579 }, { "epoch": 0.5784197647801721, "grad_norm": 1.7890625, "learning_rate": 1.6259840199061215e-05, "loss": 0.5889, "step": 4580 }, { "epoch": 0.5785460573052332, "grad_norm": 1.78125, "learning_rate": 1.6258283181408762e-05, "loss": 0.7638, "step": 4581 }, { "epoch": 0.5786723498302945, "grad_norm": 1.796875, "learning_rate": 1.6256725914316208e-05, "loss": 0.7113, "step": 4582 }, { "epoch": 0.5787986423553556, "grad_norm": 1.734375, "learning_rate": 1.6255168397845635e-05, "loss": 0.7223, "step": 4583 }, { "epoch": 0.5789249348804167, "grad_norm": 1.78125, "learning_rate": 1.6253610632059113e-05, "loss": 0.7039, "step": 4584 }, { "epoch": 0.579051227405478, "grad_norm": 1.8515625, "learning_rate": 1.6252052617018733e-05, "loss": 0.734, "step": 4585 }, { "epoch": 0.5791775199305391, "grad_norm": 1.7421875, "learning_rate": 1.6250494352786597e-05, "loss": 0.6268, "step": 4586 }, { "epoch": 0.5793038124556003, "grad_norm": 1.9765625, "learning_rate": 1.6248935839424808e-05, "loss": 0.7622, "step": 4587 }, { "epoch": 0.5794301049806615, "grad_norm": 1.71875, "learning_rate": 1.624737707699549e-05, "loss": 0.5964, "step": 4588 }, { "epoch": 0.5795563975057226, "grad_norm": 1.921875, "learning_rate": 1.6245818065560772e-05, "loss": 0.6981, "step": 4589 }, { "epoch": 0.5796826900307838, "grad_norm": 1.7734375, "learning_rate": 1.6244258805182785e-05, "loss": 0.6786, "step": 4590 }, { "epoch": 0.579808982555845, "grad_norm": 1.7890625, "learning_rate": 1.6242699295923686e-05, "loss": 0.7694, "step": 4591 }, { "epoch": 0.5799352750809061, "grad_norm": 1.7578125, "learning_rate": 1.624113953784563e-05, "loss": 0.663, "step": 4592 }, { "epoch": 0.5800615676059673, "grad_norm": 1.7578125, "learning_rate": 1.6239579531010784e-05, "loss": 0.5804, "step": 4593 }, { "epoch": 0.5801878601310285, "grad_norm": 1.7734375, "learning_rate": 1.6238019275481326e-05, "loss": 0.6898, "step": 4594 }, { "epoch": 0.5803141526560897, "grad_norm": 1.71875, "learning_rate": 1.6236458771319445e-05, "loss": 0.5957, "step": 4595 }, { "epoch": 0.5804404451811508, "grad_norm": 1.796875, "learning_rate": 1.6234898018587336e-05, "loss": 0.6401, "step": 4596 }, { "epoch": 0.580566737706212, "grad_norm": 2.40625, "learning_rate": 1.623333701734721e-05, "loss": 0.7384, "step": 4597 }, { "epoch": 0.5806930302312732, "grad_norm": 1.7734375, "learning_rate": 1.623177576766129e-05, "loss": 0.6488, "step": 4598 }, { "epoch": 0.5808193227563344, "grad_norm": 1.7734375, "learning_rate": 1.6230214269591794e-05, "loss": 0.5923, "step": 4599 }, { "epoch": 0.5809456152813955, "grad_norm": 1.8125, "learning_rate": 1.6228652523200964e-05, "loss": 0.6289, "step": 4600 }, { "epoch": 0.5810719078064567, "grad_norm": 1.859375, "learning_rate": 1.6227090528551048e-05, "loss": 0.6604, "step": 4601 }, { "epoch": 0.5811982003315179, "grad_norm": 1.8203125, "learning_rate": 1.6225528285704304e-05, "loss": 0.6293, "step": 4602 }, { "epoch": 0.5813244928565791, "grad_norm": 1.765625, "learning_rate": 1.6223965794722995e-05, "loss": 0.6437, "step": 4603 }, { "epoch": 0.5814507853816402, "grad_norm": 1.9140625, "learning_rate": 1.62224030556694e-05, "loss": 0.6108, "step": 4604 }, { "epoch": 0.5815770779067014, "grad_norm": 1.90625, "learning_rate": 1.622084006860581e-05, "loss": 0.6797, "step": 4605 }, { "epoch": 0.5817033704317626, "grad_norm": 1.7578125, "learning_rate": 1.621927683359452e-05, "loss": 0.6033, "step": 4606 }, { "epoch": 0.5818296629568237, "grad_norm": 1.8828125, "learning_rate": 1.621771335069783e-05, "loss": 0.7271, "step": 4607 }, { "epoch": 0.581955955481885, "grad_norm": 1.9609375, "learning_rate": 1.6216149619978064e-05, "loss": 0.6597, "step": 4608 }, { "epoch": 0.5820822480069461, "grad_norm": 2.296875, "learning_rate": 1.6214585641497548e-05, "loss": 0.814, "step": 4609 }, { "epoch": 0.5822085405320072, "grad_norm": 1.8203125, "learning_rate": 1.6213021415318614e-05, "loss": 0.7334, "step": 4610 }, { "epoch": 0.5823348330570685, "grad_norm": 1.796875, "learning_rate": 1.6211456941503615e-05, "loss": 0.652, "step": 4611 }, { "epoch": 0.5824611255821296, "grad_norm": 1.9140625, "learning_rate": 1.62098922201149e-05, "loss": 0.6498, "step": 4612 }, { "epoch": 0.5825874181071908, "grad_norm": 1.7265625, "learning_rate": 1.6208327251214843e-05, "loss": 0.6627, "step": 4613 }, { "epoch": 0.582713710632252, "grad_norm": 1.796875, "learning_rate": 1.6206762034865816e-05, "loss": 0.726, "step": 4614 }, { "epoch": 0.5828400031573131, "grad_norm": 1.8203125, "learning_rate": 1.62051965711302e-05, "loss": 0.7617, "step": 4615 }, { "epoch": 0.5829662956823743, "grad_norm": 1.734375, "learning_rate": 1.6203630860070398e-05, "loss": 0.6569, "step": 4616 }, { "epoch": 0.5830925882074355, "grad_norm": 1.75, "learning_rate": 1.620206490174881e-05, "loss": 0.642, "step": 4617 }, { "epoch": 0.5832188807324966, "grad_norm": 1.78125, "learning_rate": 1.620049869622785e-05, "loss": 0.6194, "step": 4618 }, { "epoch": 0.5833451732575579, "grad_norm": 1.7421875, "learning_rate": 1.6198932243569954e-05, "loss": 0.6297, "step": 4619 }, { "epoch": 0.583471465782619, "grad_norm": 1.765625, "learning_rate": 1.6197365543837547e-05, "loss": 0.6944, "step": 4620 }, { "epoch": 0.5835977583076801, "grad_norm": 1.765625, "learning_rate": 1.6195798597093077e-05, "loss": 0.6194, "step": 4621 }, { "epoch": 0.5837240508327414, "grad_norm": 1.78125, "learning_rate": 1.6194231403398997e-05, "loss": 0.7237, "step": 4622 }, { "epoch": 0.5838503433578025, "grad_norm": 1.7421875, "learning_rate": 1.6192663962817774e-05, "loss": 0.6204, "step": 4623 }, { "epoch": 0.5839766358828636, "grad_norm": 1.7578125, "learning_rate": 1.6191096275411882e-05, "loss": 0.6489, "step": 4624 }, { "epoch": 0.5841029284079249, "grad_norm": 1.8828125, "learning_rate": 1.6189528341243806e-05, "loss": 0.758, "step": 4625 }, { "epoch": 0.584229220932986, "grad_norm": 1.6328125, "learning_rate": 1.6187960160376035e-05, "loss": 0.5915, "step": 4626 }, { "epoch": 0.5843555134580471, "grad_norm": 1.6953125, "learning_rate": 1.618639173287108e-05, "loss": 0.63, "step": 4627 }, { "epoch": 0.5844818059831084, "grad_norm": 1.7421875, "learning_rate": 1.6184823058791446e-05, "loss": 0.6104, "step": 4628 }, { "epoch": 0.5846080985081695, "grad_norm": 1.84375, "learning_rate": 1.618325413819967e-05, "loss": 0.7489, "step": 4629 }, { "epoch": 0.5847343910332308, "grad_norm": 1.84375, "learning_rate": 1.618168497115827e-05, "loss": 0.722, "step": 4630 }, { "epoch": 0.5848606835582919, "grad_norm": 1.796875, "learning_rate": 1.6180115557729802e-05, "loss": 0.6688, "step": 4631 }, { "epoch": 0.584986976083353, "grad_norm": 1.7734375, "learning_rate": 1.617854589797681e-05, "loss": 0.6058, "step": 4632 }, { "epoch": 0.5851132686084143, "grad_norm": 1.734375, "learning_rate": 1.6176975991961863e-05, "loss": 0.7196, "step": 4633 }, { "epoch": 0.5852395611334754, "grad_norm": 2.359375, "learning_rate": 1.617540583974753e-05, "loss": 0.6839, "step": 4634 }, { "epoch": 0.5853658536585366, "grad_norm": 1.7578125, "learning_rate": 1.6173835441396394e-05, "loss": 0.7115, "step": 4635 }, { "epoch": 0.5854921461835978, "grad_norm": 1.71875, "learning_rate": 1.617226479697105e-05, "loss": 0.6947, "step": 4636 }, { "epoch": 0.5856184387086589, "grad_norm": 1.8046875, "learning_rate": 1.6170693906534096e-05, "loss": 0.618, "step": 4637 }, { "epoch": 0.5857447312337201, "grad_norm": 1.828125, "learning_rate": 1.6169122770148144e-05, "loss": 0.6167, "step": 4638 }, { "epoch": 0.5858710237587813, "grad_norm": 1.671875, "learning_rate": 1.616755138787582e-05, "loss": 0.5846, "step": 4639 }, { "epoch": 0.5859973162838424, "grad_norm": 1.75, "learning_rate": 1.6165979759779755e-05, "loss": 0.672, "step": 4640 }, { "epoch": 0.5861236088089036, "grad_norm": 1.7421875, "learning_rate": 1.6164407885922588e-05, "loss": 0.6616, "step": 4641 }, { "epoch": 0.5862499013339648, "grad_norm": 1.78125, "learning_rate": 1.616283576636697e-05, "loss": 0.6129, "step": 4642 }, { "epoch": 0.586376193859026, "grad_norm": 1.7890625, "learning_rate": 1.616126340117556e-05, "loss": 0.6776, "step": 4643 }, { "epoch": 0.5865024863840872, "grad_norm": 1.96875, "learning_rate": 1.6159690790411034e-05, "loss": 0.6599, "step": 4644 }, { "epoch": 0.5866287789091483, "grad_norm": 2.015625, "learning_rate": 1.615811793413607e-05, "loss": 0.6819, "step": 4645 }, { "epoch": 0.5867550714342095, "grad_norm": 1.9609375, "learning_rate": 1.6156544832413356e-05, "loss": 0.6288, "step": 4646 }, { "epoch": 0.5868813639592707, "grad_norm": 1.890625, "learning_rate": 1.6154971485305594e-05, "loss": 0.7208, "step": 4647 }, { "epoch": 0.5870076564843318, "grad_norm": 1.8515625, "learning_rate": 1.6153397892875494e-05, "loss": 0.6636, "step": 4648 }, { "epoch": 0.587133949009393, "grad_norm": 1.96875, "learning_rate": 1.6151824055185774e-05, "loss": 0.7094, "step": 4649 }, { "epoch": 0.5872602415344542, "grad_norm": 1.921875, "learning_rate": 1.6150249972299167e-05, "loss": 0.7353, "step": 4650 }, { "epoch": 0.5873865340595154, "grad_norm": 1.8203125, "learning_rate": 1.6148675644278408e-05, "loss": 0.6784, "step": 4651 }, { "epoch": 0.5875128265845765, "grad_norm": 2.125, "learning_rate": 1.6147101071186254e-05, "loss": 0.7747, "step": 4652 }, { "epoch": 0.5876391191096377, "grad_norm": 1.8046875, "learning_rate": 1.614552625308545e-05, "loss": 0.5862, "step": 4653 }, { "epoch": 0.5877654116346989, "grad_norm": 1.9375, "learning_rate": 1.6143951190038773e-05, "loss": 0.6115, "step": 4654 }, { "epoch": 0.58789170415976, "grad_norm": 1.6875, "learning_rate": 1.6142375882109003e-05, "loss": 0.5758, "step": 4655 }, { "epoch": 0.5880179966848212, "grad_norm": 1.8359375, "learning_rate": 1.6140800329358922e-05, "loss": 0.6703, "step": 4656 }, { "epoch": 0.5881442892098824, "grad_norm": 1.7421875, "learning_rate": 1.6139224531851336e-05, "loss": 0.6346, "step": 4657 }, { "epoch": 0.5882705817349435, "grad_norm": 1.796875, "learning_rate": 1.6137648489649046e-05, "loss": 0.6812, "step": 4658 }, { "epoch": 0.5883968742600048, "grad_norm": 1.625, "learning_rate": 1.6136072202814866e-05, "loss": 0.6316, "step": 4659 }, { "epoch": 0.5885231667850659, "grad_norm": 1.671875, "learning_rate": 1.6134495671411632e-05, "loss": 0.6634, "step": 4660 }, { "epoch": 0.5886494593101271, "grad_norm": 1.78125, "learning_rate": 1.6132918895502173e-05, "loss": 0.6298, "step": 4661 }, { "epoch": 0.5887757518351883, "grad_norm": 1.84375, "learning_rate": 1.613134187514934e-05, "loss": 0.6649, "step": 4662 }, { "epoch": 0.5889020443602494, "grad_norm": 1.7890625, "learning_rate": 1.612976461041599e-05, "loss": 0.6786, "step": 4663 }, { "epoch": 0.5890283368853106, "grad_norm": 1.9375, "learning_rate": 1.612818710136499e-05, "loss": 0.7225, "step": 4664 }, { "epoch": 0.5891546294103718, "grad_norm": 1.8359375, "learning_rate": 1.6126609348059208e-05, "loss": 0.7322, "step": 4665 }, { "epoch": 0.5892809219354329, "grad_norm": 1.7890625, "learning_rate": 1.6125031350561533e-05, "loss": 0.6304, "step": 4666 }, { "epoch": 0.5894072144604942, "grad_norm": 1.703125, "learning_rate": 1.6123453108934865e-05, "loss": 0.7254, "step": 4667 }, { "epoch": 0.5895335069855553, "grad_norm": 1.6171875, "learning_rate": 1.6121874623242104e-05, "loss": 0.6386, "step": 4668 }, { "epoch": 0.5896597995106164, "grad_norm": 1.8515625, "learning_rate": 1.6120295893546164e-05, "loss": 0.6407, "step": 4669 }, { "epoch": 0.5897860920356777, "grad_norm": 1.7890625, "learning_rate": 1.6118716919909977e-05, "loss": 0.6601, "step": 4670 }, { "epoch": 0.5899123845607388, "grad_norm": 1.75, "learning_rate": 1.6117137702396464e-05, "loss": 0.5656, "step": 4671 }, { "epoch": 0.5900386770857999, "grad_norm": 1.75, "learning_rate": 1.611555824106858e-05, "loss": 0.6504, "step": 4672 }, { "epoch": 0.5901649696108612, "grad_norm": 1.796875, "learning_rate": 1.6113978535989274e-05, "loss": 0.6671, "step": 4673 }, { "epoch": 0.5902912621359223, "grad_norm": 1.890625, "learning_rate": 1.611239858722151e-05, "loss": 0.7053, "step": 4674 }, { "epoch": 0.5904175546609836, "grad_norm": 1.875, "learning_rate": 1.6110818394828265e-05, "loss": 0.6857, "step": 4675 }, { "epoch": 0.5905438471860447, "grad_norm": 1.8515625, "learning_rate": 1.6109237958872514e-05, "loss": 0.728, "step": 4676 }, { "epoch": 0.5906701397111058, "grad_norm": 1.703125, "learning_rate": 1.6107657279417254e-05, "loss": 0.6023, "step": 4677 }, { "epoch": 0.5907964322361671, "grad_norm": 1.75, "learning_rate": 1.6106076356525484e-05, "loss": 0.6045, "step": 4678 }, { "epoch": 0.5909227247612282, "grad_norm": 1.8125, "learning_rate": 1.610449519026022e-05, "loss": 0.7992, "step": 4679 }, { "epoch": 0.5910490172862893, "grad_norm": 1.6875, "learning_rate": 1.610291378068448e-05, "loss": 0.596, "step": 4680 }, { "epoch": 0.5911753098113506, "grad_norm": 1.8828125, "learning_rate": 1.6101332127861296e-05, "loss": 0.686, "step": 4681 }, { "epoch": 0.5913016023364117, "grad_norm": 1.8515625, "learning_rate": 1.6099750231853712e-05, "loss": 0.7102, "step": 4682 }, { "epoch": 0.5914278948614728, "grad_norm": 1.921875, "learning_rate": 1.609816809272477e-05, "loss": 0.6892, "step": 4683 }, { "epoch": 0.5915541873865341, "grad_norm": 1.984375, "learning_rate": 1.6096585710537542e-05, "loss": 0.7236, "step": 4684 }, { "epoch": 0.5916804799115952, "grad_norm": 1.90625, "learning_rate": 1.6095003085355096e-05, "loss": 0.6314, "step": 4685 }, { "epoch": 0.5918067724366564, "grad_norm": 1.7578125, "learning_rate": 1.60934202172405e-05, "loss": 0.6123, "step": 4686 }, { "epoch": 0.5919330649617176, "grad_norm": 1.796875, "learning_rate": 1.6091837106256853e-05, "loss": 0.6991, "step": 4687 }, { "epoch": 0.5920593574867787, "grad_norm": 1.8359375, "learning_rate": 1.6090253752467254e-05, "loss": 0.639, "step": 4688 }, { "epoch": 0.5921856500118399, "grad_norm": 1.8359375, "learning_rate": 1.6088670155934803e-05, "loss": 0.6429, "step": 4689 }, { "epoch": 0.5923119425369011, "grad_norm": 1.84375, "learning_rate": 1.608708631672263e-05, "loss": 0.7389, "step": 4690 }, { "epoch": 0.5924382350619622, "grad_norm": 1.8046875, "learning_rate": 1.608550223489386e-05, "loss": 0.7039, "step": 4691 }, { "epoch": 0.5925645275870235, "grad_norm": 1.765625, "learning_rate": 1.608391791051163e-05, "loss": 0.6431, "step": 4692 }, { "epoch": 0.5926908201120846, "grad_norm": 2.328125, "learning_rate": 1.6082333343639082e-05, "loss": 0.7688, "step": 4693 }, { "epoch": 0.5928171126371458, "grad_norm": 1.8125, "learning_rate": 1.6080748534339376e-05, "loss": 0.6528, "step": 4694 }, { "epoch": 0.592943405162207, "grad_norm": 1.8046875, "learning_rate": 1.6079163482675686e-05, "loss": 0.6954, "step": 4695 }, { "epoch": 0.5930696976872681, "grad_norm": 1.7421875, "learning_rate": 1.6077578188711175e-05, "loss": 0.6827, "step": 4696 }, { "epoch": 0.5931959902123293, "grad_norm": 1.7890625, "learning_rate": 1.607599265250904e-05, "loss": 0.6015, "step": 4697 }, { "epoch": 0.5933222827373905, "grad_norm": 1.71875, "learning_rate": 1.607440687413247e-05, "loss": 0.6254, "step": 4698 }, { "epoch": 0.5934485752624516, "grad_norm": 1.6328125, "learning_rate": 1.6072820853644677e-05, "loss": 0.5811, "step": 4699 }, { "epoch": 0.5935748677875128, "grad_norm": 1.7421875, "learning_rate": 1.6071234591108874e-05, "loss": 0.7207, "step": 4700 }, { "epoch": 0.593701160312574, "grad_norm": 1.8359375, "learning_rate": 1.6069648086588276e-05, "loss": 0.6805, "step": 4701 }, { "epoch": 0.5938274528376352, "grad_norm": 1.71875, "learning_rate": 1.606806134014613e-05, "loss": 0.617, "step": 4702 }, { "epoch": 0.5939537453626963, "grad_norm": 1.9453125, "learning_rate": 1.6066474351845677e-05, "loss": 0.7156, "step": 4703 }, { "epoch": 0.5940800378877575, "grad_norm": 1.6796875, "learning_rate": 1.6064887121750165e-05, "loss": 0.6694, "step": 4704 }, { "epoch": 0.5942063304128187, "grad_norm": 1.71875, "learning_rate": 1.6063299649922858e-05, "loss": 0.5572, "step": 4705 }, { "epoch": 0.5943326229378799, "grad_norm": 1.6953125, "learning_rate": 1.6061711936427035e-05, "loss": 0.6596, "step": 4706 }, { "epoch": 0.594458915462941, "grad_norm": 1.890625, "learning_rate": 1.6060123981325974e-05, "loss": 0.6973, "step": 4707 }, { "epoch": 0.5945852079880022, "grad_norm": 1.8828125, "learning_rate": 1.605853578468297e-05, "loss": 0.6908, "step": 4708 }, { "epoch": 0.5947115005130634, "grad_norm": 1.7265625, "learning_rate": 1.605694734656132e-05, "loss": 0.7186, "step": 4709 }, { "epoch": 0.5948377930381246, "grad_norm": 1.7109375, "learning_rate": 1.6055358667024335e-05, "loss": 0.5889, "step": 4710 }, { "epoch": 0.5949640855631857, "grad_norm": 1.765625, "learning_rate": 1.6053769746135342e-05, "loss": 0.6566, "step": 4711 }, { "epoch": 0.5950903780882469, "grad_norm": 1.890625, "learning_rate": 1.6052180583957667e-05, "loss": 0.6556, "step": 4712 }, { "epoch": 0.5952166706133081, "grad_norm": 1.734375, "learning_rate": 1.6050591180554655e-05, "loss": 0.5814, "step": 4713 }, { "epoch": 0.5953429631383692, "grad_norm": 1.8515625, "learning_rate": 1.604900153598965e-05, "loss": 0.6569, "step": 4714 }, { "epoch": 0.5954692556634305, "grad_norm": 1.734375, "learning_rate": 1.6047411650326013e-05, "loss": 0.5982, "step": 4715 }, { "epoch": 0.5955955481884916, "grad_norm": 1.7109375, "learning_rate": 1.6045821523627117e-05, "loss": 0.6266, "step": 4716 }, { "epoch": 0.5957218407135527, "grad_norm": 1.8125, "learning_rate": 1.6044231155956332e-05, "loss": 0.7388, "step": 4717 }, { "epoch": 0.595848133238614, "grad_norm": 1.6484375, "learning_rate": 1.6042640547377055e-05, "loss": 0.6135, "step": 4718 }, { "epoch": 0.5959744257636751, "grad_norm": 1.84375, "learning_rate": 1.604104969795268e-05, "loss": 0.634, "step": 4719 }, { "epoch": 0.5961007182887362, "grad_norm": 1.734375, "learning_rate": 1.6039458607746614e-05, "loss": 0.6592, "step": 4720 }, { "epoch": 0.5962270108137975, "grad_norm": 1.796875, "learning_rate": 1.6037867276822277e-05, "loss": 0.5963, "step": 4721 }, { "epoch": 0.5963533033388586, "grad_norm": 1.8203125, "learning_rate": 1.603627570524309e-05, "loss": 0.705, "step": 4722 }, { "epoch": 0.5964795958639199, "grad_norm": 1.6640625, "learning_rate": 1.6034683893072496e-05, "loss": 0.5995, "step": 4723 }, { "epoch": 0.596605888388981, "grad_norm": 1.7109375, "learning_rate": 1.6033091840373936e-05, "loss": 0.6632, "step": 4724 }, { "epoch": 0.5967321809140421, "grad_norm": 1.765625, "learning_rate": 1.6031499547210865e-05, "loss": 0.6595, "step": 4725 }, { "epoch": 0.5968584734391034, "grad_norm": 1.8203125, "learning_rate": 1.6029907013646754e-05, "loss": 0.6846, "step": 4726 }, { "epoch": 0.5969847659641645, "grad_norm": 2.34375, "learning_rate": 1.602831423974507e-05, "loss": 0.9322, "step": 4727 }, { "epoch": 0.5971110584892256, "grad_norm": 1.8046875, "learning_rate": 1.6026721225569303e-05, "loss": 0.6672, "step": 4728 }, { "epoch": 0.5972373510142869, "grad_norm": 1.84375, "learning_rate": 1.602512797118294e-05, "loss": 0.6505, "step": 4729 }, { "epoch": 0.597363643539348, "grad_norm": 1.8125, "learning_rate": 1.6023534476649495e-05, "loss": 0.6182, "step": 4730 }, { "epoch": 0.5974899360644091, "grad_norm": 1.7421875, "learning_rate": 1.6021940742032472e-05, "loss": 0.6102, "step": 4731 }, { "epoch": 0.5976162285894704, "grad_norm": 1.578125, "learning_rate": 1.6020346767395393e-05, "loss": 0.6056, "step": 4732 }, { "epoch": 0.5977425211145315, "grad_norm": 1.8828125, "learning_rate": 1.6018752552801794e-05, "loss": 0.6795, "step": 4733 }, { "epoch": 0.5978688136395927, "grad_norm": 1.8203125, "learning_rate": 1.6017158098315214e-05, "loss": 0.6967, "step": 4734 }, { "epoch": 0.5979951061646539, "grad_norm": 1.671875, "learning_rate": 1.6015563403999207e-05, "loss": 0.5678, "step": 4735 }, { "epoch": 0.598121398689715, "grad_norm": 1.8671875, "learning_rate": 1.601396846991733e-05, "loss": 0.8035, "step": 4736 }, { "epoch": 0.5982476912147763, "grad_norm": 1.84375, "learning_rate": 1.6012373296133163e-05, "loss": 0.6453, "step": 4737 }, { "epoch": 0.5983739837398374, "grad_norm": 1.9453125, "learning_rate": 1.601077788271027e-05, "loss": 0.6212, "step": 4738 }, { "epoch": 0.5985002762648985, "grad_norm": 1.703125, "learning_rate": 1.6009182229712252e-05, "loss": 0.6178, "step": 4739 }, { "epoch": 0.5986265687899598, "grad_norm": 1.9921875, "learning_rate": 1.6007586337202702e-05, "loss": 0.7967, "step": 4740 }, { "epoch": 0.5987528613150209, "grad_norm": 1.890625, "learning_rate": 1.6005990205245233e-05, "loss": 0.7168, "step": 4741 }, { "epoch": 0.5988791538400821, "grad_norm": 1.8359375, "learning_rate": 1.600439383390346e-05, "loss": 0.7091, "step": 4742 }, { "epoch": 0.5990054463651433, "grad_norm": 1.6171875, "learning_rate": 1.600279722324101e-05, "loss": 0.6741, "step": 4743 }, { "epoch": 0.5991317388902044, "grad_norm": 1.65625, "learning_rate": 1.600120037332152e-05, "loss": 0.6239, "step": 4744 }, { "epoch": 0.5992580314152656, "grad_norm": 1.703125, "learning_rate": 1.599960328420864e-05, "loss": 0.6948, "step": 4745 }, { "epoch": 0.5993843239403268, "grad_norm": 1.9140625, "learning_rate": 1.5998005955966028e-05, "loss": 0.7677, "step": 4746 }, { "epoch": 0.599510616465388, "grad_norm": 1.8203125, "learning_rate": 1.5996408388657344e-05, "loss": 0.6973, "step": 4747 }, { "epoch": 0.5996369089904491, "grad_norm": 1.7421875, "learning_rate": 1.599481058234626e-05, "loss": 0.6533, "step": 4748 }, { "epoch": 0.5997632015155103, "grad_norm": 1.8046875, "learning_rate": 1.599321253709647e-05, "loss": 0.6163, "step": 4749 }, { "epoch": 0.5998894940405715, "grad_norm": 1.8125, "learning_rate": 1.599161425297166e-05, "loss": 0.6945, "step": 4750 }, { "epoch": 0.6000157865656326, "grad_norm": 1.7421875, "learning_rate": 1.599001573003554e-05, "loss": 0.6459, "step": 4751 }, { "epoch": 0.6001420790906938, "grad_norm": 1.828125, "learning_rate": 1.5988416968351824e-05, "loss": 0.6698, "step": 4752 }, { "epoch": 0.600268371615755, "grad_norm": 1.7734375, "learning_rate": 1.5986817967984225e-05, "loss": 0.6276, "step": 4753 }, { "epoch": 0.6003946641408162, "grad_norm": 1.75, "learning_rate": 1.5985218728996487e-05, "loss": 0.6806, "step": 4754 }, { "epoch": 0.6005209566658773, "grad_norm": 1.7578125, "learning_rate": 1.5983619251452345e-05, "loss": 0.6362, "step": 4755 }, { "epoch": 0.6006472491909385, "grad_norm": 1.796875, "learning_rate": 1.5982019535415553e-05, "loss": 0.7477, "step": 4756 }, { "epoch": 0.6007735417159997, "grad_norm": 1.6953125, "learning_rate": 1.5980419580949872e-05, "loss": 0.5667, "step": 4757 }, { "epoch": 0.6008998342410609, "grad_norm": 1.8203125, "learning_rate": 1.5978819388119067e-05, "loss": 0.7187, "step": 4758 }, { "epoch": 0.601026126766122, "grad_norm": 1.984375, "learning_rate": 1.5977218956986927e-05, "loss": 0.7325, "step": 4759 }, { "epoch": 0.6011524192911832, "grad_norm": 1.671875, "learning_rate": 1.5975618287617233e-05, "loss": 0.6209, "step": 4760 }, { "epoch": 0.6012787118162444, "grad_norm": 1.734375, "learning_rate": 1.5974017380073787e-05, "loss": 0.6338, "step": 4761 }, { "epoch": 0.6014050043413055, "grad_norm": 1.875, "learning_rate": 1.5972416234420397e-05, "loss": 0.6686, "step": 4762 }, { "epoch": 0.6015312968663667, "grad_norm": 1.7421875, "learning_rate": 1.5970814850720886e-05, "loss": 0.6738, "step": 4763 }, { "epoch": 0.6016575893914279, "grad_norm": 1.828125, "learning_rate": 1.5969213229039073e-05, "loss": 0.656, "step": 4764 }, { "epoch": 0.601783881916489, "grad_norm": 2.0625, "learning_rate": 1.59676113694388e-05, "loss": 0.7475, "step": 4765 }, { "epoch": 0.6019101744415503, "grad_norm": 1.6484375, "learning_rate": 1.596600927198391e-05, "loss": 0.6256, "step": 4766 }, { "epoch": 0.6020364669666114, "grad_norm": 1.8046875, "learning_rate": 1.5964406936738262e-05, "loss": 0.6311, "step": 4767 }, { "epoch": 0.6021627594916726, "grad_norm": 1.96875, "learning_rate": 1.596280436376572e-05, "loss": 0.6486, "step": 4768 }, { "epoch": 0.6022890520167338, "grad_norm": 1.859375, "learning_rate": 1.5961201553130158e-05, "loss": 0.5801, "step": 4769 }, { "epoch": 0.6024153445417949, "grad_norm": 1.828125, "learning_rate": 1.595959850489546e-05, "loss": 0.6434, "step": 4770 }, { "epoch": 0.6025416370668562, "grad_norm": 1.90625, "learning_rate": 1.595799521912552e-05, "loss": 0.7557, "step": 4771 }, { "epoch": 0.6026679295919173, "grad_norm": 1.7890625, "learning_rate": 1.5956391695884242e-05, "loss": 0.6251, "step": 4772 }, { "epoch": 0.6027942221169784, "grad_norm": 1.71875, "learning_rate": 1.5954787935235535e-05, "loss": 0.6753, "step": 4773 }, { "epoch": 0.6029205146420397, "grad_norm": 1.6328125, "learning_rate": 1.5953183937243327e-05, "loss": 0.6295, "step": 4774 }, { "epoch": 0.6030468071671008, "grad_norm": 1.9921875, "learning_rate": 1.5951579701971546e-05, "loss": 0.6831, "step": 4775 }, { "epoch": 0.6031730996921619, "grad_norm": 1.6875, "learning_rate": 1.5949975229484132e-05, "loss": 0.6903, "step": 4776 }, { "epoch": 0.6032993922172232, "grad_norm": 1.8828125, "learning_rate": 1.5948370519845037e-05, "loss": 0.7804, "step": 4777 }, { "epoch": 0.6034256847422843, "grad_norm": 1.75, "learning_rate": 1.5946765573118222e-05, "loss": 0.6035, "step": 4778 }, { "epoch": 0.6035519772673454, "grad_norm": 1.734375, "learning_rate": 1.5945160389367654e-05, "loss": 0.578, "step": 4779 }, { "epoch": 0.6036782697924067, "grad_norm": 1.8125, "learning_rate": 1.5943554968657312e-05, "loss": 0.6378, "step": 4780 }, { "epoch": 0.6038045623174678, "grad_norm": 1.84375, "learning_rate": 1.5941949311051188e-05, "loss": 0.5884, "step": 4781 }, { "epoch": 0.603930854842529, "grad_norm": 1.8515625, "learning_rate": 1.5940343416613272e-05, "loss": 0.6518, "step": 4782 }, { "epoch": 0.6040571473675902, "grad_norm": 1.8515625, "learning_rate": 1.593873728540758e-05, "loss": 0.6414, "step": 4783 }, { "epoch": 0.6041834398926513, "grad_norm": 1.90625, "learning_rate": 1.5937130917498122e-05, "loss": 0.6287, "step": 4784 }, { "epoch": 0.6043097324177126, "grad_norm": 1.8671875, "learning_rate": 1.5935524312948925e-05, "loss": 0.6976, "step": 4785 }, { "epoch": 0.6044360249427737, "grad_norm": 1.8671875, "learning_rate": 1.5933917471824026e-05, "loss": 0.6558, "step": 4786 }, { "epoch": 0.6045623174678348, "grad_norm": 1.8984375, "learning_rate": 1.5932310394187474e-05, "loss": 0.66, "step": 4787 }, { "epoch": 0.6046886099928961, "grad_norm": 1.7421875, "learning_rate": 1.5930703080103314e-05, "loss": 0.5963, "step": 4788 }, { "epoch": 0.6048149025179572, "grad_norm": 1.8203125, "learning_rate": 1.5929095529635613e-05, "loss": 0.6494, "step": 4789 }, { "epoch": 0.6049411950430184, "grad_norm": 1.796875, "learning_rate": 1.5927487742848448e-05, "loss": 0.6331, "step": 4790 }, { "epoch": 0.6050674875680796, "grad_norm": 1.921875, "learning_rate": 1.59258797198059e-05, "loss": 0.7068, "step": 4791 }, { "epoch": 0.6051937800931407, "grad_norm": 1.8515625, "learning_rate": 1.5924271460572058e-05, "loss": 0.72, "step": 4792 }, { "epoch": 0.6053200726182019, "grad_norm": 1.765625, "learning_rate": 1.5922662965211023e-05, "loss": 0.6466, "step": 4793 }, { "epoch": 0.6054463651432631, "grad_norm": 1.953125, "learning_rate": 1.592105423378691e-05, "loss": 0.6076, "step": 4794 }, { "epoch": 0.6055726576683242, "grad_norm": 1.703125, "learning_rate": 1.5919445266363837e-05, "loss": 0.6946, "step": 4795 }, { "epoch": 0.6056989501933854, "grad_norm": 1.8671875, "learning_rate": 1.5917836063005933e-05, "loss": 0.7445, "step": 4796 }, { "epoch": 0.6058252427184466, "grad_norm": 1.8203125, "learning_rate": 1.591622662377734e-05, "loss": 0.6513, "step": 4797 }, { "epoch": 0.6059515352435078, "grad_norm": 1.6875, "learning_rate": 1.59146169487422e-05, "loss": 0.6055, "step": 4798 }, { "epoch": 0.606077827768569, "grad_norm": 1.875, "learning_rate": 1.5913007037964675e-05, "loss": 0.6738, "step": 4799 }, { "epoch": 0.6062041202936301, "grad_norm": 1.7890625, "learning_rate": 1.5911396891508933e-05, "loss": 0.7308, "step": 4800 }, { "epoch": 0.6063304128186913, "grad_norm": 1.75, "learning_rate": 1.590978650943915e-05, "loss": 0.6724, "step": 4801 }, { "epoch": 0.6064567053437525, "grad_norm": 1.6171875, "learning_rate": 1.5908175891819512e-05, "loss": 0.62, "step": 4802 }, { "epoch": 0.6065829978688136, "grad_norm": 1.6953125, "learning_rate": 1.5906565038714212e-05, "loss": 0.5969, "step": 4803 }, { "epoch": 0.6067092903938748, "grad_norm": 2.015625, "learning_rate": 1.5904953950187458e-05, "loss": 0.7109, "step": 4804 }, { "epoch": 0.606835582918936, "grad_norm": 1.75, "learning_rate": 1.5903342626303465e-05, "loss": 0.6706, "step": 4805 }, { "epoch": 0.6069618754439972, "grad_norm": 1.7890625, "learning_rate": 1.590173106712645e-05, "loss": 0.6807, "step": 4806 }, { "epoch": 0.6070881679690583, "grad_norm": 1.7734375, "learning_rate": 1.5900119272720647e-05, "loss": 0.6488, "step": 4807 }, { "epoch": 0.6072144604941195, "grad_norm": 1.578125, "learning_rate": 1.5898507243150304e-05, "loss": 0.5672, "step": 4808 }, { "epoch": 0.6073407530191807, "grad_norm": 1.8359375, "learning_rate": 1.589689497847967e-05, "loss": 0.6219, "step": 4809 }, { "epoch": 0.6074670455442418, "grad_norm": 1.8828125, "learning_rate": 1.5895282478773008e-05, "loss": 0.6935, "step": 4810 }, { "epoch": 0.607593338069303, "grad_norm": 1.75, "learning_rate": 1.5893669744094583e-05, "loss": 0.6779, "step": 4811 }, { "epoch": 0.6077196305943642, "grad_norm": 1.7578125, "learning_rate": 1.589205677450868e-05, "loss": 0.6512, "step": 4812 }, { "epoch": 0.6078459231194254, "grad_norm": 1.9375, "learning_rate": 1.5890443570079584e-05, "loss": 0.7025, "step": 4813 }, { "epoch": 0.6079722156444866, "grad_norm": 1.6484375, "learning_rate": 1.5888830130871596e-05, "loss": 0.6574, "step": 4814 }, { "epoch": 0.6080985081695477, "grad_norm": 1.7265625, "learning_rate": 1.5887216456949023e-05, "loss": 0.6467, "step": 4815 }, { "epoch": 0.6082248006946089, "grad_norm": 1.734375, "learning_rate": 1.5885602548376185e-05, "loss": 0.5646, "step": 4816 }, { "epoch": 0.6083510932196701, "grad_norm": 1.8515625, "learning_rate": 1.58839884052174e-05, "loss": 0.7401, "step": 4817 }, { "epoch": 0.6084773857447312, "grad_norm": 1.734375, "learning_rate": 1.5882374027537015e-05, "loss": 0.5605, "step": 4818 }, { "epoch": 0.6086036782697924, "grad_norm": 1.71875, "learning_rate": 1.5880759415399367e-05, "loss": 0.6319, "step": 4819 }, { "epoch": 0.6087299707948536, "grad_norm": 1.828125, "learning_rate": 1.5879144568868814e-05, "loss": 0.7583, "step": 4820 }, { "epoch": 0.6088562633199147, "grad_norm": 1.984375, "learning_rate": 1.5877529488009723e-05, "loss": 0.8036, "step": 4821 }, { "epoch": 0.608982555844976, "grad_norm": 1.9453125, "learning_rate": 1.5875914172886458e-05, "loss": 0.7281, "step": 4822 }, { "epoch": 0.6091088483700371, "grad_norm": 1.8046875, "learning_rate": 1.587429862356341e-05, "loss": 0.6468, "step": 4823 }, { "epoch": 0.6092351408950982, "grad_norm": 1.84375, "learning_rate": 1.5872682840104965e-05, "loss": 0.7296, "step": 4824 }, { "epoch": 0.6093614334201595, "grad_norm": 1.8125, "learning_rate": 1.587106682257553e-05, "loss": 0.6278, "step": 4825 }, { "epoch": 0.6094877259452206, "grad_norm": 1.9375, "learning_rate": 1.586945057103951e-05, "loss": 0.6844, "step": 4826 }, { "epoch": 0.6096140184702817, "grad_norm": 1.8671875, "learning_rate": 1.5867834085561333e-05, "loss": 0.7424, "step": 4827 }, { "epoch": 0.609740310995343, "grad_norm": 1.8125, "learning_rate": 1.586621736620542e-05, "loss": 0.7453, "step": 4828 }, { "epoch": 0.6098666035204041, "grad_norm": 1.75, "learning_rate": 1.586460041303621e-05, "loss": 0.6956, "step": 4829 }, { "epoch": 0.6099928960454654, "grad_norm": 1.640625, "learning_rate": 1.586298322611816e-05, "loss": 0.6821, "step": 4830 }, { "epoch": 0.6101191885705265, "grad_norm": 1.8984375, "learning_rate": 1.586136580551571e-05, "loss": 0.6517, "step": 4831 }, { "epoch": 0.6102454810955876, "grad_norm": 1.9453125, "learning_rate": 1.5859748151293347e-05, "loss": 0.7362, "step": 4832 }, { "epoch": 0.6103717736206489, "grad_norm": 1.8359375, "learning_rate": 1.585813026351553e-05, "loss": 0.6813, "step": 4833 }, { "epoch": 0.61049806614571, "grad_norm": 1.875, "learning_rate": 1.5856512142246755e-05, "loss": 0.7539, "step": 4834 }, { "epoch": 0.6106243586707711, "grad_norm": 1.71875, "learning_rate": 1.585489378755151e-05, "loss": 0.6245, "step": 4835 }, { "epoch": 0.6107506511958324, "grad_norm": 1.6640625, "learning_rate": 1.5853275199494302e-05, "loss": 0.5967, "step": 4836 }, { "epoch": 0.6108769437208935, "grad_norm": 2.15625, "learning_rate": 1.5851656378139644e-05, "loss": 0.6381, "step": 4837 }, { "epoch": 0.6110032362459547, "grad_norm": 1.78125, "learning_rate": 1.5850037323552058e-05, "loss": 0.6833, "step": 4838 }, { "epoch": 0.6111295287710159, "grad_norm": 1.7265625, "learning_rate": 1.584841803579607e-05, "loss": 0.6395, "step": 4839 }, { "epoch": 0.611255821296077, "grad_norm": 1.859375, "learning_rate": 1.584679851493623e-05, "loss": 0.666, "step": 4840 }, { "epoch": 0.6113821138211382, "grad_norm": 1.6328125, "learning_rate": 1.5845178761037086e-05, "loss": 0.557, "step": 4841 }, { "epoch": 0.6115084063461994, "grad_norm": 1.984375, "learning_rate": 1.584355877416319e-05, "loss": 0.6781, "step": 4842 }, { "epoch": 0.6116346988712605, "grad_norm": 2.203125, "learning_rate": 1.584193855437912e-05, "loss": 0.8585, "step": 4843 }, { "epoch": 0.6117609913963218, "grad_norm": 1.7421875, "learning_rate": 1.584031810174945e-05, "loss": 0.6729, "step": 4844 }, { "epoch": 0.6118872839213829, "grad_norm": 1.7578125, "learning_rate": 1.5838697416338767e-05, "loss": 0.6968, "step": 4845 }, { "epoch": 0.612013576446444, "grad_norm": 1.734375, "learning_rate": 1.583707649821167e-05, "loss": 0.6829, "step": 4846 }, { "epoch": 0.6121398689715053, "grad_norm": 1.78125, "learning_rate": 1.5835455347432764e-05, "loss": 0.6741, "step": 4847 }, { "epoch": 0.6122661614965664, "grad_norm": 1.7578125, "learning_rate": 1.5833833964066657e-05, "loss": 0.6664, "step": 4848 }, { "epoch": 0.6123924540216276, "grad_norm": 1.7578125, "learning_rate": 1.5832212348177985e-05, "loss": 0.5891, "step": 4849 }, { "epoch": 0.6125187465466888, "grad_norm": 1.703125, "learning_rate": 1.5830590499831377e-05, "loss": 0.6021, "step": 4850 }, { "epoch": 0.6126450390717499, "grad_norm": 1.9140625, "learning_rate": 1.582896841909147e-05, "loss": 0.7376, "step": 4851 }, { "epoch": 0.6127713315968111, "grad_norm": 1.65625, "learning_rate": 1.582734610602293e-05, "loss": 0.5869, "step": 4852 }, { "epoch": 0.6128976241218723, "grad_norm": 1.7109375, "learning_rate": 1.5825723560690406e-05, "loss": 0.6913, "step": 4853 }, { "epoch": 0.6130239166469335, "grad_norm": 1.7265625, "learning_rate": 1.5824100783158572e-05, "loss": 0.7203, "step": 4854 }, { "epoch": 0.6131502091719946, "grad_norm": 1.703125, "learning_rate": 1.5822477773492114e-05, "loss": 0.5844, "step": 4855 }, { "epoch": 0.6132765016970558, "grad_norm": 1.7109375, "learning_rate": 1.582085453175571e-05, "loss": 0.6863, "step": 4856 }, { "epoch": 0.613402794222117, "grad_norm": 1.6953125, "learning_rate": 1.581923105801407e-05, "loss": 0.6707, "step": 4857 }, { "epoch": 0.6135290867471781, "grad_norm": 1.671875, "learning_rate": 1.5817607352331898e-05, "loss": 0.5587, "step": 4858 }, { "epoch": 0.6136553792722393, "grad_norm": 1.859375, "learning_rate": 1.58159834147739e-05, "loss": 0.6269, "step": 4859 }, { "epoch": 0.6137816717973005, "grad_norm": 1.703125, "learning_rate": 1.5814359245404818e-05, "loss": 0.6122, "step": 4860 }, { "epoch": 0.6139079643223617, "grad_norm": 1.671875, "learning_rate": 1.5812734844289382e-05, "loss": 0.622, "step": 4861 }, { "epoch": 0.6140342568474229, "grad_norm": 1.78125, "learning_rate": 1.5811110211492335e-05, "loss": 0.6064, "step": 4862 }, { "epoch": 0.614160549372484, "grad_norm": 1.7421875, "learning_rate": 1.580948534707843e-05, "loss": 0.6553, "step": 4863 }, { "epoch": 0.6142868418975452, "grad_norm": 1.625, "learning_rate": 1.5807860251112433e-05, "loss": 0.5324, "step": 4864 }, { "epoch": 0.6144131344226064, "grad_norm": 1.7890625, "learning_rate": 1.580623492365912e-05, "loss": 0.6497, "step": 4865 }, { "epoch": 0.6145394269476675, "grad_norm": 1.7265625, "learning_rate": 1.5804609364783262e-05, "loss": 0.6252, "step": 4866 }, { "epoch": 0.6146657194727287, "grad_norm": 1.6796875, "learning_rate": 1.5802983574549656e-05, "loss": 0.5364, "step": 4867 }, { "epoch": 0.6147920119977899, "grad_norm": 1.78125, "learning_rate": 1.5801357553023105e-05, "loss": 0.6821, "step": 4868 }, { "epoch": 0.614918304522851, "grad_norm": 1.890625, "learning_rate": 1.5799731300268415e-05, "loss": 0.7386, "step": 4869 }, { "epoch": 0.6150445970479123, "grad_norm": 1.7734375, "learning_rate": 1.5798104816350407e-05, "loss": 0.7179, "step": 4870 }, { "epoch": 0.6151708895729734, "grad_norm": 1.7421875, "learning_rate": 1.5796478101333902e-05, "loss": 0.6688, "step": 4871 }, { "epoch": 0.6152971820980345, "grad_norm": 1.890625, "learning_rate": 1.5794851155283744e-05, "loss": 0.6882, "step": 4872 }, { "epoch": 0.6154234746230958, "grad_norm": 1.8203125, "learning_rate": 1.5793223978264774e-05, "loss": 0.6782, "step": 4873 }, { "epoch": 0.6155497671481569, "grad_norm": 1.8203125, "learning_rate": 1.5791596570341855e-05, "loss": 0.7498, "step": 4874 }, { "epoch": 0.6156760596732181, "grad_norm": 1.859375, "learning_rate": 1.5789968931579848e-05, "loss": 0.655, "step": 4875 }, { "epoch": 0.6158023521982793, "grad_norm": 1.6875, "learning_rate": 1.578834106204362e-05, "loss": 0.5446, "step": 4876 }, { "epoch": 0.6159286447233404, "grad_norm": 1.6796875, "learning_rate": 1.578671296179806e-05, "loss": 0.6105, "step": 4877 }, { "epoch": 0.6160549372484017, "grad_norm": 1.8046875, "learning_rate": 1.5785084630908058e-05, "loss": 0.7259, "step": 4878 }, { "epoch": 0.6161812297734628, "grad_norm": 1.8203125, "learning_rate": 1.578345606943852e-05, "loss": 0.6052, "step": 4879 }, { "epoch": 0.6163075222985239, "grad_norm": 1.890625, "learning_rate": 1.5781827277454352e-05, "loss": 0.6558, "step": 4880 }, { "epoch": 0.6164338148235852, "grad_norm": 1.7265625, "learning_rate": 1.5780198255020475e-05, "loss": 0.589, "step": 4881 }, { "epoch": 0.6165601073486463, "grad_norm": 2.0625, "learning_rate": 1.5778569002201822e-05, "loss": 0.7487, "step": 4882 }, { "epoch": 0.6166863998737074, "grad_norm": 1.796875, "learning_rate": 1.5776939519063323e-05, "loss": 0.6611, "step": 4883 }, { "epoch": 0.6168126923987687, "grad_norm": 1.7265625, "learning_rate": 1.5775309805669932e-05, "loss": 0.6953, "step": 4884 }, { "epoch": 0.6169389849238298, "grad_norm": 1.890625, "learning_rate": 1.57736798620866e-05, "loss": 0.7373, "step": 4885 }, { "epoch": 0.617065277448891, "grad_norm": 1.921875, "learning_rate": 1.5772049688378297e-05, "loss": 0.7027, "step": 4886 }, { "epoch": 0.6171915699739522, "grad_norm": 1.984375, "learning_rate": 1.5770419284609998e-05, "loss": 0.7914, "step": 4887 }, { "epoch": 0.6173178624990133, "grad_norm": 1.9609375, "learning_rate": 1.576878865084668e-05, "loss": 0.6374, "step": 4888 }, { "epoch": 0.6174441550240745, "grad_norm": 1.671875, "learning_rate": 1.5767157787153345e-05, "loss": 0.6504, "step": 4889 }, { "epoch": 0.6175704475491357, "grad_norm": 1.734375, "learning_rate": 1.5765526693594993e-05, "loss": 0.6252, "step": 4890 }, { "epoch": 0.6176967400741968, "grad_norm": 1.78125, "learning_rate": 1.5763895370236634e-05, "loss": 0.6727, "step": 4891 }, { "epoch": 0.6178230325992581, "grad_norm": 1.9140625, "learning_rate": 1.5762263817143284e-05, "loss": 0.6906, "step": 4892 }, { "epoch": 0.6179493251243192, "grad_norm": 1.7890625, "learning_rate": 1.576063203437998e-05, "loss": 0.6575, "step": 4893 }, { "epoch": 0.6180756176493803, "grad_norm": 1.8046875, "learning_rate": 1.575900002201176e-05, "loss": 0.5768, "step": 4894 }, { "epoch": 0.6182019101744416, "grad_norm": 1.7109375, "learning_rate": 1.5757367780103672e-05, "loss": 0.6342, "step": 4895 }, { "epoch": 0.6183282026995027, "grad_norm": 1.8046875, "learning_rate": 1.5755735308720766e-05, "loss": 0.5684, "step": 4896 }, { "epoch": 0.6184544952245639, "grad_norm": 1.671875, "learning_rate": 1.575410260792812e-05, "loss": 0.581, "step": 4897 }, { "epoch": 0.6185807877496251, "grad_norm": 1.921875, "learning_rate": 1.57524696777908e-05, "loss": 0.6289, "step": 4898 }, { "epoch": 0.6187070802746862, "grad_norm": 1.8359375, "learning_rate": 1.5750836518373898e-05, "loss": 0.674, "step": 4899 }, { "epoch": 0.6188333727997474, "grad_norm": 1.828125, "learning_rate": 1.57492031297425e-05, "loss": 0.5547, "step": 4900 }, { "epoch": 0.6189596653248086, "grad_norm": 1.859375, "learning_rate": 1.5747569511961716e-05, "loss": 0.7157, "step": 4901 }, { "epoch": 0.6190859578498698, "grad_norm": 1.7421875, "learning_rate": 1.5745935665096654e-05, "loss": 0.6948, "step": 4902 }, { "epoch": 0.6192122503749309, "grad_norm": 1.9921875, "learning_rate": 1.574430158921244e-05, "loss": 0.7509, "step": 4903 }, { "epoch": 0.6193385428999921, "grad_norm": 1.6640625, "learning_rate": 1.5742667284374196e-05, "loss": 0.6364, "step": 4904 }, { "epoch": 0.6194648354250533, "grad_norm": 1.8828125, "learning_rate": 1.5741032750647064e-05, "loss": 0.7909, "step": 4905 }, { "epoch": 0.6195911279501145, "grad_norm": 1.859375, "learning_rate": 1.5739397988096197e-05, "loss": 0.6127, "step": 4906 }, { "epoch": 0.6197174204751756, "grad_norm": 2.359375, "learning_rate": 1.5737762996786753e-05, "loss": 0.6332, "step": 4907 }, { "epoch": 0.6198437130002368, "grad_norm": 1.765625, "learning_rate": 1.5736127776783894e-05, "loss": 0.6821, "step": 4908 }, { "epoch": 0.619970005525298, "grad_norm": 1.828125, "learning_rate": 1.5734492328152796e-05, "loss": 0.7124, "step": 4909 }, { "epoch": 0.6200962980503592, "grad_norm": 1.90625, "learning_rate": 1.5732856650958653e-05, "loss": 0.6669, "step": 4910 }, { "epoch": 0.6202225905754203, "grad_norm": 1.6953125, "learning_rate": 1.5731220745266646e-05, "loss": 0.6131, "step": 4911 }, { "epoch": 0.6203488831004815, "grad_norm": 1.8671875, "learning_rate": 1.572958461114199e-05, "loss": 0.7004, "step": 4912 }, { "epoch": 0.6204751756255427, "grad_norm": 1.890625, "learning_rate": 1.5727948248649886e-05, "loss": 0.669, "step": 4913 }, { "epoch": 0.6206014681506038, "grad_norm": 1.7734375, "learning_rate": 1.5726311657855564e-05, "loss": 0.6545, "step": 4914 }, { "epoch": 0.620727760675665, "grad_norm": 1.921875, "learning_rate": 1.5724674838824254e-05, "loss": 0.7557, "step": 4915 }, { "epoch": 0.6208540532007262, "grad_norm": 1.6953125, "learning_rate": 1.5723037791621193e-05, "loss": 0.659, "step": 4916 }, { "epoch": 0.6209803457257873, "grad_norm": 1.8203125, "learning_rate": 1.5721400516311628e-05, "loss": 0.6264, "step": 4917 }, { "epoch": 0.6211066382508486, "grad_norm": 1.8359375, "learning_rate": 1.5719763012960817e-05, "loss": 0.6995, "step": 4918 }, { "epoch": 0.6212329307759097, "grad_norm": 1.765625, "learning_rate": 1.5718125281634036e-05, "loss": 0.5969, "step": 4919 }, { "epoch": 0.6213592233009708, "grad_norm": 1.8046875, "learning_rate": 1.571648732239655e-05, "loss": 0.7241, "step": 4920 }, { "epoch": 0.6214855158260321, "grad_norm": 1.8515625, "learning_rate": 1.571484913531365e-05, "loss": 0.7523, "step": 4921 }, { "epoch": 0.6216118083510932, "grad_norm": 1.828125, "learning_rate": 1.5713210720450627e-05, "loss": 0.7139, "step": 4922 }, { "epoch": 0.6217381008761544, "grad_norm": 1.609375, "learning_rate": 1.5711572077872784e-05, "loss": 0.6152, "step": 4923 }, { "epoch": 0.6218643934012156, "grad_norm": 1.6640625, "learning_rate": 1.570993320764544e-05, "loss": 0.6672, "step": 4924 }, { "epoch": 0.6219906859262767, "grad_norm": 1.734375, "learning_rate": 1.570829410983391e-05, "loss": 0.7783, "step": 4925 }, { "epoch": 0.622116978451338, "grad_norm": 1.7578125, "learning_rate": 1.570665478450353e-05, "loss": 0.7251, "step": 4926 }, { "epoch": 0.6222432709763991, "grad_norm": 1.765625, "learning_rate": 1.570501523171963e-05, "loss": 0.6908, "step": 4927 }, { "epoch": 0.6223695635014602, "grad_norm": 1.765625, "learning_rate": 1.5703375451547563e-05, "loss": 0.6505, "step": 4928 }, { "epoch": 0.6224958560265215, "grad_norm": 1.953125, "learning_rate": 1.570173544405269e-05, "loss": 0.8516, "step": 4929 }, { "epoch": 0.6226221485515826, "grad_norm": 1.734375, "learning_rate": 1.5700095209300376e-05, "loss": 0.6184, "step": 4930 }, { "epoch": 0.6227484410766437, "grad_norm": 1.8046875, "learning_rate": 1.5698454747355997e-05, "loss": 0.6768, "step": 4931 }, { "epoch": 0.622874733601705, "grad_norm": 1.7421875, "learning_rate": 1.5696814058284937e-05, "loss": 0.7012, "step": 4932 }, { "epoch": 0.6230010261267661, "grad_norm": 1.78125, "learning_rate": 1.569517314215259e-05, "loss": 0.6503, "step": 4933 }, { "epoch": 0.6231273186518272, "grad_norm": 1.7421875, "learning_rate": 1.569353199902436e-05, "loss": 0.5903, "step": 4934 }, { "epoch": 0.6232536111768885, "grad_norm": 1.703125, "learning_rate": 1.5691890628965657e-05, "loss": 0.6696, "step": 4935 }, { "epoch": 0.6233799037019496, "grad_norm": 1.875, "learning_rate": 1.5690249032041903e-05, "loss": 0.6507, "step": 4936 }, { "epoch": 0.6235061962270109, "grad_norm": 1.7890625, "learning_rate": 1.568860720831853e-05, "loss": 0.7061, "step": 4937 }, { "epoch": 0.623632488752072, "grad_norm": 2.078125, "learning_rate": 1.5686965157860976e-05, "loss": 0.7472, "step": 4938 }, { "epoch": 0.6237587812771331, "grad_norm": 1.8125, "learning_rate": 1.5685322880734685e-05, "loss": 0.6043, "step": 4939 }, { "epoch": 0.6238850738021944, "grad_norm": 1.71875, "learning_rate": 1.568368037700512e-05, "loss": 0.6549, "step": 4940 }, { "epoch": 0.6240113663272555, "grad_norm": 1.875, "learning_rate": 1.5682037646737747e-05, "loss": 0.6388, "step": 4941 }, { "epoch": 0.6241376588523166, "grad_norm": 1.6953125, "learning_rate": 1.568039468999804e-05, "loss": 0.5719, "step": 4942 }, { "epoch": 0.6242639513773779, "grad_norm": 1.9765625, "learning_rate": 1.567875150685148e-05, "loss": 0.6187, "step": 4943 }, { "epoch": 0.624390243902439, "grad_norm": 1.7890625, "learning_rate": 1.5677108097363565e-05, "loss": 0.6577, "step": 4944 }, { "epoch": 0.6245165364275002, "grad_norm": 1.9453125, "learning_rate": 1.5675464461599794e-05, "loss": 0.746, "step": 4945 }, { "epoch": 0.6246428289525614, "grad_norm": 1.796875, "learning_rate": 1.567382059962568e-05, "loss": 0.6493, "step": 4946 }, { "epoch": 0.6247691214776225, "grad_norm": 1.7578125, "learning_rate": 1.5672176511506744e-05, "loss": 0.6973, "step": 4947 }, { "epoch": 0.6248954140026837, "grad_norm": 1.734375, "learning_rate": 1.5670532197308515e-05, "loss": 0.7291, "step": 4948 }, { "epoch": 0.6250217065277449, "grad_norm": 1.8125, "learning_rate": 1.566888765709653e-05, "loss": 0.6667, "step": 4949 }, { "epoch": 0.625147999052806, "grad_norm": 1.7421875, "learning_rate": 1.5667242890936335e-05, "loss": 0.5806, "step": 4950 }, { "epoch": 0.6252742915778672, "grad_norm": 1.859375, "learning_rate": 1.566559789889349e-05, "loss": 0.7303, "step": 4951 }, { "epoch": 0.6254005841029284, "grad_norm": 1.75, "learning_rate": 1.566395268103356e-05, "loss": 0.649, "step": 4952 }, { "epoch": 0.6255268766279896, "grad_norm": 1.734375, "learning_rate": 1.5662307237422122e-05, "loss": 0.6221, "step": 4953 }, { "epoch": 0.6256531691530508, "grad_norm": 1.8203125, "learning_rate": 1.566066156812475e-05, "loss": 0.6773, "step": 4954 }, { "epoch": 0.6257794616781119, "grad_norm": 1.8515625, "learning_rate": 1.5659015673207046e-05, "loss": 0.6712, "step": 4955 }, { "epoch": 0.6259057542031731, "grad_norm": 1.765625, "learning_rate": 1.5657369552734608e-05, "loss": 0.6985, "step": 4956 }, { "epoch": 0.6260320467282343, "grad_norm": 1.8515625, "learning_rate": 1.5655723206773044e-05, "loss": 0.666, "step": 4957 }, { "epoch": 0.6261583392532954, "grad_norm": 1.765625, "learning_rate": 1.5654076635387976e-05, "loss": 0.6017, "step": 4958 }, { "epoch": 0.6262846317783566, "grad_norm": 1.7890625, "learning_rate": 1.5652429838645036e-05, "loss": 0.6185, "step": 4959 }, { "epoch": 0.6264109243034178, "grad_norm": 1.6640625, "learning_rate": 1.5650782816609855e-05, "loss": 0.6188, "step": 4960 }, { "epoch": 0.626537216828479, "grad_norm": 1.828125, "learning_rate": 1.564913556934808e-05, "loss": 0.6697, "step": 4961 }, { "epoch": 0.6266635093535401, "grad_norm": 1.8125, "learning_rate": 1.564748809692537e-05, "loss": 0.6061, "step": 4962 }, { "epoch": 0.6267898018786013, "grad_norm": 1.84375, "learning_rate": 1.5645840399407386e-05, "loss": 0.7303, "step": 4963 }, { "epoch": 0.6269160944036625, "grad_norm": 1.8671875, "learning_rate": 1.5644192476859803e-05, "loss": 0.7913, "step": 4964 }, { "epoch": 0.6270423869287236, "grad_norm": 1.7421875, "learning_rate": 1.5642544329348302e-05, "loss": 0.7209, "step": 4965 }, { "epoch": 0.6271686794537848, "grad_norm": 1.7109375, "learning_rate": 1.5640895956938577e-05, "loss": 0.6517, "step": 4966 }, { "epoch": 0.627294971978846, "grad_norm": 1.8671875, "learning_rate": 1.5639247359696325e-05, "loss": 0.6036, "step": 4967 }, { "epoch": 0.6274212645039072, "grad_norm": 1.71875, "learning_rate": 1.5637598537687254e-05, "loss": 0.5346, "step": 4968 }, { "epoch": 0.6275475570289684, "grad_norm": 1.9140625, "learning_rate": 1.5635949490977086e-05, "loss": 0.6307, "step": 4969 }, { "epoch": 0.6276738495540295, "grad_norm": 1.9921875, "learning_rate": 1.5634300219631547e-05, "loss": 0.66, "step": 4970 }, { "epoch": 0.6278001420790907, "grad_norm": 1.7109375, "learning_rate": 1.563265072371637e-05, "loss": 0.6317, "step": 4971 }, { "epoch": 0.6279264346041519, "grad_norm": 1.765625, "learning_rate": 1.563100100329731e-05, "loss": 0.69, "step": 4972 }, { "epoch": 0.628052727129213, "grad_norm": 1.8515625, "learning_rate": 1.5629351058440106e-05, "loss": 0.677, "step": 4973 }, { "epoch": 0.6281790196542743, "grad_norm": 1.7421875, "learning_rate": 1.562770088921053e-05, "loss": 0.6358, "step": 4974 }, { "epoch": 0.6283053121793354, "grad_norm": 1.890625, "learning_rate": 1.562605049567435e-05, "loss": 0.6866, "step": 4975 }, { "epoch": 0.6284316047043965, "grad_norm": 1.78125, "learning_rate": 1.562439987789735e-05, "loss": 0.6544, "step": 4976 }, { "epoch": 0.6285578972294578, "grad_norm": 1.6640625, "learning_rate": 1.5622749035945316e-05, "loss": 0.6217, "step": 4977 }, { "epoch": 0.6286841897545189, "grad_norm": 2.109375, "learning_rate": 1.5621097969884048e-05, "loss": 0.6982, "step": 4978 }, { "epoch": 0.62881048227958, "grad_norm": 1.6484375, "learning_rate": 1.5619446679779357e-05, "loss": 0.6199, "step": 4979 }, { "epoch": 0.6289367748046413, "grad_norm": 1.890625, "learning_rate": 1.5617795165697057e-05, "loss": 0.7249, "step": 4980 }, { "epoch": 0.6290630673297024, "grad_norm": 1.8515625, "learning_rate": 1.5616143427702973e-05, "loss": 0.6924, "step": 4981 }, { "epoch": 0.6291893598547635, "grad_norm": 1.71875, "learning_rate": 1.5614491465862937e-05, "loss": 0.6645, "step": 4982 }, { "epoch": 0.6293156523798248, "grad_norm": 1.7734375, "learning_rate": 1.5612839280242796e-05, "loss": 0.6391, "step": 4983 }, { "epoch": 0.6294419449048859, "grad_norm": 1.7265625, "learning_rate": 1.5611186870908403e-05, "loss": 0.6441, "step": 4984 }, { "epoch": 0.6295682374299472, "grad_norm": 1.7265625, "learning_rate": 1.5609534237925612e-05, "loss": 0.6279, "step": 4985 }, { "epoch": 0.6296945299550083, "grad_norm": 1.6875, "learning_rate": 1.56078813813603e-05, "loss": 0.6092, "step": 4986 }, { "epoch": 0.6298208224800694, "grad_norm": 1.734375, "learning_rate": 1.560622830127834e-05, "loss": 0.6184, "step": 4987 }, { "epoch": 0.6299471150051307, "grad_norm": 1.8828125, "learning_rate": 1.560457499774563e-05, "loss": 0.6271, "step": 4988 }, { "epoch": 0.6300734075301918, "grad_norm": 1.796875, "learning_rate": 1.5602921470828055e-05, "loss": 0.6136, "step": 4989 }, { "epoch": 0.6301997000552529, "grad_norm": 1.671875, "learning_rate": 1.560126772059153e-05, "loss": 0.6371, "step": 4990 }, { "epoch": 0.6303259925803142, "grad_norm": 1.796875, "learning_rate": 1.5599613747101963e-05, "loss": 0.6608, "step": 4991 }, { "epoch": 0.6304522851053753, "grad_norm": 1.703125, "learning_rate": 1.559795955042528e-05, "loss": 0.6395, "step": 4992 }, { "epoch": 0.6305785776304365, "grad_norm": 1.7578125, "learning_rate": 1.559630513062741e-05, "loss": 0.6201, "step": 4993 }, { "epoch": 0.6307048701554977, "grad_norm": 1.9140625, "learning_rate": 1.5594650487774302e-05, "loss": 0.6973, "step": 4994 }, { "epoch": 0.6308311626805588, "grad_norm": 1.6640625, "learning_rate": 1.5592995621931896e-05, "loss": 0.6528, "step": 4995 }, { "epoch": 0.63095745520562, "grad_norm": 1.828125, "learning_rate": 1.5591340533166156e-05, "loss": 0.7165, "step": 4996 }, { "epoch": 0.6310837477306812, "grad_norm": 1.7109375, "learning_rate": 1.5589685221543053e-05, "loss": 0.6327, "step": 4997 }, { "epoch": 0.6312100402557423, "grad_norm": 1.7265625, "learning_rate": 1.558802968712856e-05, "loss": 0.678, "step": 4998 }, { "epoch": 0.6313363327808036, "grad_norm": 1.6875, "learning_rate": 1.5586373929988664e-05, "loss": 0.6218, "step": 4999 }, { "epoch": 0.6314626253058647, "grad_norm": 1.7890625, "learning_rate": 1.558471795018936e-05, "loss": 0.6187, "step": 5000 }, { "epoch": 0.6315889178309259, "grad_norm": 1.8203125, "learning_rate": 1.5583061747796647e-05, "loss": 0.6869, "step": 5001 }, { "epoch": 0.6317152103559871, "grad_norm": 1.6484375, "learning_rate": 1.5581405322876542e-05, "loss": 0.6646, "step": 5002 }, { "epoch": 0.6318415028810482, "grad_norm": 1.859375, "learning_rate": 1.5579748675495064e-05, "loss": 0.7187, "step": 5003 }, { "epoch": 0.6319677954061094, "grad_norm": 1.8125, "learning_rate": 1.5578091805718242e-05, "loss": 0.7005, "step": 5004 }, { "epoch": 0.6320940879311706, "grad_norm": 1.796875, "learning_rate": 1.5576434713612117e-05, "loss": 0.6432, "step": 5005 }, { "epoch": 0.6322203804562317, "grad_norm": 1.8046875, "learning_rate": 1.5574777399242738e-05, "loss": 0.7055, "step": 5006 }, { "epoch": 0.6323466729812929, "grad_norm": 1.8203125, "learning_rate": 1.557311986267616e-05, "loss": 0.6668, "step": 5007 }, { "epoch": 0.6324729655063541, "grad_norm": 1.6796875, "learning_rate": 1.5571462103978445e-05, "loss": 0.5938, "step": 5008 }, { "epoch": 0.6325992580314153, "grad_norm": 1.859375, "learning_rate": 1.556980412321567e-05, "loss": 0.7609, "step": 5009 }, { "epoch": 0.6327255505564764, "grad_norm": 1.90625, "learning_rate": 1.556814592045392e-05, "loss": 0.686, "step": 5010 }, { "epoch": 0.6328518430815376, "grad_norm": 1.7734375, "learning_rate": 1.5566487495759285e-05, "loss": 0.6591, "step": 5011 }, { "epoch": 0.6329781356065988, "grad_norm": 1.703125, "learning_rate": 1.556482884919787e-05, "loss": 0.5675, "step": 5012 }, { "epoch": 0.6331044281316599, "grad_norm": 1.890625, "learning_rate": 1.5563169980835776e-05, "loss": 0.7005, "step": 5013 }, { "epoch": 0.6332307206567211, "grad_norm": 1.7421875, "learning_rate": 1.5561510890739126e-05, "loss": 0.7038, "step": 5014 }, { "epoch": 0.6333570131817823, "grad_norm": 1.8046875, "learning_rate": 1.555985157897405e-05, "loss": 0.685, "step": 5015 }, { "epoch": 0.6334833057068435, "grad_norm": 1.6796875, "learning_rate": 1.555819204560668e-05, "loss": 0.5963, "step": 5016 }, { "epoch": 0.6336095982319047, "grad_norm": 1.75, "learning_rate": 1.5556532290703164e-05, "loss": 0.6375, "step": 5017 }, { "epoch": 0.6337358907569658, "grad_norm": 1.546875, "learning_rate": 1.5554872314329657e-05, "loss": 0.6217, "step": 5018 }, { "epoch": 0.633862183282027, "grad_norm": 1.890625, "learning_rate": 1.5553212116552315e-05, "loss": 0.7013, "step": 5019 }, { "epoch": 0.6339884758070882, "grad_norm": 1.7578125, "learning_rate": 1.5551551697437318e-05, "loss": 0.6416, "step": 5020 }, { "epoch": 0.6341147683321493, "grad_norm": 1.9453125, "learning_rate": 1.5549891057050837e-05, "loss": 0.6763, "step": 5021 }, { "epoch": 0.6342410608572105, "grad_norm": 1.8125, "learning_rate": 1.554823019545907e-05, "loss": 0.6616, "step": 5022 }, { "epoch": 0.6343673533822717, "grad_norm": 1.703125, "learning_rate": 1.5546569112728206e-05, "loss": 0.6765, "step": 5023 }, { "epoch": 0.6344936459073328, "grad_norm": 1.859375, "learning_rate": 1.5544907808924464e-05, "loss": 0.6646, "step": 5024 }, { "epoch": 0.6346199384323941, "grad_norm": 1.75, "learning_rate": 1.5543246284114047e-05, "loss": 0.6717, "step": 5025 }, { "epoch": 0.6347462309574552, "grad_norm": 1.8828125, "learning_rate": 1.5541584538363187e-05, "loss": 0.6585, "step": 5026 }, { "epoch": 0.6348725234825163, "grad_norm": 1.6953125, "learning_rate": 1.553992257173812e-05, "loss": 0.6758, "step": 5027 }, { "epoch": 0.6349988160075776, "grad_norm": 1.71875, "learning_rate": 1.5538260384305076e-05, "loss": 0.6351, "step": 5028 }, { "epoch": 0.6351251085326387, "grad_norm": 1.96875, "learning_rate": 1.5536597976130314e-05, "loss": 0.7682, "step": 5029 }, { "epoch": 0.6352514010577, "grad_norm": 1.7890625, "learning_rate": 1.5534935347280095e-05, "loss": 0.6908, "step": 5030 }, { "epoch": 0.6353776935827611, "grad_norm": 1.890625, "learning_rate": 1.5533272497820683e-05, "loss": 0.6929, "step": 5031 }, { "epoch": 0.6355039861078222, "grad_norm": 1.8671875, "learning_rate": 1.5531609427818354e-05, "loss": 0.8265, "step": 5032 }, { "epoch": 0.6356302786328835, "grad_norm": 1.7109375, "learning_rate": 1.5529946137339404e-05, "loss": 0.6403, "step": 5033 }, { "epoch": 0.6357565711579446, "grad_norm": 1.875, "learning_rate": 1.5528282626450114e-05, "loss": 0.7454, "step": 5034 }, { "epoch": 0.6358828636830057, "grad_norm": 1.8203125, "learning_rate": 1.55266188952168e-05, "loss": 0.6639, "step": 5035 }, { "epoch": 0.636009156208067, "grad_norm": 1.890625, "learning_rate": 1.5524954943705765e-05, "loss": 0.664, "step": 5036 }, { "epoch": 0.6361354487331281, "grad_norm": 1.828125, "learning_rate": 1.552329077198333e-05, "loss": 0.5806, "step": 5037 }, { "epoch": 0.6362617412581892, "grad_norm": 1.78125, "learning_rate": 1.552162638011584e-05, "loss": 0.6525, "step": 5038 }, { "epoch": 0.6363880337832505, "grad_norm": 2.015625, "learning_rate": 1.551996176816961e-05, "loss": 0.7708, "step": 5039 }, { "epoch": 0.6365143263083116, "grad_norm": 1.71875, "learning_rate": 1.5518296936211004e-05, "loss": 0.6504, "step": 5040 }, { "epoch": 0.6366406188333728, "grad_norm": 1.765625, "learning_rate": 1.5516631884306376e-05, "loss": 0.6374, "step": 5041 }, { "epoch": 0.636766911358434, "grad_norm": 1.9453125, "learning_rate": 1.5514966612522084e-05, "loss": 0.6851, "step": 5042 }, { "epoch": 0.6368932038834951, "grad_norm": 2.5625, "learning_rate": 1.551330112092451e-05, "loss": 0.6607, "step": 5043 }, { "epoch": 0.6370194964085564, "grad_norm": 1.6640625, "learning_rate": 1.5511635409580032e-05, "loss": 0.6063, "step": 5044 }, { "epoch": 0.6371457889336175, "grad_norm": 1.6640625, "learning_rate": 1.5509969478555046e-05, "loss": 0.5615, "step": 5045 }, { "epoch": 0.6372720814586786, "grad_norm": 1.7578125, "learning_rate": 1.5508303327915944e-05, "loss": 0.62, "step": 5046 }, { "epoch": 0.6373983739837399, "grad_norm": 1.765625, "learning_rate": 1.550663695772914e-05, "loss": 0.6574, "step": 5047 }, { "epoch": 0.637524666508801, "grad_norm": 1.6953125, "learning_rate": 1.550497036806105e-05, "loss": 0.5888, "step": 5048 }, { "epoch": 0.6376509590338622, "grad_norm": 1.7890625, "learning_rate": 1.55033035589781e-05, "loss": 0.6774, "step": 5049 }, { "epoch": 0.6377772515589234, "grad_norm": 1.7890625, "learning_rate": 1.5501636530546726e-05, "loss": 0.6546, "step": 5050 }, { "epoch": 0.6379035440839845, "grad_norm": 1.7734375, "learning_rate": 1.549996928283337e-05, "loss": 0.6634, "step": 5051 }, { "epoch": 0.6380298366090457, "grad_norm": 1.8046875, "learning_rate": 1.549830181590449e-05, "loss": 0.6576, "step": 5052 }, { "epoch": 0.6381561291341069, "grad_norm": 1.9296875, "learning_rate": 1.549663412982654e-05, "loss": 0.6232, "step": 5053 }, { "epoch": 0.638282421659168, "grad_norm": 1.7734375, "learning_rate": 1.5494966224665996e-05, "loss": 0.6317, "step": 5054 }, { "epoch": 0.6384087141842292, "grad_norm": 3.140625, "learning_rate": 1.549329810048933e-05, "loss": 0.7755, "step": 5055 }, { "epoch": 0.6385350067092904, "grad_norm": 1.890625, "learning_rate": 1.5491629757363033e-05, "loss": 0.7139, "step": 5056 }, { "epoch": 0.6386612992343516, "grad_norm": 1.984375, "learning_rate": 1.5489961195353606e-05, "loss": 0.723, "step": 5057 }, { "epoch": 0.6387875917594127, "grad_norm": 1.6640625, "learning_rate": 1.5488292414527547e-05, "loss": 0.6112, "step": 5058 }, { "epoch": 0.6389138842844739, "grad_norm": 1.84375, "learning_rate": 1.548662341495137e-05, "loss": 0.6508, "step": 5059 }, { "epoch": 0.6390401768095351, "grad_norm": 1.640625, "learning_rate": 1.54849541966916e-05, "loss": 0.665, "step": 5060 }, { "epoch": 0.6391664693345963, "grad_norm": 1.625, "learning_rate": 1.548328475981477e-05, "loss": 0.5839, "step": 5061 }, { "epoch": 0.6392927618596574, "grad_norm": 1.7265625, "learning_rate": 1.548161510438741e-05, "loss": 0.6761, "step": 5062 }, { "epoch": 0.6394190543847186, "grad_norm": 1.9375, "learning_rate": 1.547994523047608e-05, "loss": 0.737, "step": 5063 }, { "epoch": 0.6395453469097798, "grad_norm": 1.8828125, "learning_rate": 1.547827513814733e-05, "loss": 0.562, "step": 5064 }, { "epoch": 0.639671639434841, "grad_norm": 1.640625, "learning_rate": 1.547660482746773e-05, "loss": 0.6217, "step": 5065 }, { "epoch": 0.6397979319599021, "grad_norm": 1.6640625, "learning_rate": 1.5474934298503853e-05, "loss": 0.6301, "step": 5066 }, { "epoch": 0.6399242244849633, "grad_norm": 2.0, "learning_rate": 1.5473263551322282e-05, "loss": 0.6527, "step": 5067 }, { "epoch": 0.6400505170100245, "grad_norm": 1.90625, "learning_rate": 1.5471592585989608e-05, "loss": 0.6698, "step": 5068 }, { "epoch": 0.6401768095350856, "grad_norm": 1.7578125, "learning_rate": 1.5469921402572432e-05, "loss": 0.6161, "step": 5069 }, { "epoch": 0.6403031020601468, "grad_norm": 1.9921875, "learning_rate": 1.5468250001137368e-05, "loss": 0.7264, "step": 5070 }, { "epoch": 0.640429394585208, "grad_norm": 1.625, "learning_rate": 1.5466578381751026e-05, "loss": 0.5859, "step": 5071 }, { "epoch": 0.6405556871102691, "grad_norm": 1.65625, "learning_rate": 1.5464906544480037e-05, "loss": 0.5629, "step": 5072 }, { "epoch": 0.6406819796353304, "grad_norm": 1.7265625, "learning_rate": 1.546323448939104e-05, "loss": 0.6568, "step": 5073 }, { "epoch": 0.6408082721603915, "grad_norm": 1.8203125, "learning_rate": 1.5461562216550672e-05, "loss": 0.7143, "step": 5074 }, { "epoch": 0.6409345646854527, "grad_norm": 1.671875, "learning_rate": 1.545988972602559e-05, "loss": 0.6315, "step": 5075 }, { "epoch": 0.6410608572105139, "grad_norm": 1.75, "learning_rate": 1.545821701788245e-05, "loss": 0.6569, "step": 5076 }, { "epoch": 0.641187149735575, "grad_norm": 1.7890625, "learning_rate": 1.5456544092187936e-05, "loss": 0.609, "step": 5077 }, { "epoch": 0.6413134422606362, "grad_norm": 1.6484375, "learning_rate": 1.545487094900871e-05, "loss": 0.6239, "step": 5078 }, { "epoch": 0.6414397347856974, "grad_norm": 2.078125, "learning_rate": 1.545319758841147e-05, "loss": 0.7429, "step": 5079 }, { "epoch": 0.6415660273107585, "grad_norm": 1.7109375, "learning_rate": 1.545152401046291e-05, "loss": 0.6528, "step": 5080 }, { "epoch": 0.6416923198358198, "grad_norm": 1.8828125, "learning_rate": 1.544985021522973e-05, "loss": 0.6602, "step": 5081 }, { "epoch": 0.6418186123608809, "grad_norm": 1.71875, "learning_rate": 1.5448176202778647e-05, "loss": 0.6796, "step": 5082 }, { "epoch": 0.641944904885942, "grad_norm": 1.6640625, "learning_rate": 1.5446501973176385e-05, "loss": 0.5785, "step": 5083 }, { "epoch": 0.6420711974110033, "grad_norm": 1.734375, "learning_rate": 1.5444827526489675e-05, "loss": 0.6525, "step": 5084 }, { "epoch": 0.6421974899360644, "grad_norm": 1.875, "learning_rate": 1.5443152862785253e-05, "loss": 0.7045, "step": 5085 }, { "epoch": 0.6423237824611255, "grad_norm": 1.7734375, "learning_rate": 1.5441477982129866e-05, "loss": 0.7049, "step": 5086 }, { "epoch": 0.6424500749861868, "grad_norm": 1.796875, "learning_rate": 1.543980288459028e-05, "loss": 0.6613, "step": 5087 }, { "epoch": 0.6425763675112479, "grad_norm": 1.7890625, "learning_rate": 1.5438127570233247e-05, "loss": 0.701, "step": 5088 }, { "epoch": 0.642702660036309, "grad_norm": 1.796875, "learning_rate": 1.5436452039125552e-05, "loss": 0.7312, "step": 5089 }, { "epoch": 0.6428289525613703, "grad_norm": 1.796875, "learning_rate": 1.5434776291333968e-05, "loss": 0.6089, "step": 5090 }, { "epoch": 0.6429552450864314, "grad_norm": 1.765625, "learning_rate": 1.5433100326925298e-05, "loss": 0.6822, "step": 5091 }, { "epoch": 0.6430815376114927, "grad_norm": 1.640625, "learning_rate": 1.543142414596633e-05, "loss": 0.6251, "step": 5092 }, { "epoch": 0.6432078301365538, "grad_norm": 1.828125, "learning_rate": 1.542974774852388e-05, "loss": 0.6267, "step": 5093 }, { "epoch": 0.6433341226616149, "grad_norm": 1.8203125, "learning_rate": 1.542807113466476e-05, "loss": 0.6467, "step": 5094 }, { "epoch": 0.6434604151866762, "grad_norm": 1.703125, "learning_rate": 1.5426394304455804e-05, "loss": 0.6221, "step": 5095 }, { "epoch": 0.6435867077117373, "grad_norm": 1.8515625, "learning_rate": 1.5424717257963838e-05, "loss": 0.681, "step": 5096 }, { "epoch": 0.6437130002367984, "grad_norm": 1.7265625, "learning_rate": 1.542303999525571e-05, "loss": 0.6012, "step": 5097 }, { "epoch": 0.6438392927618597, "grad_norm": 1.7734375, "learning_rate": 1.5421362516398272e-05, "loss": 0.5918, "step": 5098 }, { "epoch": 0.6439655852869208, "grad_norm": 1.765625, "learning_rate": 1.5419684821458377e-05, "loss": 0.7689, "step": 5099 }, { "epoch": 0.644091877811982, "grad_norm": 1.6953125, "learning_rate": 1.54180069105029e-05, "loss": 0.6405, "step": 5100 }, { "epoch": 0.6442181703370432, "grad_norm": 1.703125, "learning_rate": 1.5416328783598724e-05, "loss": 0.6633, "step": 5101 }, { "epoch": 0.6443444628621043, "grad_norm": 1.6484375, "learning_rate": 1.5414650440812724e-05, "loss": 0.6486, "step": 5102 }, { "epoch": 0.6444707553871655, "grad_norm": 1.71875, "learning_rate": 1.54129718822118e-05, "loss": 0.5743, "step": 5103 }, { "epoch": 0.6445970479122267, "grad_norm": 1.7890625, "learning_rate": 1.5411293107862856e-05, "loss": 0.6596, "step": 5104 }, { "epoch": 0.6447233404372879, "grad_norm": 1.6328125, "learning_rate": 1.5409614117832797e-05, "loss": 0.6449, "step": 5105 }, { "epoch": 0.6448496329623491, "grad_norm": 1.8671875, "learning_rate": 1.540793491218856e-05, "loss": 0.6884, "step": 5106 }, { "epoch": 0.6449759254874102, "grad_norm": 1.8359375, "learning_rate": 1.5406255490997053e-05, "loss": 0.7, "step": 5107 }, { "epoch": 0.6451022180124714, "grad_norm": 1.796875, "learning_rate": 1.5404575854325226e-05, "loss": 0.7174, "step": 5108 }, { "epoch": 0.6452285105375326, "grad_norm": 1.59375, "learning_rate": 1.5402896002240026e-05, "loss": 0.5287, "step": 5109 }, { "epoch": 0.6453548030625937, "grad_norm": 1.71875, "learning_rate": 1.5401215934808405e-05, "loss": 0.6475, "step": 5110 }, { "epoch": 0.6454810955876549, "grad_norm": 1.8203125, "learning_rate": 1.5399535652097326e-05, "loss": 0.5966, "step": 5111 }, { "epoch": 0.6456073881127161, "grad_norm": 1.7578125, "learning_rate": 1.539785515417376e-05, "loss": 0.6038, "step": 5112 }, { "epoch": 0.6457336806377773, "grad_norm": 1.984375, "learning_rate": 1.5396174441104688e-05, "loss": 0.7026, "step": 5113 }, { "epoch": 0.6458599731628384, "grad_norm": 1.8828125, "learning_rate": 1.5394493512957102e-05, "loss": 0.73, "step": 5114 }, { "epoch": 0.6459862656878996, "grad_norm": 1.6875, "learning_rate": 1.5392812369798e-05, "loss": 0.6093, "step": 5115 }, { "epoch": 0.6461125582129608, "grad_norm": 1.8203125, "learning_rate": 1.539113101169438e-05, "loss": 0.7042, "step": 5116 }, { "epoch": 0.6462388507380219, "grad_norm": 1.7421875, "learning_rate": 1.5389449438713274e-05, "loss": 0.6468, "step": 5117 }, { "epoch": 0.6463651432630831, "grad_norm": 1.8046875, "learning_rate": 1.5387767650921688e-05, "loss": 0.6669, "step": 5118 }, { "epoch": 0.6464914357881443, "grad_norm": 1.9765625, "learning_rate": 1.538608564838666e-05, "loss": 0.7215, "step": 5119 }, { "epoch": 0.6466177283132054, "grad_norm": 1.6484375, "learning_rate": 1.5384403431175233e-05, "loss": 0.5756, "step": 5120 }, { "epoch": 0.6467440208382667, "grad_norm": 1.828125, "learning_rate": 1.5382720999354454e-05, "loss": 0.6778, "step": 5121 }, { "epoch": 0.6468703133633278, "grad_norm": 1.703125, "learning_rate": 1.5381038352991377e-05, "loss": 0.6018, "step": 5122 }, { "epoch": 0.646996605888389, "grad_norm": 1.796875, "learning_rate": 1.5379355492153075e-05, "loss": 0.6709, "step": 5123 }, { "epoch": 0.6471228984134502, "grad_norm": 1.828125, "learning_rate": 1.5377672416906625e-05, "loss": 0.6836, "step": 5124 }, { "epoch": 0.6472491909385113, "grad_norm": 1.7890625, "learning_rate": 1.53759891273191e-05, "loss": 0.6306, "step": 5125 }, { "epoch": 0.6473754834635725, "grad_norm": 1.8515625, "learning_rate": 1.5374305623457598e-05, "loss": 0.6542, "step": 5126 }, { "epoch": 0.6475017759886337, "grad_norm": 2.0, "learning_rate": 1.537262190538922e-05, "loss": 0.6084, "step": 5127 }, { "epoch": 0.6476280685136948, "grad_norm": 1.9296875, "learning_rate": 1.5370937973181076e-05, "loss": 0.6272, "step": 5128 }, { "epoch": 0.647754361038756, "grad_norm": 1.7109375, "learning_rate": 1.5369253826900275e-05, "loss": 0.6914, "step": 5129 }, { "epoch": 0.6478806535638172, "grad_norm": 1.75, "learning_rate": 1.5367569466613954e-05, "loss": 0.615, "step": 5130 }, { "epoch": 0.6480069460888783, "grad_norm": 1.9140625, "learning_rate": 1.536588489238924e-05, "loss": 0.6906, "step": 5131 }, { "epoch": 0.6481332386139396, "grad_norm": 1.8203125, "learning_rate": 1.5364200104293277e-05, "loss": 0.6924, "step": 5132 }, { "epoch": 0.6482595311390007, "grad_norm": 1.921875, "learning_rate": 1.5362515102393224e-05, "loss": 0.6403, "step": 5133 }, { "epoch": 0.6483858236640618, "grad_norm": 1.640625, "learning_rate": 1.536082988675623e-05, "loss": 0.5331, "step": 5134 }, { "epoch": 0.6485121161891231, "grad_norm": 1.8515625, "learning_rate": 1.5359144457449474e-05, "loss": 0.6363, "step": 5135 }, { "epoch": 0.6486384087141842, "grad_norm": 2.078125, "learning_rate": 1.5357458814540125e-05, "loss": 0.7034, "step": 5136 }, { "epoch": 0.6487647012392455, "grad_norm": 1.75, "learning_rate": 1.5355772958095375e-05, "loss": 0.6831, "step": 5137 }, { "epoch": 0.6488909937643066, "grad_norm": 1.7421875, "learning_rate": 1.5354086888182412e-05, "loss": 0.5774, "step": 5138 }, { "epoch": 0.6490172862893677, "grad_norm": 1.890625, "learning_rate": 1.5352400604868442e-05, "loss": 0.5931, "step": 5139 }, { "epoch": 0.649143578814429, "grad_norm": 1.7890625, "learning_rate": 1.5350714108220673e-05, "loss": 0.5719, "step": 5140 }, { "epoch": 0.6492698713394901, "grad_norm": 1.765625, "learning_rate": 1.5349027398306336e-05, "loss": 0.6239, "step": 5141 }, { "epoch": 0.6493961638645512, "grad_norm": 1.8515625, "learning_rate": 1.5347340475192643e-05, "loss": 0.5967, "step": 5142 }, { "epoch": 0.6495224563896125, "grad_norm": 2.015625, "learning_rate": 1.5345653338946843e-05, "loss": 0.6682, "step": 5143 }, { "epoch": 0.6496487489146736, "grad_norm": 1.8203125, "learning_rate": 1.5343965989636177e-05, "loss": 0.7096, "step": 5144 }, { "epoch": 0.6497750414397347, "grad_norm": 1.6796875, "learning_rate": 1.5342278427327895e-05, "loss": 0.6187, "step": 5145 }, { "epoch": 0.649901333964796, "grad_norm": 1.703125, "learning_rate": 1.5340590652089266e-05, "loss": 0.6512, "step": 5146 }, { "epoch": 0.6500276264898571, "grad_norm": 1.71875, "learning_rate": 1.5338902663987557e-05, "loss": 0.6689, "step": 5147 }, { "epoch": 0.6501539190149183, "grad_norm": 1.8046875, "learning_rate": 1.5337214463090044e-05, "loss": 0.6298, "step": 5148 }, { "epoch": 0.6502802115399795, "grad_norm": 1.9765625, "learning_rate": 1.5335526049464017e-05, "loss": 0.787, "step": 5149 }, { "epoch": 0.6504065040650406, "grad_norm": 1.796875, "learning_rate": 1.533383742317678e-05, "loss": 0.6725, "step": 5150 }, { "epoch": 0.6505327965901018, "grad_norm": 1.71875, "learning_rate": 1.5332148584295622e-05, "loss": 0.6481, "step": 5151 }, { "epoch": 0.650659089115163, "grad_norm": 1.9765625, "learning_rate": 1.533045953288787e-05, "loss": 0.6761, "step": 5152 }, { "epoch": 0.6507853816402241, "grad_norm": 1.8203125, "learning_rate": 1.532877026902084e-05, "loss": 0.6752, "step": 5153 }, { "epoch": 0.6509116741652854, "grad_norm": 1.8984375, "learning_rate": 1.5327080792761862e-05, "loss": 0.7083, "step": 5154 }, { "epoch": 0.6510379666903465, "grad_norm": 2.046875, "learning_rate": 1.5325391104178276e-05, "loss": 0.7164, "step": 5155 }, { "epoch": 0.6511642592154077, "grad_norm": 1.75, "learning_rate": 1.5323701203337425e-05, "loss": 0.6008, "step": 5156 }, { "epoch": 0.6512905517404689, "grad_norm": 1.828125, "learning_rate": 1.532201109030667e-05, "loss": 0.6497, "step": 5157 }, { "epoch": 0.65141684426553, "grad_norm": 1.765625, "learning_rate": 1.5320320765153367e-05, "loss": 0.6992, "step": 5158 }, { "epoch": 0.6515431367905912, "grad_norm": 1.7578125, "learning_rate": 1.5318630227944896e-05, "loss": 0.6373, "step": 5159 }, { "epoch": 0.6516694293156524, "grad_norm": 1.71875, "learning_rate": 1.5316939478748635e-05, "loss": 0.6314, "step": 5160 }, { "epoch": 0.6517957218407135, "grad_norm": 1.8359375, "learning_rate": 1.5315248517631975e-05, "loss": 0.6698, "step": 5161 }, { "epoch": 0.6519220143657747, "grad_norm": 1.6875, "learning_rate": 1.5313557344662305e-05, "loss": 0.6507, "step": 5162 }, { "epoch": 0.6520483068908359, "grad_norm": 1.6953125, "learning_rate": 1.5311865959907042e-05, "loss": 0.6432, "step": 5163 }, { "epoch": 0.6521745994158971, "grad_norm": 1.984375, "learning_rate": 1.53101743634336e-05, "loss": 0.8015, "step": 5164 }, { "epoch": 0.6523008919409582, "grad_norm": 1.765625, "learning_rate": 1.5308482555309394e-05, "loss": 0.5589, "step": 5165 }, { "epoch": 0.6524271844660194, "grad_norm": 1.7734375, "learning_rate": 1.530679053560186e-05, "loss": 0.6403, "step": 5166 }, { "epoch": 0.6525534769910806, "grad_norm": 1.7734375, "learning_rate": 1.5305098304378438e-05, "loss": 0.6935, "step": 5167 }, { "epoch": 0.6526797695161418, "grad_norm": 1.875, "learning_rate": 1.5303405861706574e-05, "loss": 0.6216, "step": 5168 }, { "epoch": 0.652806062041203, "grad_norm": 1.8515625, "learning_rate": 1.5301713207653727e-05, "loss": 0.7651, "step": 5169 }, { "epoch": 0.6529323545662641, "grad_norm": 1.8984375, "learning_rate": 1.5300020342287366e-05, "loss": 0.6571, "step": 5170 }, { "epoch": 0.6530586470913253, "grad_norm": 1.625, "learning_rate": 1.5298327265674956e-05, "loss": 0.6552, "step": 5171 }, { "epoch": 0.6531849396163865, "grad_norm": 1.828125, "learning_rate": 1.5296633977883985e-05, "loss": 0.7111, "step": 5172 }, { "epoch": 0.6533112321414476, "grad_norm": 1.65625, "learning_rate": 1.5294940478981937e-05, "loss": 0.5581, "step": 5173 }, { "epoch": 0.6534375246665088, "grad_norm": 1.7109375, "learning_rate": 1.5293246769036317e-05, "loss": 0.7405, "step": 5174 }, { "epoch": 0.65356381719157, "grad_norm": 1.6484375, "learning_rate": 1.5291552848114632e-05, "loss": 0.6823, "step": 5175 }, { "epoch": 0.6536901097166311, "grad_norm": 1.7734375, "learning_rate": 1.5289858716284395e-05, "loss": 0.608, "step": 5176 }, { "epoch": 0.6538164022416924, "grad_norm": 1.7265625, "learning_rate": 1.5288164373613133e-05, "loss": 0.6179, "step": 5177 }, { "epoch": 0.6539426947667535, "grad_norm": 1.8984375, "learning_rate": 1.5286469820168374e-05, "loss": 0.8263, "step": 5178 }, { "epoch": 0.6540689872918146, "grad_norm": 1.5625, "learning_rate": 1.528477505601766e-05, "loss": 0.6252, "step": 5179 }, { "epoch": 0.6541952798168759, "grad_norm": 1.796875, "learning_rate": 1.5283080081228547e-05, "loss": 0.6019, "step": 5180 }, { "epoch": 0.654321572341937, "grad_norm": 1.78125, "learning_rate": 1.528138489586858e-05, "loss": 0.623, "step": 5181 }, { "epoch": 0.6544478648669981, "grad_norm": 1.90625, "learning_rate": 1.5279689500005336e-05, "loss": 0.7397, "step": 5182 }, { "epoch": 0.6545741573920594, "grad_norm": 1.875, "learning_rate": 1.5277993893706386e-05, "loss": 0.6549, "step": 5183 }, { "epoch": 0.6547004499171205, "grad_norm": 1.828125, "learning_rate": 1.527629807703931e-05, "loss": 0.61, "step": 5184 }, { "epoch": 0.6548267424421818, "grad_norm": 1.7265625, "learning_rate": 1.5274602050071705e-05, "loss": 0.6076, "step": 5185 }, { "epoch": 0.6549530349672429, "grad_norm": 1.6796875, "learning_rate": 1.5272905812871168e-05, "loss": 0.6409, "step": 5186 }, { "epoch": 0.655079327492304, "grad_norm": 1.8359375, "learning_rate": 1.52712093655053e-05, "loss": 0.6979, "step": 5187 }, { "epoch": 0.6552056200173653, "grad_norm": 1.8671875, "learning_rate": 1.5269512708041725e-05, "loss": 0.6812, "step": 5188 }, { "epoch": 0.6553319125424264, "grad_norm": 1.703125, "learning_rate": 1.5267815840548067e-05, "loss": 0.5728, "step": 5189 }, { "epoch": 0.6554582050674875, "grad_norm": 1.8671875, "learning_rate": 1.5266118763091954e-05, "loss": 0.5989, "step": 5190 }, { "epoch": 0.6555844975925488, "grad_norm": 1.7734375, "learning_rate": 1.5264421475741035e-05, "loss": 0.6745, "step": 5191 }, { "epoch": 0.6557107901176099, "grad_norm": 1.9609375, "learning_rate": 1.5262723978562956e-05, "loss": 0.7018, "step": 5192 }, { "epoch": 0.655837082642671, "grad_norm": 1.8359375, "learning_rate": 1.526102627162538e-05, "loss": 0.6451, "step": 5193 }, { "epoch": 0.6559633751677323, "grad_norm": 1.7421875, "learning_rate": 1.525932835499596e-05, "loss": 0.5943, "step": 5194 }, { "epoch": 0.6560896676927934, "grad_norm": 1.7265625, "learning_rate": 1.5257630228742386e-05, "loss": 0.635, "step": 5195 }, { "epoch": 0.6562159602178546, "grad_norm": 1.7890625, "learning_rate": 1.5255931892932333e-05, "loss": 0.6362, "step": 5196 }, { "epoch": 0.6563422527429158, "grad_norm": 1.78125, "learning_rate": 1.5254233347633491e-05, "loss": 0.6747, "step": 5197 }, { "epoch": 0.6564685452679769, "grad_norm": 1.671875, "learning_rate": 1.5252534592913568e-05, "loss": 0.5739, "step": 5198 }, { "epoch": 0.6565948377930382, "grad_norm": 1.75, "learning_rate": 1.5250835628840266e-05, "loss": 0.694, "step": 5199 }, { "epoch": 0.6567211303180993, "grad_norm": 1.7265625, "learning_rate": 1.5249136455481304e-05, "loss": 0.593, "step": 5200 }, { "epoch": 0.6568474228431604, "grad_norm": 1.8359375, "learning_rate": 1.5247437072904406e-05, "loss": 0.6554, "step": 5201 }, { "epoch": 0.6569737153682217, "grad_norm": 1.890625, "learning_rate": 1.5245737481177305e-05, "loss": 0.7607, "step": 5202 }, { "epoch": 0.6571000078932828, "grad_norm": 1.6328125, "learning_rate": 1.5244037680367742e-05, "loss": 0.6534, "step": 5203 }, { "epoch": 0.657226300418344, "grad_norm": 1.71875, "learning_rate": 1.524233767054347e-05, "loss": 0.6273, "step": 5204 }, { "epoch": 0.6573525929434052, "grad_norm": 1.9375, "learning_rate": 1.5240637451772245e-05, "loss": 0.7155, "step": 5205 }, { "epoch": 0.6574788854684663, "grad_norm": 1.8828125, "learning_rate": 1.5238937024121833e-05, "loss": 0.5847, "step": 5206 }, { "epoch": 0.6576051779935275, "grad_norm": 1.796875, "learning_rate": 1.5237236387660011e-05, "loss": 0.6503, "step": 5207 }, { "epoch": 0.6577314705185887, "grad_norm": 1.8828125, "learning_rate": 1.523553554245456e-05, "loss": 0.6824, "step": 5208 }, { "epoch": 0.6578577630436498, "grad_norm": 1.8359375, "learning_rate": 1.5233834488573276e-05, "loss": 0.7446, "step": 5209 }, { "epoch": 0.657984055568711, "grad_norm": 2.015625, "learning_rate": 1.5232133226083954e-05, "loss": 0.728, "step": 5210 }, { "epoch": 0.6581103480937722, "grad_norm": 1.875, "learning_rate": 1.5230431755054404e-05, "loss": 0.6415, "step": 5211 }, { "epoch": 0.6582366406188334, "grad_norm": 1.625, "learning_rate": 1.5228730075552443e-05, "loss": 0.6806, "step": 5212 }, { "epoch": 0.6583629331438945, "grad_norm": 1.8984375, "learning_rate": 1.5227028187645892e-05, "loss": 0.667, "step": 5213 }, { "epoch": 0.6584892256689557, "grad_norm": 2.125, "learning_rate": 1.5225326091402591e-05, "loss": 0.7435, "step": 5214 }, { "epoch": 0.6586155181940169, "grad_norm": 1.6875, "learning_rate": 1.5223623786890376e-05, "loss": 0.5981, "step": 5215 }, { "epoch": 0.6587418107190781, "grad_norm": 1.71875, "learning_rate": 1.52219212741771e-05, "loss": 0.6359, "step": 5216 }, { "epoch": 0.6588681032441392, "grad_norm": 1.6953125, "learning_rate": 1.5220218553330618e-05, "loss": 0.6071, "step": 5217 }, { "epoch": 0.6589943957692004, "grad_norm": 1.8046875, "learning_rate": 1.5218515624418802e-05, "loss": 0.6744, "step": 5218 }, { "epoch": 0.6591206882942616, "grad_norm": 1.7421875, "learning_rate": 1.521681248750952e-05, "loss": 0.621, "step": 5219 }, { "epoch": 0.6592469808193228, "grad_norm": 1.7890625, "learning_rate": 1.521510914267066e-05, "loss": 0.6384, "step": 5220 }, { "epoch": 0.6593732733443839, "grad_norm": 1.578125, "learning_rate": 1.5213405589970107e-05, "loss": 0.5846, "step": 5221 }, { "epoch": 0.6594995658694451, "grad_norm": 1.828125, "learning_rate": 1.5211701829475767e-05, "loss": 0.6899, "step": 5222 }, { "epoch": 0.6596258583945063, "grad_norm": 1.7734375, "learning_rate": 1.5209997861255545e-05, "loss": 0.6325, "step": 5223 }, { "epoch": 0.6597521509195674, "grad_norm": 1.7578125, "learning_rate": 1.5208293685377357e-05, "loss": 0.5827, "step": 5224 }, { "epoch": 0.6598784434446286, "grad_norm": 1.828125, "learning_rate": 1.520658930190913e-05, "loss": 0.6351, "step": 5225 }, { "epoch": 0.6600047359696898, "grad_norm": 1.7734375, "learning_rate": 1.5204884710918793e-05, "loss": 0.6006, "step": 5226 }, { "epoch": 0.6601310284947509, "grad_norm": 1.8046875, "learning_rate": 1.5203179912474285e-05, "loss": 0.69, "step": 5227 }, { "epoch": 0.6602573210198122, "grad_norm": 1.828125, "learning_rate": 1.520147490664356e-05, "loss": 0.6918, "step": 5228 }, { "epoch": 0.6603836135448733, "grad_norm": 1.7265625, "learning_rate": 1.5199769693494576e-05, "loss": 0.6836, "step": 5229 }, { "epoch": 0.6605099060699345, "grad_norm": 1.671875, "learning_rate": 1.5198064273095295e-05, "loss": 0.651, "step": 5230 }, { "epoch": 0.6606361985949957, "grad_norm": 1.75, "learning_rate": 1.5196358645513691e-05, "loss": 0.6815, "step": 5231 }, { "epoch": 0.6607624911200568, "grad_norm": 1.765625, "learning_rate": 1.519465281081775e-05, "loss": 0.5792, "step": 5232 }, { "epoch": 0.660888783645118, "grad_norm": 1.734375, "learning_rate": 1.519294676907546e-05, "loss": 0.622, "step": 5233 }, { "epoch": 0.6610150761701792, "grad_norm": 1.8359375, "learning_rate": 1.5191240520354817e-05, "loss": 0.6885, "step": 5234 }, { "epoch": 0.6611413686952403, "grad_norm": 1.90625, "learning_rate": 1.5189534064723833e-05, "loss": 0.6764, "step": 5235 }, { "epoch": 0.6612676612203016, "grad_norm": 1.71875, "learning_rate": 1.5187827402250522e-05, "loss": 0.6982, "step": 5236 }, { "epoch": 0.6613939537453627, "grad_norm": 1.984375, "learning_rate": 1.5186120533002905e-05, "loss": 0.8227, "step": 5237 }, { "epoch": 0.6615202462704238, "grad_norm": 1.7421875, "learning_rate": 1.5184413457049014e-05, "loss": 0.6588, "step": 5238 }, { "epoch": 0.6616465387954851, "grad_norm": 1.84375, "learning_rate": 1.5182706174456892e-05, "loss": 0.6347, "step": 5239 }, { "epoch": 0.6617728313205462, "grad_norm": 1.7578125, "learning_rate": 1.5180998685294581e-05, "loss": 0.6406, "step": 5240 }, { "epoch": 0.6618991238456073, "grad_norm": 1.703125, "learning_rate": 1.5179290989630146e-05, "loss": 0.5665, "step": 5241 }, { "epoch": 0.6620254163706686, "grad_norm": 1.6796875, "learning_rate": 1.5177583087531646e-05, "loss": 0.6019, "step": 5242 }, { "epoch": 0.6621517088957297, "grad_norm": 2.09375, "learning_rate": 1.5175874979067158e-05, "loss": 0.6882, "step": 5243 }, { "epoch": 0.6622780014207909, "grad_norm": 1.78125, "learning_rate": 1.5174166664304757e-05, "loss": 0.6361, "step": 5244 }, { "epoch": 0.6624042939458521, "grad_norm": 1.8359375, "learning_rate": 1.5172458143312536e-05, "loss": 0.6399, "step": 5245 }, { "epoch": 0.6625305864709132, "grad_norm": 1.6875, "learning_rate": 1.5170749416158592e-05, "loss": 0.6198, "step": 5246 }, { "epoch": 0.6626568789959745, "grad_norm": 1.7578125, "learning_rate": 1.516904048291103e-05, "loss": 0.6446, "step": 5247 }, { "epoch": 0.6627831715210356, "grad_norm": 1.6796875, "learning_rate": 1.5167331343637966e-05, "loss": 0.6385, "step": 5248 }, { "epoch": 0.6629094640460967, "grad_norm": 1.6640625, "learning_rate": 1.516562199840752e-05, "loss": 0.5489, "step": 5249 }, { "epoch": 0.663035756571158, "grad_norm": 1.6953125, "learning_rate": 1.5163912447287823e-05, "loss": 0.6119, "step": 5250 }, { "epoch": 0.6631620490962191, "grad_norm": 1.9140625, "learning_rate": 1.5162202690347015e-05, "loss": 0.7059, "step": 5251 }, { "epoch": 0.6632883416212803, "grad_norm": 1.828125, "learning_rate": 1.5160492727653241e-05, "loss": 0.5931, "step": 5252 }, { "epoch": 0.6634146341463415, "grad_norm": 1.7109375, "learning_rate": 1.515878255927466e-05, "loss": 0.6518, "step": 5253 }, { "epoch": 0.6635409266714026, "grad_norm": 1.7734375, "learning_rate": 1.5157072185279429e-05, "loss": 0.5989, "step": 5254 }, { "epoch": 0.6636672191964638, "grad_norm": 1.8125, "learning_rate": 1.5155361605735723e-05, "loss": 0.7246, "step": 5255 }, { "epoch": 0.663793511721525, "grad_norm": 1.640625, "learning_rate": 1.5153650820711719e-05, "loss": 0.6482, "step": 5256 }, { "epoch": 0.6639198042465861, "grad_norm": 1.734375, "learning_rate": 1.5151939830275608e-05, "loss": 0.6004, "step": 5257 }, { "epoch": 0.6640460967716473, "grad_norm": 1.7734375, "learning_rate": 1.5150228634495583e-05, "loss": 0.5855, "step": 5258 }, { "epoch": 0.6641723892967085, "grad_norm": 1.8359375, "learning_rate": 1.5148517233439852e-05, "loss": 0.7196, "step": 5259 }, { "epoch": 0.6642986818217697, "grad_norm": 2.328125, "learning_rate": 1.5146805627176624e-05, "loss": 0.7831, "step": 5260 }, { "epoch": 0.6644249743468309, "grad_norm": 1.71875, "learning_rate": 1.514509381577412e-05, "loss": 0.7057, "step": 5261 }, { "epoch": 0.664551266871892, "grad_norm": 1.7421875, "learning_rate": 1.5143381799300572e-05, "loss": 0.6096, "step": 5262 }, { "epoch": 0.6646775593969532, "grad_norm": 1.8046875, "learning_rate": 1.5141669577824212e-05, "loss": 0.6137, "step": 5263 }, { "epoch": 0.6648038519220144, "grad_norm": 1.765625, "learning_rate": 1.5139957151413287e-05, "loss": 0.6712, "step": 5264 }, { "epoch": 0.6649301444470755, "grad_norm": 1.734375, "learning_rate": 1.5138244520136052e-05, "loss": 0.6581, "step": 5265 }, { "epoch": 0.6650564369721367, "grad_norm": 1.71875, "learning_rate": 1.5136531684060762e-05, "loss": 0.6705, "step": 5266 }, { "epoch": 0.6651827294971979, "grad_norm": 2.109375, "learning_rate": 1.5134818643255696e-05, "loss": 0.8603, "step": 5267 }, { "epoch": 0.6653090220222591, "grad_norm": 1.875, "learning_rate": 1.5133105397789125e-05, "loss": 0.7529, "step": 5268 }, { "epoch": 0.6654353145473202, "grad_norm": 1.9140625, "learning_rate": 1.5131391947729336e-05, "loss": 0.6364, "step": 5269 }, { "epoch": 0.6655616070723814, "grad_norm": 1.7421875, "learning_rate": 1.5129678293144627e-05, "loss": 0.5611, "step": 5270 }, { "epoch": 0.6656878995974426, "grad_norm": 1.734375, "learning_rate": 1.5127964434103292e-05, "loss": 0.6166, "step": 5271 }, { "epoch": 0.6658141921225037, "grad_norm": 1.8046875, "learning_rate": 1.5126250370673652e-05, "loss": 0.6554, "step": 5272 }, { "epoch": 0.665940484647565, "grad_norm": 1.8671875, "learning_rate": 1.5124536102924014e-05, "loss": 0.7114, "step": 5273 }, { "epoch": 0.6660667771726261, "grad_norm": 1.828125, "learning_rate": 1.5122821630922712e-05, "loss": 0.7117, "step": 5274 }, { "epoch": 0.6661930696976873, "grad_norm": 1.7109375, "learning_rate": 1.5121106954738079e-05, "loss": 0.5681, "step": 5275 }, { "epoch": 0.6663193622227485, "grad_norm": 1.9609375, "learning_rate": 1.511939207443846e-05, "loss": 0.6356, "step": 5276 }, { "epoch": 0.6664456547478096, "grad_norm": 1.84375, "learning_rate": 1.5117676990092201e-05, "loss": 0.7104, "step": 5277 }, { "epoch": 0.6665719472728708, "grad_norm": 1.6875, "learning_rate": 1.5115961701767665e-05, "loss": 0.6226, "step": 5278 }, { "epoch": 0.666698239797932, "grad_norm": 1.625, "learning_rate": 1.5114246209533216e-05, "loss": 0.642, "step": 5279 }, { "epoch": 0.6668245323229931, "grad_norm": 1.75, "learning_rate": 1.5112530513457236e-05, "loss": 0.6439, "step": 5280 }, { "epoch": 0.6669508248480543, "grad_norm": 1.6015625, "learning_rate": 1.51108146136081e-05, "loss": 0.592, "step": 5281 }, { "epoch": 0.6670771173731155, "grad_norm": 1.8828125, "learning_rate": 1.5109098510054208e-05, "loss": 0.7093, "step": 5282 }, { "epoch": 0.6672034098981766, "grad_norm": 1.7734375, "learning_rate": 1.510738220286395e-05, "loss": 0.7096, "step": 5283 }, { "epoch": 0.6673297024232379, "grad_norm": 1.796875, "learning_rate": 1.5105665692105742e-05, "loss": 0.6557, "step": 5284 }, { "epoch": 0.667455994948299, "grad_norm": 1.8828125, "learning_rate": 1.5103948977847998e-05, "loss": 0.6652, "step": 5285 }, { "epoch": 0.6675822874733601, "grad_norm": 1.796875, "learning_rate": 1.510223206015914e-05, "loss": 0.6325, "step": 5286 }, { "epoch": 0.6677085799984214, "grad_norm": 1.65625, "learning_rate": 1.5100514939107603e-05, "loss": 0.5894, "step": 5287 }, { "epoch": 0.6678348725234825, "grad_norm": 1.90625, "learning_rate": 1.5098797614761824e-05, "loss": 0.6703, "step": 5288 }, { "epoch": 0.6679611650485436, "grad_norm": 1.75, "learning_rate": 1.5097080087190255e-05, "loss": 0.6251, "step": 5289 }, { "epoch": 0.6680874575736049, "grad_norm": 1.8125, "learning_rate": 1.509536235646135e-05, "loss": 0.7015, "step": 5290 }, { "epoch": 0.668213750098666, "grad_norm": 1.734375, "learning_rate": 1.5093644422643573e-05, "loss": 0.6628, "step": 5291 }, { "epoch": 0.6683400426237273, "grad_norm": 1.8046875, "learning_rate": 1.5091926285805398e-05, "loss": 0.7254, "step": 5292 }, { "epoch": 0.6684663351487884, "grad_norm": 2.0625, "learning_rate": 1.5090207946015307e-05, "loss": 0.6886, "step": 5293 }, { "epoch": 0.6685926276738495, "grad_norm": 1.7265625, "learning_rate": 1.5088489403341788e-05, "loss": 0.7049, "step": 5294 }, { "epoch": 0.6687189201989108, "grad_norm": 1.671875, "learning_rate": 1.5086770657853338e-05, "loss": 0.6294, "step": 5295 }, { "epoch": 0.6688452127239719, "grad_norm": 1.7578125, "learning_rate": 1.5085051709618463e-05, "loss": 0.6958, "step": 5296 }, { "epoch": 0.668971505249033, "grad_norm": 1.9296875, "learning_rate": 1.5083332558705674e-05, "loss": 0.6155, "step": 5297 }, { "epoch": 0.6690977977740943, "grad_norm": 1.890625, "learning_rate": 1.5081613205183491e-05, "loss": 0.6538, "step": 5298 }, { "epoch": 0.6692240902991554, "grad_norm": 1.78125, "learning_rate": 1.507989364912045e-05, "loss": 0.5942, "step": 5299 }, { "epoch": 0.6693503828242165, "grad_norm": 1.6953125, "learning_rate": 1.5078173890585081e-05, "loss": 0.6534, "step": 5300 }, { "epoch": 0.6694766753492778, "grad_norm": 1.7421875, "learning_rate": 1.5076453929645933e-05, "loss": 0.5821, "step": 5301 }, { "epoch": 0.6696029678743389, "grad_norm": 1.71875, "learning_rate": 1.5074733766371558e-05, "loss": 0.6056, "step": 5302 }, { "epoch": 0.6697292603994001, "grad_norm": 1.671875, "learning_rate": 1.5073013400830519e-05, "loss": 0.5807, "step": 5303 }, { "epoch": 0.6698555529244613, "grad_norm": 1.7734375, "learning_rate": 1.5071292833091384e-05, "loss": 0.6269, "step": 5304 }, { "epoch": 0.6699818454495224, "grad_norm": 1.6875, "learning_rate": 1.5069572063222733e-05, "loss": 0.6664, "step": 5305 }, { "epoch": 0.6701081379745837, "grad_norm": 1.7421875, "learning_rate": 1.5067851091293148e-05, "loss": 0.7071, "step": 5306 }, { "epoch": 0.6702344304996448, "grad_norm": 1.8125, "learning_rate": 1.5066129917371228e-05, "loss": 0.69, "step": 5307 }, { "epoch": 0.670360723024706, "grad_norm": 1.625, "learning_rate": 1.5064408541525573e-05, "loss": 0.63, "step": 5308 }, { "epoch": 0.6704870155497672, "grad_norm": 1.703125, "learning_rate": 1.5062686963824788e-05, "loss": 0.6737, "step": 5309 }, { "epoch": 0.6706133080748283, "grad_norm": 1.890625, "learning_rate": 1.5060965184337496e-05, "loss": 0.6552, "step": 5310 }, { "epoch": 0.6707396005998895, "grad_norm": 1.890625, "learning_rate": 1.5059243203132323e-05, "loss": 0.663, "step": 5311 }, { "epoch": 0.6708658931249507, "grad_norm": 1.78125, "learning_rate": 1.5057521020277901e-05, "loss": 0.6326, "step": 5312 }, { "epoch": 0.6709921856500118, "grad_norm": 1.7265625, "learning_rate": 1.5055798635842873e-05, "loss": 0.6064, "step": 5313 }, { "epoch": 0.671118478175073, "grad_norm": 1.6875, "learning_rate": 1.5054076049895892e-05, "loss": 0.6261, "step": 5314 }, { "epoch": 0.6712447707001342, "grad_norm": 1.7734375, "learning_rate": 1.5052353262505609e-05, "loss": 0.6906, "step": 5315 }, { "epoch": 0.6713710632251954, "grad_norm": 1.8046875, "learning_rate": 1.5050630273740696e-05, "loss": 0.5905, "step": 5316 }, { "epoch": 0.6714973557502565, "grad_norm": 1.7890625, "learning_rate": 1.5048907083669826e-05, "loss": 0.6251, "step": 5317 }, { "epoch": 0.6716236482753177, "grad_norm": 1.796875, "learning_rate": 1.5047183692361679e-05, "loss": 0.6355, "step": 5318 }, { "epoch": 0.6717499408003789, "grad_norm": 1.7578125, "learning_rate": 1.504546009988495e-05, "loss": 0.6212, "step": 5319 }, { "epoch": 0.67187623332544, "grad_norm": 1.9609375, "learning_rate": 1.504373630630833e-05, "loss": 0.7145, "step": 5320 }, { "epoch": 0.6720025258505012, "grad_norm": 1.9375, "learning_rate": 1.504201231170053e-05, "loss": 0.6616, "step": 5321 }, { "epoch": 0.6721288183755624, "grad_norm": 2.171875, "learning_rate": 1.5040288116130266e-05, "loss": 0.706, "step": 5322 }, { "epoch": 0.6722551109006236, "grad_norm": 1.7421875, "learning_rate": 1.5038563719666259e-05, "loss": 0.6384, "step": 5323 }, { "epoch": 0.6723814034256848, "grad_norm": 1.8203125, "learning_rate": 1.5036839122377236e-05, "loss": 0.5899, "step": 5324 }, { "epoch": 0.6725076959507459, "grad_norm": 1.71875, "learning_rate": 1.5035114324331939e-05, "loss": 0.722, "step": 5325 }, { "epoch": 0.6726339884758071, "grad_norm": 1.9609375, "learning_rate": 1.503338932559911e-05, "loss": 0.7549, "step": 5326 }, { "epoch": 0.6727602810008683, "grad_norm": 1.7109375, "learning_rate": 1.5031664126247512e-05, "loss": 0.6009, "step": 5327 }, { "epoch": 0.6728865735259294, "grad_norm": 1.796875, "learning_rate": 1.5029938726345896e-05, "loss": 0.6429, "step": 5328 }, { "epoch": 0.6730128660509906, "grad_norm": 1.7265625, "learning_rate": 1.5028213125963035e-05, "loss": 0.5905, "step": 5329 }, { "epoch": 0.6731391585760518, "grad_norm": 1.5859375, "learning_rate": 1.5026487325167716e-05, "loss": 0.6082, "step": 5330 }, { "epoch": 0.6732654511011129, "grad_norm": 1.9453125, "learning_rate": 1.502476132402872e-05, "loss": 0.6158, "step": 5331 }, { "epoch": 0.6733917436261742, "grad_norm": 1.71875, "learning_rate": 1.5023035122614837e-05, "loss": 0.5935, "step": 5332 }, { "epoch": 0.6735180361512353, "grad_norm": 1.84375, "learning_rate": 1.5021308720994877e-05, "loss": 0.7118, "step": 5333 }, { "epoch": 0.6736443286762964, "grad_norm": 1.7734375, "learning_rate": 1.5019582119237641e-05, "loss": 0.6554, "step": 5334 }, { "epoch": 0.6737706212013577, "grad_norm": 1.6640625, "learning_rate": 1.5017855317411954e-05, "loss": 0.6153, "step": 5335 }, { "epoch": 0.6738969137264188, "grad_norm": 1.671875, "learning_rate": 1.501612831558664e-05, "loss": 0.6364, "step": 5336 }, { "epoch": 0.67402320625148, "grad_norm": 1.734375, "learning_rate": 1.5014401113830534e-05, "loss": 0.6591, "step": 5337 }, { "epoch": 0.6741494987765412, "grad_norm": 1.953125, "learning_rate": 1.5012673712212475e-05, "loss": 0.621, "step": 5338 }, { "epoch": 0.6742757913016023, "grad_norm": 1.6328125, "learning_rate": 1.5010946110801318e-05, "loss": 0.5487, "step": 5339 }, { "epoch": 0.6744020838266636, "grad_norm": 2.3125, "learning_rate": 1.500921830966592e-05, "loss": 0.7081, "step": 5340 }, { "epoch": 0.6745283763517247, "grad_norm": 1.6328125, "learning_rate": 1.5007490308875144e-05, "loss": 0.6702, "step": 5341 }, { "epoch": 0.6746546688767858, "grad_norm": 1.984375, "learning_rate": 1.5005762108497865e-05, "loss": 0.6502, "step": 5342 }, { "epoch": 0.6747809614018471, "grad_norm": 1.7890625, "learning_rate": 1.5004033708602967e-05, "loss": 0.7, "step": 5343 }, { "epoch": 0.6749072539269082, "grad_norm": 1.75, "learning_rate": 1.5002305109259338e-05, "loss": 0.6507, "step": 5344 }, { "epoch": 0.6750335464519693, "grad_norm": 2.078125, "learning_rate": 1.5000576310535878e-05, "loss": 0.6329, "step": 5345 }, { "epoch": 0.6751598389770306, "grad_norm": 1.734375, "learning_rate": 1.4998847312501488e-05, "loss": 0.6418, "step": 5346 }, { "epoch": 0.6752861315020917, "grad_norm": 1.7890625, "learning_rate": 1.4997118115225082e-05, "loss": 0.6219, "step": 5347 }, { "epoch": 0.6754124240271528, "grad_norm": 1.7890625, "learning_rate": 1.499538871877559e-05, "loss": 0.6159, "step": 5348 }, { "epoch": 0.6755387165522141, "grad_norm": 1.984375, "learning_rate": 1.4993659123221931e-05, "loss": 0.7365, "step": 5349 }, { "epoch": 0.6756650090772752, "grad_norm": 2.0625, "learning_rate": 1.499192932863305e-05, "loss": 0.7025, "step": 5350 }, { "epoch": 0.6757913016023364, "grad_norm": 1.6875, "learning_rate": 1.499019933507789e-05, "loss": 0.582, "step": 5351 }, { "epoch": 0.6759175941273976, "grad_norm": 1.7734375, "learning_rate": 1.4988469142625404e-05, "loss": 0.5717, "step": 5352 }, { "epoch": 0.6760438866524587, "grad_norm": 1.7578125, "learning_rate": 1.4986738751344554e-05, "loss": 0.722, "step": 5353 }, { "epoch": 0.67617017917752, "grad_norm": 1.8046875, "learning_rate": 1.498500816130431e-05, "loss": 0.6455, "step": 5354 }, { "epoch": 0.6762964717025811, "grad_norm": 1.703125, "learning_rate": 1.4983277372573644e-05, "loss": 0.6588, "step": 5355 }, { "epoch": 0.6764227642276422, "grad_norm": 1.6953125, "learning_rate": 1.4981546385221545e-05, "loss": 0.6326, "step": 5356 }, { "epoch": 0.6765490567527035, "grad_norm": 1.75, "learning_rate": 1.4979815199317008e-05, "loss": 0.6147, "step": 5357 }, { "epoch": 0.6766753492777646, "grad_norm": 1.734375, "learning_rate": 1.497808381492903e-05, "loss": 0.6127, "step": 5358 }, { "epoch": 0.6768016418028258, "grad_norm": 1.7421875, "learning_rate": 1.4976352232126623e-05, "loss": 0.6851, "step": 5359 }, { "epoch": 0.676927934327887, "grad_norm": 1.8125, "learning_rate": 1.4974620450978803e-05, "loss": 0.6225, "step": 5360 }, { "epoch": 0.6770542268529481, "grad_norm": 1.6953125, "learning_rate": 1.4972888471554593e-05, "loss": 0.7191, "step": 5361 }, { "epoch": 0.6771805193780093, "grad_norm": 1.7421875, "learning_rate": 1.4971156293923026e-05, "loss": 0.6615, "step": 5362 }, { "epoch": 0.6773068119030705, "grad_norm": 1.8203125, "learning_rate": 1.4969423918153143e-05, "loss": 0.7771, "step": 5363 }, { "epoch": 0.6774331044281316, "grad_norm": 1.8828125, "learning_rate": 1.4967691344313995e-05, "loss": 0.6419, "step": 5364 }, { "epoch": 0.6775593969531928, "grad_norm": 1.8828125, "learning_rate": 1.4965958572474631e-05, "loss": 0.5839, "step": 5365 }, { "epoch": 0.677685689478254, "grad_norm": 1.6484375, "learning_rate": 1.4964225602704122e-05, "loss": 0.6467, "step": 5366 }, { "epoch": 0.6778119820033152, "grad_norm": 1.8125, "learning_rate": 1.4962492435071535e-05, "loss": 0.6328, "step": 5367 }, { "epoch": 0.6779382745283764, "grad_norm": 1.8984375, "learning_rate": 1.4960759069645956e-05, "loss": 0.7491, "step": 5368 }, { "epoch": 0.6780645670534375, "grad_norm": 1.796875, "learning_rate": 1.495902550649647e-05, "loss": 0.6173, "step": 5369 }, { "epoch": 0.6781908595784987, "grad_norm": 1.59375, "learning_rate": 1.4957291745692169e-05, "loss": 0.5332, "step": 5370 }, { "epoch": 0.6783171521035599, "grad_norm": 1.734375, "learning_rate": 1.495555778730216e-05, "loss": 0.6902, "step": 5371 }, { "epoch": 0.678443444628621, "grad_norm": 1.8359375, "learning_rate": 1.4953823631395554e-05, "loss": 0.568, "step": 5372 }, { "epoch": 0.6785697371536822, "grad_norm": 1.859375, "learning_rate": 1.495208927804147e-05, "loss": 0.6587, "step": 5373 }, { "epoch": 0.6786960296787434, "grad_norm": 1.734375, "learning_rate": 1.4950354727309034e-05, "loss": 0.7151, "step": 5374 }, { "epoch": 0.6788223222038046, "grad_norm": 1.90625, "learning_rate": 1.4948619979267385e-05, "loss": 0.6743, "step": 5375 }, { "epoch": 0.6789486147288657, "grad_norm": 1.6953125, "learning_rate": 1.494688503398566e-05, "loss": 0.5942, "step": 5376 }, { "epoch": 0.6790749072539269, "grad_norm": 1.7421875, "learning_rate": 1.4945149891533015e-05, "loss": 0.6797, "step": 5377 }, { "epoch": 0.6792011997789881, "grad_norm": 1.890625, "learning_rate": 1.4943414551978608e-05, "loss": 0.6986, "step": 5378 }, { "epoch": 0.6793274923040492, "grad_norm": 1.7265625, "learning_rate": 1.4941679015391602e-05, "loss": 0.6749, "step": 5379 }, { "epoch": 0.6794537848291105, "grad_norm": 1.8359375, "learning_rate": 1.4939943281841172e-05, "loss": 0.7369, "step": 5380 }, { "epoch": 0.6795800773541716, "grad_norm": 1.875, "learning_rate": 1.4938207351396503e-05, "loss": 0.6674, "step": 5381 }, { "epoch": 0.6797063698792327, "grad_norm": 1.9765625, "learning_rate": 1.4936471224126782e-05, "loss": 0.7188, "step": 5382 }, { "epoch": 0.679832662404294, "grad_norm": 1.6328125, "learning_rate": 1.4934734900101209e-05, "loss": 0.5708, "step": 5383 }, { "epoch": 0.6799589549293551, "grad_norm": 1.7265625, "learning_rate": 1.4932998379388988e-05, "loss": 0.6674, "step": 5384 }, { "epoch": 0.6800852474544163, "grad_norm": 1.796875, "learning_rate": 1.4931261662059336e-05, "loss": 0.619, "step": 5385 }, { "epoch": 0.6802115399794775, "grad_norm": 1.7109375, "learning_rate": 1.492952474818147e-05, "loss": 0.5704, "step": 5386 }, { "epoch": 0.6803378325045386, "grad_norm": 1.7734375, "learning_rate": 1.4927787637824623e-05, "loss": 0.6647, "step": 5387 }, { "epoch": 0.6804641250295999, "grad_norm": 1.828125, "learning_rate": 1.4926050331058029e-05, "loss": 0.6926, "step": 5388 }, { "epoch": 0.680590417554661, "grad_norm": 1.796875, "learning_rate": 1.4924312827950931e-05, "loss": 0.6738, "step": 5389 }, { "epoch": 0.6807167100797221, "grad_norm": 1.65625, "learning_rate": 1.492257512857259e-05, "loss": 0.5711, "step": 5390 }, { "epoch": 0.6808430026047834, "grad_norm": 1.7421875, "learning_rate": 1.4920837232992256e-05, "loss": 0.5849, "step": 5391 }, { "epoch": 0.6809692951298445, "grad_norm": 1.8515625, "learning_rate": 1.4919099141279203e-05, "loss": 0.6271, "step": 5392 }, { "epoch": 0.6810955876549056, "grad_norm": 1.9296875, "learning_rate": 1.491736085350271e-05, "loss": 0.6296, "step": 5393 }, { "epoch": 0.6812218801799669, "grad_norm": 1.78125, "learning_rate": 1.4915622369732057e-05, "loss": 0.7431, "step": 5394 }, { "epoch": 0.681348172705028, "grad_norm": 1.75, "learning_rate": 1.4913883690036535e-05, "loss": 0.6229, "step": 5395 }, { "epoch": 0.6814744652300891, "grad_norm": 2.0625, "learning_rate": 1.4912144814485445e-05, "loss": 0.7954, "step": 5396 }, { "epoch": 0.6816007577551504, "grad_norm": 1.765625, "learning_rate": 1.4910405743148096e-05, "loss": 0.6492, "step": 5397 }, { "epoch": 0.6817270502802115, "grad_norm": 1.71875, "learning_rate": 1.4908666476093798e-05, "loss": 0.5575, "step": 5398 }, { "epoch": 0.6818533428052728, "grad_norm": 1.796875, "learning_rate": 1.490692701339188e-05, "loss": 0.6708, "step": 5399 }, { "epoch": 0.6819796353303339, "grad_norm": 1.6796875, "learning_rate": 1.490518735511167e-05, "loss": 0.6413, "step": 5400 }, { "epoch": 0.682105927855395, "grad_norm": 1.8125, "learning_rate": 1.4903447501322505e-05, "loss": 0.7028, "step": 5401 }, { "epoch": 0.6822322203804563, "grad_norm": 1.7265625, "learning_rate": 1.4901707452093736e-05, "loss": 0.6777, "step": 5402 }, { "epoch": 0.6823585129055174, "grad_norm": 1.9375, "learning_rate": 1.4899967207494714e-05, "loss": 0.7541, "step": 5403 }, { "epoch": 0.6824848054305785, "grad_norm": 1.7734375, "learning_rate": 1.4898226767594801e-05, "loss": 0.6157, "step": 5404 }, { "epoch": 0.6826110979556398, "grad_norm": 1.6328125, "learning_rate": 1.4896486132463368e-05, "loss": 0.6607, "step": 5405 }, { "epoch": 0.6827373904807009, "grad_norm": 1.6875, "learning_rate": 1.4894745302169791e-05, "loss": 0.5828, "step": 5406 }, { "epoch": 0.6828636830057621, "grad_norm": 1.8671875, "learning_rate": 1.4893004276783456e-05, "loss": 0.6526, "step": 5407 }, { "epoch": 0.6829899755308233, "grad_norm": 1.90625, "learning_rate": 1.4891263056373757e-05, "loss": 0.6756, "step": 5408 }, { "epoch": 0.6831162680558844, "grad_norm": 3.625, "learning_rate": 1.4889521641010092e-05, "loss": 0.6778, "step": 5409 }, { "epoch": 0.6832425605809456, "grad_norm": 1.8046875, "learning_rate": 1.4887780030761869e-05, "loss": 0.6862, "step": 5410 }, { "epoch": 0.6833688531060068, "grad_norm": 1.6484375, "learning_rate": 1.4886038225698512e-05, "loss": 0.6049, "step": 5411 }, { "epoch": 0.683495145631068, "grad_norm": 1.71875, "learning_rate": 1.4884296225889438e-05, "loss": 0.6316, "step": 5412 }, { "epoch": 0.6836214381561291, "grad_norm": 1.859375, "learning_rate": 1.4882554031404082e-05, "loss": 0.6489, "step": 5413 }, { "epoch": 0.6837477306811903, "grad_norm": 1.7734375, "learning_rate": 1.4880811642311884e-05, "loss": 0.6412, "step": 5414 }, { "epoch": 0.6838740232062515, "grad_norm": 1.9921875, "learning_rate": 1.4879069058682286e-05, "loss": 0.6366, "step": 5415 }, { "epoch": 0.6840003157313127, "grad_norm": 1.859375, "learning_rate": 1.487732628058475e-05, "loss": 0.6671, "step": 5416 }, { "epoch": 0.6841266082563738, "grad_norm": 1.765625, "learning_rate": 1.4875583308088736e-05, "loss": 0.7317, "step": 5417 }, { "epoch": 0.684252900781435, "grad_norm": 1.859375, "learning_rate": 1.4873840141263716e-05, "loss": 0.7013, "step": 5418 }, { "epoch": 0.6843791933064962, "grad_norm": 1.6796875, "learning_rate": 1.4872096780179166e-05, "loss": 0.6033, "step": 5419 }, { "epoch": 0.6845054858315573, "grad_norm": 1.7109375, "learning_rate": 1.4870353224904572e-05, "loss": 0.6035, "step": 5420 }, { "epoch": 0.6846317783566185, "grad_norm": 1.7109375, "learning_rate": 1.486860947550943e-05, "loss": 0.6765, "step": 5421 }, { "epoch": 0.6847580708816797, "grad_norm": 1.8359375, "learning_rate": 1.4866865532063242e-05, "loss": 0.6767, "step": 5422 }, { "epoch": 0.6848843634067409, "grad_norm": 1.8828125, "learning_rate": 1.4865121394635518e-05, "loss": 0.6851, "step": 5423 }, { "epoch": 0.685010655931802, "grad_norm": 1.9140625, "learning_rate": 1.4863377063295771e-05, "loss": 0.5966, "step": 5424 }, { "epoch": 0.6851369484568632, "grad_norm": 1.828125, "learning_rate": 1.486163253811353e-05, "loss": 0.701, "step": 5425 }, { "epoch": 0.6852632409819244, "grad_norm": 1.9765625, "learning_rate": 1.4859887819158325e-05, "loss": 0.6621, "step": 5426 }, { "epoch": 0.6853895335069855, "grad_norm": 1.703125, "learning_rate": 1.4858142906499699e-05, "loss": 0.6261, "step": 5427 }, { "epoch": 0.6855158260320467, "grad_norm": 2.109375, "learning_rate": 1.4856397800207198e-05, "loss": 0.734, "step": 5428 }, { "epoch": 0.6856421185571079, "grad_norm": 1.8671875, "learning_rate": 1.4854652500350376e-05, "loss": 0.7496, "step": 5429 }, { "epoch": 0.6857684110821691, "grad_norm": 1.8125, "learning_rate": 1.48529070069988e-05, "loss": 0.7008, "step": 5430 }, { "epoch": 0.6858947036072303, "grad_norm": 1.6484375, "learning_rate": 1.4851161320222038e-05, "loss": 0.6345, "step": 5431 }, { "epoch": 0.6860209961322914, "grad_norm": 1.796875, "learning_rate": 1.4849415440089674e-05, "loss": 0.6549, "step": 5432 }, { "epoch": 0.6861472886573526, "grad_norm": 1.734375, "learning_rate": 1.4847669366671288e-05, "loss": 0.6259, "step": 5433 }, { "epoch": 0.6862735811824138, "grad_norm": 1.609375, "learning_rate": 1.4845923100036479e-05, "loss": 0.5423, "step": 5434 }, { "epoch": 0.6863998737074749, "grad_norm": 2.171875, "learning_rate": 1.4844176640254846e-05, "loss": 0.697, "step": 5435 }, { "epoch": 0.6865261662325362, "grad_norm": 1.765625, "learning_rate": 1.4842429987396001e-05, "loss": 0.6469, "step": 5436 }, { "epoch": 0.6866524587575973, "grad_norm": 1.765625, "learning_rate": 1.4840683141529561e-05, "loss": 0.6713, "step": 5437 }, { "epoch": 0.6867787512826584, "grad_norm": 1.859375, "learning_rate": 1.4838936102725149e-05, "loss": 0.6663, "step": 5438 }, { "epoch": 0.6869050438077197, "grad_norm": 1.703125, "learning_rate": 1.4837188871052399e-05, "loss": 0.6564, "step": 5439 }, { "epoch": 0.6870313363327808, "grad_norm": 1.6953125, "learning_rate": 1.4835441446580955e-05, "loss": 0.6332, "step": 5440 }, { "epoch": 0.6871576288578419, "grad_norm": 1.78125, "learning_rate": 1.4833693829380458e-05, "loss": 0.6395, "step": 5441 }, { "epoch": 0.6872839213829032, "grad_norm": 1.9453125, "learning_rate": 1.4831946019520569e-05, "loss": 0.7517, "step": 5442 }, { "epoch": 0.6874102139079643, "grad_norm": 1.890625, "learning_rate": 1.4830198017070952e-05, "loss": 0.6554, "step": 5443 }, { "epoch": 0.6875365064330254, "grad_norm": 1.796875, "learning_rate": 1.4828449822101274e-05, "loss": 0.6485, "step": 5444 }, { "epoch": 0.6876627989580867, "grad_norm": 1.6484375, "learning_rate": 1.4826701434681216e-05, "loss": 0.6349, "step": 5445 }, { "epoch": 0.6877890914831478, "grad_norm": 1.578125, "learning_rate": 1.4824952854880469e-05, "loss": 0.4917, "step": 5446 }, { "epoch": 0.6879153840082091, "grad_norm": 1.796875, "learning_rate": 1.4823204082768715e-05, "loss": 0.6772, "step": 5447 }, { "epoch": 0.6880416765332702, "grad_norm": 1.8203125, "learning_rate": 1.4821455118415669e-05, "loss": 0.6128, "step": 5448 }, { "epoch": 0.6881679690583313, "grad_norm": 1.7578125, "learning_rate": 1.4819705961891033e-05, "loss": 0.6044, "step": 5449 }, { "epoch": 0.6882942615833926, "grad_norm": 1.71875, "learning_rate": 1.4817956613264528e-05, "loss": 0.6541, "step": 5450 }, { "epoch": 0.6884205541084537, "grad_norm": 1.75, "learning_rate": 1.4816207072605877e-05, "loss": 0.6375, "step": 5451 }, { "epoch": 0.6885468466335148, "grad_norm": 1.828125, "learning_rate": 1.4814457339984811e-05, "loss": 0.7125, "step": 5452 }, { "epoch": 0.6886731391585761, "grad_norm": 1.6328125, "learning_rate": 1.4812707415471076e-05, "loss": 0.538, "step": 5453 }, { "epoch": 0.6887994316836372, "grad_norm": 1.7578125, "learning_rate": 1.4810957299134412e-05, "loss": 0.5814, "step": 5454 }, { "epoch": 0.6889257242086984, "grad_norm": 1.8203125, "learning_rate": 1.4809206991044578e-05, "loss": 0.7204, "step": 5455 }, { "epoch": 0.6890520167337596, "grad_norm": 1.6640625, "learning_rate": 1.4807456491271337e-05, "loss": 0.5457, "step": 5456 }, { "epoch": 0.6891783092588207, "grad_norm": 1.84375, "learning_rate": 1.480570579988446e-05, "loss": 0.6469, "step": 5457 }, { "epoch": 0.6893046017838819, "grad_norm": 1.6328125, "learning_rate": 1.4803954916953725e-05, "loss": 0.6036, "step": 5458 }, { "epoch": 0.6894308943089431, "grad_norm": 1.890625, "learning_rate": 1.480220384254892e-05, "loss": 0.7094, "step": 5459 }, { "epoch": 0.6895571868340042, "grad_norm": 1.6328125, "learning_rate": 1.4800452576739835e-05, "loss": 0.5973, "step": 5460 }, { "epoch": 0.6896834793590655, "grad_norm": 1.703125, "learning_rate": 1.4798701119596272e-05, "loss": 0.5868, "step": 5461 }, { "epoch": 0.6898097718841266, "grad_norm": 1.890625, "learning_rate": 1.4796949471188043e-05, "loss": 0.7217, "step": 5462 }, { "epoch": 0.6899360644091878, "grad_norm": 1.7578125, "learning_rate": 1.479519763158496e-05, "loss": 0.6808, "step": 5463 }, { "epoch": 0.690062356934249, "grad_norm": 2.03125, "learning_rate": 1.4793445600856853e-05, "loss": 0.7133, "step": 5464 }, { "epoch": 0.6901886494593101, "grad_norm": 1.7890625, "learning_rate": 1.4791693379073544e-05, "loss": 0.6916, "step": 5465 }, { "epoch": 0.6903149419843713, "grad_norm": 1.75, "learning_rate": 1.4789940966304883e-05, "loss": 0.661, "step": 5466 }, { "epoch": 0.6904412345094325, "grad_norm": 1.9296875, "learning_rate": 1.4788188362620713e-05, "loss": 0.7392, "step": 5467 }, { "epoch": 0.6905675270344936, "grad_norm": 1.6796875, "learning_rate": 1.4786435568090885e-05, "loss": 0.5661, "step": 5468 }, { "epoch": 0.6906938195595548, "grad_norm": 1.75, "learning_rate": 1.4784682582785266e-05, "loss": 0.5822, "step": 5469 }, { "epoch": 0.690820112084616, "grad_norm": 1.9375, "learning_rate": 1.4782929406773724e-05, "loss": 0.7081, "step": 5470 }, { "epoch": 0.6909464046096772, "grad_norm": 1.75, "learning_rate": 1.4781176040126135e-05, "loss": 0.6591, "step": 5471 }, { "epoch": 0.6910726971347383, "grad_norm": 1.734375, "learning_rate": 1.4779422482912385e-05, "loss": 0.6081, "step": 5472 }, { "epoch": 0.6911989896597995, "grad_norm": 1.6796875, "learning_rate": 1.4777668735202369e-05, "loss": 0.6968, "step": 5473 }, { "epoch": 0.6913252821848607, "grad_norm": 1.7734375, "learning_rate": 1.477591479706598e-05, "loss": 0.5837, "step": 5474 }, { "epoch": 0.6914515747099218, "grad_norm": 1.8359375, "learning_rate": 1.4774160668573135e-05, "loss": 0.6906, "step": 5475 }, { "epoch": 0.691577867234983, "grad_norm": 1.734375, "learning_rate": 1.4772406349793744e-05, "loss": 0.6642, "step": 5476 }, { "epoch": 0.6917041597600442, "grad_norm": 1.7109375, "learning_rate": 1.4770651840797731e-05, "loss": 0.6475, "step": 5477 }, { "epoch": 0.6918304522851054, "grad_norm": 1.8828125, "learning_rate": 1.4768897141655027e-05, "loss": 0.7775, "step": 5478 }, { "epoch": 0.6919567448101666, "grad_norm": 1.84375, "learning_rate": 1.476714225243557e-05, "loss": 0.6375, "step": 5479 }, { "epoch": 0.6920830373352277, "grad_norm": 1.6328125, "learning_rate": 1.4765387173209305e-05, "loss": 0.656, "step": 5480 }, { "epoch": 0.6922093298602889, "grad_norm": 1.8671875, "learning_rate": 1.4763631904046185e-05, "loss": 0.6464, "step": 5481 }, { "epoch": 0.6923356223853501, "grad_norm": 1.8515625, "learning_rate": 1.476187644501617e-05, "loss": 0.6353, "step": 5482 }, { "epoch": 0.6924619149104112, "grad_norm": 1.828125, "learning_rate": 1.476012079618923e-05, "loss": 0.5737, "step": 5483 }, { "epoch": 0.6925882074354724, "grad_norm": 1.7109375, "learning_rate": 1.475836495763534e-05, "loss": 0.6551, "step": 5484 }, { "epoch": 0.6927144999605336, "grad_norm": 1.875, "learning_rate": 1.4756608929424488e-05, "loss": 0.7162, "step": 5485 }, { "epoch": 0.6928407924855947, "grad_norm": 1.7734375, "learning_rate": 1.4754852711626657e-05, "loss": 0.6225, "step": 5486 }, { "epoch": 0.692967085010656, "grad_norm": 1.8515625, "learning_rate": 1.4753096304311852e-05, "loss": 0.7216, "step": 5487 }, { "epoch": 0.6930933775357171, "grad_norm": 1.9921875, "learning_rate": 1.4751339707550076e-05, "loss": 0.7506, "step": 5488 }, { "epoch": 0.6932196700607782, "grad_norm": 1.7265625, "learning_rate": 1.4749582921411343e-05, "loss": 0.5738, "step": 5489 }, { "epoch": 0.6933459625858395, "grad_norm": 1.75, "learning_rate": 1.4747825945965675e-05, "loss": 0.6613, "step": 5490 }, { "epoch": 0.6934722551109006, "grad_norm": 2.015625, "learning_rate": 1.47460687812831e-05, "loss": 0.6929, "step": 5491 }, { "epoch": 0.6935985476359618, "grad_norm": 1.8828125, "learning_rate": 1.4744311427433658e-05, "loss": 0.6217, "step": 5492 }, { "epoch": 0.693724840161023, "grad_norm": 1.9296875, "learning_rate": 1.4742553884487385e-05, "loss": 0.6576, "step": 5493 }, { "epoch": 0.6938511326860841, "grad_norm": 1.6875, "learning_rate": 1.474079615251434e-05, "loss": 0.5647, "step": 5494 }, { "epoch": 0.6939774252111454, "grad_norm": 1.859375, "learning_rate": 1.473903823158458e-05, "loss": 0.6583, "step": 5495 }, { "epoch": 0.6941037177362065, "grad_norm": 2.046875, "learning_rate": 1.4737280121768171e-05, "loss": 0.7576, "step": 5496 }, { "epoch": 0.6942300102612676, "grad_norm": 1.8828125, "learning_rate": 1.4735521823135189e-05, "loss": 0.7084, "step": 5497 }, { "epoch": 0.6943563027863289, "grad_norm": 1.7578125, "learning_rate": 1.4733763335755707e-05, "loss": 0.7755, "step": 5498 }, { "epoch": 0.69448259531139, "grad_norm": 1.765625, "learning_rate": 1.4732004659699823e-05, "loss": 0.6276, "step": 5499 }, { "epoch": 0.6946088878364511, "grad_norm": 1.7578125, "learning_rate": 1.4730245795037631e-05, "loss": 0.6155, "step": 5500 }, { "epoch": 0.6947351803615124, "grad_norm": 1.7265625, "learning_rate": 1.4728486741839239e-05, "loss": 0.6636, "step": 5501 }, { "epoch": 0.6948614728865735, "grad_norm": 1.7265625, "learning_rate": 1.472672750017475e-05, "loss": 0.6734, "step": 5502 }, { "epoch": 0.6949877654116347, "grad_norm": 1.90625, "learning_rate": 1.472496807011429e-05, "loss": 0.6635, "step": 5503 }, { "epoch": 0.6951140579366959, "grad_norm": 1.9453125, "learning_rate": 1.4723208451727983e-05, "loss": 0.6834, "step": 5504 }, { "epoch": 0.695240350461757, "grad_norm": 1.7734375, "learning_rate": 1.4721448645085965e-05, "loss": 0.6673, "step": 5505 }, { "epoch": 0.6953666429868183, "grad_norm": 2.0, "learning_rate": 1.4719688650258372e-05, "loss": 0.6693, "step": 5506 }, { "epoch": 0.6954929355118794, "grad_norm": 1.8984375, "learning_rate": 1.4717928467315362e-05, "loss": 0.5523, "step": 5507 }, { "epoch": 0.6956192280369405, "grad_norm": 1.765625, "learning_rate": 1.4716168096327086e-05, "loss": 0.6321, "step": 5508 }, { "epoch": 0.6957455205620018, "grad_norm": 1.6796875, "learning_rate": 1.4714407537363709e-05, "loss": 0.6406, "step": 5509 }, { "epoch": 0.6958718130870629, "grad_norm": 1.7890625, "learning_rate": 1.4712646790495403e-05, "loss": 0.6229, "step": 5510 }, { "epoch": 0.695998105612124, "grad_norm": 1.75, "learning_rate": 1.4710885855792348e-05, "loss": 0.5822, "step": 5511 }, { "epoch": 0.6961243981371853, "grad_norm": 1.8125, "learning_rate": 1.4709124733324727e-05, "loss": 0.638, "step": 5512 }, { "epoch": 0.6962506906622464, "grad_norm": 1.703125, "learning_rate": 1.4707363423162739e-05, "loss": 0.5815, "step": 5513 }, { "epoch": 0.6963769831873076, "grad_norm": 1.8203125, "learning_rate": 1.4705601925376583e-05, "loss": 0.6937, "step": 5514 }, { "epoch": 0.6965032757123688, "grad_norm": 1.9140625, "learning_rate": 1.4703840240036466e-05, "loss": 0.6955, "step": 5515 }, { "epoch": 0.6966295682374299, "grad_norm": 1.7578125, "learning_rate": 1.470207836721261e-05, "loss": 0.5989, "step": 5516 }, { "epoch": 0.6967558607624911, "grad_norm": 1.8046875, "learning_rate": 1.4700316306975236e-05, "loss": 0.691, "step": 5517 }, { "epoch": 0.6968821532875523, "grad_norm": 1.6796875, "learning_rate": 1.4698554059394575e-05, "loss": 0.6132, "step": 5518 }, { "epoch": 0.6970084458126135, "grad_norm": 1.671875, "learning_rate": 1.4696791624540865e-05, "loss": 0.5692, "step": 5519 }, { "epoch": 0.6971347383376746, "grad_norm": 1.859375, "learning_rate": 1.4695029002484353e-05, "loss": 0.6762, "step": 5520 }, { "epoch": 0.6972610308627358, "grad_norm": 1.7734375, "learning_rate": 1.4693266193295296e-05, "loss": 0.6299, "step": 5521 }, { "epoch": 0.697387323387797, "grad_norm": 1.7109375, "learning_rate": 1.469150319704395e-05, "loss": 0.5138, "step": 5522 }, { "epoch": 0.6975136159128582, "grad_norm": 1.7265625, "learning_rate": 1.4689740013800591e-05, "loss": 0.6607, "step": 5523 }, { "epoch": 0.6976399084379193, "grad_norm": 1.640625, "learning_rate": 1.4687976643635488e-05, "loss": 0.6416, "step": 5524 }, { "epoch": 0.6977662009629805, "grad_norm": 1.734375, "learning_rate": 1.468621308661893e-05, "loss": 0.6674, "step": 5525 }, { "epoch": 0.6978924934880417, "grad_norm": 1.8515625, "learning_rate": 1.4684449342821205e-05, "loss": 0.6721, "step": 5526 }, { "epoch": 0.6980187860131029, "grad_norm": 1.65625, "learning_rate": 1.4682685412312611e-05, "loss": 0.63, "step": 5527 }, { "epoch": 0.698145078538164, "grad_norm": 1.7890625, "learning_rate": 1.4680921295163456e-05, "loss": 0.581, "step": 5528 }, { "epoch": 0.6982713710632252, "grad_norm": 1.7265625, "learning_rate": 1.4679156991444055e-05, "loss": 0.6442, "step": 5529 }, { "epoch": 0.6983976635882864, "grad_norm": 1.8515625, "learning_rate": 1.4677392501224724e-05, "loss": 0.7812, "step": 5530 }, { "epoch": 0.6985239561133475, "grad_norm": 1.84375, "learning_rate": 1.4675627824575793e-05, "loss": 0.662, "step": 5531 }, { "epoch": 0.6986502486384087, "grad_norm": 1.828125, "learning_rate": 1.4673862961567602e-05, "loss": 0.6274, "step": 5532 }, { "epoch": 0.6987765411634699, "grad_norm": 1.8828125, "learning_rate": 1.4672097912270491e-05, "loss": 0.6337, "step": 5533 }, { "epoch": 0.698902833688531, "grad_norm": 1.75, "learning_rate": 1.4670332676754808e-05, "loss": 0.6939, "step": 5534 }, { "epoch": 0.6990291262135923, "grad_norm": 1.8984375, "learning_rate": 1.4668567255090918e-05, "loss": 0.6395, "step": 5535 }, { "epoch": 0.6991554187386534, "grad_norm": 1.734375, "learning_rate": 1.4666801647349178e-05, "loss": 0.6663, "step": 5536 }, { "epoch": 0.6992817112637146, "grad_norm": 1.734375, "learning_rate": 1.4665035853599967e-05, "loss": 0.5961, "step": 5537 }, { "epoch": 0.6994080037887758, "grad_norm": 1.6875, "learning_rate": 1.466326987391366e-05, "loss": 0.6896, "step": 5538 }, { "epoch": 0.6995342963138369, "grad_norm": 1.6875, "learning_rate": 1.4661503708360654e-05, "loss": 0.5603, "step": 5539 }, { "epoch": 0.6996605888388981, "grad_norm": 1.8203125, "learning_rate": 1.4659737357011334e-05, "loss": 0.6634, "step": 5540 }, { "epoch": 0.6997868813639593, "grad_norm": 1.7890625, "learning_rate": 1.4657970819936106e-05, "loss": 0.6652, "step": 5541 }, { "epoch": 0.6999131738890204, "grad_norm": 1.765625, "learning_rate": 1.4656204097205384e-05, "loss": 0.6516, "step": 5542 }, { "epoch": 0.7000394664140817, "grad_norm": 1.703125, "learning_rate": 1.4654437188889582e-05, "loss": 0.5701, "step": 5543 }, { "epoch": 0.7001657589391428, "grad_norm": 1.8515625, "learning_rate": 1.465267009505912e-05, "loss": 0.6999, "step": 5544 }, { "epoch": 0.7002920514642039, "grad_norm": 1.7109375, "learning_rate": 1.4650902815784438e-05, "loss": 0.6188, "step": 5545 }, { "epoch": 0.7004183439892652, "grad_norm": 1.7109375, "learning_rate": 1.4649135351135971e-05, "loss": 0.6294, "step": 5546 }, { "epoch": 0.7005446365143263, "grad_norm": 2.09375, "learning_rate": 1.4647367701184168e-05, "loss": 0.6768, "step": 5547 }, { "epoch": 0.7006709290393874, "grad_norm": 1.8984375, "learning_rate": 1.464559986599948e-05, "loss": 0.6416, "step": 5548 }, { "epoch": 0.7007972215644487, "grad_norm": 1.7109375, "learning_rate": 1.4643831845652373e-05, "loss": 0.635, "step": 5549 }, { "epoch": 0.7009235140895098, "grad_norm": 1.7734375, "learning_rate": 1.4642063640213316e-05, "loss": 0.6539, "step": 5550 }, { "epoch": 0.701049806614571, "grad_norm": 1.75, "learning_rate": 1.4640295249752781e-05, "loss": 0.5482, "step": 5551 }, { "epoch": 0.7011760991396322, "grad_norm": 1.765625, "learning_rate": 1.4638526674341254e-05, "loss": 0.6661, "step": 5552 }, { "epoch": 0.7013023916646933, "grad_norm": 2.03125, "learning_rate": 1.4636757914049226e-05, "loss": 0.6692, "step": 5553 }, { "epoch": 0.7014286841897546, "grad_norm": 1.6015625, "learning_rate": 1.4634988968947199e-05, "loss": 0.5763, "step": 5554 }, { "epoch": 0.7015549767148157, "grad_norm": 1.7734375, "learning_rate": 1.463321983910567e-05, "loss": 0.7649, "step": 5555 }, { "epoch": 0.7016812692398768, "grad_norm": 1.828125, "learning_rate": 1.4631450524595164e-05, "loss": 0.6452, "step": 5556 }, { "epoch": 0.7018075617649381, "grad_norm": 1.7578125, "learning_rate": 1.4629681025486191e-05, "loss": 0.6883, "step": 5557 }, { "epoch": 0.7019338542899992, "grad_norm": 1.765625, "learning_rate": 1.4627911341849285e-05, "loss": 0.6016, "step": 5558 }, { "epoch": 0.7020601468150603, "grad_norm": 1.7578125, "learning_rate": 1.4626141473754983e-05, "loss": 0.6317, "step": 5559 }, { "epoch": 0.7021864393401216, "grad_norm": 1.765625, "learning_rate": 1.4624371421273823e-05, "loss": 0.6596, "step": 5560 }, { "epoch": 0.7023127318651827, "grad_norm": 1.7109375, "learning_rate": 1.4622601184476355e-05, "loss": 0.614, "step": 5561 }, { "epoch": 0.7024390243902439, "grad_norm": 1.7265625, "learning_rate": 1.4620830763433138e-05, "loss": 0.6981, "step": 5562 }, { "epoch": 0.7025653169153051, "grad_norm": 1.78125, "learning_rate": 1.4619060158214739e-05, "loss": 0.6518, "step": 5563 }, { "epoch": 0.7026916094403662, "grad_norm": 1.7421875, "learning_rate": 1.4617289368891725e-05, "loss": 0.636, "step": 5564 }, { "epoch": 0.7028179019654274, "grad_norm": 2.03125, "learning_rate": 1.4615518395534678e-05, "loss": 0.6546, "step": 5565 }, { "epoch": 0.7029441944904886, "grad_norm": 1.6015625, "learning_rate": 1.4613747238214188e-05, "loss": 0.6245, "step": 5566 }, { "epoch": 0.7030704870155497, "grad_norm": 1.7421875, "learning_rate": 1.4611975897000845e-05, "loss": 0.6605, "step": 5567 }, { "epoch": 0.703196779540611, "grad_norm": 1.8671875, "learning_rate": 1.4610204371965251e-05, "loss": 0.6308, "step": 5568 }, { "epoch": 0.7033230720656721, "grad_norm": 1.71875, "learning_rate": 1.4608432663178013e-05, "loss": 0.5915, "step": 5569 }, { "epoch": 0.7034493645907333, "grad_norm": 1.6875, "learning_rate": 1.4606660770709754e-05, "loss": 0.6605, "step": 5570 }, { "epoch": 0.7035756571157945, "grad_norm": 1.5859375, "learning_rate": 1.460488869463109e-05, "loss": 0.6004, "step": 5571 }, { "epoch": 0.7037019496408556, "grad_norm": 1.8359375, "learning_rate": 1.4603116435012653e-05, "loss": 0.6751, "step": 5572 }, { "epoch": 0.7038282421659168, "grad_norm": 1.703125, "learning_rate": 1.4601343991925081e-05, "loss": 0.6992, "step": 5573 }, { "epoch": 0.703954534690978, "grad_norm": 1.7890625, "learning_rate": 1.4599571365439023e-05, "loss": 0.7039, "step": 5574 }, { "epoch": 0.7040808272160392, "grad_norm": 1.7890625, "learning_rate": 1.4597798555625128e-05, "loss": 0.6958, "step": 5575 }, { "epoch": 0.7042071197411003, "grad_norm": 1.90625, "learning_rate": 1.459602556255406e-05, "loss": 0.6697, "step": 5576 }, { "epoch": 0.7043334122661615, "grad_norm": 1.6796875, "learning_rate": 1.4594252386296481e-05, "loss": 0.7035, "step": 5577 }, { "epoch": 0.7044597047912227, "grad_norm": 2.203125, "learning_rate": 1.4592479026923068e-05, "loss": 0.7212, "step": 5578 }, { "epoch": 0.7045859973162838, "grad_norm": 1.84375, "learning_rate": 1.4590705484504504e-05, "loss": 0.6405, "step": 5579 }, { "epoch": 0.704712289841345, "grad_norm": 1.8125, "learning_rate": 1.4588931759111476e-05, "loss": 0.6819, "step": 5580 }, { "epoch": 0.7048385823664062, "grad_norm": 1.6328125, "learning_rate": 1.4587157850814679e-05, "loss": 0.5775, "step": 5581 }, { "epoch": 0.7049648748914673, "grad_norm": 1.8515625, "learning_rate": 1.4585383759684819e-05, "loss": 0.6639, "step": 5582 }, { "epoch": 0.7050911674165286, "grad_norm": 1.9140625, "learning_rate": 1.458360948579261e-05, "loss": 0.6452, "step": 5583 }, { "epoch": 0.7052174599415897, "grad_norm": 1.6953125, "learning_rate": 1.4581835029208766e-05, "loss": 0.6341, "step": 5584 }, { "epoch": 0.7053437524666509, "grad_norm": 1.609375, "learning_rate": 1.4580060390004013e-05, "loss": 0.5878, "step": 5585 }, { "epoch": 0.7054700449917121, "grad_norm": 1.796875, "learning_rate": 1.4578285568249084e-05, "loss": 0.6497, "step": 5586 }, { "epoch": 0.7055963375167732, "grad_norm": 1.8203125, "learning_rate": 1.4576510564014724e-05, "loss": 0.6362, "step": 5587 }, { "epoch": 0.7057226300418344, "grad_norm": 1.8828125, "learning_rate": 1.457473537737167e-05, "loss": 0.6482, "step": 5588 }, { "epoch": 0.7058489225668956, "grad_norm": 1.6484375, "learning_rate": 1.4572960008390686e-05, "loss": 0.5788, "step": 5589 }, { "epoch": 0.7059752150919567, "grad_norm": 1.7109375, "learning_rate": 1.4571184457142531e-05, "loss": 0.5838, "step": 5590 }, { "epoch": 0.706101507617018, "grad_norm": 1.6875, "learning_rate": 1.4569408723697972e-05, "loss": 0.59, "step": 5591 }, { "epoch": 0.7062278001420791, "grad_norm": 1.703125, "learning_rate": 1.4567632808127789e-05, "loss": 0.5639, "step": 5592 }, { "epoch": 0.7063540926671402, "grad_norm": 1.6875, "learning_rate": 1.4565856710502764e-05, "loss": 0.6141, "step": 5593 }, { "epoch": 0.7064803851922015, "grad_norm": 1.796875, "learning_rate": 1.4564080430893687e-05, "loss": 0.6763, "step": 5594 }, { "epoch": 0.7066066777172626, "grad_norm": 1.7734375, "learning_rate": 1.456230396937136e-05, "loss": 0.7375, "step": 5595 }, { "epoch": 0.7067329702423237, "grad_norm": 1.828125, "learning_rate": 1.4560527326006584e-05, "loss": 0.7116, "step": 5596 }, { "epoch": 0.706859262767385, "grad_norm": 1.7265625, "learning_rate": 1.4558750500870175e-05, "loss": 0.6264, "step": 5597 }, { "epoch": 0.7069855552924461, "grad_norm": 1.7734375, "learning_rate": 1.4556973494032952e-05, "loss": 0.6956, "step": 5598 }, { "epoch": 0.7071118478175074, "grad_norm": 1.671875, "learning_rate": 1.4555196305565738e-05, "loss": 0.652, "step": 5599 }, { "epoch": 0.7072381403425685, "grad_norm": 1.8359375, "learning_rate": 1.455341893553937e-05, "loss": 0.6756, "step": 5600 }, { "epoch": 0.7073644328676296, "grad_norm": 2.0, "learning_rate": 1.4551641384024694e-05, "loss": 0.7161, "step": 5601 }, { "epoch": 0.7074907253926909, "grad_norm": 1.7421875, "learning_rate": 1.4549863651092558e-05, "loss": 0.5826, "step": 5602 }, { "epoch": 0.707617017917752, "grad_norm": 1.984375, "learning_rate": 1.4548085736813811e-05, "loss": 0.6602, "step": 5603 }, { "epoch": 0.7077433104428131, "grad_norm": 1.6796875, "learning_rate": 1.4546307641259326e-05, "loss": 0.6554, "step": 5604 }, { "epoch": 0.7078696029678744, "grad_norm": 1.859375, "learning_rate": 1.4544529364499967e-05, "loss": 0.6488, "step": 5605 }, { "epoch": 0.7079958954929355, "grad_norm": 1.734375, "learning_rate": 1.4542750906606613e-05, "loss": 0.6436, "step": 5606 }, { "epoch": 0.7081221880179966, "grad_norm": 1.671875, "learning_rate": 1.4540972267650148e-05, "loss": 0.6247, "step": 5607 }, { "epoch": 0.7082484805430579, "grad_norm": 1.671875, "learning_rate": 1.4539193447701469e-05, "loss": 0.6003, "step": 5608 }, { "epoch": 0.708374773068119, "grad_norm": 1.6875, "learning_rate": 1.453741444683147e-05, "loss": 0.6274, "step": 5609 }, { "epoch": 0.7085010655931802, "grad_norm": 1.609375, "learning_rate": 1.4535635265111063e-05, "loss": 0.6875, "step": 5610 }, { "epoch": 0.7086273581182414, "grad_norm": 1.890625, "learning_rate": 1.4533855902611157e-05, "loss": 0.7056, "step": 5611 }, { "epoch": 0.7087536506433025, "grad_norm": 1.9375, "learning_rate": 1.4532076359402677e-05, "loss": 0.7515, "step": 5612 }, { "epoch": 0.7088799431683637, "grad_norm": 1.8125, "learning_rate": 1.4530296635556548e-05, "loss": 0.7058, "step": 5613 }, { "epoch": 0.7090062356934249, "grad_norm": 1.578125, "learning_rate": 1.4528516731143709e-05, "loss": 0.5597, "step": 5614 }, { "epoch": 0.709132528218486, "grad_norm": 1.6875, "learning_rate": 1.4526736646235098e-05, "loss": 0.5906, "step": 5615 }, { "epoch": 0.7092588207435473, "grad_norm": 2.140625, "learning_rate": 1.4524956380901669e-05, "loss": 0.7589, "step": 5616 }, { "epoch": 0.7093851132686084, "grad_norm": 1.75, "learning_rate": 1.4523175935214378e-05, "loss": 0.6676, "step": 5617 }, { "epoch": 0.7095114057936696, "grad_norm": 1.9140625, "learning_rate": 1.4521395309244186e-05, "loss": 0.6682, "step": 5618 }, { "epoch": 0.7096376983187308, "grad_norm": 1.96875, "learning_rate": 1.4519614503062072e-05, "loss": 0.7619, "step": 5619 }, { "epoch": 0.7097639908437919, "grad_norm": 1.6640625, "learning_rate": 1.4517833516739006e-05, "loss": 0.5964, "step": 5620 }, { "epoch": 0.7098902833688531, "grad_norm": 2.03125, "learning_rate": 1.4516052350345982e-05, "loss": 0.8153, "step": 5621 }, { "epoch": 0.7100165758939143, "grad_norm": 1.8125, "learning_rate": 1.4514271003953985e-05, "loss": 0.6718, "step": 5622 }, { "epoch": 0.7101428684189754, "grad_norm": 1.5625, "learning_rate": 1.4512489477634022e-05, "loss": 0.5028, "step": 5623 }, { "epoch": 0.7102691609440366, "grad_norm": 1.953125, "learning_rate": 1.4510707771457097e-05, "loss": 0.7223, "step": 5624 }, { "epoch": 0.7103954534690978, "grad_norm": 1.75, "learning_rate": 1.4508925885494226e-05, "loss": 0.6869, "step": 5625 }, { "epoch": 0.710521745994159, "grad_norm": 1.6796875, "learning_rate": 1.4507143819816426e-05, "loss": 0.6416, "step": 5626 }, { "epoch": 0.7106480385192201, "grad_norm": 1.859375, "learning_rate": 1.4505361574494731e-05, "loss": 0.6572, "step": 5627 }, { "epoch": 0.7107743310442813, "grad_norm": 1.84375, "learning_rate": 1.4503579149600176e-05, "loss": 0.715, "step": 5628 }, { "epoch": 0.7109006235693425, "grad_norm": 1.8203125, "learning_rate": 1.4501796545203803e-05, "loss": 0.6419, "step": 5629 }, { "epoch": 0.7110269160944037, "grad_norm": 1.875, "learning_rate": 1.4500013761376663e-05, "loss": 0.649, "step": 5630 }, { "epoch": 0.7111532086194648, "grad_norm": 1.8359375, "learning_rate": 1.4498230798189815e-05, "loss": 0.6298, "step": 5631 }, { "epoch": 0.711279501144526, "grad_norm": 1.7109375, "learning_rate": 1.4496447655714323e-05, "loss": 0.5989, "step": 5632 }, { "epoch": 0.7114057936695872, "grad_norm": 1.7734375, "learning_rate": 1.4494664334021256e-05, "loss": 0.683, "step": 5633 }, { "epoch": 0.7115320861946484, "grad_norm": 1.7734375, "learning_rate": 1.4492880833181695e-05, "loss": 0.6747, "step": 5634 }, { "epoch": 0.7116583787197095, "grad_norm": 1.765625, "learning_rate": 1.4491097153266725e-05, "loss": 0.6574, "step": 5635 }, { "epoch": 0.7117846712447707, "grad_norm": 1.859375, "learning_rate": 1.4489313294347438e-05, "loss": 0.6464, "step": 5636 }, { "epoch": 0.7119109637698319, "grad_norm": 1.7265625, "learning_rate": 1.4487529256494938e-05, "loss": 0.5806, "step": 5637 }, { "epoch": 0.712037256294893, "grad_norm": 1.6796875, "learning_rate": 1.4485745039780332e-05, "loss": 0.6782, "step": 5638 }, { "epoch": 0.7121635488199543, "grad_norm": 1.875, "learning_rate": 1.4483960644274733e-05, "loss": 0.7385, "step": 5639 }, { "epoch": 0.7122898413450154, "grad_norm": 1.8203125, "learning_rate": 1.4482176070049263e-05, "loss": 0.6806, "step": 5640 }, { "epoch": 0.7124161338700765, "grad_norm": 1.75, "learning_rate": 1.4480391317175051e-05, "loss": 0.5271, "step": 5641 }, { "epoch": 0.7125424263951378, "grad_norm": 1.828125, "learning_rate": 1.4478606385723233e-05, "loss": 0.6782, "step": 5642 }, { "epoch": 0.7126687189201989, "grad_norm": 1.859375, "learning_rate": 1.4476821275764951e-05, "loss": 0.6294, "step": 5643 }, { "epoch": 0.71279501144526, "grad_norm": 1.6875, "learning_rate": 1.4475035987371355e-05, "loss": 0.6144, "step": 5644 }, { "epoch": 0.7129213039703213, "grad_norm": 1.5859375, "learning_rate": 1.4473250520613604e-05, "loss": 0.6354, "step": 5645 }, { "epoch": 0.7130475964953824, "grad_norm": 1.8671875, "learning_rate": 1.4471464875562862e-05, "loss": 0.6424, "step": 5646 }, { "epoch": 0.7131738890204437, "grad_norm": 1.8671875, "learning_rate": 1.4469679052290299e-05, "loss": 0.6695, "step": 5647 }, { "epoch": 0.7133001815455048, "grad_norm": 1.8984375, "learning_rate": 1.4467893050867098e-05, "loss": 0.7563, "step": 5648 }, { "epoch": 0.7134264740705659, "grad_norm": 1.734375, "learning_rate": 1.446610687136444e-05, "loss": 0.6191, "step": 5649 }, { "epoch": 0.7135527665956272, "grad_norm": 1.765625, "learning_rate": 1.4464320513853521e-05, "loss": 0.6459, "step": 5650 }, { "epoch": 0.7136790591206883, "grad_norm": 1.734375, "learning_rate": 1.4462533978405538e-05, "loss": 0.6426, "step": 5651 }, { "epoch": 0.7138053516457494, "grad_norm": 1.765625, "learning_rate": 1.4460747265091699e-05, "loss": 0.6758, "step": 5652 }, { "epoch": 0.7139316441708107, "grad_norm": 1.7890625, "learning_rate": 1.4458960373983217e-05, "loss": 0.5463, "step": 5653 }, { "epoch": 0.7140579366958718, "grad_norm": 1.609375, "learning_rate": 1.4457173305151316e-05, "loss": 0.5733, "step": 5654 }, { "epoch": 0.7141842292209329, "grad_norm": 1.8828125, "learning_rate": 1.4455386058667222e-05, "loss": 0.7158, "step": 5655 }, { "epoch": 0.7143105217459942, "grad_norm": 1.78125, "learning_rate": 1.4453598634602173e-05, "loss": 0.6017, "step": 5656 }, { "epoch": 0.7144368142710553, "grad_norm": 1.875, "learning_rate": 1.445181103302741e-05, "loss": 0.6453, "step": 5657 }, { "epoch": 0.7145631067961165, "grad_norm": 1.9140625, "learning_rate": 1.445002325401418e-05, "loss": 0.7075, "step": 5658 }, { "epoch": 0.7146893993211777, "grad_norm": 1.765625, "learning_rate": 1.4448235297633741e-05, "loss": 0.5547, "step": 5659 }, { "epoch": 0.7148156918462388, "grad_norm": 1.671875, "learning_rate": 1.444644716395736e-05, "loss": 0.5829, "step": 5660 }, { "epoch": 0.7149419843713001, "grad_norm": 1.828125, "learning_rate": 1.4444658853056304e-05, "loss": 0.6038, "step": 5661 }, { "epoch": 0.7150682768963612, "grad_norm": 1.765625, "learning_rate": 1.444287036500185e-05, "loss": 0.6535, "step": 5662 }, { "epoch": 0.7151945694214223, "grad_norm": 1.828125, "learning_rate": 1.4441081699865285e-05, "loss": 0.6782, "step": 5663 }, { "epoch": 0.7153208619464836, "grad_norm": 1.8515625, "learning_rate": 1.4439292857717901e-05, "loss": 0.628, "step": 5664 }, { "epoch": 0.7154471544715447, "grad_norm": 1.7734375, "learning_rate": 1.4437503838630997e-05, "loss": 0.6022, "step": 5665 }, { "epoch": 0.7155734469966059, "grad_norm": 1.828125, "learning_rate": 1.4435714642675879e-05, "loss": 0.6553, "step": 5666 }, { "epoch": 0.7156997395216671, "grad_norm": 1.890625, "learning_rate": 1.4433925269923858e-05, "loss": 0.7516, "step": 5667 }, { "epoch": 0.7158260320467282, "grad_norm": 1.7734375, "learning_rate": 1.4432135720446256e-05, "loss": 0.6676, "step": 5668 }, { "epoch": 0.7159523245717894, "grad_norm": 1.7734375, "learning_rate": 1.44303459943144e-05, "loss": 0.6842, "step": 5669 }, { "epoch": 0.7160786170968506, "grad_norm": 1.59375, "learning_rate": 1.4428556091599624e-05, "loss": 0.579, "step": 5670 }, { "epoch": 0.7162049096219117, "grad_norm": 1.8671875, "learning_rate": 1.442676601237327e-05, "loss": 0.6923, "step": 5671 }, { "epoch": 0.7163312021469729, "grad_norm": 1.8046875, "learning_rate": 1.442497575670668e-05, "loss": 0.6177, "step": 5672 }, { "epoch": 0.7164574946720341, "grad_norm": 1.90625, "learning_rate": 1.442318532467122e-05, "loss": 0.838, "step": 5673 }, { "epoch": 0.7165837871970953, "grad_norm": 1.8515625, "learning_rate": 1.4421394716338246e-05, "loss": 0.6033, "step": 5674 }, { "epoch": 0.7167100797221564, "grad_norm": 1.6953125, "learning_rate": 1.4419603931779129e-05, "loss": 0.5862, "step": 5675 }, { "epoch": 0.7168363722472176, "grad_norm": 1.84375, "learning_rate": 1.4417812971065241e-05, "loss": 0.6206, "step": 5676 }, { "epoch": 0.7169626647722788, "grad_norm": 1.9765625, "learning_rate": 1.4416021834267972e-05, "loss": 0.6116, "step": 5677 }, { "epoch": 0.71708895729734, "grad_norm": 1.9765625, "learning_rate": 1.4414230521458709e-05, "loss": 0.7623, "step": 5678 }, { "epoch": 0.7172152498224011, "grad_norm": 1.890625, "learning_rate": 1.441243903270885e-05, "loss": 0.6634, "step": 5679 }, { "epoch": 0.7173415423474623, "grad_norm": 1.7265625, "learning_rate": 1.4410647368089798e-05, "loss": 0.7314, "step": 5680 }, { "epoch": 0.7174678348725235, "grad_norm": 1.8671875, "learning_rate": 1.4408855527672965e-05, "loss": 0.7367, "step": 5681 }, { "epoch": 0.7175941273975847, "grad_norm": 1.765625, "learning_rate": 1.440706351152977e-05, "loss": 0.6455, "step": 5682 }, { "epoch": 0.7177204199226458, "grad_norm": 1.71875, "learning_rate": 1.4405271319731639e-05, "loss": 0.622, "step": 5683 }, { "epoch": 0.717846712447707, "grad_norm": 1.71875, "learning_rate": 1.4403478952350005e-05, "loss": 0.6604, "step": 5684 }, { "epoch": 0.7179730049727682, "grad_norm": 1.640625, "learning_rate": 1.4401686409456307e-05, "loss": 0.6796, "step": 5685 }, { "epoch": 0.7180992974978293, "grad_norm": 1.9140625, "learning_rate": 1.4399893691121988e-05, "loss": 0.7007, "step": 5686 }, { "epoch": 0.7182255900228905, "grad_norm": 1.796875, "learning_rate": 1.4398100797418508e-05, "loss": 0.6583, "step": 5687 }, { "epoch": 0.7183518825479517, "grad_norm": 1.765625, "learning_rate": 1.4396307728417322e-05, "loss": 0.6419, "step": 5688 }, { "epoch": 0.7184781750730128, "grad_norm": 1.7578125, "learning_rate": 1.4394514484189898e-05, "loss": 0.6429, "step": 5689 }, { "epoch": 0.7186044675980741, "grad_norm": 1.640625, "learning_rate": 1.4392721064807712e-05, "loss": 0.6056, "step": 5690 }, { "epoch": 0.7187307601231352, "grad_norm": 1.6953125, "learning_rate": 1.4390927470342244e-05, "loss": 0.645, "step": 5691 }, { "epoch": 0.7188570526481964, "grad_norm": 1.8046875, "learning_rate": 1.4389133700864986e-05, "loss": 0.6285, "step": 5692 }, { "epoch": 0.7189833451732576, "grad_norm": 2.015625, "learning_rate": 1.438733975644743e-05, "loss": 0.653, "step": 5693 }, { "epoch": 0.7191096376983187, "grad_norm": 1.75, "learning_rate": 1.4385545637161076e-05, "loss": 0.6587, "step": 5694 }, { "epoch": 0.71923593022338, "grad_norm": 1.84375, "learning_rate": 1.438375134307744e-05, "loss": 0.7268, "step": 5695 }, { "epoch": 0.7193622227484411, "grad_norm": 1.8046875, "learning_rate": 1.438195687426803e-05, "loss": 0.5759, "step": 5696 }, { "epoch": 0.7194885152735022, "grad_norm": 1.9375, "learning_rate": 1.4380162230804377e-05, "loss": 0.6596, "step": 5697 }, { "epoch": 0.7196148077985635, "grad_norm": 1.765625, "learning_rate": 1.4378367412758007e-05, "loss": 0.6414, "step": 5698 }, { "epoch": 0.7197411003236246, "grad_norm": 1.8125, "learning_rate": 1.4376572420200457e-05, "loss": 0.6747, "step": 5699 }, { "epoch": 0.7198673928486857, "grad_norm": 1.7421875, "learning_rate": 1.4374777253203273e-05, "loss": 0.6664, "step": 5700 }, { "epoch": 0.719993685373747, "grad_norm": 1.8828125, "learning_rate": 1.4372981911838004e-05, "loss": 0.7484, "step": 5701 }, { "epoch": 0.7201199778988081, "grad_norm": 1.890625, "learning_rate": 1.437118639617621e-05, "loss": 0.7677, "step": 5702 }, { "epoch": 0.7202462704238692, "grad_norm": 1.7109375, "learning_rate": 1.4369390706289457e-05, "loss": 0.6598, "step": 5703 }, { "epoch": 0.7203725629489305, "grad_norm": 1.796875, "learning_rate": 1.436759484224931e-05, "loss": 0.6224, "step": 5704 }, { "epoch": 0.7204988554739916, "grad_norm": 1.7578125, "learning_rate": 1.4365798804127356e-05, "loss": 0.6193, "step": 5705 }, { "epoch": 0.7206251479990528, "grad_norm": 1.8203125, "learning_rate": 1.4364002591995176e-05, "loss": 0.7698, "step": 5706 }, { "epoch": 0.720751440524114, "grad_norm": 1.8125, "learning_rate": 1.4362206205924364e-05, "loss": 0.6287, "step": 5707 }, { "epoch": 0.7208777330491751, "grad_norm": 1.953125, "learning_rate": 1.436040964598652e-05, "loss": 0.6459, "step": 5708 }, { "epoch": 0.7210040255742364, "grad_norm": 2.0, "learning_rate": 1.4358612912253249e-05, "loss": 0.7742, "step": 5709 }, { "epoch": 0.7211303180992975, "grad_norm": 1.640625, "learning_rate": 1.4356816004796168e-05, "loss": 0.5873, "step": 5710 }, { "epoch": 0.7212566106243586, "grad_norm": 1.8515625, "learning_rate": 1.4355018923686895e-05, "loss": 0.6177, "step": 5711 }, { "epoch": 0.7213829031494199, "grad_norm": 1.7421875, "learning_rate": 1.4353221668997055e-05, "loss": 0.5927, "step": 5712 }, { "epoch": 0.721509195674481, "grad_norm": 1.9140625, "learning_rate": 1.4351424240798286e-05, "loss": 0.65, "step": 5713 }, { "epoch": 0.7216354881995422, "grad_norm": 1.8046875, "learning_rate": 1.434962663916223e-05, "loss": 0.6759, "step": 5714 }, { "epoch": 0.7217617807246034, "grad_norm": 1.75, "learning_rate": 1.434782886416053e-05, "loss": 0.7202, "step": 5715 }, { "epoch": 0.7218880732496645, "grad_norm": 1.828125, "learning_rate": 1.4346030915864845e-05, "loss": 0.7022, "step": 5716 }, { "epoch": 0.7220143657747257, "grad_norm": 1.921875, "learning_rate": 1.4344232794346834e-05, "loss": 0.6885, "step": 5717 }, { "epoch": 0.7221406582997869, "grad_norm": 1.6484375, "learning_rate": 1.4342434499678168e-05, "loss": 0.579, "step": 5718 }, { "epoch": 0.722266950824848, "grad_norm": 1.8984375, "learning_rate": 1.4340636031930524e-05, "loss": 0.6784, "step": 5719 }, { "epoch": 0.7223932433499092, "grad_norm": 1.828125, "learning_rate": 1.4338837391175582e-05, "loss": 0.7226, "step": 5720 }, { "epoch": 0.7225195358749704, "grad_norm": 1.8125, "learning_rate": 1.4337038577485035e-05, "loss": 0.7756, "step": 5721 }, { "epoch": 0.7226458284000316, "grad_norm": 1.7265625, "learning_rate": 1.4335239590930572e-05, "loss": 0.6143, "step": 5722 }, { "epoch": 0.7227721209250928, "grad_norm": 1.671875, "learning_rate": 1.4333440431583905e-05, "loss": 0.6306, "step": 5723 }, { "epoch": 0.7228984134501539, "grad_norm": 1.8515625, "learning_rate": 1.4331641099516738e-05, "loss": 0.6392, "step": 5724 }, { "epoch": 0.7230247059752151, "grad_norm": 1.703125, "learning_rate": 1.4329841594800791e-05, "loss": 0.5819, "step": 5725 }, { "epoch": 0.7231509985002763, "grad_norm": 1.796875, "learning_rate": 1.4328041917507784e-05, "loss": 0.6128, "step": 5726 }, { "epoch": 0.7232772910253374, "grad_norm": 1.8046875, "learning_rate": 1.4326242067709456e-05, "loss": 0.5971, "step": 5727 }, { "epoch": 0.7234035835503986, "grad_norm": 1.734375, "learning_rate": 1.4324442045477536e-05, "loss": 0.7113, "step": 5728 }, { "epoch": 0.7235298760754598, "grad_norm": 1.8203125, "learning_rate": 1.4322641850883772e-05, "loss": 0.635, "step": 5729 }, { "epoch": 0.723656168600521, "grad_norm": 1.6640625, "learning_rate": 1.4320841483999917e-05, "loss": 0.6331, "step": 5730 }, { "epoch": 0.7237824611255821, "grad_norm": 1.84375, "learning_rate": 1.4319040944897729e-05, "loss": 0.6843, "step": 5731 }, { "epoch": 0.7239087536506433, "grad_norm": 1.75, "learning_rate": 1.431724023364897e-05, "loss": 0.6382, "step": 5732 }, { "epoch": 0.7240350461757045, "grad_norm": 1.6484375, "learning_rate": 1.4315439350325416e-05, "loss": 0.6652, "step": 5733 }, { "epoch": 0.7241613387007656, "grad_norm": 1.7890625, "learning_rate": 1.4313638294998841e-05, "loss": 0.6503, "step": 5734 }, { "epoch": 0.7242876312258268, "grad_norm": 1.6953125, "learning_rate": 1.4311837067741035e-05, "loss": 0.5604, "step": 5735 }, { "epoch": 0.724413923750888, "grad_norm": 1.890625, "learning_rate": 1.431003566862379e-05, "loss": 0.6975, "step": 5736 }, { "epoch": 0.7245402162759492, "grad_norm": 1.78125, "learning_rate": 1.4308234097718902e-05, "loss": 0.5873, "step": 5737 }, { "epoch": 0.7246665088010104, "grad_norm": 1.640625, "learning_rate": 1.4306432355098183e-05, "loss": 0.6688, "step": 5738 }, { "epoch": 0.7247928013260715, "grad_norm": 1.7109375, "learning_rate": 1.4304630440833443e-05, "loss": 0.6211, "step": 5739 }, { "epoch": 0.7249190938511327, "grad_norm": 1.6875, "learning_rate": 1.43028283549965e-05, "loss": 0.6142, "step": 5740 }, { "epoch": 0.7250453863761939, "grad_norm": 1.9375, "learning_rate": 1.4301026097659186e-05, "loss": 0.744, "step": 5741 }, { "epoch": 0.725171678901255, "grad_norm": 1.7890625, "learning_rate": 1.4299223668893328e-05, "loss": 0.5848, "step": 5742 }, { "epoch": 0.7252979714263162, "grad_norm": 1.8828125, "learning_rate": 1.4297421068770771e-05, "loss": 0.6125, "step": 5743 }, { "epoch": 0.7254242639513774, "grad_norm": 1.9140625, "learning_rate": 1.4295618297363359e-05, "loss": 0.7166, "step": 5744 }, { "epoch": 0.7255505564764385, "grad_norm": 1.671875, "learning_rate": 1.4293815354742951e-05, "loss": 0.6155, "step": 5745 }, { "epoch": 0.7256768490014998, "grad_norm": 1.6484375, "learning_rate": 1.4292012240981406e-05, "loss": 0.5822, "step": 5746 }, { "epoch": 0.7258031415265609, "grad_norm": 1.8671875, "learning_rate": 1.4290208956150589e-05, "loss": 0.6744, "step": 5747 }, { "epoch": 0.725929434051622, "grad_norm": 1.8515625, "learning_rate": 1.4288405500322378e-05, "loss": 0.659, "step": 5748 }, { "epoch": 0.7260557265766833, "grad_norm": 1.75, "learning_rate": 1.428660187356865e-05, "loss": 0.7001, "step": 5749 }, { "epoch": 0.7261820191017444, "grad_norm": 1.71875, "learning_rate": 1.4284798075961298e-05, "loss": 0.6304, "step": 5750 }, { "epoch": 0.7263083116268055, "grad_norm": 1.859375, "learning_rate": 1.428299410757222e-05, "loss": 0.7479, "step": 5751 }, { "epoch": 0.7264346041518668, "grad_norm": 1.703125, "learning_rate": 1.4281189968473307e-05, "loss": 0.5699, "step": 5752 }, { "epoch": 0.7265608966769279, "grad_norm": 1.953125, "learning_rate": 1.4279385658736473e-05, "loss": 0.7467, "step": 5753 }, { "epoch": 0.7266871892019892, "grad_norm": 1.7421875, "learning_rate": 1.427758117843364e-05, "loss": 0.6292, "step": 5754 }, { "epoch": 0.7268134817270503, "grad_norm": 1.9453125, "learning_rate": 1.427577652763672e-05, "loss": 0.6135, "step": 5755 }, { "epoch": 0.7269397742521114, "grad_norm": 1.7265625, "learning_rate": 1.4273971706417648e-05, "loss": 0.6346, "step": 5756 }, { "epoch": 0.7270660667771727, "grad_norm": 2.21875, "learning_rate": 1.4272166714848359e-05, "loss": 0.7847, "step": 5757 }, { "epoch": 0.7271923593022338, "grad_norm": 1.8203125, "learning_rate": 1.4270361553000794e-05, "loss": 0.6409, "step": 5758 }, { "epoch": 0.7273186518272949, "grad_norm": 1.6875, "learning_rate": 1.4268556220946903e-05, "loss": 0.5256, "step": 5759 }, { "epoch": 0.7274449443523562, "grad_norm": 1.90625, "learning_rate": 1.4266750718758644e-05, "loss": 0.6153, "step": 5760 }, { "epoch": 0.7275712368774173, "grad_norm": 1.84375, "learning_rate": 1.4264945046507976e-05, "loss": 0.6926, "step": 5761 }, { "epoch": 0.7276975294024784, "grad_norm": 1.8984375, "learning_rate": 1.4263139204266871e-05, "loss": 0.6815, "step": 5762 }, { "epoch": 0.7278238219275397, "grad_norm": 1.734375, "learning_rate": 1.4261333192107307e-05, "loss": 0.599, "step": 5763 }, { "epoch": 0.7279501144526008, "grad_norm": 1.671875, "learning_rate": 1.4259527010101268e-05, "loss": 0.63, "step": 5764 }, { "epoch": 0.728076406977662, "grad_norm": 1.8671875, "learning_rate": 1.425772065832074e-05, "loss": 0.7508, "step": 5765 }, { "epoch": 0.7282026995027232, "grad_norm": 1.7890625, "learning_rate": 1.4255914136837722e-05, "loss": 0.7136, "step": 5766 }, { "epoch": 0.7283289920277843, "grad_norm": 1.7890625, "learning_rate": 1.425410744572422e-05, "loss": 0.7487, "step": 5767 }, { "epoch": 0.7284552845528456, "grad_norm": 1.765625, "learning_rate": 1.4252300585052239e-05, "loss": 0.6339, "step": 5768 }, { "epoch": 0.7285815770779067, "grad_norm": 1.796875, "learning_rate": 1.4250493554893799e-05, "loss": 0.673, "step": 5769 }, { "epoch": 0.7287078696029679, "grad_norm": 1.90625, "learning_rate": 1.4248686355320924e-05, "loss": 0.6833, "step": 5770 }, { "epoch": 0.7288341621280291, "grad_norm": 1.7890625, "learning_rate": 1.4246878986405643e-05, "loss": 0.6076, "step": 5771 }, { "epoch": 0.7289604546530902, "grad_norm": 1.8671875, "learning_rate": 1.4245071448219997e-05, "loss": 0.6992, "step": 5772 }, { "epoch": 0.7290867471781514, "grad_norm": 1.9921875, "learning_rate": 1.424326374083603e-05, "loss": 0.67, "step": 5773 }, { "epoch": 0.7292130397032126, "grad_norm": 1.8828125, "learning_rate": 1.4241455864325788e-05, "loss": 0.6718, "step": 5774 }, { "epoch": 0.7293393322282737, "grad_norm": 1.6640625, "learning_rate": 1.4239647818761332e-05, "loss": 0.652, "step": 5775 }, { "epoch": 0.7294656247533349, "grad_norm": 1.796875, "learning_rate": 1.4237839604214727e-05, "loss": 0.6186, "step": 5776 }, { "epoch": 0.7295919172783961, "grad_norm": 1.953125, "learning_rate": 1.4236031220758042e-05, "loss": 0.7578, "step": 5777 }, { "epoch": 0.7297182098034573, "grad_norm": 1.8125, "learning_rate": 1.4234222668463355e-05, "loss": 0.61, "step": 5778 }, { "epoch": 0.7298445023285184, "grad_norm": 1.765625, "learning_rate": 1.4232413947402752e-05, "loss": 0.6693, "step": 5779 }, { "epoch": 0.7299707948535796, "grad_norm": 1.6796875, "learning_rate": 1.4230605057648321e-05, "loss": 0.6512, "step": 5780 }, { "epoch": 0.7300970873786408, "grad_norm": 1.7890625, "learning_rate": 1.4228795999272164e-05, "loss": 0.726, "step": 5781 }, { "epoch": 0.7302233799037019, "grad_norm": 1.6796875, "learning_rate": 1.4226986772346388e-05, "loss": 0.5775, "step": 5782 }, { "epoch": 0.7303496724287631, "grad_norm": 1.625, "learning_rate": 1.4225177376943095e-05, "loss": 0.5856, "step": 5783 }, { "epoch": 0.7304759649538243, "grad_norm": 1.71875, "learning_rate": 1.4223367813134412e-05, "loss": 0.6111, "step": 5784 }, { "epoch": 0.7306022574788855, "grad_norm": 1.7734375, "learning_rate": 1.422155808099246e-05, "loss": 0.7217, "step": 5785 }, { "epoch": 0.7307285500039467, "grad_norm": 1.7265625, "learning_rate": 1.421974818058937e-05, "loss": 0.6449, "step": 5786 }, { "epoch": 0.7308548425290078, "grad_norm": 1.7578125, "learning_rate": 1.4217938111997282e-05, "loss": 0.6795, "step": 5787 }, { "epoch": 0.730981135054069, "grad_norm": 1.6796875, "learning_rate": 1.4216127875288342e-05, "loss": 0.6098, "step": 5788 }, { "epoch": 0.7311074275791302, "grad_norm": 1.6875, "learning_rate": 1.4214317470534695e-05, "loss": 0.6653, "step": 5789 }, { "epoch": 0.7312337201041913, "grad_norm": 1.8125, "learning_rate": 1.421250689780851e-05, "loss": 0.584, "step": 5790 }, { "epoch": 0.7313600126292525, "grad_norm": 1.8203125, "learning_rate": 1.4210696157181944e-05, "loss": 0.5994, "step": 5791 }, { "epoch": 0.7314863051543137, "grad_norm": 1.6328125, "learning_rate": 1.420888524872717e-05, "loss": 0.5314, "step": 5792 }, { "epoch": 0.7316125976793748, "grad_norm": 1.8046875, "learning_rate": 1.4207074172516371e-05, "loss": 0.6725, "step": 5793 }, { "epoch": 0.731738890204436, "grad_norm": 1.7734375, "learning_rate": 1.4205262928621728e-05, "loss": 0.6566, "step": 5794 }, { "epoch": 0.7318651827294972, "grad_norm": 1.9765625, "learning_rate": 1.4203451517115433e-05, "loss": 0.5617, "step": 5795 }, { "epoch": 0.7319914752545583, "grad_norm": 1.5859375, "learning_rate": 1.4201639938069687e-05, "loss": 0.5606, "step": 5796 }, { "epoch": 0.7321177677796196, "grad_norm": 1.8984375, "learning_rate": 1.4199828191556693e-05, "loss": 0.6812, "step": 5797 }, { "epoch": 0.7322440603046807, "grad_norm": 1.8359375, "learning_rate": 1.4198016277648658e-05, "loss": 0.5992, "step": 5798 }, { "epoch": 0.7323703528297419, "grad_norm": 1.78125, "learning_rate": 1.4196204196417812e-05, "loss": 0.6502, "step": 5799 }, { "epoch": 0.7324966453548031, "grad_norm": 1.71875, "learning_rate": 1.419439194793637e-05, "loss": 0.6474, "step": 5800 }, { "epoch": 0.7326229378798642, "grad_norm": 1.765625, "learning_rate": 1.4192579532276569e-05, "loss": 0.5702, "step": 5801 }, { "epoch": 0.7327492304049255, "grad_norm": 1.7734375, "learning_rate": 1.4190766949510647e-05, "loss": 0.5999, "step": 5802 }, { "epoch": 0.7328755229299866, "grad_norm": 1.671875, "learning_rate": 1.4188954199710847e-05, "loss": 0.6099, "step": 5803 }, { "epoch": 0.7330018154550477, "grad_norm": 1.6796875, "learning_rate": 1.4187141282949424e-05, "loss": 0.5528, "step": 5804 }, { "epoch": 0.733128107980109, "grad_norm": 1.84375, "learning_rate": 1.4185328199298636e-05, "loss": 0.6528, "step": 5805 }, { "epoch": 0.7332544005051701, "grad_norm": 1.7421875, "learning_rate": 1.4183514948830745e-05, "loss": 0.6622, "step": 5806 }, { "epoch": 0.7333806930302312, "grad_norm": 1.8125, "learning_rate": 1.4181701531618025e-05, "loss": 0.807, "step": 5807 }, { "epoch": 0.7335069855552925, "grad_norm": 1.7890625, "learning_rate": 1.4179887947732751e-05, "loss": 0.6868, "step": 5808 }, { "epoch": 0.7336332780803536, "grad_norm": 2.015625, "learning_rate": 1.4178074197247215e-05, "loss": 0.7381, "step": 5809 }, { "epoch": 0.7337595706054147, "grad_norm": 1.8828125, "learning_rate": 1.4176260280233707e-05, "loss": 0.7494, "step": 5810 }, { "epoch": 0.733885863130476, "grad_norm": 1.8125, "learning_rate": 1.417444619676452e-05, "loss": 0.6074, "step": 5811 }, { "epoch": 0.7340121556555371, "grad_norm": 1.75, "learning_rate": 1.4172631946911964e-05, "loss": 0.5784, "step": 5812 }, { "epoch": 0.7341384481805983, "grad_norm": 1.765625, "learning_rate": 1.4170817530748346e-05, "loss": 0.6116, "step": 5813 }, { "epoch": 0.7342647407056595, "grad_norm": 1.6875, "learning_rate": 1.416900294834599e-05, "loss": 0.6068, "step": 5814 }, { "epoch": 0.7343910332307206, "grad_norm": 1.8125, "learning_rate": 1.4167188199777218e-05, "loss": 0.6185, "step": 5815 }, { "epoch": 0.7345173257557819, "grad_norm": 1.7734375, "learning_rate": 1.416537328511436e-05, "loss": 0.6042, "step": 5816 }, { "epoch": 0.734643618280843, "grad_norm": 1.84375, "learning_rate": 1.4163558204429756e-05, "loss": 0.6186, "step": 5817 }, { "epoch": 0.7347699108059041, "grad_norm": 1.6640625, "learning_rate": 1.4161742957795755e-05, "loss": 0.647, "step": 5818 }, { "epoch": 0.7348962033309654, "grad_norm": 1.765625, "learning_rate": 1.4159927545284699e-05, "loss": 0.668, "step": 5819 }, { "epoch": 0.7350224958560265, "grad_norm": 1.703125, "learning_rate": 1.4158111966968953e-05, "loss": 0.5971, "step": 5820 }, { "epoch": 0.7351487883810877, "grad_norm": 1.8046875, "learning_rate": 1.415629622292088e-05, "loss": 0.6438, "step": 5821 }, { "epoch": 0.7352750809061489, "grad_norm": 1.953125, "learning_rate": 1.4154480313212848e-05, "loss": 0.7169, "step": 5822 }, { "epoch": 0.73540137343121, "grad_norm": 1.8828125, "learning_rate": 1.4152664237917237e-05, "loss": 0.6927, "step": 5823 }, { "epoch": 0.7355276659562712, "grad_norm": 1.96875, "learning_rate": 1.4150847997106434e-05, "loss": 0.6423, "step": 5824 }, { "epoch": 0.7356539584813324, "grad_norm": 1.75, "learning_rate": 1.414903159085283e-05, "loss": 0.5822, "step": 5825 }, { "epoch": 0.7357802510063935, "grad_norm": 1.59375, "learning_rate": 1.4147215019228817e-05, "loss": 0.5973, "step": 5826 }, { "epoch": 0.7359065435314547, "grad_norm": 1.78125, "learning_rate": 1.4145398282306804e-05, "loss": 0.6695, "step": 5827 }, { "epoch": 0.7360328360565159, "grad_norm": 1.734375, "learning_rate": 1.41435813801592e-05, "loss": 0.6593, "step": 5828 }, { "epoch": 0.7361591285815771, "grad_norm": 1.890625, "learning_rate": 1.4141764312858421e-05, "loss": 0.602, "step": 5829 }, { "epoch": 0.7362854211066383, "grad_norm": 1.859375, "learning_rate": 1.4139947080476895e-05, "loss": 0.5772, "step": 5830 }, { "epoch": 0.7364117136316994, "grad_norm": 1.859375, "learning_rate": 1.4138129683087046e-05, "loss": 0.6972, "step": 5831 }, { "epoch": 0.7365380061567606, "grad_norm": 1.8984375, "learning_rate": 1.413631212076132e-05, "loss": 0.6022, "step": 5832 }, { "epoch": 0.7366642986818218, "grad_norm": 1.8828125, "learning_rate": 1.4134494393572153e-05, "loss": 0.6614, "step": 5833 }, { "epoch": 0.736790591206883, "grad_norm": 1.8515625, "learning_rate": 1.4132676501591997e-05, "loss": 0.6429, "step": 5834 }, { "epoch": 0.7369168837319441, "grad_norm": 1.703125, "learning_rate": 1.4130858444893314e-05, "loss": 0.6049, "step": 5835 }, { "epoch": 0.7370431762570053, "grad_norm": 1.7890625, "learning_rate": 1.412904022354856e-05, "loss": 0.6257, "step": 5836 }, { "epoch": 0.7371694687820665, "grad_norm": 1.84375, "learning_rate": 1.4127221837630209e-05, "loss": 0.69, "step": 5837 }, { "epoch": 0.7372957613071276, "grad_norm": 1.8125, "learning_rate": 1.4125403287210738e-05, "loss": 0.586, "step": 5838 }, { "epoch": 0.7374220538321888, "grad_norm": 1.859375, "learning_rate": 1.4123584572362625e-05, "loss": 0.6347, "step": 5839 }, { "epoch": 0.73754834635725, "grad_norm": 1.8828125, "learning_rate": 1.4121765693158364e-05, "loss": 0.6699, "step": 5840 }, { "epoch": 0.7376746388823111, "grad_norm": 1.953125, "learning_rate": 1.4119946649670453e-05, "loss": 0.6205, "step": 5841 }, { "epoch": 0.7378009314073724, "grad_norm": 1.8828125, "learning_rate": 1.4118127441971392e-05, "loss": 0.6737, "step": 5842 }, { "epoch": 0.7379272239324335, "grad_norm": 1.921875, "learning_rate": 1.4116308070133689e-05, "loss": 0.7416, "step": 5843 }, { "epoch": 0.7380535164574946, "grad_norm": 1.765625, "learning_rate": 1.4114488534229862e-05, "loss": 0.527, "step": 5844 }, { "epoch": 0.7381798089825559, "grad_norm": 1.8671875, "learning_rate": 1.411266883433243e-05, "loss": 0.6709, "step": 5845 }, { "epoch": 0.738306101507617, "grad_norm": 1.78125, "learning_rate": 1.4110848970513925e-05, "loss": 0.651, "step": 5846 }, { "epoch": 0.7384323940326782, "grad_norm": 1.6640625, "learning_rate": 1.4109028942846883e-05, "loss": 0.5478, "step": 5847 }, { "epoch": 0.7385586865577394, "grad_norm": 1.796875, "learning_rate": 1.4107208751403842e-05, "loss": 0.6642, "step": 5848 }, { "epoch": 0.7386849790828005, "grad_norm": 1.9921875, "learning_rate": 1.4105388396257355e-05, "loss": 0.6134, "step": 5849 }, { "epoch": 0.7388112716078618, "grad_norm": 1.6484375, "learning_rate": 1.4103567877479978e-05, "loss": 0.5977, "step": 5850 }, { "epoch": 0.7389375641329229, "grad_norm": 1.75, "learning_rate": 1.4101747195144266e-05, "loss": 0.6459, "step": 5851 }, { "epoch": 0.739063856657984, "grad_norm": 1.7890625, "learning_rate": 1.409992634932279e-05, "loss": 0.7092, "step": 5852 }, { "epoch": 0.7391901491830453, "grad_norm": 1.8046875, "learning_rate": 1.4098105340088126e-05, "loss": 0.7048, "step": 5853 }, { "epoch": 0.7393164417081064, "grad_norm": 1.71875, "learning_rate": 1.4096284167512855e-05, "loss": 0.6799, "step": 5854 }, { "epoch": 0.7394427342331675, "grad_norm": 1.84375, "learning_rate": 1.4094462831669562e-05, "loss": 0.5452, "step": 5855 }, { "epoch": 0.7395690267582288, "grad_norm": 1.7265625, "learning_rate": 1.4092641332630841e-05, "loss": 0.6067, "step": 5856 }, { "epoch": 0.7396953192832899, "grad_norm": 1.90625, "learning_rate": 1.4090819670469298e-05, "loss": 0.7454, "step": 5857 }, { "epoch": 0.739821611808351, "grad_norm": 1.6875, "learning_rate": 1.4088997845257534e-05, "loss": 0.6108, "step": 5858 }, { "epoch": 0.7399479043334123, "grad_norm": 1.6875, "learning_rate": 1.4087175857068168e-05, "loss": 0.6244, "step": 5859 }, { "epoch": 0.7400741968584734, "grad_norm": 1.7890625, "learning_rate": 1.4085353705973815e-05, "loss": 0.6285, "step": 5860 }, { "epoch": 0.7402004893835347, "grad_norm": 1.7734375, "learning_rate": 1.40835313920471e-05, "loss": 0.6408, "step": 5861 }, { "epoch": 0.7403267819085958, "grad_norm": 1.6171875, "learning_rate": 1.4081708915360665e-05, "loss": 0.5517, "step": 5862 }, { "epoch": 0.7404530744336569, "grad_norm": 1.890625, "learning_rate": 1.407988627598714e-05, "loss": 0.748, "step": 5863 }, { "epoch": 0.7405793669587182, "grad_norm": 1.9453125, "learning_rate": 1.4078063473999177e-05, "loss": 0.6539, "step": 5864 }, { "epoch": 0.7407056594837793, "grad_norm": 1.84375, "learning_rate": 1.4076240509469425e-05, "loss": 0.6468, "step": 5865 }, { "epoch": 0.7408319520088404, "grad_norm": 1.640625, "learning_rate": 1.4074417382470546e-05, "loss": 0.5931, "step": 5866 }, { "epoch": 0.7409582445339017, "grad_norm": 1.9296875, "learning_rate": 1.4072594093075204e-05, "loss": 0.6613, "step": 5867 }, { "epoch": 0.7410845370589628, "grad_norm": 1.8671875, "learning_rate": 1.407077064135607e-05, "loss": 0.6376, "step": 5868 }, { "epoch": 0.741210829584024, "grad_norm": 1.6796875, "learning_rate": 1.4068947027385824e-05, "loss": 0.6353, "step": 5869 }, { "epoch": 0.7413371221090852, "grad_norm": 1.8671875, "learning_rate": 1.406712325123715e-05, "loss": 0.5618, "step": 5870 }, { "epoch": 0.7414634146341463, "grad_norm": 1.8203125, "learning_rate": 1.406529931298274e-05, "loss": 0.6975, "step": 5871 }, { "epoch": 0.7415897071592075, "grad_norm": 1.875, "learning_rate": 1.406347521269529e-05, "loss": 0.6776, "step": 5872 }, { "epoch": 0.7417159996842687, "grad_norm": 2.171875, "learning_rate": 1.4061650950447508e-05, "loss": 0.667, "step": 5873 }, { "epoch": 0.7418422922093298, "grad_norm": 1.75, "learning_rate": 1.4059826526312099e-05, "loss": 0.5794, "step": 5874 }, { "epoch": 0.741968584734391, "grad_norm": 1.765625, "learning_rate": 1.4058001940361784e-05, "loss": 0.5837, "step": 5875 }, { "epoch": 0.7420948772594522, "grad_norm": 1.7734375, "learning_rate": 1.4056177192669289e-05, "loss": 0.6811, "step": 5876 }, { "epoch": 0.7422211697845134, "grad_norm": 1.96875, "learning_rate": 1.4054352283307338e-05, "loss": 0.6904, "step": 5877 }, { "epoch": 0.7423474623095746, "grad_norm": 1.8203125, "learning_rate": 1.4052527212348672e-05, "loss": 0.6817, "step": 5878 }, { "epoch": 0.7424737548346357, "grad_norm": 1.921875, "learning_rate": 1.4050701979866033e-05, "loss": 0.6606, "step": 5879 }, { "epoch": 0.7426000473596969, "grad_norm": 1.8203125, "learning_rate": 1.4048876585932164e-05, "loss": 0.7049, "step": 5880 }, { "epoch": 0.7427263398847581, "grad_norm": 1.8203125, "learning_rate": 1.4047051030619833e-05, "loss": 0.6454, "step": 5881 }, { "epoch": 0.7428526324098192, "grad_norm": 1.6875, "learning_rate": 1.4045225314001796e-05, "loss": 0.6313, "step": 5882 }, { "epoch": 0.7429789249348804, "grad_norm": 1.796875, "learning_rate": 1.404339943615082e-05, "loss": 0.6867, "step": 5883 }, { "epoch": 0.7431052174599416, "grad_norm": 1.78125, "learning_rate": 1.404157339713968e-05, "loss": 0.6845, "step": 5884 }, { "epoch": 0.7432315099850028, "grad_norm": 1.7421875, "learning_rate": 1.403974719704116e-05, "loss": 0.6793, "step": 5885 }, { "epoch": 0.7433578025100639, "grad_norm": 1.8671875, "learning_rate": 1.403792083592805e-05, "loss": 0.616, "step": 5886 }, { "epoch": 0.7434840950351251, "grad_norm": 1.78125, "learning_rate": 1.4036094313873139e-05, "loss": 0.6567, "step": 5887 }, { "epoch": 0.7436103875601863, "grad_norm": 1.8046875, "learning_rate": 1.403426763094923e-05, "loss": 0.6472, "step": 5888 }, { "epoch": 0.7437366800852474, "grad_norm": 1.875, "learning_rate": 1.4032440787229127e-05, "loss": 0.6368, "step": 5889 }, { "epoch": 0.7438629726103086, "grad_norm": 1.7421875, "learning_rate": 1.4030613782785652e-05, "loss": 0.573, "step": 5890 }, { "epoch": 0.7439892651353698, "grad_norm": 1.8515625, "learning_rate": 1.4028786617691618e-05, "loss": 0.6623, "step": 5891 }, { "epoch": 0.744115557660431, "grad_norm": 1.7890625, "learning_rate": 1.4026959292019853e-05, "loss": 0.597, "step": 5892 }, { "epoch": 0.7442418501854922, "grad_norm": 1.7734375, "learning_rate": 1.402513180584319e-05, "loss": 0.6284, "step": 5893 }, { "epoch": 0.7443681427105533, "grad_norm": 2.046875, "learning_rate": 1.4023304159234472e-05, "loss": 0.7584, "step": 5894 }, { "epoch": 0.7444944352356145, "grad_norm": 1.6953125, "learning_rate": 1.402147635226654e-05, "loss": 0.6672, "step": 5895 }, { "epoch": 0.7446207277606757, "grad_norm": 1.734375, "learning_rate": 1.4019648385012243e-05, "loss": 0.6645, "step": 5896 }, { "epoch": 0.7447470202857368, "grad_norm": 1.953125, "learning_rate": 1.4017820257544445e-05, "loss": 0.6585, "step": 5897 }, { "epoch": 0.744873312810798, "grad_norm": 1.7109375, "learning_rate": 1.401599196993601e-05, "loss": 0.605, "step": 5898 }, { "epoch": 0.7449996053358592, "grad_norm": 1.609375, "learning_rate": 1.4014163522259808e-05, "loss": 0.6281, "step": 5899 }, { "epoch": 0.7451258978609203, "grad_norm": 1.7890625, "learning_rate": 1.4012334914588715e-05, "loss": 0.6565, "step": 5900 }, { "epoch": 0.7452521903859816, "grad_norm": 1.765625, "learning_rate": 1.4010506146995618e-05, "loss": 0.6667, "step": 5901 }, { "epoch": 0.7453784829110427, "grad_norm": 1.7734375, "learning_rate": 1.4008677219553405e-05, "loss": 0.6812, "step": 5902 }, { "epoch": 0.7455047754361038, "grad_norm": 1.7734375, "learning_rate": 1.4006848132334975e-05, "loss": 0.617, "step": 5903 }, { "epoch": 0.7456310679611651, "grad_norm": 1.875, "learning_rate": 1.4005018885413231e-05, "loss": 0.6124, "step": 5904 }, { "epoch": 0.7457573604862262, "grad_norm": 2.296875, "learning_rate": 1.400318947886108e-05, "loss": 0.7658, "step": 5905 }, { "epoch": 0.7458836530112873, "grad_norm": 1.8671875, "learning_rate": 1.4001359912751436e-05, "loss": 0.647, "step": 5906 }, { "epoch": 0.7460099455363486, "grad_norm": 1.875, "learning_rate": 1.3999530187157224e-05, "loss": 0.6246, "step": 5907 }, { "epoch": 0.7461362380614097, "grad_norm": 1.8359375, "learning_rate": 1.3997700302151374e-05, "loss": 0.6175, "step": 5908 }, { "epoch": 0.746262530586471, "grad_norm": 1.953125, "learning_rate": 1.3995870257806818e-05, "loss": 0.6934, "step": 5909 }, { "epoch": 0.7463888231115321, "grad_norm": 1.7734375, "learning_rate": 1.3994040054196498e-05, "loss": 0.6465, "step": 5910 }, { "epoch": 0.7465151156365932, "grad_norm": 1.7734375, "learning_rate": 1.3992209691393363e-05, "loss": 0.6269, "step": 5911 }, { "epoch": 0.7466414081616545, "grad_norm": 1.7734375, "learning_rate": 1.3990379169470364e-05, "loss": 0.6224, "step": 5912 }, { "epoch": 0.7467677006867156, "grad_norm": 1.78125, "learning_rate": 1.3988548488500467e-05, "loss": 0.6145, "step": 5913 }, { "epoch": 0.7468939932117767, "grad_norm": 1.7734375, "learning_rate": 1.3986717648556632e-05, "loss": 0.734, "step": 5914 }, { "epoch": 0.747020285736838, "grad_norm": 1.75, "learning_rate": 1.398488664971183e-05, "loss": 0.5946, "step": 5915 }, { "epoch": 0.7471465782618991, "grad_norm": 1.8359375, "learning_rate": 1.3983055492039049e-05, "loss": 0.7378, "step": 5916 }, { "epoch": 0.7472728707869603, "grad_norm": 1.78125, "learning_rate": 1.3981224175611268e-05, "loss": 0.6724, "step": 5917 }, { "epoch": 0.7473991633120215, "grad_norm": 1.6171875, "learning_rate": 1.3979392700501479e-05, "loss": 0.5856, "step": 5918 }, { "epoch": 0.7475254558370826, "grad_norm": 1.8828125, "learning_rate": 1.3977561066782686e-05, "loss": 0.5958, "step": 5919 }, { "epoch": 0.7476517483621438, "grad_norm": 1.84375, "learning_rate": 1.3975729274527888e-05, "loss": 0.7419, "step": 5920 }, { "epoch": 0.747778040887205, "grad_norm": 1.7265625, "learning_rate": 1.3973897323810098e-05, "loss": 0.6396, "step": 5921 }, { "epoch": 0.7479043334122661, "grad_norm": 1.6875, "learning_rate": 1.397206521470233e-05, "loss": 0.6196, "step": 5922 }, { "epoch": 0.7480306259373274, "grad_norm": 1.7109375, "learning_rate": 1.397023294727761e-05, "loss": 0.6612, "step": 5923 }, { "epoch": 0.7481569184623885, "grad_norm": 1.84375, "learning_rate": 1.3968400521608969e-05, "loss": 0.7437, "step": 5924 }, { "epoch": 0.7482832109874497, "grad_norm": 1.8046875, "learning_rate": 1.396656793776944e-05, "loss": 0.6859, "step": 5925 }, { "epoch": 0.7484095035125109, "grad_norm": 1.765625, "learning_rate": 1.3964735195832069e-05, "loss": 0.5289, "step": 5926 }, { "epoch": 0.748535796037572, "grad_norm": 1.7890625, "learning_rate": 1.3962902295869903e-05, "loss": 0.6105, "step": 5927 }, { "epoch": 0.7486620885626332, "grad_norm": 2.171875, "learning_rate": 1.3961069237955995e-05, "loss": 0.6886, "step": 5928 }, { "epoch": 0.7487883810876944, "grad_norm": 1.765625, "learning_rate": 1.395923602216341e-05, "loss": 0.5791, "step": 5929 }, { "epoch": 0.7489146736127555, "grad_norm": 1.828125, "learning_rate": 1.3957402648565213e-05, "loss": 0.6713, "step": 5930 }, { "epoch": 0.7490409661378167, "grad_norm": 1.703125, "learning_rate": 1.3955569117234477e-05, "loss": 0.6117, "step": 5931 }, { "epoch": 0.7491672586628779, "grad_norm": 1.75, "learning_rate": 1.3953735428244284e-05, "loss": 0.6255, "step": 5932 }, { "epoch": 0.749293551187939, "grad_norm": 1.7265625, "learning_rate": 1.3951901581667722e-05, "loss": 0.6363, "step": 5933 }, { "epoch": 0.7494198437130002, "grad_norm": 1.7421875, "learning_rate": 1.3950067577577877e-05, "loss": 0.6082, "step": 5934 }, { "epoch": 0.7495461362380614, "grad_norm": 1.7578125, "learning_rate": 1.3948233416047855e-05, "loss": 0.7149, "step": 5935 }, { "epoch": 0.7496724287631226, "grad_norm": 1.671875, "learning_rate": 1.3946399097150762e-05, "loss": 0.5988, "step": 5936 }, { "epoch": 0.7497987212881837, "grad_norm": 1.6953125, "learning_rate": 1.3944564620959705e-05, "loss": 0.5821, "step": 5937 }, { "epoch": 0.749925013813245, "grad_norm": 1.765625, "learning_rate": 1.3942729987547802e-05, "loss": 0.6625, "step": 5938 }, { "epoch": 0.7500513063383061, "grad_norm": 1.6796875, "learning_rate": 1.394089519698818e-05, "loss": 0.5821, "step": 5939 }, { "epoch": 0.7501775988633673, "grad_norm": 1.890625, "learning_rate": 1.3939060249353966e-05, "loss": 0.7042, "step": 5940 }, { "epoch": 0.7503038913884285, "grad_norm": 1.96875, "learning_rate": 1.3937225144718299e-05, "loss": 0.7328, "step": 5941 }, { "epoch": 0.7504301839134896, "grad_norm": 1.8984375, "learning_rate": 1.3935389883154322e-05, "loss": 0.6893, "step": 5942 }, { "epoch": 0.7505564764385508, "grad_norm": 1.9375, "learning_rate": 1.393355446473518e-05, "loss": 0.8232, "step": 5943 }, { "epoch": 0.750682768963612, "grad_norm": 1.6875, "learning_rate": 1.3931718889534033e-05, "loss": 0.6281, "step": 5944 }, { "epoch": 0.7508090614886731, "grad_norm": 1.8125, "learning_rate": 1.3929883157624043e-05, "loss": 0.6927, "step": 5945 }, { "epoch": 0.7509353540137343, "grad_norm": 1.6328125, "learning_rate": 1.3928047269078376e-05, "loss": 0.5558, "step": 5946 }, { "epoch": 0.7510616465387955, "grad_norm": 1.734375, "learning_rate": 1.3926211223970205e-05, "loss": 0.6376, "step": 5947 }, { "epoch": 0.7511879390638566, "grad_norm": 1.5546875, "learning_rate": 1.3924375022372712e-05, "loss": 0.4858, "step": 5948 }, { "epoch": 0.7513142315889179, "grad_norm": 1.921875, "learning_rate": 1.3922538664359082e-05, "loss": 0.6219, "step": 5949 }, { "epoch": 0.751440524113979, "grad_norm": 1.875, "learning_rate": 1.3920702150002509e-05, "loss": 0.7071, "step": 5950 }, { "epoch": 0.7515668166390401, "grad_norm": 2.078125, "learning_rate": 1.3918865479376193e-05, "loss": 0.6266, "step": 5951 }, { "epoch": 0.7516931091641014, "grad_norm": 1.78125, "learning_rate": 1.3917028652553338e-05, "loss": 0.6384, "step": 5952 }, { "epoch": 0.7518194016891625, "grad_norm": 1.84375, "learning_rate": 1.3915191669607155e-05, "loss": 0.6517, "step": 5953 }, { "epoch": 0.7519456942142237, "grad_norm": 1.8203125, "learning_rate": 1.3913354530610863e-05, "loss": 0.6827, "step": 5954 }, { "epoch": 0.7520719867392849, "grad_norm": 1.8671875, "learning_rate": 1.3911517235637687e-05, "loss": 0.6104, "step": 5955 }, { "epoch": 0.752198279264346, "grad_norm": 1.8359375, "learning_rate": 1.3909679784760853e-05, "loss": 0.6787, "step": 5956 }, { "epoch": 0.7523245717894073, "grad_norm": 1.9296875, "learning_rate": 1.39078421780536e-05, "loss": 0.7113, "step": 5957 }, { "epoch": 0.7524508643144684, "grad_norm": 1.75, "learning_rate": 1.3906004415589172e-05, "loss": 0.6231, "step": 5958 }, { "epoch": 0.7525771568395295, "grad_norm": 1.921875, "learning_rate": 1.3904166497440813e-05, "loss": 0.6607, "step": 5959 }, { "epoch": 0.7527034493645908, "grad_norm": 1.8828125, "learning_rate": 1.3902328423681786e-05, "loss": 0.6148, "step": 5960 }, { "epoch": 0.7528297418896519, "grad_norm": 1.7578125, "learning_rate": 1.3900490194385341e-05, "loss": 0.6742, "step": 5961 }, { "epoch": 0.752956034414713, "grad_norm": 1.75, "learning_rate": 1.3898651809624758e-05, "loss": 0.6735, "step": 5962 }, { "epoch": 0.7530823269397743, "grad_norm": 2.140625, "learning_rate": 1.3896813269473301e-05, "loss": 0.7051, "step": 5963 }, { "epoch": 0.7532086194648354, "grad_norm": 1.8046875, "learning_rate": 1.3894974574004254e-05, "loss": 0.6578, "step": 5964 }, { "epoch": 0.7533349119898965, "grad_norm": 2.09375, "learning_rate": 1.38931357232909e-05, "loss": 0.7734, "step": 5965 }, { "epoch": 0.7534612045149578, "grad_norm": 1.84375, "learning_rate": 1.3891296717406538e-05, "loss": 0.6763, "step": 5966 }, { "epoch": 0.7535874970400189, "grad_norm": 1.6484375, "learning_rate": 1.3889457556424458e-05, "loss": 0.5993, "step": 5967 }, { "epoch": 0.7537137895650802, "grad_norm": 1.765625, "learning_rate": 1.3887618240417967e-05, "loss": 0.6705, "step": 5968 }, { "epoch": 0.7538400820901413, "grad_norm": 1.84375, "learning_rate": 1.3885778769460382e-05, "loss": 0.6022, "step": 5969 }, { "epoch": 0.7539663746152024, "grad_norm": 1.8125, "learning_rate": 1.388393914362501e-05, "loss": 0.6277, "step": 5970 }, { "epoch": 0.7540926671402637, "grad_norm": 1.78125, "learning_rate": 1.388209936298518e-05, "loss": 0.6641, "step": 5971 }, { "epoch": 0.7542189596653248, "grad_norm": 1.6875, "learning_rate": 1.388025942761422e-05, "loss": 0.6214, "step": 5972 }, { "epoch": 0.754345252190386, "grad_norm": 1.78125, "learning_rate": 1.3878419337585467e-05, "loss": 0.6584, "step": 5973 }, { "epoch": 0.7544715447154472, "grad_norm": 1.78125, "learning_rate": 1.387657909297226e-05, "loss": 0.7242, "step": 5974 }, { "epoch": 0.7545978372405083, "grad_norm": 1.8359375, "learning_rate": 1.3874738693847947e-05, "loss": 0.5973, "step": 5975 }, { "epoch": 0.7547241297655695, "grad_norm": 1.8203125, "learning_rate": 1.3872898140285885e-05, "loss": 0.7091, "step": 5976 }, { "epoch": 0.7548504222906307, "grad_norm": 1.765625, "learning_rate": 1.3871057432359427e-05, "loss": 0.6335, "step": 5977 }, { "epoch": 0.7549767148156918, "grad_norm": 1.859375, "learning_rate": 1.3869216570141947e-05, "loss": 0.6292, "step": 5978 }, { "epoch": 0.755103007340753, "grad_norm": 1.8515625, "learning_rate": 1.3867375553706812e-05, "loss": 0.7327, "step": 5979 }, { "epoch": 0.7552292998658142, "grad_norm": 1.875, "learning_rate": 1.3865534383127406e-05, "loss": 0.721, "step": 5980 }, { "epoch": 0.7553555923908754, "grad_norm": 1.7421875, "learning_rate": 1.3863693058477108e-05, "loss": 0.6261, "step": 5981 }, { "epoch": 0.7554818849159365, "grad_norm": 1.6875, "learning_rate": 1.386185157982931e-05, "loss": 0.5659, "step": 5982 }, { "epoch": 0.7556081774409977, "grad_norm": 1.734375, "learning_rate": 1.3860009947257412e-05, "loss": 0.6671, "step": 5983 }, { "epoch": 0.7557344699660589, "grad_norm": 1.84375, "learning_rate": 1.3858168160834816e-05, "loss": 0.6534, "step": 5984 }, { "epoch": 0.7558607624911201, "grad_norm": 1.875, "learning_rate": 1.3856326220634926e-05, "loss": 0.6834, "step": 5985 }, { "epoch": 0.7559870550161812, "grad_norm": 1.6796875, "learning_rate": 1.3854484126731164e-05, "loss": 0.6244, "step": 5986 }, { "epoch": 0.7561133475412424, "grad_norm": 1.8828125, "learning_rate": 1.3852641879196949e-05, "loss": 0.7425, "step": 5987 }, { "epoch": 0.7562396400663036, "grad_norm": 1.7109375, "learning_rate": 1.3850799478105708e-05, "loss": 0.6442, "step": 5988 }, { "epoch": 0.7563659325913648, "grad_norm": 1.7578125, "learning_rate": 1.3848956923530873e-05, "loss": 0.622, "step": 5989 }, { "epoch": 0.7564922251164259, "grad_norm": 1.765625, "learning_rate": 1.3847114215545889e-05, "loss": 0.6456, "step": 5990 }, { "epoch": 0.7566185176414871, "grad_norm": 1.8515625, "learning_rate": 1.3845271354224196e-05, "loss": 0.7227, "step": 5991 }, { "epoch": 0.7567448101665483, "grad_norm": 1.84375, "learning_rate": 1.3843428339639251e-05, "loss": 0.6801, "step": 5992 }, { "epoch": 0.7568711026916094, "grad_norm": 1.8046875, "learning_rate": 1.3841585171864508e-05, "loss": 0.5798, "step": 5993 }, { "epoch": 0.7569973952166706, "grad_norm": 1.7421875, "learning_rate": 1.3839741850973435e-05, "loss": 0.6525, "step": 5994 }, { "epoch": 0.7571236877417318, "grad_norm": 1.6640625, "learning_rate": 1.3837898377039501e-05, "loss": 0.6026, "step": 5995 }, { "epoch": 0.7572499802667929, "grad_norm": 1.71875, "learning_rate": 1.3836054750136182e-05, "loss": 0.6123, "step": 5996 }, { "epoch": 0.7573762727918542, "grad_norm": 1.7734375, "learning_rate": 1.3834210970336954e-05, "loss": 0.6279, "step": 5997 }, { "epoch": 0.7575025653169153, "grad_norm": 1.8671875, "learning_rate": 1.3832367037715317e-05, "loss": 0.6579, "step": 5998 }, { "epoch": 0.7576288578419765, "grad_norm": 1.8203125, "learning_rate": 1.383052295234476e-05, "loss": 0.6769, "step": 5999 }, { "epoch": 0.7577551503670377, "grad_norm": 1.90625, "learning_rate": 1.3828678714298787e-05, "loss": 0.6165, "step": 6000 }, { "epoch": 0.7578814428920988, "grad_norm": 1.703125, "learning_rate": 1.3826834323650899e-05, "loss": 0.6954, "step": 6001 }, { "epoch": 0.75800773541716, "grad_norm": 1.8125, "learning_rate": 1.3824989780474613e-05, "loss": 0.7047, "step": 6002 }, { "epoch": 0.7581340279422212, "grad_norm": 1.6953125, "learning_rate": 1.382314508484345e-05, "loss": 0.6294, "step": 6003 }, { "epoch": 0.7582603204672823, "grad_norm": 1.828125, "learning_rate": 1.382130023683093e-05, "loss": 0.6809, "step": 6004 }, { "epoch": 0.7583866129923436, "grad_norm": 1.796875, "learning_rate": 1.3819455236510588e-05, "loss": 0.545, "step": 6005 }, { "epoch": 0.7585129055174047, "grad_norm": 1.8125, "learning_rate": 1.3817610083955956e-05, "loss": 0.6586, "step": 6006 }, { "epoch": 0.7586391980424658, "grad_norm": 1.84375, "learning_rate": 1.3815764779240586e-05, "loss": 0.7115, "step": 6007 }, { "epoch": 0.7587654905675271, "grad_norm": 1.75, "learning_rate": 1.3813919322438021e-05, "loss": 0.6785, "step": 6008 }, { "epoch": 0.7588917830925882, "grad_norm": 1.8125, "learning_rate": 1.3812073713621818e-05, "loss": 0.6769, "step": 6009 }, { "epoch": 0.7590180756176493, "grad_norm": 1.796875, "learning_rate": 1.3810227952865542e-05, "loss": 0.6337, "step": 6010 }, { "epoch": 0.7591443681427106, "grad_norm": 1.859375, "learning_rate": 1.3808382040242754e-05, "loss": 0.6807, "step": 6011 }, { "epoch": 0.7592706606677717, "grad_norm": 1.8515625, "learning_rate": 1.380653597582703e-05, "loss": 0.6054, "step": 6012 }, { "epoch": 0.7593969531928328, "grad_norm": 1.7734375, "learning_rate": 1.3804689759691953e-05, "loss": 0.6988, "step": 6013 }, { "epoch": 0.7595232457178941, "grad_norm": 1.9921875, "learning_rate": 1.3802843391911107e-05, "loss": 0.7549, "step": 6014 }, { "epoch": 0.7596495382429552, "grad_norm": 1.765625, "learning_rate": 1.380099687255808e-05, "loss": 0.6578, "step": 6015 }, { "epoch": 0.7597758307680165, "grad_norm": 2.28125, "learning_rate": 1.3799150201706475e-05, "loss": 0.8085, "step": 6016 }, { "epoch": 0.7599021232930776, "grad_norm": 1.8359375, "learning_rate": 1.3797303379429893e-05, "loss": 0.6561, "step": 6017 }, { "epoch": 0.7600284158181387, "grad_norm": 1.8125, "learning_rate": 1.3795456405801945e-05, "loss": 0.7191, "step": 6018 }, { "epoch": 0.7601547083432, "grad_norm": 1.7890625, "learning_rate": 1.3793609280896247e-05, "loss": 0.653, "step": 6019 }, { "epoch": 0.7602810008682611, "grad_norm": 1.8828125, "learning_rate": 1.3791762004786422e-05, "loss": 0.6766, "step": 6020 }, { "epoch": 0.7604072933933222, "grad_norm": 1.875, "learning_rate": 1.3789914577546095e-05, "loss": 0.6968, "step": 6021 }, { "epoch": 0.7605335859183835, "grad_norm": 1.5234375, "learning_rate": 1.3788066999248901e-05, "loss": 0.5664, "step": 6022 }, { "epoch": 0.7606598784434446, "grad_norm": 2.03125, "learning_rate": 1.3786219269968481e-05, "loss": 0.6311, "step": 6023 }, { "epoch": 0.7607861709685058, "grad_norm": 1.7734375, "learning_rate": 1.378437138977848e-05, "loss": 0.6329, "step": 6024 }, { "epoch": 0.760912463493567, "grad_norm": 1.6171875, "learning_rate": 1.3782523358752551e-05, "loss": 0.6556, "step": 6025 }, { "epoch": 0.7610387560186281, "grad_norm": 1.640625, "learning_rate": 1.3780675176964351e-05, "loss": 0.5346, "step": 6026 }, { "epoch": 0.7611650485436893, "grad_norm": 1.9296875, "learning_rate": 1.3778826844487544e-05, "loss": 0.7202, "step": 6027 }, { "epoch": 0.7612913410687505, "grad_norm": 1.796875, "learning_rate": 1.3776978361395803e-05, "loss": 0.6195, "step": 6028 }, { "epoch": 0.7614176335938116, "grad_norm": 1.734375, "learning_rate": 1.3775129727762801e-05, "loss": 0.7045, "step": 6029 }, { "epoch": 0.7615439261188729, "grad_norm": 1.9296875, "learning_rate": 1.3773280943662225e-05, "loss": 0.7754, "step": 6030 }, { "epoch": 0.761670218643934, "grad_norm": 1.78125, "learning_rate": 1.3771432009167754e-05, "loss": 0.6314, "step": 6031 }, { "epoch": 0.7617965111689952, "grad_norm": 1.671875, "learning_rate": 1.3769582924353088e-05, "loss": 0.5401, "step": 6032 }, { "epoch": 0.7619228036940564, "grad_norm": 1.7578125, "learning_rate": 1.3767733689291927e-05, "loss": 0.5928, "step": 6033 }, { "epoch": 0.7620490962191175, "grad_norm": 1.8359375, "learning_rate": 1.3765884304057975e-05, "loss": 0.6568, "step": 6034 }, { "epoch": 0.7621753887441787, "grad_norm": 2.0, "learning_rate": 1.3764034768724948e-05, "loss": 0.6778, "step": 6035 }, { "epoch": 0.7623016812692399, "grad_norm": 1.796875, "learning_rate": 1.3762185083366557e-05, "loss": 0.6708, "step": 6036 }, { "epoch": 0.762427973794301, "grad_norm": 1.8203125, "learning_rate": 1.3760335248056536e-05, "loss": 0.6761, "step": 6037 }, { "epoch": 0.7625542663193622, "grad_norm": 1.65625, "learning_rate": 1.3758485262868606e-05, "loss": 0.6789, "step": 6038 }, { "epoch": 0.7626805588444234, "grad_norm": 1.734375, "learning_rate": 1.3756635127876507e-05, "loss": 0.6491, "step": 6039 }, { "epoch": 0.7628068513694846, "grad_norm": 1.8046875, "learning_rate": 1.3754784843153979e-05, "loss": 0.6893, "step": 6040 }, { "epoch": 0.7629331438945457, "grad_norm": 1.5625, "learning_rate": 1.3752934408774775e-05, "loss": 0.5835, "step": 6041 }, { "epoch": 0.7630594364196069, "grad_norm": 1.75, "learning_rate": 1.3751083824812638e-05, "loss": 0.6264, "step": 6042 }, { "epoch": 0.7631857289446681, "grad_norm": 1.78125, "learning_rate": 1.374923309134134e-05, "loss": 0.6404, "step": 6043 }, { "epoch": 0.7633120214697292, "grad_norm": 1.6171875, "learning_rate": 1.3747382208434641e-05, "loss": 0.5264, "step": 6044 }, { "epoch": 0.7634383139947905, "grad_norm": 1.7734375, "learning_rate": 1.3745531176166313e-05, "loss": 0.6314, "step": 6045 }, { "epoch": 0.7635646065198516, "grad_norm": 1.7578125, "learning_rate": 1.3743679994610135e-05, "loss": 0.6629, "step": 6046 }, { "epoch": 0.7636908990449128, "grad_norm": 1.8359375, "learning_rate": 1.374182866383989e-05, "loss": 0.6585, "step": 6047 }, { "epoch": 0.763817191569974, "grad_norm": 1.75, "learning_rate": 1.3739977183929365e-05, "loss": 0.6453, "step": 6048 }, { "epoch": 0.7639434840950351, "grad_norm": 1.7890625, "learning_rate": 1.3738125554952358e-05, "loss": 0.6121, "step": 6049 }, { "epoch": 0.7640697766200963, "grad_norm": 1.8359375, "learning_rate": 1.3736273776982669e-05, "loss": 0.7544, "step": 6050 }, { "epoch": 0.7641960691451575, "grad_norm": 1.609375, "learning_rate": 1.3734421850094108e-05, "loss": 0.5836, "step": 6051 }, { "epoch": 0.7643223616702186, "grad_norm": 1.6796875, "learning_rate": 1.3732569774360486e-05, "loss": 0.6421, "step": 6052 }, { "epoch": 0.7644486541952799, "grad_norm": 1.6953125, "learning_rate": 1.3730717549855626e-05, "loss": 0.6113, "step": 6053 }, { "epoch": 0.764574946720341, "grad_norm": 1.71875, "learning_rate": 1.372886517665335e-05, "loss": 0.5726, "step": 6054 }, { "epoch": 0.7647012392454021, "grad_norm": 1.7421875, "learning_rate": 1.3727012654827487e-05, "loss": 0.5678, "step": 6055 }, { "epoch": 0.7648275317704634, "grad_norm": 1.734375, "learning_rate": 1.372515998445188e-05, "loss": 0.6054, "step": 6056 }, { "epoch": 0.7649538242955245, "grad_norm": 1.8203125, "learning_rate": 1.3723307165600366e-05, "loss": 0.5687, "step": 6057 }, { "epoch": 0.7650801168205856, "grad_norm": 1.625, "learning_rate": 1.3721454198346798e-05, "loss": 0.6063, "step": 6058 }, { "epoch": 0.7652064093456469, "grad_norm": 1.796875, "learning_rate": 1.3719601082765029e-05, "loss": 0.6695, "step": 6059 }, { "epoch": 0.765332701870708, "grad_norm": 1.96875, "learning_rate": 1.3717747818928917e-05, "loss": 0.6576, "step": 6060 }, { "epoch": 0.7654589943957693, "grad_norm": 1.8515625, "learning_rate": 1.3715894406912336e-05, "loss": 0.6232, "step": 6061 }, { "epoch": 0.7655852869208304, "grad_norm": 1.6484375, "learning_rate": 1.3714040846789151e-05, "loss": 0.6198, "step": 6062 }, { "epoch": 0.7657115794458915, "grad_norm": 1.734375, "learning_rate": 1.3712187138633249e-05, "loss": 0.6654, "step": 6063 }, { "epoch": 0.7658378719709528, "grad_norm": 1.625, "learning_rate": 1.3710333282518504e-05, "loss": 0.5571, "step": 6064 }, { "epoch": 0.7659641644960139, "grad_norm": 1.703125, "learning_rate": 1.3708479278518814e-05, "loss": 0.6524, "step": 6065 }, { "epoch": 0.766090457021075, "grad_norm": 1.8828125, "learning_rate": 1.370662512670807e-05, "loss": 0.6771, "step": 6066 }, { "epoch": 0.7662167495461363, "grad_norm": 1.8671875, "learning_rate": 1.3704770827160178e-05, "loss": 0.6814, "step": 6067 }, { "epoch": 0.7663430420711974, "grad_norm": 1.75, "learning_rate": 1.3702916379949045e-05, "loss": 0.6659, "step": 6068 }, { "epoch": 0.7664693345962585, "grad_norm": 1.828125, "learning_rate": 1.370106178514858e-05, "loss": 0.6535, "step": 6069 }, { "epoch": 0.7665956271213198, "grad_norm": 1.8671875, "learning_rate": 1.369920704283271e-05, "loss": 0.7656, "step": 6070 }, { "epoch": 0.7667219196463809, "grad_norm": 1.796875, "learning_rate": 1.3697352153075358e-05, "loss": 0.7071, "step": 6071 }, { "epoch": 0.7668482121714421, "grad_norm": 1.7578125, "learning_rate": 1.3695497115950453e-05, "loss": 0.5972, "step": 6072 }, { "epoch": 0.7669745046965033, "grad_norm": 1.703125, "learning_rate": 1.3693641931531935e-05, "loss": 0.6287, "step": 6073 }, { "epoch": 0.7671007972215644, "grad_norm": 1.875, "learning_rate": 1.3691786599893746e-05, "loss": 0.6104, "step": 6074 }, { "epoch": 0.7672270897466256, "grad_norm": 1.828125, "learning_rate": 1.3689931121109839e-05, "loss": 0.6544, "step": 6075 }, { "epoch": 0.7673533822716868, "grad_norm": 1.78125, "learning_rate": 1.368807549525416e-05, "loss": 0.6543, "step": 6076 }, { "epoch": 0.767479674796748, "grad_norm": 1.8828125, "learning_rate": 1.3686219722400678e-05, "loss": 0.5871, "step": 6077 }, { "epoch": 0.7676059673218092, "grad_norm": 1.796875, "learning_rate": 1.3684363802623353e-05, "loss": 0.6503, "step": 6078 }, { "epoch": 0.7677322598468703, "grad_norm": 1.75, "learning_rate": 1.3682507735996165e-05, "loss": 0.6352, "step": 6079 }, { "epoch": 0.7678585523719315, "grad_norm": 1.8359375, "learning_rate": 1.3680651522593088e-05, "loss": 0.617, "step": 6080 }, { "epoch": 0.7679848448969927, "grad_norm": 1.7578125, "learning_rate": 1.3678795162488106e-05, "loss": 0.549, "step": 6081 }, { "epoch": 0.7681111374220538, "grad_norm": 1.7890625, "learning_rate": 1.367693865575521e-05, "loss": 0.6425, "step": 6082 }, { "epoch": 0.768237429947115, "grad_norm": 1.8359375, "learning_rate": 1.3675082002468396e-05, "loss": 0.638, "step": 6083 }, { "epoch": 0.7683637224721762, "grad_norm": 1.8125, "learning_rate": 1.3673225202701664e-05, "loss": 0.7422, "step": 6084 }, { "epoch": 0.7684900149972373, "grad_norm": 1.765625, "learning_rate": 1.3671368256529026e-05, "loss": 0.6501, "step": 6085 }, { "epoch": 0.7686163075222985, "grad_norm": 1.78125, "learning_rate": 1.366951116402449e-05, "loss": 0.6167, "step": 6086 }, { "epoch": 0.7687426000473597, "grad_norm": 2.328125, "learning_rate": 1.3667653925262074e-05, "loss": 0.8152, "step": 6087 }, { "epoch": 0.7688688925724209, "grad_norm": 1.75, "learning_rate": 1.3665796540315811e-05, "loss": 0.6009, "step": 6088 }, { "epoch": 0.768995185097482, "grad_norm": 1.9140625, "learning_rate": 1.3663939009259728e-05, "loss": 0.7854, "step": 6089 }, { "epoch": 0.7691214776225432, "grad_norm": 1.8125, "learning_rate": 1.366208133216786e-05, "loss": 0.6063, "step": 6090 }, { "epoch": 0.7692477701476044, "grad_norm": 1.8984375, "learning_rate": 1.366022350911425e-05, "loss": 0.6642, "step": 6091 }, { "epoch": 0.7693740626726656, "grad_norm": 1.8359375, "learning_rate": 1.3658365540172948e-05, "loss": 0.6787, "step": 6092 }, { "epoch": 0.7695003551977267, "grad_norm": 1.8046875, "learning_rate": 1.3656507425418008e-05, "loss": 0.5754, "step": 6093 }, { "epoch": 0.7696266477227879, "grad_norm": 1.609375, "learning_rate": 1.365464916492349e-05, "loss": 0.5456, "step": 6094 }, { "epoch": 0.7697529402478491, "grad_norm": 1.8125, "learning_rate": 1.365279075876346e-05, "loss": 0.6587, "step": 6095 }, { "epoch": 0.7698792327729103, "grad_norm": 1.75, "learning_rate": 1.3650932207011982e-05, "loss": 0.7317, "step": 6096 }, { "epoch": 0.7700055252979714, "grad_norm": 1.78125, "learning_rate": 1.3649073509743144e-05, "loss": 0.548, "step": 6097 }, { "epoch": 0.7701318178230326, "grad_norm": 1.734375, "learning_rate": 1.3647214667031026e-05, "loss": 0.5933, "step": 6098 }, { "epoch": 0.7702581103480938, "grad_norm": 1.9296875, "learning_rate": 1.3645355678949716e-05, "loss": 0.7413, "step": 6099 }, { "epoch": 0.7703844028731549, "grad_norm": 1.8203125, "learning_rate": 1.364349654557331e-05, "loss": 0.6665, "step": 6100 }, { "epoch": 0.7705106953982161, "grad_norm": 1.8125, "learning_rate": 1.3641637266975908e-05, "loss": 0.6422, "step": 6101 }, { "epoch": 0.7706369879232773, "grad_norm": 1.8828125, "learning_rate": 1.3639777843231614e-05, "loss": 0.648, "step": 6102 }, { "epoch": 0.7707632804483384, "grad_norm": 1.625, "learning_rate": 1.363791827441454e-05, "loss": 0.6022, "step": 6103 }, { "epoch": 0.7708895729733997, "grad_norm": 1.9765625, "learning_rate": 1.363605856059881e-05, "loss": 0.683, "step": 6104 }, { "epoch": 0.7710158654984608, "grad_norm": 1.5546875, "learning_rate": 1.363419870185854e-05, "loss": 0.5054, "step": 6105 }, { "epoch": 0.7711421580235219, "grad_norm": 1.7578125, "learning_rate": 1.3632338698267869e-05, "loss": 0.6127, "step": 6106 }, { "epoch": 0.7712684505485832, "grad_norm": 1.84375, "learning_rate": 1.363047854990092e-05, "loss": 0.7165, "step": 6107 }, { "epoch": 0.7713947430736443, "grad_norm": 1.6796875, "learning_rate": 1.3628618256831848e-05, "loss": 0.5512, "step": 6108 }, { "epoch": 0.7715210355987056, "grad_norm": 1.8203125, "learning_rate": 1.3626757819134786e-05, "loss": 0.6191, "step": 6109 }, { "epoch": 0.7716473281237667, "grad_norm": 1.5859375, "learning_rate": 1.3624897236883896e-05, "loss": 0.5962, "step": 6110 }, { "epoch": 0.7717736206488278, "grad_norm": 1.765625, "learning_rate": 1.3623036510153331e-05, "loss": 0.676, "step": 6111 }, { "epoch": 0.7718999131738891, "grad_norm": 1.8046875, "learning_rate": 1.3621175639017259e-05, "loss": 0.5927, "step": 6112 }, { "epoch": 0.7720262056989502, "grad_norm": 1.6953125, "learning_rate": 1.3619314623549848e-05, "loss": 0.6194, "step": 6113 }, { "epoch": 0.7721524982240113, "grad_norm": 2.046875, "learning_rate": 1.3617453463825272e-05, "loss": 0.7469, "step": 6114 }, { "epoch": 0.7722787907490726, "grad_norm": 1.828125, "learning_rate": 1.3615592159917717e-05, "loss": 0.5741, "step": 6115 }, { "epoch": 0.7724050832741337, "grad_norm": 1.921875, "learning_rate": 1.3613730711901365e-05, "loss": 0.6966, "step": 6116 }, { "epoch": 0.7725313757991948, "grad_norm": 1.6484375, "learning_rate": 1.3611869119850413e-05, "loss": 0.6918, "step": 6117 }, { "epoch": 0.7726576683242561, "grad_norm": 1.828125, "learning_rate": 1.3610007383839056e-05, "loss": 0.6489, "step": 6118 }, { "epoch": 0.7727839608493172, "grad_norm": 1.734375, "learning_rate": 1.36081455039415e-05, "loss": 0.6196, "step": 6119 }, { "epoch": 0.7729102533743784, "grad_norm": 1.703125, "learning_rate": 1.3606283480231957e-05, "loss": 0.6431, "step": 6120 }, { "epoch": 0.7730365458994396, "grad_norm": 1.828125, "learning_rate": 1.360442131278464e-05, "loss": 0.6135, "step": 6121 }, { "epoch": 0.7731628384245007, "grad_norm": 1.875, "learning_rate": 1.360255900167377e-05, "loss": 0.7079, "step": 6122 }, { "epoch": 0.773289130949562, "grad_norm": 1.7578125, "learning_rate": 1.3600696546973575e-05, "loss": 0.601, "step": 6123 }, { "epoch": 0.7734154234746231, "grad_norm": 1.6953125, "learning_rate": 1.359883394875829e-05, "loss": 0.5576, "step": 6124 }, { "epoch": 0.7735417159996842, "grad_norm": 1.796875, "learning_rate": 1.359697120710215e-05, "loss": 0.6517, "step": 6125 }, { "epoch": 0.7736680085247455, "grad_norm": 1.9140625, "learning_rate": 1.3595108322079404e-05, "loss": 0.6775, "step": 6126 }, { "epoch": 0.7737943010498066, "grad_norm": 1.8828125, "learning_rate": 1.35932452937643e-05, "loss": 0.6693, "step": 6127 }, { "epoch": 0.7739205935748678, "grad_norm": 1.765625, "learning_rate": 1.3591382122231092e-05, "loss": 0.6149, "step": 6128 }, { "epoch": 0.774046886099929, "grad_norm": 1.75, "learning_rate": 1.3589518807554043e-05, "loss": 0.5734, "step": 6129 }, { "epoch": 0.7741731786249901, "grad_norm": 1.8203125, "learning_rate": 1.358765534980742e-05, "loss": 0.7353, "step": 6130 }, { "epoch": 0.7742994711500513, "grad_norm": 1.8125, "learning_rate": 1.3585791749065498e-05, "loss": 0.6543, "step": 6131 }, { "epoch": 0.7744257636751125, "grad_norm": 1.8203125, "learning_rate": 1.358392800540255e-05, "loss": 0.6902, "step": 6132 }, { "epoch": 0.7745520562001736, "grad_norm": 2.078125, "learning_rate": 1.3582064118892866e-05, "loss": 0.6915, "step": 6133 }, { "epoch": 0.7746783487252348, "grad_norm": 1.8515625, "learning_rate": 1.3580200089610735e-05, "loss": 0.6161, "step": 6134 }, { "epoch": 0.774804641250296, "grad_norm": 1.875, "learning_rate": 1.3578335917630452e-05, "loss": 0.6115, "step": 6135 }, { "epoch": 0.7749309337753572, "grad_norm": 1.765625, "learning_rate": 1.3576471603026318e-05, "loss": 0.6431, "step": 6136 }, { "epoch": 0.7750572263004183, "grad_norm": 2.0625, "learning_rate": 1.3574607145872641e-05, "loss": 0.6704, "step": 6137 }, { "epoch": 0.7751835188254795, "grad_norm": 1.59375, "learning_rate": 1.357274254624373e-05, "loss": 0.5881, "step": 6138 }, { "epoch": 0.7753098113505407, "grad_norm": 1.8203125, "learning_rate": 1.3570877804213912e-05, "loss": 0.6796, "step": 6139 }, { "epoch": 0.7754361038756019, "grad_norm": 1.703125, "learning_rate": 1.35690129198575e-05, "loss": 0.6224, "step": 6140 }, { "epoch": 0.775562396400663, "grad_norm": 1.703125, "learning_rate": 1.3567147893248833e-05, "loss": 0.6869, "step": 6141 }, { "epoch": 0.7756886889257242, "grad_norm": 1.828125, "learning_rate": 1.3565282724462243e-05, "loss": 0.6656, "step": 6142 }, { "epoch": 0.7758149814507854, "grad_norm": 1.875, "learning_rate": 1.3563417413572069e-05, "loss": 0.6053, "step": 6143 }, { "epoch": 0.7759412739758466, "grad_norm": 1.8046875, "learning_rate": 1.3561551960652661e-05, "loss": 0.5933, "step": 6144 }, { "epoch": 0.7760675665009077, "grad_norm": 1.6953125, "learning_rate": 1.3559686365778373e-05, "loss": 0.5864, "step": 6145 }, { "epoch": 0.7761938590259689, "grad_norm": 1.8828125, "learning_rate": 1.3557820629023557e-05, "loss": 0.6352, "step": 6146 }, { "epoch": 0.7763201515510301, "grad_norm": 1.9375, "learning_rate": 1.3555954750462581e-05, "loss": 0.6488, "step": 6147 }, { "epoch": 0.7764464440760912, "grad_norm": 1.984375, "learning_rate": 1.3554088730169814e-05, "loss": 0.6386, "step": 6148 }, { "epoch": 0.7765727366011524, "grad_norm": 1.8125, "learning_rate": 1.355222256821963e-05, "loss": 0.6843, "step": 6149 }, { "epoch": 0.7766990291262136, "grad_norm": 1.8515625, "learning_rate": 1.3550356264686413e-05, "loss": 0.6754, "step": 6150 }, { "epoch": 0.7768253216512747, "grad_norm": 2.015625, "learning_rate": 1.3548489819644544e-05, "loss": 0.6816, "step": 6151 }, { "epoch": 0.776951614176336, "grad_norm": 1.8515625, "learning_rate": 1.354662323316842e-05, "loss": 0.6595, "step": 6152 }, { "epoch": 0.7770779067013971, "grad_norm": 1.71875, "learning_rate": 1.3544756505332437e-05, "loss": 0.6836, "step": 6153 }, { "epoch": 0.7772041992264583, "grad_norm": 1.7421875, "learning_rate": 1.3542889636210996e-05, "loss": 0.6805, "step": 6154 }, { "epoch": 0.7773304917515195, "grad_norm": 1.9140625, "learning_rate": 1.3541022625878508e-05, "loss": 0.618, "step": 6155 }, { "epoch": 0.7774567842765806, "grad_norm": 1.671875, "learning_rate": 1.3539155474409387e-05, "loss": 0.6017, "step": 6156 }, { "epoch": 0.7775830768016418, "grad_norm": 1.7734375, "learning_rate": 1.3537288181878052e-05, "loss": 0.6671, "step": 6157 }, { "epoch": 0.777709369326703, "grad_norm": 1.8359375, "learning_rate": 1.3535420748358932e-05, "loss": 0.6759, "step": 6158 }, { "epoch": 0.7778356618517641, "grad_norm": 1.890625, "learning_rate": 1.3533553173926457e-05, "loss": 0.727, "step": 6159 }, { "epoch": 0.7779619543768254, "grad_norm": 1.796875, "learning_rate": 1.3531685458655063e-05, "loss": 0.6974, "step": 6160 }, { "epoch": 0.7780882469018865, "grad_norm": 1.7421875, "learning_rate": 1.3529817602619195e-05, "loss": 0.6163, "step": 6161 }, { "epoch": 0.7782145394269476, "grad_norm": 1.8984375, "learning_rate": 1.3527949605893298e-05, "loss": 0.6753, "step": 6162 }, { "epoch": 0.7783408319520089, "grad_norm": 1.6875, "learning_rate": 1.3526081468551827e-05, "loss": 0.6595, "step": 6163 }, { "epoch": 0.77846712447707, "grad_norm": 2.0, "learning_rate": 1.3524213190669238e-05, "loss": 0.5411, "step": 6164 }, { "epoch": 0.7785934170021311, "grad_norm": 1.8203125, "learning_rate": 1.3522344772320004e-05, "loss": 0.5891, "step": 6165 }, { "epoch": 0.7787197095271924, "grad_norm": 1.8125, "learning_rate": 1.3520476213578593e-05, "loss": 0.6607, "step": 6166 }, { "epoch": 0.7788460020522535, "grad_norm": 2.03125, "learning_rate": 1.3518607514519478e-05, "loss": 0.7612, "step": 6167 }, { "epoch": 0.7789722945773147, "grad_norm": 1.6796875, "learning_rate": 1.351673867521714e-05, "loss": 0.6414, "step": 6168 }, { "epoch": 0.7790985871023759, "grad_norm": 1.671875, "learning_rate": 1.3514869695746073e-05, "loss": 0.5612, "step": 6169 }, { "epoch": 0.779224879627437, "grad_norm": 1.828125, "learning_rate": 1.3513000576180763e-05, "loss": 0.6807, "step": 6170 }, { "epoch": 0.7793511721524983, "grad_norm": 1.7265625, "learning_rate": 1.3511131316595716e-05, "loss": 0.6331, "step": 6171 }, { "epoch": 0.7794774646775594, "grad_norm": 1.6484375, "learning_rate": 1.3509261917065426e-05, "loss": 0.6694, "step": 6172 }, { "epoch": 0.7796037572026205, "grad_norm": 1.9609375, "learning_rate": 1.3507392377664412e-05, "loss": 0.6104, "step": 6173 }, { "epoch": 0.7797300497276818, "grad_norm": 1.8515625, "learning_rate": 1.3505522698467183e-05, "loss": 0.7039, "step": 6174 }, { "epoch": 0.7798563422527429, "grad_norm": 1.7109375, "learning_rate": 1.3503652879548268e-05, "loss": 0.7053, "step": 6175 }, { "epoch": 0.779982634777804, "grad_norm": 1.9296875, "learning_rate": 1.3501782920982185e-05, "loss": 0.7565, "step": 6176 }, { "epoch": 0.7801089273028653, "grad_norm": 1.8359375, "learning_rate": 1.349991282284347e-05, "loss": 0.6497, "step": 6177 }, { "epoch": 0.7802352198279264, "grad_norm": 1.8046875, "learning_rate": 1.349804258520666e-05, "loss": 0.5623, "step": 6178 }, { "epoch": 0.7803615123529876, "grad_norm": 1.8359375, "learning_rate": 1.3496172208146297e-05, "loss": 0.6576, "step": 6179 }, { "epoch": 0.7804878048780488, "grad_norm": 1.65625, "learning_rate": 1.349430169173693e-05, "loss": 0.5765, "step": 6180 }, { "epoch": 0.7806140974031099, "grad_norm": 1.6015625, "learning_rate": 1.3492431036053117e-05, "loss": 0.5428, "step": 6181 }, { "epoch": 0.7807403899281711, "grad_norm": 1.765625, "learning_rate": 1.3490560241169414e-05, "loss": 0.6149, "step": 6182 }, { "epoch": 0.7808666824532323, "grad_norm": 1.8828125, "learning_rate": 1.3488689307160388e-05, "loss": 0.6673, "step": 6183 }, { "epoch": 0.7809929749782935, "grad_norm": 1.8046875, "learning_rate": 1.3486818234100606e-05, "loss": 0.634, "step": 6184 }, { "epoch": 0.7811192675033547, "grad_norm": 1.6953125, "learning_rate": 1.3484947022064651e-05, "loss": 0.5447, "step": 6185 }, { "epoch": 0.7812455600284158, "grad_norm": 1.71875, "learning_rate": 1.34830756711271e-05, "loss": 0.6211, "step": 6186 }, { "epoch": 0.781371852553477, "grad_norm": 1.734375, "learning_rate": 1.3481204181362544e-05, "loss": 0.6811, "step": 6187 }, { "epoch": 0.7814981450785382, "grad_norm": 1.8359375, "learning_rate": 1.3479332552845573e-05, "loss": 0.6362, "step": 6188 }, { "epoch": 0.7816244376035993, "grad_norm": 1.7578125, "learning_rate": 1.3477460785650787e-05, "loss": 0.6779, "step": 6189 }, { "epoch": 0.7817507301286605, "grad_norm": 1.796875, "learning_rate": 1.347558887985279e-05, "loss": 0.6048, "step": 6190 }, { "epoch": 0.7818770226537217, "grad_norm": 1.734375, "learning_rate": 1.3473716835526194e-05, "loss": 0.6176, "step": 6191 }, { "epoch": 0.7820033151787829, "grad_norm": 1.6875, "learning_rate": 1.347184465274561e-05, "loss": 0.5979, "step": 6192 }, { "epoch": 0.782129607703844, "grad_norm": 1.7109375, "learning_rate": 1.3469972331585658e-05, "loss": 0.6024, "step": 6193 }, { "epoch": 0.7822559002289052, "grad_norm": 1.7578125, "learning_rate": 1.3468099872120972e-05, "loss": 0.6824, "step": 6194 }, { "epoch": 0.7823821927539664, "grad_norm": 1.6640625, "learning_rate": 1.3466227274426177e-05, "loss": 0.5458, "step": 6195 }, { "epoch": 0.7825084852790275, "grad_norm": 1.828125, "learning_rate": 1.3464354538575908e-05, "loss": 0.6058, "step": 6196 }, { "epoch": 0.7826347778040887, "grad_norm": 1.7734375, "learning_rate": 1.3462481664644816e-05, "loss": 0.6308, "step": 6197 }, { "epoch": 0.7827610703291499, "grad_norm": 1.8984375, "learning_rate": 1.3460608652707541e-05, "loss": 0.6709, "step": 6198 }, { "epoch": 0.7828873628542111, "grad_norm": 1.8203125, "learning_rate": 1.3458735502838742e-05, "loss": 0.6519, "step": 6199 }, { "epoch": 0.7830136553792723, "grad_norm": 1.6796875, "learning_rate": 1.3456862215113078e-05, "loss": 0.6746, "step": 6200 }, { "epoch": 0.7831399479043334, "grad_norm": 1.7265625, "learning_rate": 1.3454988789605212e-05, "loss": 0.6368, "step": 6201 }, { "epoch": 0.7832662404293946, "grad_norm": 1.796875, "learning_rate": 1.3453115226389814e-05, "loss": 0.7083, "step": 6202 }, { "epoch": 0.7833925329544558, "grad_norm": 1.640625, "learning_rate": 1.3451241525541563e-05, "loss": 0.5713, "step": 6203 }, { "epoch": 0.7835188254795169, "grad_norm": 1.84375, "learning_rate": 1.3449367687135134e-05, "loss": 0.5214, "step": 6204 }, { "epoch": 0.7836451180045781, "grad_norm": 1.8125, "learning_rate": 1.3447493711245216e-05, "loss": 0.68, "step": 6205 }, { "epoch": 0.7837714105296393, "grad_norm": 1.7734375, "learning_rate": 1.3445619597946506e-05, "loss": 0.6756, "step": 6206 }, { "epoch": 0.7838977030547004, "grad_norm": 1.8359375, "learning_rate": 1.34437453473137e-05, "loss": 0.643, "step": 6207 }, { "epoch": 0.7840239955797617, "grad_norm": 1.8515625, "learning_rate": 1.3441870959421497e-05, "loss": 0.6511, "step": 6208 }, { "epoch": 0.7841502881048228, "grad_norm": 1.734375, "learning_rate": 1.3439996434344607e-05, "loss": 0.553, "step": 6209 }, { "epoch": 0.7842765806298839, "grad_norm": 1.8359375, "learning_rate": 1.3438121772157744e-05, "loss": 0.6671, "step": 6210 }, { "epoch": 0.7844028731549452, "grad_norm": 1.9609375, "learning_rate": 1.343624697293563e-05, "loss": 0.6465, "step": 6211 }, { "epoch": 0.7845291656800063, "grad_norm": 1.921875, "learning_rate": 1.343437203675299e-05, "loss": 0.6977, "step": 6212 }, { "epoch": 0.7846554582050674, "grad_norm": 1.7109375, "learning_rate": 1.343249696368455e-05, "loss": 0.6139, "step": 6213 }, { "epoch": 0.7847817507301287, "grad_norm": 2.0625, "learning_rate": 1.3430621753805049e-05, "loss": 0.8664, "step": 6214 }, { "epoch": 0.7849080432551898, "grad_norm": 1.7890625, "learning_rate": 1.3428746407189227e-05, "loss": 0.6684, "step": 6215 }, { "epoch": 0.7850343357802511, "grad_norm": 1.75, "learning_rate": 1.3426870923911834e-05, "loss": 0.5193, "step": 6216 }, { "epoch": 0.7851606283053122, "grad_norm": 1.7421875, "learning_rate": 1.342499530404762e-05, "loss": 0.6566, "step": 6217 }, { "epoch": 0.7852869208303733, "grad_norm": 1.6796875, "learning_rate": 1.3423119547671343e-05, "loss": 0.5367, "step": 6218 }, { "epoch": 0.7854132133554346, "grad_norm": 1.8359375, "learning_rate": 1.3421243654857765e-05, "loss": 0.6547, "step": 6219 }, { "epoch": 0.7855395058804957, "grad_norm": 1.78125, "learning_rate": 1.3419367625681656e-05, "loss": 0.6167, "step": 6220 }, { "epoch": 0.7856657984055568, "grad_norm": 1.875, "learning_rate": 1.3417491460217791e-05, "loss": 0.6913, "step": 6221 }, { "epoch": 0.7857920909306181, "grad_norm": 1.7734375, "learning_rate": 1.3415615158540946e-05, "loss": 0.574, "step": 6222 }, { "epoch": 0.7859183834556792, "grad_norm": 1.7578125, "learning_rate": 1.3413738720725905e-05, "loss": 0.6452, "step": 6223 }, { "epoch": 0.7860446759807403, "grad_norm": 1.8046875, "learning_rate": 1.3411862146847465e-05, "loss": 0.6382, "step": 6224 }, { "epoch": 0.7861709685058016, "grad_norm": 1.921875, "learning_rate": 1.3409985436980419e-05, "loss": 0.745, "step": 6225 }, { "epoch": 0.7862972610308627, "grad_norm": 1.796875, "learning_rate": 1.3408108591199564e-05, "loss": 0.733, "step": 6226 }, { "epoch": 0.7864235535559239, "grad_norm": 1.7578125, "learning_rate": 1.3406231609579712e-05, "loss": 0.6995, "step": 6227 }, { "epoch": 0.7865498460809851, "grad_norm": 1.8125, "learning_rate": 1.3404354492195671e-05, "loss": 0.7242, "step": 6228 }, { "epoch": 0.7866761386060462, "grad_norm": 1.7734375, "learning_rate": 1.3402477239122261e-05, "loss": 0.6322, "step": 6229 }, { "epoch": 0.7868024311311075, "grad_norm": 1.90625, "learning_rate": 1.3400599850434302e-05, "loss": 0.6135, "step": 6230 }, { "epoch": 0.7869287236561686, "grad_norm": 1.875, "learning_rate": 1.3398722326206623e-05, "loss": 0.6255, "step": 6231 }, { "epoch": 0.7870550161812297, "grad_norm": 1.890625, "learning_rate": 1.339684466651406e-05, "loss": 0.761, "step": 6232 }, { "epoch": 0.787181308706291, "grad_norm": 1.75, "learning_rate": 1.339496687143145e-05, "loss": 0.6713, "step": 6233 }, { "epoch": 0.7873076012313521, "grad_norm": 1.734375, "learning_rate": 1.339308894103364e-05, "loss": 0.5759, "step": 6234 }, { "epoch": 0.7874338937564133, "grad_norm": 1.7421875, "learning_rate": 1.3391210875395473e-05, "loss": 0.5746, "step": 6235 }, { "epoch": 0.7875601862814745, "grad_norm": 1.90625, "learning_rate": 1.3389332674591811e-05, "loss": 0.8075, "step": 6236 }, { "epoch": 0.7876864788065356, "grad_norm": 1.8125, "learning_rate": 1.3387454338697514e-05, "loss": 0.6379, "step": 6237 }, { "epoch": 0.7878127713315968, "grad_norm": 2.125, "learning_rate": 1.3385575867787444e-05, "loss": 0.7001, "step": 6238 }, { "epoch": 0.787939063856658, "grad_norm": 1.7421875, "learning_rate": 1.3383697261936477e-05, "loss": 0.641, "step": 6239 }, { "epoch": 0.7880653563817192, "grad_norm": 1.90625, "learning_rate": 1.3381818521219485e-05, "loss": 0.6003, "step": 6240 }, { "epoch": 0.7881916489067803, "grad_norm": 1.71875, "learning_rate": 1.3379939645711351e-05, "loss": 0.6474, "step": 6241 }, { "epoch": 0.7883179414318415, "grad_norm": 1.875, "learning_rate": 1.3378060635486967e-05, "loss": 0.6608, "step": 6242 }, { "epoch": 0.7884442339569027, "grad_norm": 1.7890625, "learning_rate": 1.3376181490621221e-05, "loss": 0.6495, "step": 6243 }, { "epoch": 0.7885705264819638, "grad_norm": 1.78125, "learning_rate": 1.3374302211189014e-05, "loss": 0.6957, "step": 6244 }, { "epoch": 0.788696819007025, "grad_norm": 1.96875, "learning_rate": 1.3372422797265248e-05, "loss": 0.6605, "step": 6245 }, { "epoch": 0.7888231115320862, "grad_norm": 1.8125, "learning_rate": 1.3370543248924832e-05, "loss": 0.7247, "step": 6246 }, { "epoch": 0.7889494040571474, "grad_norm": 1.765625, "learning_rate": 1.336866356624268e-05, "loss": 0.7085, "step": 6247 }, { "epoch": 0.7890756965822086, "grad_norm": 1.90625, "learning_rate": 1.3366783749293713e-05, "loss": 0.6509, "step": 6248 }, { "epoch": 0.7892019891072697, "grad_norm": 1.8359375, "learning_rate": 1.3364903798152856e-05, "loss": 0.71, "step": 6249 }, { "epoch": 0.7893282816323309, "grad_norm": 1.8984375, "learning_rate": 1.3363023712895036e-05, "loss": 0.6862, "step": 6250 }, { "epoch": 0.7894545741573921, "grad_norm": 1.6953125, "learning_rate": 1.3361143493595193e-05, "loss": 0.654, "step": 6251 }, { "epoch": 0.7895808666824532, "grad_norm": 1.6328125, "learning_rate": 1.3359263140328265e-05, "loss": 0.5988, "step": 6252 }, { "epoch": 0.7897071592075144, "grad_norm": 1.765625, "learning_rate": 1.3357382653169199e-05, "loss": 0.659, "step": 6253 }, { "epoch": 0.7898334517325756, "grad_norm": 1.78125, "learning_rate": 1.3355502032192949e-05, "loss": 0.6793, "step": 6254 }, { "epoch": 0.7899597442576367, "grad_norm": 1.8515625, "learning_rate": 1.335362127747447e-05, "loss": 0.6602, "step": 6255 }, { "epoch": 0.790086036782698, "grad_norm": 1.7734375, "learning_rate": 1.3351740389088725e-05, "loss": 0.6574, "step": 6256 }, { "epoch": 0.7902123293077591, "grad_norm": 1.6796875, "learning_rate": 1.334985936711068e-05, "loss": 0.5753, "step": 6257 }, { "epoch": 0.7903386218328202, "grad_norm": 1.7421875, "learning_rate": 1.3347978211615312e-05, "loss": 0.6191, "step": 6258 }, { "epoch": 0.7904649143578815, "grad_norm": 1.6328125, "learning_rate": 1.3346096922677592e-05, "loss": 0.5274, "step": 6259 }, { "epoch": 0.7905912068829426, "grad_norm": 1.8828125, "learning_rate": 1.334421550037251e-05, "loss": 0.7045, "step": 6260 }, { "epoch": 0.7907174994080038, "grad_norm": 1.71875, "learning_rate": 1.3342333944775055e-05, "loss": 0.6484, "step": 6261 }, { "epoch": 0.790843791933065, "grad_norm": 1.734375, "learning_rate": 1.3340452255960217e-05, "loss": 0.5979, "step": 6262 }, { "epoch": 0.7909700844581261, "grad_norm": 1.859375, "learning_rate": 1.3338570434003e-05, "loss": 0.5796, "step": 6263 }, { "epoch": 0.7910963769831874, "grad_norm": 1.765625, "learning_rate": 1.3336688478978405e-05, "loss": 0.629, "step": 6264 }, { "epoch": 0.7912226695082485, "grad_norm": 1.8203125, "learning_rate": 1.3334806390961448e-05, "loss": 0.6982, "step": 6265 }, { "epoch": 0.7913489620333096, "grad_norm": 1.7109375, "learning_rate": 1.3332924170027137e-05, "loss": 0.7003, "step": 6266 }, { "epoch": 0.7914752545583709, "grad_norm": 1.8671875, "learning_rate": 1.3331041816250498e-05, "loss": 0.6378, "step": 6267 }, { "epoch": 0.791601547083432, "grad_norm": 1.7265625, "learning_rate": 1.3329159329706554e-05, "loss": 0.6075, "step": 6268 }, { "epoch": 0.7917278396084931, "grad_norm": 1.84375, "learning_rate": 1.3327276710470338e-05, "loss": 0.7828, "step": 6269 }, { "epoch": 0.7918541321335544, "grad_norm": 1.65625, "learning_rate": 1.3325393958616887e-05, "loss": 0.6069, "step": 6270 }, { "epoch": 0.7919804246586155, "grad_norm": 1.84375, "learning_rate": 1.3323511074221246e-05, "loss": 0.6946, "step": 6271 }, { "epoch": 0.7921067171836766, "grad_norm": 1.734375, "learning_rate": 1.3321628057358455e-05, "loss": 0.5836, "step": 6272 }, { "epoch": 0.7922330097087379, "grad_norm": 1.9453125, "learning_rate": 1.3319744908103571e-05, "loss": 0.6938, "step": 6273 }, { "epoch": 0.792359302233799, "grad_norm": 2.078125, "learning_rate": 1.331786162653165e-05, "loss": 0.63, "step": 6274 }, { "epoch": 0.7924855947588602, "grad_norm": 1.8046875, "learning_rate": 1.331597821271776e-05, "loss": 0.674, "step": 6275 }, { "epoch": 0.7926118872839214, "grad_norm": 1.890625, "learning_rate": 1.331409466673696e-05, "loss": 0.7464, "step": 6276 }, { "epoch": 0.7927381798089825, "grad_norm": 1.75, "learning_rate": 1.3312210988664332e-05, "loss": 0.5432, "step": 6277 }, { "epoch": 0.7928644723340438, "grad_norm": 1.796875, "learning_rate": 1.3310327178574951e-05, "loss": 0.62, "step": 6278 }, { "epoch": 0.7929907648591049, "grad_norm": 1.78125, "learning_rate": 1.3308443236543904e-05, "loss": 0.6312, "step": 6279 }, { "epoch": 0.793117057384166, "grad_norm": 1.8125, "learning_rate": 1.3306559162646277e-05, "loss": 0.6739, "step": 6280 }, { "epoch": 0.7932433499092273, "grad_norm": 1.7890625, "learning_rate": 1.3304674956957167e-05, "loss": 0.6021, "step": 6281 }, { "epoch": 0.7933696424342884, "grad_norm": 1.734375, "learning_rate": 1.3302790619551673e-05, "loss": 0.6358, "step": 6282 }, { "epoch": 0.7934959349593496, "grad_norm": 1.7109375, "learning_rate": 1.33009061505049e-05, "loss": 0.5752, "step": 6283 }, { "epoch": 0.7936222274844108, "grad_norm": 1.8125, "learning_rate": 1.3299021549891958e-05, "loss": 0.6963, "step": 6284 }, { "epoch": 0.7937485200094719, "grad_norm": 1.6953125, "learning_rate": 1.3297136817787963e-05, "loss": 0.6738, "step": 6285 }, { "epoch": 0.7938748125345331, "grad_norm": 1.9765625, "learning_rate": 1.3295251954268036e-05, "loss": 0.6624, "step": 6286 }, { "epoch": 0.7940011050595943, "grad_norm": 1.59375, "learning_rate": 1.3293366959407303e-05, "loss": 0.6293, "step": 6287 }, { "epoch": 0.7941273975846554, "grad_norm": 1.7890625, "learning_rate": 1.3291481833280897e-05, "loss": 0.6987, "step": 6288 }, { "epoch": 0.7942536901097166, "grad_norm": 1.703125, "learning_rate": 1.3289596575963952e-05, "loss": 0.669, "step": 6289 }, { "epoch": 0.7943799826347778, "grad_norm": 1.6796875, "learning_rate": 1.328771118753161e-05, "loss": 0.5942, "step": 6290 }, { "epoch": 0.794506275159839, "grad_norm": 1.796875, "learning_rate": 1.3285825668059023e-05, "loss": 0.6152, "step": 6291 }, { "epoch": 0.7946325676849002, "grad_norm": 1.703125, "learning_rate": 1.3283940017621337e-05, "loss": 0.5926, "step": 6292 }, { "epoch": 0.7947588602099613, "grad_norm": 1.8046875, "learning_rate": 1.328205423629371e-05, "loss": 0.7719, "step": 6293 }, { "epoch": 0.7948851527350225, "grad_norm": 1.796875, "learning_rate": 1.328016832415131e-05, "loss": 0.741, "step": 6294 }, { "epoch": 0.7950114452600837, "grad_norm": 1.8671875, "learning_rate": 1.3278282281269296e-05, "loss": 0.7078, "step": 6295 }, { "epoch": 0.7951377377851448, "grad_norm": 1.8046875, "learning_rate": 1.3276396107722851e-05, "loss": 0.7876, "step": 6296 }, { "epoch": 0.795264030310206, "grad_norm": 1.7734375, "learning_rate": 1.3274509803587148e-05, "loss": 0.664, "step": 6297 }, { "epoch": 0.7953903228352672, "grad_norm": 1.6875, "learning_rate": 1.3272623368937372e-05, "loss": 0.5863, "step": 6298 }, { "epoch": 0.7955166153603284, "grad_norm": 1.7890625, "learning_rate": 1.3270736803848709e-05, "loss": 0.7191, "step": 6299 }, { "epoch": 0.7956429078853895, "grad_norm": 1.796875, "learning_rate": 1.3268850108396359e-05, "loss": 0.5925, "step": 6300 }, { "epoch": 0.7957692004104507, "grad_norm": 1.734375, "learning_rate": 1.3266963282655513e-05, "loss": 0.5865, "step": 6301 }, { "epoch": 0.7958954929355119, "grad_norm": 1.5546875, "learning_rate": 1.326507632670138e-05, "loss": 0.5589, "step": 6302 }, { "epoch": 0.796021785460573, "grad_norm": 1.6796875, "learning_rate": 1.3263189240609172e-05, "loss": 0.6112, "step": 6303 }, { "epoch": 0.7961480779856343, "grad_norm": 1.90625, "learning_rate": 1.3261302024454097e-05, "loss": 0.6387, "step": 6304 }, { "epoch": 0.7962743705106954, "grad_norm": 1.8125, "learning_rate": 1.325941467831138e-05, "loss": 0.6557, "step": 6305 }, { "epoch": 0.7964006630357565, "grad_norm": 1.75, "learning_rate": 1.3257527202256247e-05, "loss": 0.6123, "step": 6306 }, { "epoch": 0.7965269555608178, "grad_norm": 1.8203125, "learning_rate": 1.3255639596363923e-05, "loss": 0.5708, "step": 6307 }, { "epoch": 0.7966532480858789, "grad_norm": 1.8671875, "learning_rate": 1.3253751860709647e-05, "loss": 0.5854, "step": 6308 }, { "epoch": 0.7967795406109401, "grad_norm": 1.765625, "learning_rate": 1.3251863995368658e-05, "loss": 0.6101, "step": 6309 }, { "epoch": 0.7969058331360013, "grad_norm": 1.8203125, "learning_rate": 1.3249976000416208e-05, "loss": 0.6424, "step": 6310 }, { "epoch": 0.7970321256610624, "grad_norm": 1.828125, "learning_rate": 1.3248087875927537e-05, "loss": 0.6437, "step": 6311 }, { "epoch": 0.7971584181861237, "grad_norm": 1.640625, "learning_rate": 1.324619962197791e-05, "loss": 0.6178, "step": 6312 }, { "epoch": 0.7972847107111848, "grad_norm": 1.8359375, "learning_rate": 1.3244311238642583e-05, "loss": 0.6386, "step": 6313 }, { "epoch": 0.7974110032362459, "grad_norm": 1.78125, "learning_rate": 1.3242422725996824e-05, "loss": 0.6642, "step": 6314 }, { "epoch": 0.7975372957613072, "grad_norm": 1.5859375, "learning_rate": 1.3240534084115906e-05, "loss": 0.5815, "step": 6315 }, { "epoch": 0.7976635882863683, "grad_norm": 1.7265625, "learning_rate": 1.3238645313075104e-05, "loss": 0.6139, "step": 6316 }, { "epoch": 0.7977898808114294, "grad_norm": 1.7890625, "learning_rate": 1.3236756412949703e-05, "loss": 0.6594, "step": 6317 }, { "epoch": 0.7979161733364907, "grad_norm": 1.7421875, "learning_rate": 1.3234867383814986e-05, "loss": 0.5776, "step": 6318 }, { "epoch": 0.7980424658615518, "grad_norm": 1.7734375, "learning_rate": 1.3232978225746247e-05, "loss": 0.6792, "step": 6319 }, { "epoch": 0.7981687583866129, "grad_norm": 1.8203125, "learning_rate": 1.3231088938818784e-05, "loss": 0.7272, "step": 6320 }, { "epoch": 0.7982950509116742, "grad_norm": 1.7578125, "learning_rate": 1.3229199523107896e-05, "loss": 0.6316, "step": 6321 }, { "epoch": 0.7984213434367353, "grad_norm": 1.8671875, "learning_rate": 1.3227309978688894e-05, "loss": 0.6944, "step": 6322 }, { "epoch": 0.7985476359617966, "grad_norm": 1.7265625, "learning_rate": 1.3225420305637089e-05, "loss": 0.6124, "step": 6323 }, { "epoch": 0.7986739284868577, "grad_norm": 1.8046875, "learning_rate": 1.3223530504027802e-05, "loss": 0.67, "step": 6324 }, { "epoch": 0.7988002210119188, "grad_norm": 1.921875, "learning_rate": 1.3221640573936352e-05, "loss": 0.7168, "step": 6325 }, { "epoch": 0.7989265135369801, "grad_norm": 1.703125, "learning_rate": 1.3219750515438068e-05, "loss": 0.5815, "step": 6326 }, { "epoch": 0.7990528060620412, "grad_norm": 1.6953125, "learning_rate": 1.3217860328608285e-05, "loss": 0.6142, "step": 6327 }, { "epoch": 0.7991790985871023, "grad_norm": 1.9140625, "learning_rate": 1.321597001352234e-05, "loss": 0.7362, "step": 6328 }, { "epoch": 0.7993053911121636, "grad_norm": 1.671875, "learning_rate": 1.3214079570255576e-05, "loss": 0.6203, "step": 6329 }, { "epoch": 0.7994316836372247, "grad_norm": 2.046875, "learning_rate": 1.3212188998883341e-05, "loss": 0.6592, "step": 6330 }, { "epoch": 0.7995579761622859, "grad_norm": 1.78125, "learning_rate": 1.321029829948099e-05, "loss": 0.6374, "step": 6331 }, { "epoch": 0.7996842686873471, "grad_norm": 1.828125, "learning_rate": 1.320840747212388e-05, "loss": 0.6313, "step": 6332 }, { "epoch": 0.7998105612124082, "grad_norm": 1.8984375, "learning_rate": 1.320651651688738e-05, "loss": 0.6485, "step": 6333 }, { "epoch": 0.7999368537374694, "grad_norm": 1.8125, "learning_rate": 1.320462543384685e-05, "loss": 0.6012, "step": 6334 }, { "epoch": 0.8000631462625306, "grad_norm": 1.734375, "learning_rate": 1.3202734223077673e-05, "loss": 0.6599, "step": 6335 }, { "epoch": 0.8001894387875917, "grad_norm": 1.7734375, "learning_rate": 1.3200842884655222e-05, "loss": 0.7059, "step": 6336 }, { "epoch": 0.8003157313126529, "grad_norm": 1.8359375, "learning_rate": 1.3198951418654887e-05, "loss": 0.6325, "step": 6337 }, { "epoch": 0.8004420238377141, "grad_norm": 1.7734375, "learning_rate": 1.319705982515205e-05, "loss": 0.6157, "step": 6338 }, { "epoch": 0.8005683163627753, "grad_norm": 1.6484375, "learning_rate": 1.319516810422211e-05, "loss": 0.6353, "step": 6339 }, { "epoch": 0.8006946088878365, "grad_norm": 1.9296875, "learning_rate": 1.3193276255940464e-05, "loss": 0.6675, "step": 6340 }, { "epoch": 0.8008209014128976, "grad_norm": 1.859375, "learning_rate": 1.3191384280382517e-05, "loss": 0.638, "step": 6341 }, { "epoch": 0.8009471939379588, "grad_norm": 1.7109375, "learning_rate": 1.3189492177623681e-05, "loss": 0.4959, "step": 6342 }, { "epoch": 0.80107348646302, "grad_norm": 1.890625, "learning_rate": 1.318759994773937e-05, "loss": 0.7125, "step": 6343 }, { "epoch": 0.8011997789880811, "grad_norm": 1.7421875, "learning_rate": 1.3185707590805004e-05, "loss": 0.6411, "step": 6344 }, { "epoch": 0.8013260715131423, "grad_norm": 1.84375, "learning_rate": 1.3183815106896005e-05, "loss": 0.6333, "step": 6345 }, { "epoch": 0.8014523640382035, "grad_norm": 1.9453125, "learning_rate": 1.3181922496087807e-05, "loss": 0.6803, "step": 6346 }, { "epoch": 0.8015786565632647, "grad_norm": 1.875, "learning_rate": 1.3180029758455839e-05, "loss": 0.6863, "step": 6347 }, { "epoch": 0.8017049490883258, "grad_norm": 1.71875, "learning_rate": 1.3178136894075546e-05, "loss": 0.6374, "step": 6348 }, { "epoch": 0.801831241613387, "grad_norm": 1.9296875, "learning_rate": 1.3176243903022369e-05, "loss": 0.7041, "step": 6349 }, { "epoch": 0.8019575341384482, "grad_norm": 1.90625, "learning_rate": 1.3174350785371763e-05, "loss": 0.6932, "step": 6350 }, { "epoch": 0.8020838266635093, "grad_norm": 1.90625, "learning_rate": 1.317245754119918e-05, "loss": 0.6258, "step": 6351 }, { "epoch": 0.8022101191885705, "grad_norm": 1.6640625, "learning_rate": 1.3170564170580083e-05, "loss": 0.6438, "step": 6352 }, { "epoch": 0.8023364117136317, "grad_norm": 1.6796875, "learning_rate": 1.3168670673589932e-05, "loss": 0.6627, "step": 6353 }, { "epoch": 0.8024627042386929, "grad_norm": 1.7109375, "learning_rate": 1.31667770503042e-05, "loss": 0.676, "step": 6354 }, { "epoch": 0.8025889967637541, "grad_norm": 1.65625, "learning_rate": 1.3164883300798366e-05, "loss": 0.5656, "step": 6355 }, { "epoch": 0.8027152892888152, "grad_norm": 1.6640625, "learning_rate": 1.3162989425147902e-05, "loss": 0.6235, "step": 6356 }, { "epoch": 0.8028415818138764, "grad_norm": 1.6015625, "learning_rate": 1.3161095423428302e-05, "loss": 0.5381, "step": 6357 }, { "epoch": 0.8029678743389376, "grad_norm": 1.7734375, "learning_rate": 1.315920129571505e-05, "loss": 0.598, "step": 6358 }, { "epoch": 0.8030941668639987, "grad_norm": 1.7421875, "learning_rate": 1.3157307042083642e-05, "loss": 0.6124, "step": 6359 }, { "epoch": 0.80322045938906, "grad_norm": 1.7109375, "learning_rate": 1.3155412662609584e-05, "loss": 0.5744, "step": 6360 }, { "epoch": 0.8033467519141211, "grad_norm": 1.625, "learning_rate": 1.3153518157368377e-05, "loss": 0.5281, "step": 6361 }, { "epoch": 0.8034730444391822, "grad_norm": 1.703125, "learning_rate": 1.3151623526435531e-05, "loss": 0.7058, "step": 6362 }, { "epoch": 0.8035993369642435, "grad_norm": 1.8046875, "learning_rate": 1.3149728769886562e-05, "loss": 0.6104, "step": 6363 }, { "epoch": 0.8037256294893046, "grad_norm": 1.8828125, "learning_rate": 1.3147833887796992e-05, "loss": 0.7007, "step": 6364 }, { "epoch": 0.8038519220143657, "grad_norm": 1.9140625, "learning_rate": 1.3145938880242346e-05, "loss": 0.626, "step": 6365 }, { "epoch": 0.803978214539427, "grad_norm": 1.6953125, "learning_rate": 1.3144043747298152e-05, "loss": 0.6994, "step": 6366 }, { "epoch": 0.8041045070644881, "grad_norm": 1.6796875, "learning_rate": 1.3142148489039946e-05, "loss": 0.5762, "step": 6367 }, { "epoch": 0.8042307995895492, "grad_norm": 1.625, "learning_rate": 1.314025310554327e-05, "loss": 0.6106, "step": 6368 }, { "epoch": 0.8043570921146105, "grad_norm": 1.734375, "learning_rate": 1.313835759688367e-05, "loss": 0.6159, "step": 6369 }, { "epoch": 0.8044833846396716, "grad_norm": 1.671875, "learning_rate": 1.3136461963136696e-05, "loss": 0.5752, "step": 6370 }, { "epoch": 0.8046096771647329, "grad_norm": 1.7421875, "learning_rate": 1.3134566204377901e-05, "loss": 0.5854, "step": 6371 }, { "epoch": 0.804735969689794, "grad_norm": 1.984375, "learning_rate": 1.313267032068285e-05, "loss": 0.6531, "step": 6372 }, { "epoch": 0.8048622622148551, "grad_norm": 1.75, "learning_rate": 1.3130774312127103e-05, "loss": 0.5301, "step": 6373 }, { "epoch": 0.8049885547399164, "grad_norm": 1.796875, "learning_rate": 1.3128878178786235e-05, "loss": 0.7164, "step": 6374 }, { "epoch": 0.8051148472649775, "grad_norm": 1.7109375, "learning_rate": 1.3126981920735818e-05, "loss": 0.6674, "step": 6375 }, { "epoch": 0.8052411397900386, "grad_norm": 3.171875, "learning_rate": 1.3125085538051432e-05, "loss": 0.6363, "step": 6376 }, { "epoch": 0.8053674323150999, "grad_norm": 1.9140625, "learning_rate": 1.3123189030808665e-05, "loss": 0.6078, "step": 6377 }, { "epoch": 0.805493724840161, "grad_norm": 1.953125, "learning_rate": 1.3121292399083106e-05, "loss": 0.6205, "step": 6378 }, { "epoch": 0.8056200173652222, "grad_norm": 1.7421875, "learning_rate": 1.311939564295035e-05, "loss": 0.5557, "step": 6379 }, { "epoch": 0.8057463098902834, "grad_norm": 1.6640625, "learning_rate": 1.3117498762485998e-05, "loss": 0.6658, "step": 6380 }, { "epoch": 0.8058726024153445, "grad_norm": 1.90625, "learning_rate": 1.3115601757765654e-05, "loss": 0.5935, "step": 6381 }, { "epoch": 0.8059988949404057, "grad_norm": 1.8203125, "learning_rate": 1.3113704628864928e-05, "loss": 0.6557, "step": 6382 }, { "epoch": 0.8061251874654669, "grad_norm": 1.859375, "learning_rate": 1.3111807375859435e-05, "loss": 0.6602, "step": 6383 }, { "epoch": 0.806251479990528, "grad_norm": 1.7421875, "learning_rate": 1.3109909998824795e-05, "loss": 0.614, "step": 6384 }, { "epoch": 0.8063777725155893, "grad_norm": 1.6875, "learning_rate": 1.3108012497836634e-05, "loss": 0.6204, "step": 6385 }, { "epoch": 0.8065040650406504, "grad_norm": 1.828125, "learning_rate": 1.3106114872970578e-05, "loss": 0.6813, "step": 6386 }, { "epoch": 0.8066303575657116, "grad_norm": 1.6484375, "learning_rate": 1.3104217124302267e-05, "loss": 0.5789, "step": 6387 }, { "epoch": 0.8067566500907728, "grad_norm": 1.875, "learning_rate": 1.3102319251907341e-05, "loss": 0.7371, "step": 6388 }, { "epoch": 0.8068829426158339, "grad_norm": 1.6015625, "learning_rate": 1.310042125586144e-05, "loss": 0.565, "step": 6389 }, { "epoch": 0.8070092351408951, "grad_norm": 1.6953125, "learning_rate": 1.3098523136240216e-05, "loss": 0.6243, "step": 6390 }, { "epoch": 0.8071355276659563, "grad_norm": 1.8515625, "learning_rate": 1.309662489311932e-05, "loss": 0.7153, "step": 6391 }, { "epoch": 0.8072618201910174, "grad_norm": 2.0625, "learning_rate": 1.309472652657442e-05, "loss": 0.7735, "step": 6392 }, { "epoch": 0.8073881127160786, "grad_norm": 1.8515625, "learning_rate": 1.3092828036681171e-05, "loss": 0.7327, "step": 6393 }, { "epoch": 0.8075144052411398, "grad_norm": 1.6796875, "learning_rate": 1.3090929423515243e-05, "loss": 0.6483, "step": 6394 }, { "epoch": 0.807640697766201, "grad_norm": 1.8828125, "learning_rate": 1.3089030687152317e-05, "loss": 0.6894, "step": 6395 }, { "epoch": 0.8077669902912621, "grad_norm": 1.859375, "learning_rate": 1.3087131827668068e-05, "loss": 0.6286, "step": 6396 }, { "epoch": 0.8078932828163233, "grad_norm": 1.859375, "learning_rate": 1.308523284513818e-05, "loss": 0.5957, "step": 6397 }, { "epoch": 0.8080195753413845, "grad_norm": 1.953125, "learning_rate": 1.3083333739638344e-05, "loss": 0.6755, "step": 6398 }, { "epoch": 0.8081458678664456, "grad_norm": 1.7734375, "learning_rate": 1.3081434511244248e-05, "loss": 0.6657, "step": 6399 }, { "epoch": 0.8082721603915068, "grad_norm": 1.84375, "learning_rate": 1.3079535160031598e-05, "loss": 0.7091, "step": 6400 }, { "epoch": 0.808398452916568, "grad_norm": 1.8515625, "learning_rate": 1.3077635686076092e-05, "loss": 0.7269, "step": 6401 }, { "epoch": 0.8085247454416292, "grad_norm": 1.7109375, "learning_rate": 1.3075736089453441e-05, "loss": 0.5821, "step": 6402 }, { "epoch": 0.8086510379666904, "grad_norm": 1.765625, "learning_rate": 1.3073836370239357e-05, "loss": 0.5708, "step": 6403 }, { "epoch": 0.8087773304917515, "grad_norm": 1.8359375, "learning_rate": 1.307193652850956e-05, "loss": 0.6609, "step": 6404 }, { "epoch": 0.8089036230168127, "grad_norm": 1.625, "learning_rate": 1.3070036564339772e-05, "loss": 0.5393, "step": 6405 }, { "epoch": 0.8090299155418739, "grad_norm": 1.9296875, "learning_rate": 1.3068136477805722e-05, "loss": 0.7246, "step": 6406 }, { "epoch": 0.809156208066935, "grad_norm": 1.8671875, "learning_rate": 1.3066236268983141e-05, "loss": 0.7095, "step": 6407 }, { "epoch": 0.8092825005919962, "grad_norm": 1.890625, "learning_rate": 1.3064335937947767e-05, "loss": 0.737, "step": 6408 }, { "epoch": 0.8094087931170574, "grad_norm": 1.8125, "learning_rate": 1.3062435484775346e-05, "loss": 0.6992, "step": 6409 }, { "epoch": 0.8095350856421185, "grad_norm": 1.875, "learning_rate": 1.3060534909541622e-05, "loss": 0.7497, "step": 6410 }, { "epoch": 0.8096613781671798, "grad_norm": 1.8046875, "learning_rate": 1.3058634212322348e-05, "loss": 0.5949, "step": 6411 }, { "epoch": 0.8097876706922409, "grad_norm": 1.859375, "learning_rate": 1.3056733393193279e-05, "loss": 0.6457, "step": 6412 }, { "epoch": 0.809913963217302, "grad_norm": 1.8828125, "learning_rate": 1.3054832452230182e-05, "loss": 0.5911, "step": 6413 }, { "epoch": 0.8100402557423633, "grad_norm": 1.8125, "learning_rate": 1.305293138950882e-05, "loss": 0.6915, "step": 6414 }, { "epoch": 0.8101665482674244, "grad_norm": 1.7109375, "learning_rate": 1.3051030205104969e-05, "loss": 0.6842, "step": 6415 }, { "epoch": 0.8102928407924856, "grad_norm": 1.75, "learning_rate": 1.3049128899094401e-05, "loss": 0.7097, "step": 6416 }, { "epoch": 0.8104191333175468, "grad_norm": 1.9140625, "learning_rate": 1.3047227471552901e-05, "loss": 0.7674, "step": 6417 }, { "epoch": 0.8105454258426079, "grad_norm": 1.8828125, "learning_rate": 1.3045325922556252e-05, "loss": 0.735, "step": 6418 }, { "epoch": 0.8106717183676692, "grad_norm": 1.78125, "learning_rate": 1.3043424252180248e-05, "loss": 0.6558, "step": 6419 }, { "epoch": 0.8107980108927303, "grad_norm": 1.5859375, "learning_rate": 1.3041522460500684e-05, "loss": 0.5115, "step": 6420 }, { "epoch": 0.8109243034177914, "grad_norm": 1.6875, "learning_rate": 1.3039620547593357e-05, "loss": 0.6571, "step": 6421 }, { "epoch": 0.8110505959428527, "grad_norm": 1.734375, "learning_rate": 1.3037718513534078e-05, "loss": 0.6559, "step": 6422 }, { "epoch": 0.8111768884679138, "grad_norm": 1.90625, "learning_rate": 1.3035816358398658e-05, "loss": 0.6148, "step": 6423 }, { "epoch": 0.8113031809929749, "grad_norm": 1.78125, "learning_rate": 1.303391408226291e-05, "loss": 0.6, "step": 6424 }, { "epoch": 0.8114294735180362, "grad_norm": 1.8984375, "learning_rate": 1.3032011685202654e-05, "loss": 0.5936, "step": 6425 }, { "epoch": 0.8115557660430973, "grad_norm": 1.7265625, "learning_rate": 1.3030109167293717e-05, "loss": 0.6307, "step": 6426 }, { "epoch": 0.8116820585681584, "grad_norm": 1.65625, "learning_rate": 1.3028206528611924e-05, "loss": 0.6206, "step": 6427 }, { "epoch": 0.8118083510932197, "grad_norm": 2.0, "learning_rate": 1.3026303769233112e-05, "loss": 0.701, "step": 6428 }, { "epoch": 0.8119346436182808, "grad_norm": 1.8671875, "learning_rate": 1.3024400889233125e-05, "loss": 0.6937, "step": 6429 }, { "epoch": 0.8120609361433421, "grad_norm": 1.84375, "learning_rate": 1.3022497888687798e-05, "loss": 0.6049, "step": 6430 }, { "epoch": 0.8121872286684032, "grad_norm": 2.015625, "learning_rate": 1.302059476767299e-05, "loss": 0.6169, "step": 6431 }, { "epoch": 0.8123135211934643, "grad_norm": 1.703125, "learning_rate": 1.3018691526264547e-05, "loss": 0.5843, "step": 6432 }, { "epoch": 0.8124398137185256, "grad_norm": 1.6328125, "learning_rate": 1.3016788164538331e-05, "loss": 0.601, "step": 6433 }, { "epoch": 0.8125661062435867, "grad_norm": 1.6953125, "learning_rate": 1.3014884682570207e-05, "loss": 0.5404, "step": 6434 }, { "epoch": 0.8126923987686479, "grad_norm": 1.7265625, "learning_rate": 1.3012981080436043e-05, "loss": 0.5928, "step": 6435 }, { "epoch": 0.8128186912937091, "grad_norm": 1.7109375, "learning_rate": 1.3011077358211707e-05, "loss": 0.6142, "step": 6436 }, { "epoch": 0.8129449838187702, "grad_norm": 1.765625, "learning_rate": 1.3009173515973081e-05, "loss": 0.6612, "step": 6437 }, { "epoch": 0.8130712763438314, "grad_norm": 1.8671875, "learning_rate": 1.3007269553796047e-05, "loss": 0.6209, "step": 6438 }, { "epoch": 0.8131975688688926, "grad_norm": 1.6640625, "learning_rate": 1.3005365471756491e-05, "loss": 0.6218, "step": 6439 }, { "epoch": 0.8133238613939537, "grad_norm": 1.8671875, "learning_rate": 1.3003461269930307e-05, "loss": 0.6755, "step": 6440 }, { "epoch": 0.8134501539190149, "grad_norm": 1.9140625, "learning_rate": 1.300155694839339e-05, "loss": 0.769, "step": 6441 }, { "epoch": 0.8135764464440761, "grad_norm": 1.734375, "learning_rate": 1.2999652507221646e-05, "loss": 0.6283, "step": 6442 }, { "epoch": 0.8137027389691373, "grad_norm": 1.6875, "learning_rate": 1.2997747946490974e-05, "loss": 0.6152, "step": 6443 }, { "epoch": 0.8138290314941984, "grad_norm": 1.6171875, "learning_rate": 1.2995843266277293e-05, "loss": 0.5875, "step": 6444 }, { "epoch": 0.8139553240192596, "grad_norm": 1.6953125, "learning_rate": 1.2993938466656512e-05, "loss": 0.6634, "step": 6445 }, { "epoch": 0.8140816165443208, "grad_norm": 1.84375, "learning_rate": 1.2992033547704559e-05, "loss": 0.5777, "step": 6446 }, { "epoch": 0.814207909069382, "grad_norm": 1.734375, "learning_rate": 1.2990128509497353e-05, "loss": 0.5832, "step": 6447 }, { "epoch": 0.8143342015944431, "grad_norm": 1.734375, "learning_rate": 1.2988223352110825e-05, "loss": 0.5551, "step": 6448 }, { "epoch": 0.8144604941195043, "grad_norm": 1.8359375, "learning_rate": 1.2986318075620913e-05, "loss": 0.6155, "step": 6449 }, { "epoch": 0.8145867866445655, "grad_norm": 1.9140625, "learning_rate": 1.2984412680103558e-05, "loss": 0.7631, "step": 6450 }, { "epoch": 0.8147130791696267, "grad_norm": 1.8515625, "learning_rate": 1.2982507165634702e-05, "loss": 0.5798, "step": 6451 }, { "epoch": 0.8148393716946878, "grad_norm": 1.7578125, "learning_rate": 1.2980601532290292e-05, "loss": 0.623, "step": 6452 }, { "epoch": 0.814965664219749, "grad_norm": 1.9375, "learning_rate": 1.2978695780146284e-05, "loss": 0.6961, "step": 6453 }, { "epoch": 0.8150919567448102, "grad_norm": 1.859375, "learning_rate": 1.2976789909278638e-05, "loss": 0.6465, "step": 6454 }, { "epoch": 0.8152182492698713, "grad_norm": 1.8359375, "learning_rate": 1.2974883919763316e-05, "loss": 0.6097, "step": 6455 }, { "epoch": 0.8153445417949325, "grad_norm": 1.8203125, "learning_rate": 1.2972977811676286e-05, "loss": 0.7227, "step": 6456 }, { "epoch": 0.8154708343199937, "grad_norm": 1.96875, "learning_rate": 1.297107158509352e-05, "loss": 0.7178, "step": 6457 }, { "epoch": 0.8155971268450548, "grad_norm": 1.6796875, "learning_rate": 1.2969165240091e-05, "loss": 0.5586, "step": 6458 }, { "epoch": 0.815723419370116, "grad_norm": 1.8046875, "learning_rate": 1.2967258776744704e-05, "loss": 0.6236, "step": 6459 }, { "epoch": 0.8158497118951772, "grad_norm": 1.7578125, "learning_rate": 1.2965352195130621e-05, "loss": 0.6136, "step": 6460 }, { "epoch": 0.8159760044202384, "grad_norm": 1.671875, "learning_rate": 1.296344549532474e-05, "loss": 0.5865, "step": 6461 }, { "epoch": 0.8161022969452996, "grad_norm": 1.8984375, "learning_rate": 1.296153867740306e-05, "loss": 0.706, "step": 6462 }, { "epoch": 0.8162285894703607, "grad_norm": 2.046875, "learning_rate": 1.2959631741441583e-05, "loss": 0.7414, "step": 6463 }, { "epoch": 0.8163548819954219, "grad_norm": 1.703125, "learning_rate": 1.2957724687516313e-05, "loss": 0.6745, "step": 6464 }, { "epoch": 0.8164811745204831, "grad_norm": 1.8515625, "learning_rate": 1.2955817515703257e-05, "loss": 0.7427, "step": 6465 }, { "epoch": 0.8166074670455442, "grad_norm": 1.984375, "learning_rate": 1.2953910226078439e-05, "loss": 0.6692, "step": 6466 }, { "epoch": 0.8167337595706055, "grad_norm": 1.734375, "learning_rate": 1.2952002818717872e-05, "loss": 0.7483, "step": 6467 }, { "epoch": 0.8168600520956666, "grad_norm": 1.875, "learning_rate": 1.2950095293697586e-05, "loss": 0.5938, "step": 6468 }, { "epoch": 0.8169863446207277, "grad_norm": 1.7265625, "learning_rate": 1.2948187651093607e-05, "loss": 0.6073, "step": 6469 }, { "epoch": 0.817112637145789, "grad_norm": 1.734375, "learning_rate": 1.2946279890981968e-05, "loss": 0.6201, "step": 6470 }, { "epoch": 0.8172389296708501, "grad_norm": 1.6875, "learning_rate": 1.2944372013438709e-05, "loss": 0.5611, "step": 6471 }, { "epoch": 0.8173652221959112, "grad_norm": 1.9296875, "learning_rate": 1.2942464018539872e-05, "loss": 0.663, "step": 6472 }, { "epoch": 0.8174915147209725, "grad_norm": 1.9296875, "learning_rate": 1.2940555906361508e-05, "loss": 0.7023, "step": 6473 }, { "epoch": 0.8176178072460336, "grad_norm": 1.75, "learning_rate": 1.2938647676979667e-05, "loss": 0.5877, "step": 6474 }, { "epoch": 0.8177440997710947, "grad_norm": 1.6328125, "learning_rate": 1.2936739330470412e-05, "loss": 0.6365, "step": 6475 }, { "epoch": 0.817870392296156, "grad_norm": 1.9296875, "learning_rate": 1.2934830866909797e-05, "loss": 0.7662, "step": 6476 }, { "epoch": 0.8179966848212171, "grad_norm": 1.7109375, "learning_rate": 1.2932922286373894e-05, "loss": 0.5508, "step": 6477 }, { "epoch": 0.8181229773462784, "grad_norm": 1.7265625, "learning_rate": 1.2931013588938775e-05, "loss": 0.6181, "step": 6478 }, { "epoch": 0.8182492698713395, "grad_norm": 1.703125, "learning_rate": 1.292910477468051e-05, "loss": 0.5941, "step": 6479 }, { "epoch": 0.8183755623964006, "grad_norm": 1.7890625, "learning_rate": 1.2927195843675187e-05, "loss": 0.7495, "step": 6480 }, { "epoch": 0.8185018549214619, "grad_norm": 1.7890625, "learning_rate": 1.2925286795998883e-05, "loss": 0.6434, "step": 6481 }, { "epoch": 0.818628147446523, "grad_norm": 1.6640625, "learning_rate": 1.2923377631727698e-05, "loss": 0.6538, "step": 6482 }, { "epoch": 0.8187544399715841, "grad_norm": 1.8125, "learning_rate": 1.2921468350937723e-05, "loss": 0.6761, "step": 6483 }, { "epoch": 0.8188807324966454, "grad_norm": 1.859375, "learning_rate": 1.2919558953705055e-05, "loss": 0.6993, "step": 6484 }, { "epoch": 0.8190070250217065, "grad_norm": 1.7109375, "learning_rate": 1.2917649440105799e-05, "loss": 0.6054, "step": 6485 }, { "epoch": 0.8191333175467677, "grad_norm": 1.6796875, "learning_rate": 1.2915739810216063e-05, "loss": 0.5716, "step": 6486 }, { "epoch": 0.8192596100718289, "grad_norm": 1.7890625, "learning_rate": 1.2913830064111963e-05, "loss": 0.5791, "step": 6487 }, { "epoch": 0.81938590259689, "grad_norm": 1.6796875, "learning_rate": 1.2911920201869617e-05, "loss": 0.5489, "step": 6488 }, { "epoch": 0.8195121951219512, "grad_norm": 1.8203125, "learning_rate": 1.291001022356514e-05, "loss": 0.6473, "step": 6489 }, { "epoch": 0.8196384876470124, "grad_norm": 1.828125, "learning_rate": 1.2908100129274667e-05, "loss": 0.6841, "step": 6490 }, { "epoch": 0.8197647801720735, "grad_norm": 1.6796875, "learning_rate": 1.2906189919074328e-05, "loss": 0.5915, "step": 6491 }, { "epoch": 0.8198910726971348, "grad_norm": 1.6171875, "learning_rate": 1.2904279593040259e-05, "loss": 0.5528, "step": 6492 }, { "epoch": 0.8200173652221959, "grad_norm": 1.7734375, "learning_rate": 1.2902369151248602e-05, "loss": 0.7054, "step": 6493 }, { "epoch": 0.8201436577472571, "grad_norm": 1.6953125, "learning_rate": 1.2900458593775498e-05, "loss": 0.5995, "step": 6494 }, { "epoch": 0.8202699502723183, "grad_norm": 1.828125, "learning_rate": 1.2898547920697104e-05, "loss": 0.7182, "step": 6495 }, { "epoch": 0.8203962427973794, "grad_norm": 1.6484375, "learning_rate": 1.289663713208957e-05, "loss": 0.6003, "step": 6496 }, { "epoch": 0.8205225353224406, "grad_norm": 1.8515625, "learning_rate": 1.2894726228029055e-05, "loss": 0.652, "step": 6497 }, { "epoch": 0.8206488278475018, "grad_norm": 1.7421875, "learning_rate": 1.2892815208591727e-05, "loss": 0.6414, "step": 6498 }, { "epoch": 0.820775120372563, "grad_norm": 1.7578125, "learning_rate": 1.2890904073853751e-05, "loss": 0.665, "step": 6499 }, { "epoch": 0.8209014128976241, "grad_norm": 1.8515625, "learning_rate": 1.2888992823891307e-05, "loss": 0.6322, "step": 6500 }, { "epoch": 0.8210277054226853, "grad_norm": 1.8359375, "learning_rate": 1.2887081458780561e-05, "loss": 0.6448, "step": 6501 }, { "epoch": 0.8211539979477465, "grad_norm": 1.71875, "learning_rate": 1.2885169978597706e-05, "loss": 0.5934, "step": 6502 }, { "epoch": 0.8212802904728076, "grad_norm": 1.8046875, "learning_rate": 1.2883258383418924e-05, "loss": 0.6762, "step": 6503 }, { "epoch": 0.8214065829978688, "grad_norm": 1.8203125, "learning_rate": 1.2881346673320407e-05, "loss": 0.6559, "step": 6504 }, { "epoch": 0.82153287552293, "grad_norm": 1.8515625, "learning_rate": 1.2879434848378351e-05, "loss": 0.6381, "step": 6505 }, { "epoch": 0.8216591680479911, "grad_norm": 3.703125, "learning_rate": 1.287752290866896e-05, "loss": 0.7869, "step": 6506 }, { "epoch": 0.8217854605730524, "grad_norm": 1.8515625, "learning_rate": 1.2875610854268436e-05, "loss": 0.5988, "step": 6507 }, { "epoch": 0.8219117530981135, "grad_norm": 1.765625, "learning_rate": 1.287369868525299e-05, "loss": 0.7365, "step": 6508 }, { "epoch": 0.8220380456231747, "grad_norm": 1.734375, "learning_rate": 1.2871786401698834e-05, "loss": 0.575, "step": 6509 }, { "epoch": 0.8221643381482359, "grad_norm": 1.7734375, "learning_rate": 1.2869874003682192e-05, "loss": 0.6274, "step": 6510 }, { "epoch": 0.822290630673297, "grad_norm": 1.828125, "learning_rate": 1.2867961491279283e-05, "loss": 0.7067, "step": 6511 }, { "epoch": 0.8224169231983582, "grad_norm": 1.7734375, "learning_rate": 1.2866048864566338e-05, "loss": 0.527, "step": 6512 }, { "epoch": 0.8225432157234194, "grad_norm": 1.8828125, "learning_rate": 1.2864136123619586e-05, "loss": 0.7324, "step": 6513 }, { "epoch": 0.8226695082484805, "grad_norm": 1.859375, "learning_rate": 1.2862223268515268e-05, "loss": 0.6956, "step": 6514 }, { "epoch": 0.8227958007735418, "grad_norm": 1.7109375, "learning_rate": 1.2860310299329626e-05, "loss": 0.6211, "step": 6515 }, { "epoch": 0.8229220932986029, "grad_norm": 1.671875, "learning_rate": 1.2858397216138905e-05, "loss": 0.5503, "step": 6516 }, { "epoch": 0.823048385823664, "grad_norm": 1.9609375, "learning_rate": 1.2856484019019355e-05, "loss": 0.5809, "step": 6517 }, { "epoch": 0.8231746783487253, "grad_norm": 1.796875, "learning_rate": 1.2854570708047231e-05, "loss": 0.6822, "step": 6518 }, { "epoch": 0.8233009708737864, "grad_norm": 1.8203125, "learning_rate": 1.2852657283298798e-05, "loss": 0.6205, "step": 6519 }, { "epoch": 0.8234272633988475, "grad_norm": 1.7890625, "learning_rate": 1.2850743744850314e-05, "loss": 0.6392, "step": 6520 }, { "epoch": 0.8235535559239088, "grad_norm": 1.859375, "learning_rate": 1.2848830092778048e-05, "loss": 0.5838, "step": 6521 }, { "epoch": 0.8236798484489699, "grad_norm": 1.7734375, "learning_rate": 1.2846916327158281e-05, "loss": 0.6047, "step": 6522 }, { "epoch": 0.8238061409740312, "grad_norm": 1.703125, "learning_rate": 1.2845002448067283e-05, "loss": 0.546, "step": 6523 }, { "epoch": 0.8239324334990923, "grad_norm": 1.6953125, "learning_rate": 1.2843088455581342e-05, "loss": 0.6178, "step": 6524 }, { "epoch": 0.8240587260241534, "grad_norm": 1.78125, "learning_rate": 1.2841174349776744e-05, "loss": 0.6318, "step": 6525 }, { "epoch": 0.8241850185492147, "grad_norm": 1.75, "learning_rate": 1.283926013072978e-05, "loss": 0.5826, "step": 6526 }, { "epoch": 0.8243113110742758, "grad_norm": 1.984375, "learning_rate": 1.2837345798516745e-05, "loss": 0.6955, "step": 6527 }, { "epoch": 0.8244376035993369, "grad_norm": 1.859375, "learning_rate": 1.283543135321394e-05, "loss": 0.6917, "step": 6528 }, { "epoch": 0.8245638961243982, "grad_norm": 1.6640625, "learning_rate": 1.2833516794897668e-05, "loss": 0.6308, "step": 6529 }, { "epoch": 0.8246901886494593, "grad_norm": 1.75, "learning_rate": 1.2831602123644247e-05, "loss": 0.643, "step": 6530 }, { "epoch": 0.8248164811745204, "grad_norm": 1.859375, "learning_rate": 1.2829687339529984e-05, "loss": 0.6561, "step": 6531 }, { "epoch": 0.8249427736995817, "grad_norm": 1.65625, "learning_rate": 1.28277724426312e-05, "loss": 0.5944, "step": 6532 }, { "epoch": 0.8250690662246428, "grad_norm": 2.015625, "learning_rate": 1.2825857433024216e-05, "loss": 0.7598, "step": 6533 }, { "epoch": 0.825195358749704, "grad_norm": 1.671875, "learning_rate": 1.282394231078536e-05, "loss": 0.6118, "step": 6534 }, { "epoch": 0.8253216512747652, "grad_norm": 1.8203125, "learning_rate": 1.2822027075990969e-05, "loss": 0.6268, "step": 6535 }, { "epoch": 0.8254479437998263, "grad_norm": 1.6953125, "learning_rate": 1.2820111728717372e-05, "loss": 0.632, "step": 6536 }, { "epoch": 0.8255742363248875, "grad_norm": 1.7734375, "learning_rate": 1.281819626904092e-05, "loss": 0.6604, "step": 6537 }, { "epoch": 0.8257005288499487, "grad_norm": 1.6484375, "learning_rate": 1.2816280697037944e-05, "loss": 0.5395, "step": 6538 }, { "epoch": 0.8258268213750098, "grad_norm": 1.7421875, "learning_rate": 1.2814365012784808e-05, "loss": 0.6384, "step": 6539 }, { "epoch": 0.8259531139000711, "grad_norm": 2.109375, "learning_rate": 1.2812449216357863e-05, "loss": 0.8125, "step": 6540 }, { "epoch": 0.8260794064251322, "grad_norm": 1.71875, "learning_rate": 1.2810533307833465e-05, "loss": 0.6209, "step": 6541 }, { "epoch": 0.8262056989501934, "grad_norm": 1.859375, "learning_rate": 1.280861728728798e-05, "loss": 0.7124, "step": 6542 }, { "epoch": 0.8263319914752546, "grad_norm": 1.828125, "learning_rate": 1.2806701154797772e-05, "loss": 0.662, "step": 6543 }, { "epoch": 0.8264582840003157, "grad_norm": 1.6953125, "learning_rate": 1.2804784910439219e-05, "loss": 0.6272, "step": 6544 }, { "epoch": 0.8265845765253769, "grad_norm": 1.8203125, "learning_rate": 1.2802868554288694e-05, "loss": 0.6776, "step": 6545 }, { "epoch": 0.8267108690504381, "grad_norm": 1.7734375, "learning_rate": 1.280095208642258e-05, "loss": 0.6787, "step": 6546 }, { "epoch": 0.8268371615754992, "grad_norm": 1.71875, "learning_rate": 1.2799035506917258e-05, "loss": 0.6539, "step": 6547 }, { "epoch": 0.8269634541005604, "grad_norm": 1.6640625, "learning_rate": 1.2797118815849129e-05, "loss": 0.5827, "step": 6548 }, { "epoch": 0.8270897466256216, "grad_norm": 1.7734375, "learning_rate": 1.2795202013294578e-05, "loss": 0.6104, "step": 6549 }, { "epoch": 0.8272160391506828, "grad_norm": 1.96875, "learning_rate": 1.2793285099330008e-05, "loss": 0.7195, "step": 6550 }, { "epoch": 0.8273423316757439, "grad_norm": 1.7734375, "learning_rate": 1.2791368074031825e-05, "loss": 0.7366, "step": 6551 }, { "epoch": 0.8274686242008051, "grad_norm": 1.7265625, "learning_rate": 1.2789450937476428e-05, "loss": 0.6082, "step": 6552 }, { "epoch": 0.8275949167258663, "grad_norm": 1.6796875, "learning_rate": 1.2787533689740241e-05, "loss": 0.5339, "step": 6553 }, { "epoch": 0.8277212092509275, "grad_norm": 1.59375, "learning_rate": 1.2785616330899672e-05, "loss": 0.5198, "step": 6554 }, { "epoch": 0.8278475017759886, "grad_norm": 1.8203125, "learning_rate": 1.2783698861031148e-05, "loss": 0.6224, "step": 6555 }, { "epoch": 0.8279737943010498, "grad_norm": 1.8515625, "learning_rate": 1.2781781280211089e-05, "loss": 0.6309, "step": 6556 }, { "epoch": 0.828100086826111, "grad_norm": 1.7734375, "learning_rate": 1.277986358851593e-05, "loss": 0.6394, "step": 6557 }, { "epoch": 0.8282263793511722, "grad_norm": 1.7265625, "learning_rate": 1.2777945786022106e-05, "loss": 0.617, "step": 6558 }, { "epoch": 0.8283526718762333, "grad_norm": 1.796875, "learning_rate": 1.2776027872806052e-05, "loss": 0.6319, "step": 6559 }, { "epoch": 0.8284789644012945, "grad_norm": 1.6171875, "learning_rate": 1.2774109848944214e-05, "loss": 0.6206, "step": 6560 }, { "epoch": 0.8286052569263557, "grad_norm": 1.6328125, "learning_rate": 1.277219171451304e-05, "loss": 0.5624, "step": 6561 }, { "epoch": 0.8287315494514168, "grad_norm": 1.625, "learning_rate": 1.277027346958898e-05, "loss": 0.5798, "step": 6562 }, { "epoch": 0.828857841976478, "grad_norm": 1.9765625, "learning_rate": 1.2768355114248493e-05, "loss": 0.7236, "step": 6563 }, { "epoch": 0.8289841345015392, "grad_norm": 2.015625, "learning_rate": 1.2766436648568041e-05, "loss": 0.656, "step": 6564 }, { "epoch": 0.8291104270266003, "grad_norm": 1.75, "learning_rate": 1.2764518072624084e-05, "loss": 0.709, "step": 6565 }, { "epoch": 0.8292367195516616, "grad_norm": 1.8515625, "learning_rate": 1.2762599386493098e-05, "loss": 0.7, "step": 6566 }, { "epoch": 0.8293630120767227, "grad_norm": 1.7421875, "learning_rate": 1.2760680590251555e-05, "loss": 0.7366, "step": 6567 }, { "epoch": 0.8294893046017838, "grad_norm": 1.828125, "learning_rate": 1.275876168397593e-05, "loss": 0.6517, "step": 6568 }, { "epoch": 0.8296155971268451, "grad_norm": 1.71875, "learning_rate": 1.2756842667742713e-05, "loss": 0.5935, "step": 6569 }, { "epoch": 0.8297418896519062, "grad_norm": 1.671875, "learning_rate": 1.2754923541628386e-05, "loss": 0.5746, "step": 6570 }, { "epoch": 0.8298681821769675, "grad_norm": 1.7421875, "learning_rate": 1.2753004305709441e-05, "loss": 0.6765, "step": 6571 }, { "epoch": 0.8299944747020286, "grad_norm": 1.71875, "learning_rate": 1.275108496006238e-05, "loss": 0.6536, "step": 6572 }, { "epoch": 0.8301207672270897, "grad_norm": 1.75, "learning_rate": 1.2749165504763692e-05, "loss": 0.6206, "step": 6573 }, { "epoch": 0.830247059752151, "grad_norm": 1.8515625, "learning_rate": 1.2747245939889892e-05, "loss": 0.724, "step": 6574 }, { "epoch": 0.8303733522772121, "grad_norm": 1.859375, "learning_rate": 1.2745326265517486e-05, "loss": 0.7212, "step": 6575 }, { "epoch": 0.8304996448022732, "grad_norm": 1.7734375, "learning_rate": 1.2743406481722986e-05, "loss": 0.6261, "step": 6576 }, { "epoch": 0.8306259373273345, "grad_norm": 1.8515625, "learning_rate": 1.2741486588582914e-05, "loss": 0.6224, "step": 6577 }, { "epoch": 0.8307522298523956, "grad_norm": 1.734375, "learning_rate": 1.273956658617379e-05, "loss": 0.5965, "step": 6578 }, { "epoch": 0.8308785223774567, "grad_norm": 1.8671875, "learning_rate": 1.273764647457214e-05, "loss": 0.6252, "step": 6579 }, { "epoch": 0.831004814902518, "grad_norm": 1.96875, "learning_rate": 1.2735726253854494e-05, "loss": 0.7138, "step": 6580 }, { "epoch": 0.8311311074275791, "grad_norm": 1.6484375, "learning_rate": 1.273380592409739e-05, "loss": 0.6202, "step": 6581 }, { "epoch": 0.8312573999526403, "grad_norm": 1.828125, "learning_rate": 1.2731885485377368e-05, "loss": 0.6271, "step": 6582 }, { "epoch": 0.8313836924777015, "grad_norm": 1.625, "learning_rate": 1.2729964937770967e-05, "loss": 0.5476, "step": 6583 }, { "epoch": 0.8315099850027626, "grad_norm": 1.828125, "learning_rate": 1.272804428135474e-05, "loss": 0.6333, "step": 6584 }, { "epoch": 0.8316362775278239, "grad_norm": 1.8359375, "learning_rate": 1.2726123516205242e-05, "loss": 0.6061, "step": 6585 }, { "epoch": 0.831762570052885, "grad_norm": 1.7890625, "learning_rate": 1.2724202642399026e-05, "loss": 0.6356, "step": 6586 }, { "epoch": 0.8318888625779461, "grad_norm": 1.78125, "learning_rate": 1.2722281660012655e-05, "loss": 0.5842, "step": 6587 }, { "epoch": 0.8320151551030074, "grad_norm": 1.8359375, "learning_rate": 1.2720360569122693e-05, "loss": 0.7113, "step": 6588 }, { "epoch": 0.8321414476280685, "grad_norm": 1.8125, "learning_rate": 1.2718439369805711e-05, "loss": 0.6719, "step": 6589 }, { "epoch": 0.8322677401531297, "grad_norm": 1.7734375, "learning_rate": 1.2716518062138283e-05, "loss": 0.6508, "step": 6590 }, { "epoch": 0.8323940326781909, "grad_norm": 1.765625, "learning_rate": 1.2714596646196988e-05, "loss": 0.5931, "step": 6591 }, { "epoch": 0.832520325203252, "grad_norm": 1.78125, "learning_rate": 1.271267512205841e-05, "loss": 0.7578, "step": 6592 }, { "epoch": 0.8326466177283132, "grad_norm": 1.5625, "learning_rate": 1.2710753489799133e-05, "loss": 0.5808, "step": 6593 }, { "epoch": 0.8327729102533744, "grad_norm": 1.6796875, "learning_rate": 1.2708831749495754e-05, "loss": 0.5628, "step": 6594 }, { "epoch": 0.8328992027784355, "grad_norm": 1.9453125, "learning_rate": 1.2706909901224865e-05, "loss": 0.662, "step": 6595 }, { "epoch": 0.8330254953034967, "grad_norm": 1.734375, "learning_rate": 1.270498794506307e-05, "loss": 0.6826, "step": 6596 }, { "epoch": 0.8331517878285579, "grad_norm": 1.890625, "learning_rate": 1.2703065881086966e-05, "loss": 0.6575, "step": 6597 }, { "epoch": 0.833278080353619, "grad_norm": 1.875, "learning_rate": 1.2701143709373172e-05, "loss": 0.6771, "step": 6598 }, { "epoch": 0.8334043728786802, "grad_norm": 1.703125, "learning_rate": 1.2699221429998293e-05, "loss": 0.656, "step": 6599 }, { "epoch": 0.8335306654037414, "grad_norm": 1.625, "learning_rate": 1.2697299043038949e-05, "loss": 0.5529, "step": 6600 }, { "epoch": 0.8336569579288026, "grad_norm": 1.8984375, "learning_rate": 1.2695376548571762e-05, "loss": 0.6529, "step": 6601 }, { "epoch": 0.8337832504538638, "grad_norm": 1.8671875, "learning_rate": 1.2693453946673357e-05, "loss": 0.637, "step": 6602 }, { "epoch": 0.833909542978925, "grad_norm": 1.828125, "learning_rate": 1.2691531237420367e-05, "loss": 0.6541, "step": 6603 }, { "epoch": 0.8340358355039861, "grad_norm": 1.875, "learning_rate": 1.2689608420889426e-05, "loss": 0.6511, "step": 6604 }, { "epoch": 0.8341621280290473, "grad_norm": 1.890625, "learning_rate": 1.268768549715717e-05, "loss": 0.617, "step": 6605 }, { "epoch": 0.8342884205541085, "grad_norm": 1.8515625, "learning_rate": 1.2685762466300244e-05, "loss": 0.5881, "step": 6606 }, { "epoch": 0.8344147130791696, "grad_norm": 1.6171875, "learning_rate": 1.2683839328395296e-05, "loss": 0.5123, "step": 6607 }, { "epoch": 0.8345410056042308, "grad_norm": 1.765625, "learning_rate": 1.2681916083518977e-05, "loss": 0.5831, "step": 6608 }, { "epoch": 0.834667298129292, "grad_norm": 1.8828125, "learning_rate": 1.2679992731747942e-05, "loss": 0.7281, "step": 6609 }, { "epoch": 0.8347935906543531, "grad_norm": 1.8046875, "learning_rate": 1.267806927315885e-05, "loss": 0.7183, "step": 6610 }, { "epoch": 0.8349198831794143, "grad_norm": 1.8671875, "learning_rate": 1.2676145707828366e-05, "loss": 0.7142, "step": 6611 }, { "epoch": 0.8350461757044755, "grad_norm": 2.09375, "learning_rate": 1.2674222035833163e-05, "loss": 0.7029, "step": 6612 }, { "epoch": 0.8351724682295366, "grad_norm": 1.703125, "learning_rate": 1.2672298257249913e-05, "loss": 0.6182, "step": 6613 }, { "epoch": 0.8352987607545979, "grad_norm": 1.6875, "learning_rate": 1.267037437215529e-05, "loss": 0.6366, "step": 6614 }, { "epoch": 0.835425053279659, "grad_norm": 2.203125, "learning_rate": 1.2668450380625977e-05, "loss": 0.7654, "step": 6615 }, { "epoch": 0.8355513458047202, "grad_norm": 1.9296875, "learning_rate": 1.2666526282738655e-05, "loss": 0.6436, "step": 6616 }, { "epoch": 0.8356776383297814, "grad_norm": 1.7890625, "learning_rate": 1.2664602078570023e-05, "loss": 0.6617, "step": 6617 }, { "epoch": 0.8358039308548425, "grad_norm": 1.6640625, "learning_rate": 1.2662677768196767e-05, "loss": 0.5489, "step": 6618 }, { "epoch": 0.8359302233799037, "grad_norm": 1.828125, "learning_rate": 1.266075335169559e-05, "loss": 0.6841, "step": 6619 }, { "epoch": 0.8360565159049649, "grad_norm": 1.8671875, "learning_rate": 1.2658828829143191e-05, "loss": 0.7256, "step": 6620 }, { "epoch": 0.836182808430026, "grad_norm": 1.640625, "learning_rate": 1.265690420061628e-05, "loss": 0.5617, "step": 6621 }, { "epoch": 0.8363091009550873, "grad_norm": 1.859375, "learning_rate": 1.2654979466191568e-05, "loss": 0.6792, "step": 6622 }, { "epoch": 0.8364353934801484, "grad_norm": 2.015625, "learning_rate": 1.265305462594577e-05, "loss": 0.6512, "step": 6623 }, { "epoch": 0.8365616860052095, "grad_norm": 1.796875, "learning_rate": 1.2651129679955604e-05, "loss": 0.7029, "step": 6624 }, { "epoch": 0.8366879785302708, "grad_norm": 1.9375, "learning_rate": 1.2649204628297795e-05, "loss": 0.7094, "step": 6625 }, { "epoch": 0.8368142710553319, "grad_norm": 1.7421875, "learning_rate": 1.2647279471049072e-05, "loss": 0.5542, "step": 6626 }, { "epoch": 0.836940563580393, "grad_norm": 1.765625, "learning_rate": 1.2645354208286164e-05, "loss": 0.6226, "step": 6627 }, { "epoch": 0.8370668561054543, "grad_norm": 2.59375, "learning_rate": 1.264342884008581e-05, "loss": 0.7151, "step": 6628 }, { "epoch": 0.8371931486305154, "grad_norm": 1.765625, "learning_rate": 1.2641503366524748e-05, "loss": 0.6934, "step": 6629 }, { "epoch": 0.8373194411555765, "grad_norm": 1.5546875, "learning_rate": 1.2639577787679725e-05, "loss": 0.5967, "step": 6630 }, { "epoch": 0.8374457336806378, "grad_norm": 1.8203125, "learning_rate": 1.263765210362749e-05, "loss": 0.6382, "step": 6631 }, { "epoch": 0.8375720262056989, "grad_norm": 1.7890625, "learning_rate": 1.2635726314444794e-05, "loss": 0.653, "step": 6632 }, { "epoch": 0.8376983187307602, "grad_norm": 1.890625, "learning_rate": 1.2633800420208396e-05, "loss": 0.7293, "step": 6633 }, { "epoch": 0.8378246112558213, "grad_norm": 1.90625, "learning_rate": 1.2631874420995055e-05, "loss": 0.8146, "step": 6634 }, { "epoch": 0.8379509037808824, "grad_norm": 1.828125, "learning_rate": 1.2629948316881545e-05, "loss": 0.6435, "step": 6635 }, { "epoch": 0.8380771963059437, "grad_norm": 1.765625, "learning_rate": 1.2628022107944624e-05, "loss": 0.6455, "step": 6636 }, { "epoch": 0.8382034888310048, "grad_norm": 1.765625, "learning_rate": 1.2626095794261072e-05, "loss": 0.6327, "step": 6637 }, { "epoch": 0.838329781356066, "grad_norm": 2.21875, "learning_rate": 1.262416937590767e-05, "loss": 0.6426, "step": 6638 }, { "epoch": 0.8384560738811272, "grad_norm": 1.734375, "learning_rate": 1.2622242852961194e-05, "loss": 0.5867, "step": 6639 }, { "epoch": 0.8385823664061883, "grad_norm": 1.6171875, "learning_rate": 1.2620316225498437e-05, "loss": 0.5902, "step": 6640 }, { "epoch": 0.8387086589312495, "grad_norm": 1.5546875, "learning_rate": 1.2618389493596186e-05, "loss": 0.5724, "step": 6641 }, { "epoch": 0.8388349514563107, "grad_norm": 1.75, "learning_rate": 1.2616462657331236e-05, "loss": 0.6443, "step": 6642 }, { "epoch": 0.8389612439813718, "grad_norm": 1.7890625, "learning_rate": 1.2614535716780388e-05, "loss": 0.607, "step": 6643 }, { "epoch": 0.839087536506433, "grad_norm": 1.7890625, "learning_rate": 1.2612608672020442e-05, "loss": 0.6519, "step": 6644 }, { "epoch": 0.8392138290314942, "grad_norm": 1.71875, "learning_rate": 1.2610681523128208e-05, "loss": 0.6369, "step": 6645 }, { "epoch": 0.8393401215565554, "grad_norm": 1.9921875, "learning_rate": 1.2608754270180496e-05, "loss": 0.6442, "step": 6646 }, { "epoch": 0.8394664140816166, "grad_norm": 1.796875, "learning_rate": 1.2606826913254122e-05, "loss": 0.7124, "step": 6647 }, { "epoch": 0.8395927066066777, "grad_norm": 1.7421875, "learning_rate": 1.2604899452425907e-05, "loss": 0.7358, "step": 6648 }, { "epoch": 0.8397189991317389, "grad_norm": 1.703125, "learning_rate": 1.2602971887772672e-05, "loss": 0.6054, "step": 6649 }, { "epoch": 0.8398452916568001, "grad_norm": 1.8125, "learning_rate": 1.2601044219371253e-05, "loss": 0.6656, "step": 6650 }, { "epoch": 0.8399715841818612, "grad_norm": 1.9140625, "learning_rate": 1.2599116447298472e-05, "loss": 0.6693, "step": 6651 }, { "epoch": 0.8400978767069224, "grad_norm": 1.6484375, "learning_rate": 1.259718857163117e-05, "loss": 0.6157, "step": 6652 }, { "epoch": 0.8402241692319836, "grad_norm": 1.6796875, "learning_rate": 1.2595260592446189e-05, "loss": 0.5138, "step": 6653 }, { "epoch": 0.8403504617570448, "grad_norm": 1.8515625, "learning_rate": 1.259333250982037e-05, "loss": 0.7272, "step": 6654 }, { "epoch": 0.8404767542821059, "grad_norm": 1.8359375, "learning_rate": 1.2591404323830566e-05, "loss": 0.6806, "step": 6655 }, { "epoch": 0.8406030468071671, "grad_norm": 1.7578125, "learning_rate": 1.2589476034553623e-05, "loss": 0.6321, "step": 6656 }, { "epoch": 0.8407293393322283, "grad_norm": 1.859375, "learning_rate": 1.2587547642066405e-05, "loss": 0.7232, "step": 6657 }, { "epoch": 0.8408556318572894, "grad_norm": 1.7109375, "learning_rate": 1.2585619146445771e-05, "loss": 0.6452, "step": 6658 }, { "epoch": 0.8409819243823506, "grad_norm": 1.8203125, "learning_rate": 1.2583690547768586e-05, "loss": 0.6505, "step": 6659 }, { "epoch": 0.8411082169074118, "grad_norm": 1.8984375, "learning_rate": 1.2581761846111718e-05, "loss": 0.6636, "step": 6660 }, { "epoch": 0.8412345094324729, "grad_norm": 1.78125, "learning_rate": 1.2579833041552041e-05, "loss": 0.6124, "step": 6661 }, { "epoch": 0.8413608019575342, "grad_norm": 1.78125, "learning_rate": 1.2577904134166434e-05, "loss": 0.6232, "step": 6662 }, { "epoch": 0.8414870944825953, "grad_norm": 1.796875, "learning_rate": 1.2575975124031778e-05, "loss": 0.6367, "step": 6663 }, { "epoch": 0.8416133870076565, "grad_norm": 1.78125, "learning_rate": 1.2574046011224956e-05, "loss": 0.6301, "step": 6664 }, { "epoch": 0.8417396795327177, "grad_norm": 1.9296875, "learning_rate": 1.257211679582286e-05, "loss": 0.7318, "step": 6665 }, { "epoch": 0.8418659720577788, "grad_norm": 1.7421875, "learning_rate": 1.2570187477902387e-05, "loss": 0.6411, "step": 6666 }, { "epoch": 0.84199226458284, "grad_norm": 1.7890625, "learning_rate": 1.2568258057540428e-05, "loss": 0.689, "step": 6667 }, { "epoch": 0.8421185571079012, "grad_norm": 1.7421875, "learning_rate": 1.2566328534813893e-05, "loss": 0.6057, "step": 6668 }, { "epoch": 0.8422448496329623, "grad_norm": 1.9140625, "learning_rate": 1.256439890979968e-05, "loss": 0.6697, "step": 6669 }, { "epoch": 0.8423711421580236, "grad_norm": 1.90625, "learning_rate": 1.2562469182574707e-05, "loss": 0.5966, "step": 6670 }, { "epoch": 0.8424974346830847, "grad_norm": 1.828125, "learning_rate": 1.2560539353215881e-05, "loss": 0.6417, "step": 6671 }, { "epoch": 0.8426237272081458, "grad_norm": 1.8203125, "learning_rate": 1.2558609421800126e-05, "loss": 0.6274, "step": 6672 }, { "epoch": 0.8427500197332071, "grad_norm": 1.7421875, "learning_rate": 1.255667938840436e-05, "loss": 0.6303, "step": 6673 }, { "epoch": 0.8428763122582682, "grad_norm": 1.671875, "learning_rate": 1.2554749253105514e-05, "loss": 0.5643, "step": 6674 }, { "epoch": 0.8430026047833293, "grad_norm": 1.734375, "learning_rate": 1.2552819015980515e-05, "loss": 0.6373, "step": 6675 }, { "epoch": 0.8431288973083906, "grad_norm": 1.71875, "learning_rate": 1.2550888677106299e-05, "loss": 0.6252, "step": 6676 }, { "epoch": 0.8432551898334517, "grad_norm": 1.890625, "learning_rate": 1.2548958236559805e-05, "loss": 0.7004, "step": 6677 }, { "epoch": 0.843381482358513, "grad_norm": 2.03125, "learning_rate": 1.2547027694417975e-05, "loss": 0.6922, "step": 6678 }, { "epoch": 0.8435077748835741, "grad_norm": 1.8984375, "learning_rate": 1.2545097050757758e-05, "loss": 0.691, "step": 6679 }, { "epoch": 0.8436340674086352, "grad_norm": 1.6328125, "learning_rate": 1.2543166305656099e-05, "loss": 0.6177, "step": 6680 }, { "epoch": 0.8437603599336965, "grad_norm": 1.8984375, "learning_rate": 1.254123545918996e-05, "loss": 0.609, "step": 6681 }, { "epoch": 0.8438866524587576, "grad_norm": 1.59375, "learning_rate": 1.2539304511436294e-05, "loss": 0.5564, "step": 6682 }, { "epoch": 0.8440129449838187, "grad_norm": 1.7734375, "learning_rate": 1.2537373462472067e-05, "loss": 0.6603, "step": 6683 }, { "epoch": 0.84413923750888, "grad_norm": 1.8046875, "learning_rate": 1.2535442312374247e-05, "loss": 0.6882, "step": 6684 }, { "epoch": 0.8442655300339411, "grad_norm": 1.8984375, "learning_rate": 1.25335110612198e-05, "loss": 0.6366, "step": 6685 }, { "epoch": 0.8443918225590022, "grad_norm": 1.78125, "learning_rate": 1.2531579709085708e-05, "loss": 0.5821, "step": 6686 }, { "epoch": 0.8445181150840635, "grad_norm": 1.8359375, "learning_rate": 1.2529648256048945e-05, "loss": 0.6152, "step": 6687 }, { "epoch": 0.8446444076091246, "grad_norm": 1.6484375, "learning_rate": 1.2527716702186497e-05, "loss": 0.547, "step": 6688 }, { "epoch": 0.8447707001341858, "grad_norm": 1.890625, "learning_rate": 1.2525785047575347e-05, "loss": 0.6052, "step": 6689 }, { "epoch": 0.844896992659247, "grad_norm": 1.9765625, "learning_rate": 1.2523853292292492e-05, "loss": 0.6198, "step": 6690 }, { "epoch": 0.8450232851843081, "grad_norm": 1.71875, "learning_rate": 1.2521921436414924e-05, "loss": 0.6084, "step": 6691 }, { "epoch": 0.8451495777093694, "grad_norm": 1.75, "learning_rate": 1.2519989480019636e-05, "loss": 0.625, "step": 6692 }, { "epoch": 0.8452758702344305, "grad_norm": 1.796875, "learning_rate": 1.2518057423183643e-05, "loss": 0.6085, "step": 6693 }, { "epoch": 0.8454021627594916, "grad_norm": 1.9609375, "learning_rate": 1.2516125265983945e-05, "loss": 0.5975, "step": 6694 }, { "epoch": 0.8455284552845529, "grad_norm": 1.71875, "learning_rate": 1.2514193008497554e-05, "loss": 0.5623, "step": 6695 }, { "epoch": 0.845654747809614, "grad_norm": 1.8046875, "learning_rate": 1.2512260650801484e-05, "loss": 0.7135, "step": 6696 }, { "epoch": 0.8457810403346752, "grad_norm": 1.7890625, "learning_rate": 1.2510328192972758e-05, "loss": 0.7081, "step": 6697 }, { "epoch": 0.8459073328597364, "grad_norm": 1.7109375, "learning_rate": 1.2508395635088395e-05, "loss": 0.5507, "step": 6698 }, { "epoch": 0.8460336253847975, "grad_norm": 1.71875, "learning_rate": 1.2506462977225424e-05, "loss": 0.5862, "step": 6699 }, { "epoch": 0.8461599179098587, "grad_norm": 1.8125, "learning_rate": 1.2504530219460877e-05, "loss": 0.7091, "step": 6700 }, { "epoch": 0.8462862104349199, "grad_norm": 1.65625, "learning_rate": 1.2502597361871787e-05, "loss": 0.6325, "step": 6701 }, { "epoch": 0.846412502959981, "grad_norm": 1.8046875, "learning_rate": 1.2500664404535194e-05, "loss": 0.6293, "step": 6702 }, { "epoch": 0.8465387954850422, "grad_norm": 1.9375, "learning_rate": 1.2498731347528141e-05, "loss": 0.6654, "step": 6703 }, { "epoch": 0.8466650880101034, "grad_norm": 1.7421875, "learning_rate": 1.2496798190927677e-05, "loss": 0.6578, "step": 6704 }, { "epoch": 0.8467913805351646, "grad_norm": 1.6953125, "learning_rate": 1.2494864934810849e-05, "loss": 0.6217, "step": 6705 }, { "epoch": 0.8469176730602257, "grad_norm": 1.796875, "learning_rate": 1.2492931579254714e-05, "loss": 0.6235, "step": 6706 }, { "epoch": 0.8470439655852869, "grad_norm": 1.71875, "learning_rate": 1.2490998124336333e-05, "loss": 0.6103, "step": 6707 }, { "epoch": 0.8471702581103481, "grad_norm": 1.6953125, "learning_rate": 1.2489064570132764e-05, "loss": 0.5437, "step": 6708 }, { "epoch": 0.8472965506354093, "grad_norm": 1.8203125, "learning_rate": 1.248713091672108e-05, "loss": 0.6209, "step": 6709 }, { "epoch": 0.8474228431604705, "grad_norm": 1.765625, "learning_rate": 1.2485197164178342e-05, "loss": 0.6108, "step": 6710 }, { "epoch": 0.8475491356855316, "grad_norm": 1.8125, "learning_rate": 1.2483263312581637e-05, "loss": 0.6396, "step": 6711 }, { "epoch": 0.8476754282105928, "grad_norm": 1.625, "learning_rate": 1.2481329362008034e-05, "loss": 0.5847, "step": 6712 }, { "epoch": 0.847801720735654, "grad_norm": 1.8515625, "learning_rate": 1.2479395312534621e-05, "loss": 0.6941, "step": 6713 }, { "epoch": 0.8479280132607151, "grad_norm": 1.75, "learning_rate": 1.2477461164238484e-05, "loss": 0.6011, "step": 6714 }, { "epoch": 0.8480543057857763, "grad_norm": 2.171875, "learning_rate": 1.2475526917196712e-05, "loss": 0.6617, "step": 6715 }, { "epoch": 0.8481805983108375, "grad_norm": 1.8125, "learning_rate": 1.2473592571486397e-05, "loss": 0.6438, "step": 6716 }, { "epoch": 0.8483068908358986, "grad_norm": 1.890625, "learning_rate": 1.2471658127184643e-05, "loss": 0.684, "step": 6717 }, { "epoch": 0.8484331833609599, "grad_norm": 1.859375, "learning_rate": 1.246972358436855e-05, "loss": 0.7118, "step": 6718 }, { "epoch": 0.848559475886021, "grad_norm": 1.765625, "learning_rate": 1.246778894311522e-05, "loss": 0.5991, "step": 6719 }, { "epoch": 0.8486857684110821, "grad_norm": 1.8359375, "learning_rate": 1.2465854203501767e-05, "loss": 0.6691, "step": 6720 }, { "epoch": 0.8488120609361434, "grad_norm": 1.8515625, "learning_rate": 1.2463919365605308e-05, "loss": 0.6482, "step": 6721 }, { "epoch": 0.8489383534612045, "grad_norm": 1.75, "learning_rate": 1.2461984429502955e-05, "loss": 0.628, "step": 6722 }, { "epoch": 0.8490646459862657, "grad_norm": 1.8828125, "learning_rate": 1.2460049395271836e-05, "loss": 0.7187, "step": 6723 }, { "epoch": 0.8491909385113269, "grad_norm": 1.828125, "learning_rate": 1.2458114262989072e-05, "loss": 0.6411, "step": 6724 }, { "epoch": 0.849317231036388, "grad_norm": 1.75, "learning_rate": 1.2456179032731795e-05, "loss": 0.6254, "step": 6725 }, { "epoch": 0.8494435235614493, "grad_norm": 1.7109375, "learning_rate": 1.2454243704577136e-05, "loss": 0.58, "step": 6726 }, { "epoch": 0.8495698160865104, "grad_norm": 1.7265625, "learning_rate": 1.2452308278602234e-05, "loss": 0.6652, "step": 6727 }, { "epoch": 0.8496961086115715, "grad_norm": 1.734375, "learning_rate": 1.245037275488423e-05, "loss": 0.7199, "step": 6728 }, { "epoch": 0.8498224011366328, "grad_norm": 1.6796875, "learning_rate": 1.2448437133500274e-05, "loss": 0.554, "step": 6729 }, { "epoch": 0.8499486936616939, "grad_norm": 1.6328125, "learning_rate": 1.2446501414527507e-05, "loss": 0.6513, "step": 6730 }, { "epoch": 0.850074986186755, "grad_norm": 1.8125, "learning_rate": 1.244456559804309e-05, "loss": 0.6848, "step": 6731 }, { "epoch": 0.8502012787118163, "grad_norm": 1.7578125, "learning_rate": 1.2442629684124174e-05, "loss": 0.6355, "step": 6732 }, { "epoch": 0.8503275712368774, "grad_norm": 1.78125, "learning_rate": 1.2440693672847925e-05, "loss": 0.6391, "step": 6733 }, { "epoch": 0.8504538637619385, "grad_norm": 1.859375, "learning_rate": 1.2438757564291502e-05, "loss": 0.647, "step": 6734 }, { "epoch": 0.8505801562869998, "grad_norm": 1.7421875, "learning_rate": 1.2436821358532079e-05, "loss": 0.692, "step": 6735 }, { "epoch": 0.8507064488120609, "grad_norm": 1.8515625, "learning_rate": 1.2434885055646823e-05, "loss": 0.6729, "step": 6736 }, { "epoch": 0.8508327413371221, "grad_norm": 1.859375, "learning_rate": 1.2432948655712915e-05, "loss": 0.7525, "step": 6737 }, { "epoch": 0.8509590338621833, "grad_norm": 1.8203125, "learning_rate": 1.2431012158807534e-05, "loss": 0.6833, "step": 6738 }, { "epoch": 0.8510853263872444, "grad_norm": 1.6015625, "learning_rate": 1.2429075565007865e-05, "loss": 0.5817, "step": 6739 }, { "epoch": 0.8512116189123057, "grad_norm": 1.671875, "learning_rate": 1.2427138874391094e-05, "loss": 0.6212, "step": 6740 }, { "epoch": 0.8513379114373668, "grad_norm": 1.71875, "learning_rate": 1.2425202087034412e-05, "loss": 0.6912, "step": 6741 }, { "epoch": 0.851464203962428, "grad_norm": 1.765625, "learning_rate": 1.2423265203015018e-05, "loss": 0.6542, "step": 6742 }, { "epoch": 0.8515904964874892, "grad_norm": 1.7265625, "learning_rate": 1.2421328222410109e-05, "loss": 0.7144, "step": 6743 }, { "epoch": 0.8517167890125503, "grad_norm": 1.640625, "learning_rate": 1.2419391145296891e-05, "loss": 0.5583, "step": 6744 }, { "epoch": 0.8518430815376115, "grad_norm": 1.8125, "learning_rate": 1.2417453971752566e-05, "loss": 0.6446, "step": 6745 }, { "epoch": 0.8519693740626727, "grad_norm": 1.65625, "learning_rate": 1.2415516701854352e-05, "loss": 0.5622, "step": 6746 }, { "epoch": 0.8520956665877338, "grad_norm": 1.7890625, "learning_rate": 1.2413579335679457e-05, "loss": 0.661, "step": 6747 }, { "epoch": 0.852221959112795, "grad_norm": 2.046875, "learning_rate": 1.2411641873305104e-05, "loss": 0.7217, "step": 6748 }, { "epoch": 0.8523482516378562, "grad_norm": 1.734375, "learning_rate": 1.2409704314808518e-05, "loss": 0.598, "step": 6749 }, { "epoch": 0.8524745441629173, "grad_norm": 1.7109375, "learning_rate": 1.240776666026692e-05, "loss": 0.5288, "step": 6750 }, { "epoch": 0.8526008366879785, "grad_norm": 1.8125, "learning_rate": 1.240582890975754e-05, "loss": 0.674, "step": 6751 }, { "epoch": 0.8527271292130397, "grad_norm": 1.6484375, "learning_rate": 1.2403891063357618e-05, "loss": 0.5369, "step": 6752 }, { "epoch": 0.8528534217381009, "grad_norm": 1.734375, "learning_rate": 1.2401953121144387e-05, "loss": 0.6816, "step": 6753 }, { "epoch": 0.8529797142631621, "grad_norm": 1.9453125, "learning_rate": 1.240001508319509e-05, "loss": 0.6741, "step": 6754 }, { "epoch": 0.8531060067882232, "grad_norm": 1.7578125, "learning_rate": 1.2398076949586972e-05, "loss": 0.6062, "step": 6755 }, { "epoch": 0.8532322993132844, "grad_norm": 1.8984375, "learning_rate": 1.2396138720397283e-05, "loss": 0.6315, "step": 6756 }, { "epoch": 0.8533585918383456, "grad_norm": 1.7734375, "learning_rate": 1.2394200395703277e-05, "loss": 0.6837, "step": 6757 }, { "epoch": 0.8534848843634067, "grad_norm": 1.78125, "learning_rate": 1.239226197558221e-05, "loss": 0.5775, "step": 6758 }, { "epoch": 0.8536111768884679, "grad_norm": 1.7421875, "learning_rate": 1.2390323460111341e-05, "loss": 0.6869, "step": 6759 }, { "epoch": 0.8537374694135291, "grad_norm": 1.875, "learning_rate": 1.2388384849367939e-05, "loss": 0.6468, "step": 6760 }, { "epoch": 0.8538637619385903, "grad_norm": 1.703125, "learning_rate": 1.238644614342927e-05, "loss": 0.518, "step": 6761 }, { "epoch": 0.8539900544636514, "grad_norm": 1.8203125, "learning_rate": 1.2384507342372603e-05, "loss": 0.5983, "step": 6762 }, { "epoch": 0.8541163469887126, "grad_norm": 1.5390625, "learning_rate": 1.2382568446275219e-05, "loss": 0.5599, "step": 6763 }, { "epoch": 0.8542426395137738, "grad_norm": 1.859375, "learning_rate": 1.2380629455214392e-05, "loss": 0.5295, "step": 6764 }, { "epoch": 0.8543689320388349, "grad_norm": 1.953125, "learning_rate": 1.2378690369267412e-05, "loss": 0.6951, "step": 6765 }, { "epoch": 0.8544952245638961, "grad_norm": 1.6640625, "learning_rate": 1.2376751188511564e-05, "loss": 0.6318, "step": 6766 }, { "epoch": 0.8546215170889573, "grad_norm": 1.6875, "learning_rate": 1.2374811913024136e-05, "loss": 0.5465, "step": 6767 }, { "epoch": 0.8547478096140184, "grad_norm": 1.9296875, "learning_rate": 1.2372872542882428e-05, "loss": 0.6324, "step": 6768 }, { "epoch": 0.8548741021390797, "grad_norm": 1.8046875, "learning_rate": 1.2370933078163734e-05, "loss": 0.6188, "step": 6769 }, { "epoch": 0.8550003946641408, "grad_norm": 1.7890625, "learning_rate": 1.236899351894536e-05, "loss": 0.5677, "step": 6770 }, { "epoch": 0.855126687189202, "grad_norm": 1.7890625, "learning_rate": 1.2367053865304609e-05, "loss": 0.642, "step": 6771 }, { "epoch": 0.8552529797142632, "grad_norm": 1.96875, "learning_rate": 1.2365114117318793e-05, "loss": 0.7665, "step": 6772 }, { "epoch": 0.8553792722393243, "grad_norm": 2.03125, "learning_rate": 1.2363174275065221e-05, "loss": 0.6912, "step": 6773 }, { "epoch": 0.8555055647643856, "grad_norm": 1.8984375, "learning_rate": 1.236123433862122e-05, "loss": 0.6813, "step": 6774 }, { "epoch": 0.8556318572894467, "grad_norm": 1.8046875, "learning_rate": 1.2359294308064103e-05, "loss": 0.5857, "step": 6775 }, { "epoch": 0.8557581498145078, "grad_norm": 1.8046875, "learning_rate": 1.2357354183471198e-05, "loss": 0.6854, "step": 6776 }, { "epoch": 0.8558844423395691, "grad_norm": 1.8359375, "learning_rate": 1.2355413964919834e-05, "loss": 0.711, "step": 6777 }, { "epoch": 0.8560107348646302, "grad_norm": 1.7578125, "learning_rate": 1.2353473652487342e-05, "loss": 0.5881, "step": 6778 }, { "epoch": 0.8561370273896913, "grad_norm": 1.6875, "learning_rate": 1.2351533246251059e-05, "loss": 0.6243, "step": 6779 }, { "epoch": 0.8562633199147526, "grad_norm": 1.6484375, "learning_rate": 1.2349592746288327e-05, "loss": 0.5629, "step": 6780 }, { "epoch": 0.8563896124398137, "grad_norm": 1.890625, "learning_rate": 1.2347652152676483e-05, "loss": 0.7369, "step": 6781 }, { "epoch": 0.8565159049648748, "grad_norm": 1.765625, "learning_rate": 1.234571146549288e-05, "loss": 0.6076, "step": 6782 }, { "epoch": 0.8566421974899361, "grad_norm": 1.859375, "learning_rate": 1.2343770684814869e-05, "loss": 0.6862, "step": 6783 }, { "epoch": 0.8567684900149972, "grad_norm": 1.90625, "learning_rate": 1.2341829810719805e-05, "loss": 0.7052, "step": 6784 }, { "epoch": 0.8568947825400585, "grad_norm": 1.765625, "learning_rate": 1.2339888843285044e-05, "loss": 0.7039, "step": 6785 }, { "epoch": 0.8570210750651196, "grad_norm": 1.6875, "learning_rate": 1.2337947782587951e-05, "loss": 0.6781, "step": 6786 }, { "epoch": 0.8571473675901807, "grad_norm": 1.6484375, "learning_rate": 1.233600662870589e-05, "loss": 0.5757, "step": 6787 }, { "epoch": 0.857273660115242, "grad_norm": 1.71875, "learning_rate": 1.2334065381716231e-05, "loss": 0.625, "step": 6788 }, { "epoch": 0.8573999526403031, "grad_norm": 1.8046875, "learning_rate": 1.233212404169635e-05, "loss": 0.6636, "step": 6789 }, { "epoch": 0.8575262451653642, "grad_norm": 1.8359375, "learning_rate": 1.2330182608723617e-05, "loss": 0.732, "step": 6790 }, { "epoch": 0.8576525376904255, "grad_norm": 1.734375, "learning_rate": 1.232824108287542e-05, "loss": 0.5813, "step": 6791 }, { "epoch": 0.8577788302154866, "grad_norm": 1.734375, "learning_rate": 1.2326299464229143e-05, "loss": 0.6225, "step": 6792 }, { "epoch": 0.8579051227405478, "grad_norm": 1.875, "learning_rate": 1.2324357752862173e-05, "loss": 0.6309, "step": 6793 }, { "epoch": 0.858031415265609, "grad_norm": 1.75, "learning_rate": 1.23224159488519e-05, "loss": 0.6186, "step": 6794 }, { "epoch": 0.8581577077906701, "grad_norm": 1.7109375, "learning_rate": 1.2320474052275721e-05, "loss": 0.5563, "step": 6795 }, { "epoch": 0.8582840003157313, "grad_norm": 1.703125, "learning_rate": 1.2318532063211036e-05, "loss": 0.6531, "step": 6796 }, { "epoch": 0.8584102928407925, "grad_norm": 1.65625, "learning_rate": 1.2316589981735248e-05, "loss": 0.6558, "step": 6797 }, { "epoch": 0.8585365853658536, "grad_norm": 1.7421875, "learning_rate": 1.2314647807925762e-05, "loss": 0.6214, "step": 6798 }, { "epoch": 0.8586628778909148, "grad_norm": 1.703125, "learning_rate": 1.2312705541859989e-05, "loss": 0.6036, "step": 6799 }, { "epoch": 0.858789170415976, "grad_norm": 1.9921875, "learning_rate": 1.2310763183615346e-05, "loss": 0.6396, "step": 6800 }, { "epoch": 0.8589154629410372, "grad_norm": 1.828125, "learning_rate": 1.2308820733269246e-05, "loss": 0.6812, "step": 6801 }, { "epoch": 0.8590417554660984, "grad_norm": 1.8046875, "learning_rate": 1.2306878190899114e-05, "loss": 0.6719, "step": 6802 }, { "epoch": 0.8591680479911595, "grad_norm": 1.7421875, "learning_rate": 1.2304935556582375e-05, "loss": 0.6119, "step": 6803 }, { "epoch": 0.8592943405162207, "grad_norm": 1.7578125, "learning_rate": 1.2302992830396457e-05, "loss": 0.5969, "step": 6804 }, { "epoch": 0.8594206330412819, "grad_norm": 1.78125, "learning_rate": 1.2301050012418789e-05, "loss": 0.6402, "step": 6805 }, { "epoch": 0.859546925566343, "grad_norm": 1.828125, "learning_rate": 1.2299107102726809e-05, "loss": 0.6435, "step": 6806 }, { "epoch": 0.8596732180914042, "grad_norm": 1.703125, "learning_rate": 1.2297164101397958e-05, "loss": 0.6836, "step": 6807 }, { "epoch": 0.8597995106164654, "grad_norm": 1.9140625, "learning_rate": 1.229522100850968e-05, "loss": 0.7114, "step": 6808 }, { "epoch": 0.8599258031415266, "grad_norm": 1.7734375, "learning_rate": 1.2293277824139424e-05, "loss": 0.6707, "step": 6809 }, { "epoch": 0.8600520956665877, "grad_norm": 1.734375, "learning_rate": 1.2291334548364636e-05, "loss": 0.5912, "step": 6810 }, { "epoch": 0.8601783881916489, "grad_norm": 1.6796875, "learning_rate": 1.228939118126277e-05, "loss": 0.59, "step": 6811 }, { "epoch": 0.8603046807167101, "grad_norm": 1.9375, "learning_rate": 1.2287447722911288e-05, "loss": 0.7159, "step": 6812 }, { "epoch": 0.8604309732417712, "grad_norm": 1.71875, "learning_rate": 1.2285504173387648e-05, "loss": 0.6479, "step": 6813 }, { "epoch": 0.8605572657668324, "grad_norm": 1.7734375, "learning_rate": 1.2283560532769317e-05, "loss": 0.6841, "step": 6814 }, { "epoch": 0.8606835582918936, "grad_norm": 1.875, "learning_rate": 1.2281616801133761e-05, "loss": 0.6306, "step": 6815 }, { "epoch": 0.8608098508169548, "grad_norm": 1.6953125, "learning_rate": 1.2279672978558459e-05, "loss": 0.6258, "step": 6816 }, { "epoch": 0.860936143342016, "grad_norm": 1.828125, "learning_rate": 1.2277729065120883e-05, "loss": 0.6208, "step": 6817 }, { "epoch": 0.8610624358670771, "grad_norm": 1.890625, "learning_rate": 1.227578506089851e-05, "loss": 0.5869, "step": 6818 }, { "epoch": 0.8611887283921383, "grad_norm": 1.796875, "learning_rate": 1.2273840965968828e-05, "loss": 0.671, "step": 6819 }, { "epoch": 0.8613150209171995, "grad_norm": 1.6484375, "learning_rate": 1.2271896780409321e-05, "loss": 0.5763, "step": 6820 }, { "epoch": 0.8614413134422606, "grad_norm": 1.828125, "learning_rate": 1.226995250429748e-05, "loss": 0.632, "step": 6821 }, { "epoch": 0.8615676059673218, "grad_norm": 1.7109375, "learning_rate": 1.2268008137710798e-05, "loss": 0.5977, "step": 6822 }, { "epoch": 0.861693898492383, "grad_norm": 1.8203125, "learning_rate": 1.226606368072678e-05, "loss": 0.7139, "step": 6823 }, { "epoch": 0.8618201910174441, "grad_norm": 1.71875, "learning_rate": 1.2264119133422917e-05, "loss": 0.5998, "step": 6824 }, { "epoch": 0.8619464835425054, "grad_norm": 1.9140625, "learning_rate": 1.2262174495876723e-05, "loss": 0.6695, "step": 6825 }, { "epoch": 0.8620727760675665, "grad_norm": 1.75, "learning_rate": 1.2260229768165699e-05, "loss": 0.6453, "step": 6826 }, { "epoch": 0.8621990685926276, "grad_norm": 1.7578125, "learning_rate": 1.225828495036736e-05, "loss": 0.6758, "step": 6827 }, { "epoch": 0.8623253611176889, "grad_norm": 1.671875, "learning_rate": 1.2256340042559226e-05, "loss": 0.6465, "step": 6828 }, { "epoch": 0.86245165364275, "grad_norm": 1.8984375, "learning_rate": 1.2254395044818809e-05, "loss": 0.6746, "step": 6829 }, { "epoch": 0.8625779461678111, "grad_norm": 1.8671875, "learning_rate": 1.2252449957223635e-05, "loss": 0.6879, "step": 6830 }, { "epoch": 0.8627042386928724, "grad_norm": 1.6953125, "learning_rate": 1.2250504779851234e-05, "loss": 0.5575, "step": 6831 }, { "epoch": 0.8628305312179335, "grad_norm": 1.8515625, "learning_rate": 1.2248559512779133e-05, "loss": 0.6999, "step": 6832 }, { "epoch": 0.8629568237429948, "grad_norm": 1.78125, "learning_rate": 1.2246614156084864e-05, "loss": 0.6778, "step": 6833 }, { "epoch": 0.8630831162680559, "grad_norm": 2.109375, "learning_rate": 1.2244668709845969e-05, "loss": 0.7046, "step": 6834 }, { "epoch": 0.863209408793117, "grad_norm": 1.7109375, "learning_rate": 1.2242723174139982e-05, "loss": 0.5515, "step": 6835 }, { "epoch": 0.8633357013181783, "grad_norm": 1.9140625, "learning_rate": 1.2240777549044456e-05, "loss": 0.6427, "step": 6836 }, { "epoch": 0.8634619938432394, "grad_norm": 1.8046875, "learning_rate": 1.223883183463693e-05, "loss": 0.6526, "step": 6837 }, { "epoch": 0.8635882863683005, "grad_norm": 2.0, "learning_rate": 1.223688603099496e-05, "loss": 0.7043, "step": 6838 }, { "epoch": 0.8637145788933618, "grad_norm": 1.8828125, "learning_rate": 1.22349401381961e-05, "loss": 0.6949, "step": 6839 }, { "epoch": 0.8638408714184229, "grad_norm": 1.703125, "learning_rate": 1.2232994156317913e-05, "loss": 0.7059, "step": 6840 }, { "epoch": 0.863967163943484, "grad_norm": 1.8359375, "learning_rate": 1.2231048085437955e-05, "loss": 0.5298, "step": 6841 }, { "epoch": 0.8640934564685453, "grad_norm": 1.890625, "learning_rate": 1.2229101925633795e-05, "loss": 0.6736, "step": 6842 }, { "epoch": 0.8642197489936064, "grad_norm": 1.71875, "learning_rate": 1.2227155676983e-05, "loss": 0.5993, "step": 6843 }, { "epoch": 0.8643460415186676, "grad_norm": 1.7421875, "learning_rate": 1.2225209339563144e-05, "loss": 0.576, "step": 6844 }, { "epoch": 0.8644723340437288, "grad_norm": 1.7578125, "learning_rate": 1.2223262913451806e-05, "loss": 0.6434, "step": 6845 }, { "epoch": 0.8645986265687899, "grad_norm": 1.6640625, "learning_rate": 1.222131639872656e-05, "loss": 0.52, "step": 6846 }, { "epoch": 0.8647249190938512, "grad_norm": 1.75, "learning_rate": 1.2219369795464994e-05, "loss": 0.7511, "step": 6847 }, { "epoch": 0.8648512116189123, "grad_norm": 1.734375, "learning_rate": 1.2217423103744694e-05, "loss": 0.6972, "step": 6848 }, { "epoch": 0.8649775041439735, "grad_norm": 1.9375, "learning_rate": 1.2215476323643252e-05, "loss": 0.6918, "step": 6849 }, { "epoch": 0.8651037966690347, "grad_norm": 1.9375, "learning_rate": 1.2213529455238258e-05, "loss": 0.7474, "step": 6850 }, { "epoch": 0.8652300891940958, "grad_norm": 1.9296875, "learning_rate": 1.2211582498607312e-05, "loss": 0.7703, "step": 6851 }, { "epoch": 0.865356381719157, "grad_norm": 1.8203125, "learning_rate": 1.2209635453828015e-05, "loss": 0.7494, "step": 6852 }, { "epoch": 0.8654826742442182, "grad_norm": 1.8125, "learning_rate": 1.220768832097797e-05, "loss": 0.5903, "step": 6853 }, { "epoch": 0.8656089667692793, "grad_norm": 1.78125, "learning_rate": 1.2205741100134784e-05, "loss": 0.6257, "step": 6854 }, { "epoch": 0.8657352592943405, "grad_norm": 1.953125, "learning_rate": 1.2203793791376075e-05, "loss": 0.7224, "step": 6855 }, { "epoch": 0.8658615518194017, "grad_norm": 1.828125, "learning_rate": 1.2201846394779451e-05, "loss": 0.6398, "step": 6856 }, { "epoch": 0.8659878443444629, "grad_norm": 1.6640625, "learning_rate": 1.2199898910422534e-05, "loss": 0.5881, "step": 6857 }, { "epoch": 0.866114136869524, "grad_norm": 2.03125, "learning_rate": 1.2197951338382946e-05, "loss": 0.6098, "step": 6858 }, { "epoch": 0.8662404293945852, "grad_norm": 1.765625, "learning_rate": 1.219600367873831e-05, "loss": 0.641, "step": 6859 }, { "epoch": 0.8663667219196464, "grad_norm": 1.734375, "learning_rate": 1.2194055931566257e-05, "loss": 0.5858, "step": 6860 }, { "epoch": 0.8664930144447075, "grad_norm": 1.6640625, "learning_rate": 1.2192108096944422e-05, "loss": 0.584, "step": 6861 }, { "epoch": 0.8666193069697687, "grad_norm": 1.7265625, "learning_rate": 1.2190160174950437e-05, "loss": 0.6087, "step": 6862 }, { "epoch": 0.8667455994948299, "grad_norm": 1.8046875, "learning_rate": 1.2188212165661939e-05, "loss": 0.6534, "step": 6863 }, { "epoch": 0.8668718920198911, "grad_norm": 1.8125, "learning_rate": 1.218626406915658e-05, "loss": 0.6561, "step": 6864 }, { "epoch": 0.8669981845449523, "grad_norm": 1.7578125, "learning_rate": 1.2184315885511996e-05, "loss": 0.647, "step": 6865 }, { "epoch": 0.8671244770700134, "grad_norm": 1.8046875, "learning_rate": 1.2182367614805846e-05, "loss": 0.5312, "step": 6866 }, { "epoch": 0.8672507695950746, "grad_norm": 1.7734375, "learning_rate": 1.2180419257115779e-05, "loss": 0.6561, "step": 6867 }, { "epoch": 0.8673770621201358, "grad_norm": 1.90625, "learning_rate": 1.2178470812519451e-05, "loss": 0.7463, "step": 6868 }, { "epoch": 0.8675033546451969, "grad_norm": 2.140625, "learning_rate": 1.2176522281094526e-05, "loss": 0.7824, "step": 6869 }, { "epoch": 0.8676296471702581, "grad_norm": 1.7734375, "learning_rate": 1.2174573662918662e-05, "loss": 0.5426, "step": 6870 }, { "epoch": 0.8677559396953193, "grad_norm": 1.6484375, "learning_rate": 1.2172624958069532e-05, "loss": 0.6195, "step": 6871 }, { "epoch": 0.8678822322203804, "grad_norm": 1.78125, "learning_rate": 1.2170676166624802e-05, "loss": 0.6537, "step": 6872 }, { "epoch": 0.8680085247454417, "grad_norm": 2.015625, "learning_rate": 1.216872728866215e-05, "loss": 0.7531, "step": 6873 }, { "epoch": 0.8681348172705028, "grad_norm": 1.9375, "learning_rate": 1.2166778324259252e-05, "loss": 0.7243, "step": 6874 }, { "epoch": 0.8682611097955639, "grad_norm": 1.78125, "learning_rate": 1.216482927349379e-05, "loss": 0.6819, "step": 6875 }, { "epoch": 0.8683874023206252, "grad_norm": 1.8984375, "learning_rate": 1.2162880136443447e-05, "loss": 0.642, "step": 6876 }, { "epoch": 0.8685136948456863, "grad_norm": 1.7265625, "learning_rate": 1.2160930913185908e-05, "loss": 0.652, "step": 6877 }, { "epoch": 0.8686399873707475, "grad_norm": 1.6328125, "learning_rate": 1.2158981603798871e-05, "loss": 0.5908, "step": 6878 }, { "epoch": 0.8687662798958087, "grad_norm": 1.671875, "learning_rate": 1.2157032208360029e-05, "loss": 0.6558, "step": 6879 }, { "epoch": 0.8688925724208698, "grad_norm": 1.75, "learning_rate": 1.2155082726947076e-05, "loss": 0.6469, "step": 6880 }, { "epoch": 0.8690188649459311, "grad_norm": 1.9375, "learning_rate": 1.2153133159637715e-05, "loss": 0.5484, "step": 6881 }, { "epoch": 0.8691451574709922, "grad_norm": 1.8046875, "learning_rate": 1.2151183506509656e-05, "loss": 0.5148, "step": 6882 }, { "epoch": 0.8692714499960533, "grad_norm": 1.8828125, "learning_rate": 1.2149233767640602e-05, "loss": 0.6566, "step": 6883 }, { "epoch": 0.8693977425211146, "grad_norm": 1.8828125, "learning_rate": 1.2147283943108267e-05, "loss": 0.7661, "step": 6884 }, { "epoch": 0.8695240350461757, "grad_norm": 1.8046875, "learning_rate": 1.2145334032990366e-05, "loss": 0.6068, "step": 6885 }, { "epoch": 0.8696503275712368, "grad_norm": 1.6640625, "learning_rate": 1.2143384037364618e-05, "loss": 0.5205, "step": 6886 }, { "epoch": 0.8697766200962981, "grad_norm": 1.703125, "learning_rate": 1.2141433956308746e-05, "loss": 0.5617, "step": 6887 }, { "epoch": 0.8699029126213592, "grad_norm": 1.7578125, "learning_rate": 1.2139483789900473e-05, "loss": 0.5691, "step": 6888 }, { "epoch": 0.8700292051464203, "grad_norm": 1.84375, "learning_rate": 1.213753353821753e-05, "loss": 0.5992, "step": 6889 }, { "epoch": 0.8701554976714816, "grad_norm": 1.7109375, "learning_rate": 1.2135583201337646e-05, "loss": 0.6188, "step": 6890 }, { "epoch": 0.8702817901965427, "grad_norm": 1.78125, "learning_rate": 1.2133632779338561e-05, "loss": 0.6357, "step": 6891 }, { "epoch": 0.8704080827216039, "grad_norm": 1.6796875, "learning_rate": 1.2131682272298016e-05, "loss": 0.5861, "step": 6892 }, { "epoch": 0.8705343752466651, "grad_norm": 1.6875, "learning_rate": 1.2129731680293747e-05, "loss": 0.5953, "step": 6893 }, { "epoch": 0.8706606677717262, "grad_norm": 2.046875, "learning_rate": 1.2127781003403504e-05, "loss": 0.5358, "step": 6894 }, { "epoch": 0.8707869602967875, "grad_norm": 1.6796875, "learning_rate": 1.2125830241705035e-05, "loss": 0.5847, "step": 6895 }, { "epoch": 0.8709132528218486, "grad_norm": 1.8203125, "learning_rate": 1.2123879395276092e-05, "loss": 0.5655, "step": 6896 }, { "epoch": 0.8710395453469097, "grad_norm": 1.890625, "learning_rate": 1.2121928464194435e-05, "loss": 0.6804, "step": 6897 }, { "epoch": 0.871165837871971, "grad_norm": 1.796875, "learning_rate": 1.2119977448537818e-05, "loss": 0.6972, "step": 6898 }, { "epoch": 0.8712921303970321, "grad_norm": 1.7734375, "learning_rate": 1.2118026348384005e-05, "loss": 0.6506, "step": 6899 }, { "epoch": 0.8714184229220933, "grad_norm": 1.78125, "learning_rate": 1.2116075163810765e-05, "loss": 0.6509, "step": 6900 }, { "epoch": 0.8715447154471545, "grad_norm": 1.9296875, "learning_rate": 1.2114123894895866e-05, "loss": 0.7178, "step": 6901 }, { "epoch": 0.8716710079722156, "grad_norm": 2.0, "learning_rate": 1.211217254171708e-05, "loss": 0.6702, "step": 6902 }, { "epoch": 0.8717973004972768, "grad_norm": 1.78125, "learning_rate": 1.2110221104352184e-05, "loss": 0.6757, "step": 6903 }, { "epoch": 0.871923593022338, "grad_norm": 1.78125, "learning_rate": 1.2108269582878957e-05, "loss": 0.6371, "step": 6904 }, { "epoch": 0.8720498855473992, "grad_norm": 1.8203125, "learning_rate": 1.2106317977375186e-05, "loss": 0.5626, "step": 6905 }, { "epoch": 0.8721761780724603, "grad_norm": 1.765625, "learning_rate": 1.2104366287918651e-05, "loss": 0.6198, "step": 6906 }, { "epoch": 0.8723024705975215, "grad_norm": 1.9609375, "learning_rate": 1.2102414514587145e-05, "loss": 0.7458, "step": 6907 }, { "epoch": 0.8724287631225827, "grad_norm": 1.703125, "learning_rate": 1.2100462657458459e-05, "loss": 0.5936, "step": 6908 }, { "epoch": 0.8725550556476439, "grad_norm": 1.796875, "learning_rate": 1.2098510716610392e-05, "loss": 0.5895, "step": 6909 }, { "epoch": 0.872681348172705, "grad_norm": 1.7890625, "learning_rate": 1.2096558692120742e-05, "loss": 0.6959, "step": 6910 }, { "epoch": 0.8728076406977662, "grad_norm": 1.8671875, "learning_rate": 1.2094606584067312e-05, "loss": 0.6947, "step": 6911 }, { "epoch": 0.8729339332228274, "grad_norm": 1.890625, "learning_rate": 1.209265439252791e-05, "loss": 0.6777, "step": 6912 }, { "epoch": 0.8730602257478886, "grad_norm": 1.6640625, "learning_rate": 1.2090702117580345e-05, "loss": 0.5688, "step": 6913 }, { "epoch": 0.8731865182729497, "grad_norm": 1.703125, "learning_rate": 1.2088749759302427e-05, "loss": 0.6247, "step": 6914 }, { "epoch": 0.8733128107980109, "grad_norm": 1.90625, "learning_rate": 1.2086797317771977e-05, "loss": 0.6277, "step": 6915 }, { "epoch": 0.8734391033230721, "grad_norm": 1.8125, "learning_rate": 1.208484479306681e-05, "loss": 0.6786, "step": 6916 }, { "epoch": 0.8735653958481332, "grad_norm": 1.8671875, "learning_rate": 1.2082892185264751e-05, "loss": 0.7276, "step": 6917 }, { "epoch": 0.8736916883731944, "grad_norm": 1.8046875, "learning_rate": 1.208093949444363e-05, "loss": 0.6782, "step": 6918 }, { "epoch": 0.8738179808982556, "grad_norm": 1.9375, "learning_rate": 1.2078986720681269e-05, "loss": 0.5916, "step": 6919 }, { "epoch": 0.8739442734233167, "grad_norm": 1.8671875, "learning_rate": 1.2077033864055508e-05, "loss": 0.6619, "step": 6920 }, { "epoch": 0.874070565948378, "grad_norm": 1.7890625, "learning_rate": 1.2075080924644178e-05, "loss": 0.6422, "step": 6921 }, { "epoch": 0.8741968584734391, "grad_norm": 1.8515625, "learning_rate": 1.2073127902525122e-05, "loss": 0.6277, "step": 6922 }, { "epoch": 0.8743231509985003, "grad_norm": 1.765625, "learning_rate": 1.207117479777618e-05, "loss": 0.5808, "step": 6923 }, { "epoch": 0.8744494435235615, "grad_norm": 1.71875, "learning_rate": 1.20692216104752e-05, "loss": 0.6335, "step": 6924 }, { "epoch": 0.8745757360486226, "grad_norm": 1.9296875, "learning_rate": 1.206726834070003e-05, "loss": 0.6188, "step": 6925 }, { "epoch": 0.8747020285736838, "grad_norm": 1.6953125, "learning_rate": 1.2065314988528523e-05, "loss": 0.5501, "step": 6926 }, { "epoch": 0.874828321098745, "grad_norm": 1.875, "learning_rate": 1.2063361554038535e-05, "loss": 0.6333, "step": 6927 }, { "epoch": 0.8749546136238061, "grad_norm": 2.046875, "learning_rate": 1.2061408037307927e-05, "loss": 0.7747, "step": 6928 }, { "epoch": 0.8750809061488674, "grad_norm": 1.828125, "learning_rate": 1.205945443841456e-05, "loss": 0.6635, "step": 6929 }, { "epoch": 0.8752071986739285, "grad_norm": 1.8984375, "learning_rate": 1.2057500757436298e-05, "loss": 0.7161, "step": 6930 }, { "epoch": 0.8753334911989896, "grad_norm": 1.7421875, "learning_rate": 1.2055546994451013e-05, "loss": 0.6576, "step": 6931 }, { "epoch": 0.8754597837240509, "grad_norm": 1.7421875, "learning_rate": 1.2053593149536576e-05, "loss": 0.616, "step": 6932 }, { "epoch": 0.875586076249112, "grad_norm": 1.90625, "learning_rate": 1.2051639222770861e-05, "loss": 0.633, "step": 6933 }, { "epoch": 0.8757123687741731, "grad_norm": 1.65625, "learning_rate": 1.204968521423175e-05, "loss": 0.5846, "step": 6934 }, { "epoch": 0.8758386612992344, "grad_norm": 1.8828125, "learning_rate": 1.2047731123997119e-05, "loss": 0.7368, "step": 6935 }, { "epoch": 0.8759649538242955, "grad_norm": 1.921875, "learning_rate": 1.204577695214486e-05, "loss": 0.6735, "step": 6936 }, { "epoch": 0.8760912463493566, "grad_norm": 1.765625, "learning_rate": 1.2043822698752862e-05, "loss": 0.5648, "step": 6937 }, { "epoch": 0.8762175388744179, "grad_norm": 1.6640625, "learning_rate": 1.2041868363899012e-05, "loss": 0.5782, "step": 6938 }, { "epoch": 0.876343831399479, "grad_norm": 1.734375, "learning_rate": 1.2039913947661209e-05, "loss": 0.6356, "step": 6939 }, { "epoch": 0.8764701239245403, "grad_norm": 1.8359375, "learning_rate": 1.2037959450117347e-05, "loss": 0.5763, "step": 6940 }, { "epoch": 0.8765964164496014, "grad_norm": 1.828125, "learning_rate": 1.2036004871345331e-05, "loss": 0.7229, "step": 6941 }, { "epoch": 0.8767227089746625, "grad_norm": 1.734375, "learning_rate": 1.2034050211423065e-05, "loss": 0.5863, "step": 6942 }, { "epoch": 0.8768490014997238, "grad_norm": 1.8984375, "learning_rate": 1.2032095470428457e-05, "loss": 0.6729, "step": 6943 }, { "epoch": 0.8769752940247849, "grad_norm": 1.796875, "learning_rate": 1.2030140648439416e-05, "loss": 0.6118, "step": 6944 }, { "epoch": 0.877101586549846, "grad_norm": 1.8046875, "learning_rate": 1.202818574553386e-05, "loss": 0.6328, "step": 6945 }, { "epoch": 0.8772278790749073, "grad_norm": 1.75, "learning_rate": 1.2026230761789709e-05, "loss": 0.6295, "step": 6946 }, { "epoch": 0.8773541715999684, "grad_norm": 1.703125, "learning_rate": 1.2024275697284875e-05, "loss": 0.6044, "step": 6947 }, { "epoch": 0.8774804641250296, "grad_norm": 1.84375, "learning_rate": 1.2022320552097291e-05, "loss": 0.6412, "step": 6948 }, { "epoch": 0.8776067566500908, "grad_norm": 1.8671875, "learning_rate": 1.202036532630488e-05, "loss": 0.7067, "step": 6949 }, { "epoch": 0.8777330491751519, "grad_norm": 1.71875, "learning_rate": 1.2018410019985572e-05, "loss": 0.5858, "step": 6950 }, { "epoch": 0.8778593417002131, "grad_norm": 1.890625, "learning_rate": 1.2016454633217303e-05, "loss": 0.641, "step": 6951 }, { "epoch": 0.8779856342252743, "grad_norm": 1.8984375, "learning_rate": 1.2014499166078011e-05, "loss": 0.6647, "step": 6952 }, { "epoch": 0.8781119267503354, "grad_norm": 1.71875, "learning_rate": 1.2012543618645629e-05, "loss": 0.6305, "step": 6953 }, { "epoch": 0.8782382192753967, "grad_norm": 1.7578125, "learning_rate": 1.201058799099811e-05, "loss": 0.6253, "step": 6954 }, { "epoch": 0.8783645118004578, "grad_norm": 1.8125, "learning_rate": 1.2008632283213397e-05, "loss": 0.5878, "step": 6955 }, { "epoch": 0.878490804325519, "grad_norm": 1.8828125, "learning_rate": 1.200667649536944e-05, "loss": 0.6151, "step": 6956 }, { "epoch": 0.8786170968505802, "grad_norm": 1.875, "learning_rate": 1.2004720627544188e-05, "loss": 0.6697, "step": 6957 }, { "epoch": 0.8787433893756413, "grad_norm": 1.859375, "learning_rate": 1.2002764679815604e-05, "loss": 0.7142, "step": 6958 }, { "epoch": 0.8788696819007025, "grad_norm": 1.8125, "learning_rate": 1.2000808652261642e-05, "loss": 0.5926, "step": 6959 }, { "epoch": 0.8789959744257637, "grad_norm": 1.84375, "learning_rate": 1.1998852544960266e-05, "loss": 0.6545, "step": 6960 }, { "epoch": 0.8791222669508248, "grad_norm": 2.03125, "learning_rate": 1.1996896357989445e-05, "loss": 0.6586, "step": 6961 }, { "epoch": 0.879248559475886, "grad_norm": 1.7578125, "learning_rate": 1.199494009142714e-05, "loss": 0.632, "step": 6962 }, { "epoch": 0.8793748520009472, "grad_norm": 1.8359375, "learning_rate": 1.199298374535133e-05, "loss": 0.7019, "step": 6963 }, { "epoch": 0.8795011445260084, "grad_norm": 1.8203125, "learning_rate": 1.199102731983999e-05, "loss": 0.678, "step": 6964 }, { "epoch": 0.8796274370510695, "grad_norm": 1.734375, "learning_rate": 1.1989070814971095e-05, "loss": 0.5901, "step": 6965 }, { "epoch": 0.8797537295761307, "grad_norm": 1.734375, "learning_rate": 1.198711423082263e-05, "loss": 0.5613, "step": 6966 }, { "epoch": 0.8798800221011919, "grad_norm": 1.8203125, "learning_rate": 1.1985157567472577e-05, "loss": 0.6818, "step": 6967 }, { "epoch": 0.880006314626253, "grad_norm": 1.71875, "learning_rate": 1.1983200824998924e-05, "loss": 0.6341, "step": 6968 }, { "epoch": 0.8801326071513143, "grad_norm": 1.796875, "learning_rate": 1.1981244003479664e-05, "loss": 0.6171, "step": 6969 }, { "epoch": 0.8802588996763754, "grad_norm": 1.8828125, "learning_rate": 1.1979287102992791e-05, "loss": 0.6465, "step": 6970 }, { "epoch": 0.8803851922014366, "grad_norm": 1.8046875, "learning_rate": 1.19773301236163e-05, "loss": 0.6342, "step": 6971 }, { "epoch": 0.8805114847264978, "grad_norm": 1.8125, "learning_rate": 1.1975373065428193e-05, "loss": 0.7513, "step": 6972 }, { "epoch": 0.8806377772515589, "grad_norm": 1.9921875, "learning_rate": 1.1973415928506474e-05, "loss": 0.6145, "step": 6973 }, { "epoch": 0.8807640697766201, "grad_norm": 1.796875, "learning_rate": 1.197145871292915e-05, "loss": 0.6519, "step": 6974 }, { "epoch": 0.8808903623016813, "grad_norm": 1.9765625, "learning_rate": 1.196950141877423e-05, "loss": 0.616, "step": 6975 }, { "epoch": 0.8810166548267424, "grad_norm": 1.8046875, "learning_rate": 1.1967544046119725e-05, "loss": 0.6186, "step": 6976 }, { "epoch": 0.8811429473518037, "grad_norm": 1.7421875, "learning_rate": 1.1965586595043657e-05, "loss": 0.6506, "step": 6977 }, { "epoch": 0.8812692398768648, "grad_norm": 1.78125, "learning_rate": 1.1963629065624041e-05, "loss": 0.6618, "step": 6978 }, { "epoch": 0.8813955324019259, "grad_norm": 1.6953125, "learning_rate": 1.19616714579389e-05, "loss": 0.594, "step": 6979 }, { "epoch": 0.8815218249269872, "grad_norm": 1.6875, "learning_rate": 1.1959713772066258e-05, "loss": 0.6244, "step": 6980 }, { "epoch": 0.8816481174520483, "grad_norm": 1.859375, "learning_rate": 1.1957756008084144e-05, "loss": 0.617, "step": 6981 }, { "epoch": 0.8817744099771094, "grad_norm": 1.765625, "learning_rate": 1.1955798166070594e-05, "loss": 0.7051, "step": 6982 }, { "epoch": 0.8819007025021707, "grad_norm": 1.96875, "learning_rate": 1.1953840246103639e-05, "loss": 0.6504, "step": 6983 }, { "epoch": 0.8820269950272318, "grad_norm": 1.84375, "learning_rate": 1.1951882248261319e-05, "loss": 0.6553, "step": 6984 }, { "epoch": 0.882153287552293, "grad_norm": 1.6796875, "learning_rate": 1.1949924172621674e-05, "loss": 0.5721, "step": 6985 }, { "epoch": 0.8822795800773542, "grad_norm": 1.65625, "learning_rate": 1.1947966019262747e-05, "loss": 0.5525, "step": 6986 }, { "epoch": 0.8824058726024153, "grad_norm": 1.9375, "learning_rate": 1.1946007788262587e-05, "loss": 0.6909, "step": 6987 }, { "epoch": 0.8825321651274766, "grad_norm": 1.8828125, "learning_rate": 1.1944049479699244e-05, "loss": 0.6957, "step": 6988 }, { "epoch": 0.8826584576525377, "grad_norm": 1.890625, "learning_rate": 1.1942091093650772e-05, "loss": 0.6766, "step": 6989 }, { "epoch": 0.8827847501775988, "grad_norm": 1.7734375, "learning_rate": 1.1940132630195222e-05, "loss": 0.628, "step": 6990 }, { "epoch": 0.8829110427026601, "grad_norm": 1.890625, "learning_rate": 1.1938174089410664e-05, "loss": 0.6511, "step": 6991 }, { "epoch": 0.8830373352277212, "grad_norm": 1.9296875, "learning_rate": 1.1936215471375152e-05, "loss": 0.7321, "step": 6992 }, { "epoch": 0.8831636277527823, "grad_norm": 1.8046875, "learning_rate": 1.1934256776166757e-05, "loss": 0.637, "step": 6993 }, { "epoch": 0.8832899202778436, "grad_norm": 1.765625, "learning_rate": 1.1932298003863545e-05, "loss": 0.5881, "step": 6994 }, { "epoch": 0.8834162128029047, "grad_norm": 1.7578125, "learning_rate": 1.1930339154543589e-05, "loss": 0.6002, "step": 6995 }, { "epoch": 0.8835425053279659, "grad_norm": 1.71875, "learning_rate": 1.1928380228284965e-05, "loss": 0.7218, "step": 6996 }, { "epoch": 0.8836687978530271, "grad_norm": 1.9453125, "learning_rate": 1.192642122516575e-05, "loss": 0.7226, "step": 6997 }, { "epoch": 0.8837950903780882, "grad_norm": 1.7734375, "learning_rate": 1.1924462145264021e-05, "loss": 0.6181, "step": 6998 }, { "epoch": 0.8839213829031494, "grad_norm": 1.7421875, "learning_rate": 1.192250298865787e-05, "loss": 0.6486, "step": 6999 }, { "epoch": 0.8840476754282106, "grad_norm": 1.8203125, "learning_rate": 1.1920543755425378e-05, "loss": 0.6353, "step": 7000 }, { "epoch": 0.8841739679532717, "grad_norm": 1.84375, "learning_rate": 1.1918584445644641e-05, "loss": 0.758, "step": 7001 }, { "epoch": 0.884300260478333, "grad_norm": 1.8515625, "learning_rate": 1.1916625059393747e-05, "loss": 0.7313, "step": 7002 }, { "epoch": 0.8844265530033941, "grad_norm": 1.7734375, "learning_rate": 1.1914665596750794e-05, "loss": 0.6254, "step": 7003 }, { "epoch": 0.8845528455284553, "grad_norm": 1.8125, "learning_rate": 1.1912706057793883e-05, "loss": 0.6311, "step": 7004 }, { "epoch": 0.8846791380535165, "grad_norm": 1.96875, "learning_rate": 1.1910746442601117e-05, "loss": 0.6114, "step": 7005 }, { "epoch": 0.8848054305785776, "grad_norm": 1.7578125, "learning_rate": 1.19087867512506e-05, "loss": 0.6107, "step": 7006 }, { "epoch": 0.8849317231036388, "grad_norm": 1.8359375, "learning_rate": 1.1906826983820439e-05, "loss": 0.6518, "step": 7007 }, { "epoch": 0.8850580156287, "grad_norm": 1.703125, "learning_rate": 1.1904867140388747e-05, "loss": 0.6326, "step": 7008 }, { "epoch": 0.8851843081537611, "grad_norm": 1.671875, "learning_rate": 1.1902907221033639e-05, "loss": 0.5719, "step": 7009 }, { "epoch": 0.8853106006788223, "grad_norm": 1.8046875, "learning_rate": 1.1900947225833234e-05, "loss": 0.6209, "step": 7010 }, { "epoch": 0.8854368932038835, "grad_norm": 1.890625, "learning_rate": 1.1898987154865651e-05, "loss": 0.6298, "step": 7011 }, { "epoch": 0.8855631857289447, "grad_norm": 1.7890625, "learning_rate": 1.1897027008209015e-05, "loss": 0.6496, "step": 7012 }, { "epoch": 0.8856894782540058, "grad_norm": 1.8046875, "learning_rate": 1.1895066785941449e-05, "loss": 0.6773, "step": 7013 }, { "epoch": 0.885815770779067, "grad_norm": 1.734375, "learning_rate": 1.1893106488141089e-05, "loss": 0.6741, "step": 7014 }, { "epoch": 0.8859420633041282, "grad_norm": 1.6640625, "learning_rate": 1.1891146114886061e-05, "loss": 0.5431, "step": 7015 }, { "epoch": 0.8860683558291894, "grad_norm": 1.6875, "learning_rate": 1.1889185666254505e-05, "loss": 0.6418, "step": 7016 }, { "epoch": 0.8861946483542505, "grad_norm": 1.65625, "learning_rate": 1.1887225142324556e-05, "loss": 0.6184, "step": 7017 }, { "epoch": 0.8863209408793117, "grad_norm": 1.8984375, "learning_rate": 1.1885264543174364e-05, "loss": 0.5507, "step": 7018 }, { "epoch": 0.8864472334043729, "grad_norm": 1.828125, "learning_rate": 1.1883303868882066e-05, "loss": 0.6644, "step": 7019 }, { "epoch": 0.8865735259294341, "grad_norm": 1.84375, "learning_rate": 1.188134311952581e-05, "loss": 0.6931, "step": 7020 }, { "epoch": 0.8866998184544952, "grad_norm": 1.9296875, "learning_rate": 1.1879382295183753e-05, "loss": 0.6085, "step": 7021 }, { "epoch": 0.8868261109795564, "grad_norm": 1.7578125, "learning_rate": 1.1877421395934042e-05, "loss": 0.6135, "step": 7022 }, { "epoch": 0.8869524035046176, "grad_norm": 1.9296875, "learning_rate": 1.1875460421854834e-05, "loss": 0.695, "step": 7023 }, { "epoch": 0.8870786960296787, "grad_norm": 2.03125, "learning_rate": 1.1873499373024294e-05, "loss": 0.8429, "step": 7024 }, { "epoch": 0.88720498855474, "grad_norm": 1.6875, "learning_rate": 1.187153824952058e-05, "loss": 0.609, "step": 7025 }, { "epoch": 0.8873312810798011, "grad_norm": 1.734375, "learning_rate": 1.1869577051421859e-05, "loss": 0.5829, "step": 7026 }, { "epoch": 0.8874575736048622, "grad_norm": 1.6640625, "learning_rate": 1.18676157788063e-05, "loss": 0.5748, "step": 7027 }, { "epoch": 0.8875838661299235, "grad_norm": 1.84375, "learning_rate": 1.1865654431752076e-05, "loss": 0.7466, "step": 7028 }, { "epoch": 0.8877101586549846, "grad_norm": 1.8359375, "learning_rate": 1.186369301033736e-05, "loss": 0.601, "step": 7029 }, { "epoch": 0.8878364511800457, "grad_norm": 1.7890625, "learning_rate": 1.1861731514640329e-05, "loss": 0.6199, "step": 7030 }, { "epoch": 0.887962743705107, "grad_norm": 1.9296875, "learning_rate": 1.1859769944739163e-05, "loss": 0.839, "step": 7031 }, { "epoch": 0.8880890362301681, "grad_norm": 1.7890625, "learning_rate": 1.1857808300712048e-05, "loss": 0.5718, "step": 7032 }, { "epoch": 0.8882153287552293, "grad_norm": 1.84375, "learning_rate": 1.185584658263717e-05, "loss": 0.6311, "step": 7033 }, { "epoch": 0.8883416212802905, "grad_norm": 1.609375, "learning_rate": 1.1853884790592716e-05, "loss": 0.5578, "step": 7034 }, { "epoch": 0.8884679138053516, "grad_norm": 1.6328125, "learning_rate": 1.1851922924656875e-05, "loss": 0.5221, "step": 7035 }, { "epoch": 0.8885942063304129, "grad_norm": 1.9296875, "learning_rate": 1.1849960984907853e-05, "loss": 0.6162, "step": 7036 }, { "epoch": 0.888720498855474, "grad_norm": 1.75, "learning_rate": 1.184799897142384e-05, "loss": 0.6075, "step": 7037 }, { "epoch": 0.8888467913805351, "grad_norm": 1.7421875, "learning_rate": 1.184603688428304e-05, "loss": 0.6846, "step": 7038 }, { "epoch": 0.8889730839055964, "grad_norm": 1.8515625, "learning_rate": 1.1844074723563656e-05, "loss": 0.6193, "step": 7039 }, { "epoch": 0.8890993764306575, "grad_norm": 1.828125, "learning_rate": 1.1842112489343896e-05, "loss": 0.6513, "step": 7040 }, { "epoch": 0.8892256689557186, "grad_norm": 1.9375, "learning_rate": 1.1840150181701968e-05, "loss": 0.6669, "step": 7041 }, { "epoch": 0.8893519614807799, "grad_norm": 1.7734375, "learning_rate": 1.1838187800716085e-05, "loss": 0.6238, "step": 7042 }, { "epoch": 0.889478254005841, "grad_norm": 1.6796875, "learning_rate": 1.1836225346464467e-05, "loss": 0.5711, "step": 7043 }, { "epoch": 0.8896045465309022, "grad_norm": 1.8125, "learning_rate": 1.1834262819025326e-05, "loss": 0.6471, "step": 7044 }, { "epoch": 0.8897308390559634, "grad_norm": 1.75, "learning_rate": 1.1832300218476889e-05, "loss": 0.6134, "step": 7045 }, { "epoch": 0.8898571315810245, "grad_norm": 1.9140625, "learning_rate": 1.1830337544897376e-05, "loss": 0.6769, "step": 7046 }, { "epoch": 0.8899834241060858, "grad_norm": 1.7890625, "learning_rate": 1.1828374798365022e-05, "loss": 0.6082, "step": 7047 }, { "epoch": 0.8901097166311469, "grad_norm": 1.75, "learning_rate": 1.182641197895805e-05, "loss": 0.6288, "step": 7048 }, { "epoch": 0.890236009156208, "grad_norm": 1.6953125, "learning_rate": 1.1824449086754694e-05, "loss": 0.5837, "step": 7049 }, { "epoch": 0.8903623016812693, "grad_norm": 1.828125, "learning_rate": 1.1822486121833195e-05, "loss": 0.693, "step": 7050 }, { "epoch": 0.8904885942063304, "grad_norm": 1.7578125, "learning_rate": 1.1820523084271787e-05, "loss": 0.6117, "step": 7051 }, { "epoch": 0.8906148867313916, "grad_norm": 2.078125, "learning_rate": 1.1818559974148715e-05, "loss": 0.712, "step": 7052 }, { "epoch": 0.8907411792564528, "grad_norm": 1.8046875, "learning_rate": 1.181659679154222e-05, "loss": 0.6599, "step": 7053 }, { "epoch": 0.8908674717815139, "grad_norm": 1.6796875, "learning_rate": 1.1814633536530556e-05, "loss": 0.6078, "step": 7054 }, { "epoch": 0.8909937643065751, "grad_norm": 1.6328125, "learning_rate": 1.1812670209191968e-05, "loss": 0.6073, "step": 7055 }, { "epoch": 0.8911200568316363, "grad_norm": 1.671875, "learning_rate": 1.1810706809604711e-05, "loss": 0.6826, "step": 7056 }, { "epoch": 0.8912463493566974, "grad_norm": 1.921875, "learning_rate": 1.1808743337847043e-05, "loss": 0.6429, "step": 7057 }, { "epoch": 0.8913726418817586, "grad_norm": 1.75, "learning_rate": 1.1806779793997222e-05, "loss": 0.5589, "step": 7058 }, { "epoch": 0.8914989344068198, "grad_norm": 1.65625, "learning_rate": 1.1804816178133512e-05, "loss": 0.6517, "step": 7059 }, { "epoch": 0.891625226931881, "grad_norm": 1.953125, "learning_rate": 1.1802852490334173e-05, "loss": 0.6566, "step": 7060 }, { "epoch": 0.8917515194569421, "grad_norm": 1.90625, "learning_rate": 1.180088873067748e-05, "loss": 0.6608, "step": 7061 }, { "epoch": 0.8918778119820033, "grad_norm": 1.8984375, "learning_rate": 1.1798924899241695e-05, "loss": 0.5959, "step": 7062 }, { "epoch": 0.8920041045070645, "grad_norm": 1.796875, "learning_rate": 1.1796960996105101e-05, "loss": 0.5921, "step": 7063 }, { "epoch": 0.8921303970321257, "grad_norm": 1.875, "learning_rate": 1.1794997021345967e-05, "loss": 0.6181, "step": 7064 }, { "epoch": 0.8922566895571868, "grad_norm": 1.9296875, "learning_rate": 1.1793032975042578e-05, "loss": 0.6888, "step": 7065 }, { "epoch": 0.892382982082248, "grad_norm": 1.9296875, "learning_rate": 1.1791068857273212e-05, "loss": 0.7059, "step": 7066 }, { "epoch": 0.8925092746073092, "grad_norm": 1.9609375, "learning_rate": 1.1789104668116154e-05, "loss": 0.7026, "step": 7067 }, { "epoch": 0.8926355671323704, "grad_norm": 2.140625, "learning_rate": 1.1787140407649695e-05, "loss": 0.6319, "step": 7068 }, { "epoch": 0.8927618596574315, "grad_norm": 1.953125, "learning_rate": 1.1785176075952124e-05, "loss": 0.7754, "step": 7069 }, { "epoch": 0.8928881521824927, "grad_norm": 1.65625, "learning_rate": 1.1783211673101732e-05, "loss": 0.5669, "step": 7070 }, { "epoch": 0.8930144447075539, "grad_norm": 1.6875, "learning_rate": 1.178124719917682e-05, "loss": 0.6385, "step": 7071 }, { "epoch": 0.893140737232615, "grad_norm": 1.6875, "learning_rate": 1.1779282654255685e-05, "loss": 0.5861, "step": 7072 }, { "epoch": 0.8932670297576762, "grad_norm": 1.671875, "learning_rate": 1.177731803841663e-05, "loss": 0.6437, "step": 7073 }, { "epoch": 0.8933933222827374, "grad_norm": 1.828125, "learning_rate": 1.1775353351737957e-05, "loss": 0.6144, "step": 7074 }, { "epoch": 0.8935196148077985, "grad_norm": 1.78125, "learning_rate": 1.1773388594297976e-05, "loss": 0.7643, "step": 7075 }, { "epoch": 0.8936459073328598, "grad_norm": 1.734375, "learning_rate": 1.1771423766175e-05, "loss": 0.5963, "step": 7076 }, { "epoch": 0.8937721998579209, "grad_norm": 1.7734375, "learning_rate": 1.1769458867447337e-05, "loss": 0.5415, "step": 7077 }, { "epoch": 0.8938984923829821, "grad_norm": 2.046875, "learning_rate": 1.1767493898193305e-05, "loss": 0.7179, "step": 7078 }, { "epoch": 0.8940247849080433, "grad_norm": 1.859375, "learning_rate": 1.1765528858491224e-05, "loss": 0.6856, "step": 7079 }, { "epoch": 0.8941510774331044, "grad_norm": 1.8984375, "learning_rate": 1.1763563748419414e-05, "loss": 0.6423, "step": 7080 }, { "epoch": 0.8942773699581656, "grad_norm": 1.78125, "learning_rate": 1.1761598568056202e-05, "loss": 0.654, "step": 7081 }, { "epoch": 0.8944036624832268, "grad_norm": 1.71875, "learning_rate": 1.1759633317479914e-05, "loss": 0.6306, "step": 7082 }, { "epoch": 0.8945299550082879, "grad_norm": 1.8125, "learning_rate": 1.1757667996768883e-05, "loss": 0.6558, "step": 7083 }, { "epoch": 0.8946562475333492, "grad_norm": 1.71875, "learning_rate": 1.1755702606001435e-05, "loss": 0.6546, "step": 7084 }, { "epoch": 0.8947825400584103, "grad_norm": 1.8125, "learning_rate": 1.1753737145255912e-05, "loss": 0.5924, "step": 7085 }, { "epoch": 0.8949088325834714, "grad_norm": 1.90625, "learning_rate": 1.1751771614610653e-05, "loss": 0.7706, "step": 7086 }, { "epoch": 0.8950351251085327, "grad_norm": 1.7265625, "learning_rate": 1.1749806014143992e-05, "loss": 0.6019, "step": 7087 }, { "epoch": 0.8951614176335938, "grad_norm": 1.7265625, "learning_rate": 1.174784034393428e-05, "loss": 0.6182, "step": 7088 }, { "epoch": 0.8952877101586549, "grad_norm": 1.7109375, "learning_rate": 1.1745874604059862e-05, "loss": 0.5569, "step": 7089 }, { "epoch": 0.8954140026837162, "grad_norm": 1.9140625, "learning_rate": 1.1743908794599087e-05, "loss": 0.6679, "step": 7090 }, { "epoch": 0.8955402952087773, "grad_norm": 2.046875, "learning_rate": 1.1741942915630309e-05, "loss": 0.777, "step": 7091 }, { "epoch": 0.8956665877338384, "grad_norm": 1.75, "learning_rate": 1.1739976967231881e-05, "loss": 0.6469, "step": 7092 }, { "epoch": 0.8957928802588997, "grad_norm": 1.859375, "learning_rate": 1.173801094948216e-05, "loss": 0.5906, "step": 7093 }, { "epoch": 0.8959191727839608, "grad_norm": 1.7578125, "learning_rate": 1.1736044862459514e-05, "loss": 0.6112, "step": 7094 }, { "epoch": 0.8960454653090221, "grad_norm": 1.6796875, "learning_rate": 1.17340787062423e-05, "loss": 0.6044, "step": 7095 }, { "epoch": 0.8961717578340832, "grad_norm": 1.8515625, "learning_rate": 1.1732112480908884e-05, "loss": 0.7454, "step": 7096 }, { "epoch": 0.8962980503591443, "grad_norm": 1.734375, "learning_rate": 1.1730146186537637e-05, "loss": 0.6155, "step": 7097 }, { "epoch": 0.8964243428842056, "grad_norm": 1.890625, "learning_rate": 1.1728179823206928e-05, "loss": 0.6293, "step": 7098 }, { "epoch": 0.8965506354092667, "grad_norm": 1.703125, "learning_rate": 1.1726213390995135e-05, "loss": 0.5652, "step": 7099 }, { "epoch": 0.8966769279343278, "grad_norm": 1.8828125, "learning_rate": 1.1724246889980638e-05, "loss": 0.6321, "step": 7100 }, { "epoch": 0.8968032204593891, "grad_norm": 1.921875, "learning_rate": 1.1722280320241809e-05, "loss": 0.6533, "step": 7101 }, { "epoch": 0.8969295129844502, "grad_norm": 1.8046875, "learning_rate": 1.1720313681857036e-05, "loss": 0.6104, "step": 7102 }, { "epoch": 0.8970558055095114, "grad_norm": 1.7421875, "learning_rate": 1.1718346974904704e-05, "loss": 0.5897, "step": 7103 }, { "epoch": 0.8971820980345726, "grad_norm": 1.78125, "learning_rate": 1.17163801994632e-05, "loss": 0.6442, "step": 7104 }, { "epoch": 0.8973083905596337, "grad_norm": 1.7734375, "learning_rate": 1.1714413355610914e-05, "loss": 0.6219, "step": 7105 }, { "epoch": 0.8974346830846949, "grad_norm": 1.7890625, "learning_rate": 1.1712446443426245e-05, "loss": 0.6023, "step": 7106 }, { "epoch": 0.8975609756097561, "grad_norm": 1.8359375, "learning_rate": 1.1710479462987579e-05, "loss": 0.6429, "step": 7107 }, { "epoch": 0.8976872681348173, "grad_norm": 1.8203125, "learning_rate": 1.1708512414373325e-05, "loss": 0.6922, "step": 7108 }, { "epoch": 0.8978135606598785, "grad_norm": 1.765625, "learning_rate": 1.1706545297661883e-05, "loss": 0.6887, "step": 7109 }, { "epoch": 0.8979398531849396, "grad_norm": 1.8984375, "learning_rate": 1.1704578112931653e-05, "loss": 0.6017, "step": 7110 }, { "epoch": 0.8980661457100008, "grad_norm": 1.71875, "learning_rate": 1.1702610860261048e-05, "loss": 0.6386, "step": 7111 }, { "epoch": 0.898192438235062, "grad_norm": 1.859375, "learning_rate": 1.1700643539728473e-05, "loss": 0.6841, "step": 7112 }, { "epoch": 0.8983187307601231, "grad_norm": 1.7109375, "learning_rate": 1.1698676151412345e-05, "loss": 0.7307, "step": 7113 }, { "epoch": 0.8984450232851843, "grad_norm": 1.7734375, "learning_rate": 1.1696708695391074e-05, "loss": 0.6969, "step": 7114 }, { "epoch": 0.8985713158102455, "grad_norm": 1.828125, "learning_rate": 1.1694741171743085e-05, "loss": 0.6034, "step": 7115 }, { "epoch": 0.8986976083353067, "grad_norm": 1.7421875, "learning_rate": 1.169277358054679e-05, "loss": 0.6283, "step": 7116 }, { "epoch": 0.8988239008603678, "grad_norm": 1.9296875, "learning_rate": 1.169080592188062e-05, "loss": 0.6948, "step": 7117 }, { "epoch": 0.898950193385429, "grad_norm": 1.75, "learning_rate": 1.1688838195822999e-05, "loss": 0.6184, "step": 7118 }, { "epoch": 0.8990764859104902, "grad_norm": 1.8203125, "learning_rate": 1.1686870402452354e-05, "loss": 0.6962, "step": 7119 }, { "epoch": 0.8992027784355513, "grad_norm": 1.921875, "learning_rate": 1.1684902541847121e-05, "loss": 0.6074, "step": 7120 }, { "epoch": 0.8993290709606125, "grad_norm": 1.8828125, "learning_rate": 1.1682934614085728e-05, "loss": 0.5815, "step": 7121 }, { "epoch": 0.8994553634856737, "grad_norm": 1.796875, "learning_rate": 1.1680966619246617e-05, "loss": 0.6682, "step": 7122 }, { "epoch": 0.8995816560107348, "grad_norm": 1.8203125, "learning_rate": 1.1678998557408223e-05, "loss": 0.7273, "step": 7123 }, { "epoch": 0.899707948535796, "grad_norm": 1.640625, "learning_rate": 1.167703042864899e-05, "loss": 0.5645, "step": 7124 }, { "epoch": 0.8998342410608572, "grad_norm": 1.8515625, "learning_rate": 1.1675062233047365e-05, "loss": 0.6357, "step": 7125 }, { "epoch": 0.8999605335859184, "grad_norm": 1.6875, "learning_rate": 1.1673093970681795e-05, "loss": 0.6203, "step": 7126 }, { "epoch": 0.9000868261109796, "grad_norm": 1.6640625, "learning_rate": 1.167112564163073e-05, "loss": 0.5942, "step": 7127 }, { "epoch": 0.9002131186360407, "grad_norm": 1.7265625, "learning_rate": 1.166915724597262e-05, "loss": 0.6089, "step": 7128 }, { "epoch": 0.9003394111611019, "grad_norm": 1.7109375, "learning_rate": 1.1667188783785923e-05, "loss": 0.6651, "step": 7129 }, { "epoch": 0.9004657036861631, "grad_norm": 1.7890625, "learning_rate": 1.1665220255149096e-05, "loss": 0.6551, "step": 7130 }, { "epoch": 0.9005919962112242, "grad_norm": 1.78125, "learning_rate": 1.1663251660140603e-05, "loss": 0.6206, "step": 7131 }, { "epoch": 0.9007182887362855, "grad_norm": 1.9375, "learning_rate": 1.16612829988389e-05, "loss": 0.6483, "step": 7132 }, { "epoch": 0.9008445812613466, "grad_norm": 1.8359375, "learning_rate": 1.1659314271322463e-05, "loss": 0.6054, "step": 7133 }, { "epoch": 0.9009708737864077, "grad_norm": 1.796875, "learning_rate": 1.1657345477669756e-05, "loss": 0.6522, "step": 7134 }, { "epoch": 0.901097166311469, "grad_norm": 1.9921875, "learning_rate": 1.165537661795925e-05, "loss": 0.7104, "step": 7135 }, { "epoch": 0.9012234588365301, "grad_norm": 1.7890625, "learning_rate": 1.1653407692269416e-05, "loss": 0.6326, "step": 7136 }, { "epoch": 0.9013497513615912, "grad_norm": 1.640625, "learning_rate": 1.1651438700678736e-05, "loss": 0.6144, "step": 7137 }, { "epoch": 0.9014760438866525, "grad_norm": 1.7421875, "learning_rate": 1.1649469643265688e-05, "loss": 0.6162, "step": 7138 }, { "epoch": 0.9016023364117136, "grad_norm": 1.890625, "learning_rate": 1.1647500520108751e-05, "loss": 0.8098, "step": 7139 }, { "epoch": 0.9017286289367749, "grad_norm": 1.8359375, "learning_rate": 1.164553133128641e-05, "loss": 0.5972, "step": 7140 }, { "epoch": 0.901854921461836, "grad_norm": 1.796875, "learning_rate": 1.1643562076877157e-05, "loss": 0.6415, "step": 7141 }, { "epoch": 0.9019812139868971, "grad_norm": 1.828125, "learning_rate": 1.1641592756959479e-05, "loss": 0.592, "step": 7142 }, { "epoch": 0.9021075065119584, "grad_norm": 1.7109375, "learning_rate": 1.1639623371611866e-05, "loss": 0.5936, "step": 7143 }, { "epoch": 0.9022337990370195, "grad_norm": 1.7734375, "learning_rate": 1.1637653920912814e-05, "loss": 0.7562, "step": 7144 }, { "epoch": 0.9023600915620806, "grad_norm": 1.7421875, "learning_rate": 1.1635684404940823e-05, "loss": 0.5646, "step": 7145 }, { "epoch": 0.9024863840871419, "grad_norm": 1.859375, "learning_rate": 1.1633714823774392e-05, "loss": 0.6618, "step": 7146 }, { "epoch": 0.902612676612203, "grad_norm": 1.7578125, "learning_rate": 1.163174517749202e-05, "loss": 0.6384, "step": 7147 }, { "epoch": 0.9027389691372641, "grad_norm": 1.7421875, "learning_rate": 1.1629775466172214e-05, "loss": 0.5843, "step": 7148 }, { "epoch": 0.9028652616623254, "grad_norm": 1.828125, "learning_rate": 1.162780568989349e-05, "loss": 0.6428, "step": 7149 }, { "epoch": 0.9029915541873865, "grad_norm": 1.7734375, "learning_rate": 1.1625835848734349e-05, "loss": 0.6335, "step": 7150 }, { "epoch": 0.9031178467124477, "grad_norm": 1.6328125, "learning_rate": 1.1623865942773306e-05, "loss": 0.5605, "step": 7151 }, { "epoch": 0.9032441392375089, "grad_norm": 1.796875, "learning_rate": 1.1621895972088879e-05, "loss": 0.5695, "step": 7152 }, { "epoch": 0.90337043176257, "grad_norm": 1.7578125, "learning_rate": 1.1619925936759587e-05, "loss": 0.7074, "step": 7153 }, { "epoch": 0.9034967242876313, "grad_norm": 1.6875, "learning_rate": 1.1617955836863946e-05, "loss": 0.6056, "step": 7154 }, { "epoch": 0.9036230168126924, "grad_norm": 1.6875, "learning_rate": 1.1615985672480483e-05, "loss": 0.594, "step": 7155 }, { "epoch": 0.9037493093377535, "grad_norm": 1.8359375, "learning_rate": 1.1614015443687723e-05, "loss": 0.6961, "step": 7156 }, { "epoch": 0.9038756018628148, "grad_norm": 1.78125, "learning_rate": 1.1612045150564196e-05, "loss": 0.5876, "step": 7157 }, { "epoch": 0.9040018943878759, "grad_norm": 1.9140625, "learning_rate": 1.1610074793188431e-05, "loss": 0.7054, "step": 7158 }, { "epoch": 0.9041281869129371, "grad_norm": 1.8984375, "learning_rate": 1.1608104371638964e-05, "loss": 0.6693, "step": 7159 }, { "epoch": 0.9042544794379983, "grad_norm": 1.8984375, "learning_rate": 1.160613388599433e-05, "loss": 0.7235, "step": 7160 }, { "epoch": 0.9043807719630594, "grad_norm": 1.8984375, "learning_rate": 1.1604163336333067e-05, "loss": 0.6573, "step": 7161 }, { "epoch": 0.9045070644881206, "grad_norm": 1.765625, "learning_rate": 1.160219272273372e-05, "loss": 0.6747, "step": 7162 }, { "epoch": 0.9046333570131818, "grad_norm": 1.7109375, "learning_rate": 1.1600222045274827e-05, "loss": 0.5814, "step": 7163 }, { "epoch": 0.904759649538243, "grad_norm": 1.796875, "learning_rate": 1.1598251304034938e-05, "loss": 0.5931, "step": 7164 }, { "epoch": 0.9048859420633041, "grad_norm": 1.765625, "learning_rate": 1.1596280499092602e-05, "loss": 0.6011, "step": 7165 }, { "epoch": 0.9050122345883653, "grad_norm": 1.8203125, "learning_rate": 1.1594309630526372e-05, "loss": 0.6143, "step": 7166 }, { "epoch": 0.9051385271134265, "grad_norm": 1.5859375, "learning_rate": 1.1592338698414801e-05, "loss": 0.5501, "step": 7167 }, { "epoch": 0.9052648196384876, "grad_norm": 1.875, "learning_rate": 1.159036770283644e-05, "loss": 0.6487, "step": 7168 }, { "epoch": 0.9053911121635488, "grad_norm": 2.015625, "learning_rate": 1.1588396643869857e-05, "loss": 0.6598, "step": 7169 }, { "epoch": 0.90551740468861, "grad_norm": 1.7734375, "learning_rate": 1.1586425521593609e-05, "loss": 0.6319, "step": 7170 }, { "epoch": 0.9056436972136712, "grad_norm": 1.7421875, "learning_rate": 1.1584454336086261e-05, "loss": 0.5291, "step": 7171 }, { "epoch": 0.9057699897387324, "grad_norm": 1.8828125, "learning_rate": 1.1582483087426377e-05, "loss": 0.6414, "step": 7172 }, { "epoch": 0.9058962822637935, "grad_norm": 1.6953125, "learning_rate": 1.1580511775692531e-05, "loss": 0.5587, "step": 7173 }, { "epoch": 0.9060225747888547, "grad_norm": 1.6796875, "learning_rate": 1.1578540400963294e-05, "loss": 0.5698, "step": 7174 }, { "epoch": 0.9061488673139159, "grad_norm": 1.75, "learning_rate": 1.1576568963317236e-05, "loss": 0.5912, "step": 7175 }, { "epoch": 0.906275159838977, "grad_norm": 1.9453125, "learning_rate": 1.1574597462832937e-05, "loss": 0.5982, "step": 7176 }, { "epoch": 0.9064014523640382, "grad_norm": 1.8828125, "learning_rate": 1.1572625899588978e-05, "loss": 0.6783, "step": 7177 }, { "epoch": 0.9065277448890994, "grad_norm": 1.8828125, "learning_rate": 1.1570654273663938e-05, "loss": 0.621, "step": 7178 }, { "epoch": 0.9066540374141605, "grad_norm": 1.953125, "learning_rate": 1.15686825851364e-05, "loss": 0.7427, "step": 7179 }, { "epoch": 0.9067803299392218, "grad_norm": 1.9921875, "learning_rate": 1.156671083408495e-05, "loss": 0.7011, "step": 7180 }, { "epoch": 0.9069066224642829, "grad_norm": 1.984375, "learning_rate": 1.1564739020588186e-05, "loss": 0.7037, "step": 7181 }, { "epoch": 0.907032914989344, "grad_norm": 1.671875, "learning_rate": 1.156276714472469e-05, "loss": 0.5585, "step": 7182 }, { "epoch": 0.9071592075144053, "grad_norm": 1.65625, "learning_rate": 1.1560795206573061e-05, "loss": 0.6542, "step": 7183 }, { "epoch": 0.9072855000394664, "grad_norm": 1.6875, "learning_rate": 1.1558823206211894e-05, "loss": 0.6157, "step": 7184 }, { "epoch": 0.9074117925645276, "grad_norm": 1.7421875, "learning_rate": 1.1556851143719787e-05, "loss": 0.6658, "step": 7185 }, { "epoch": 0.9075380850895888, "grad_norm": 1.8984375, "learning_rate": 1.1554879019175346e-05, "loss": 0.5465, "step": 7186 }, { "epoch": 0.9076643776146499, "grad_norm": 1.8515625, "learning_rate": 1.155290683265717e-05, "loss": 0.6074, "step": 7187 }, { "epoch": 0.9077906701397112, "grad_norm": 1.8359375, "learning_rate": 1.1550934584243868e-05, "loss": 0.6292, "step": 7188 }, { "epoch": 0.9079169626647723, "grad_norm": 1.78125, "learning_rate": 1.1548962274014052e-05, "loss": 0.5681, "step": 7189 }, { "epoch": 0.9080432551898334, "grad_norm": 1.9140625, "learning_rate": 1.154698990204633e-05, "loss": 0.7098, "step": 7190 }, { "epoch": 0.9081695477148947, "grad_norm": 1.7421875, "learning_rate": 1.1545017468419315e-05, "loss": 0.649, "step": 7191 }, { "epoch": 0.9082958402399558, "grad_norm": 1.9765625, "learning_rate": 1.1543044973211628e-05, "loss": 0.7316, "step": 7192 }, { "epoch": 0.9084221327650169, "grad_norm": 1.7421875, "learning_rate": 1.1541072416501883e-05, "loss": 0.5897, "step": 7193 }, { "epoch": 0.9085484252900782, "grad_norm": 1.8359375, "learning_rate": 1.1539099798368702e-05, "loss": 0.6983, "step": 7194 }, { "epoch": 0.9086747178151393, "grad_norm": 1.8203125, "learning_rate": 1.1537127118890714e-05, "loss": 0.648, "step": 7195 }, { "epoch": 0.9088010103402004, "grad_norm": 1.671875, "learning_rate": 1.1535154378146538e-05, "loss": 0.629, "step": 7196 }, { "epoch": 0.9089273028652617, "grad_norm": 1.8046875, "learning_rate": 1.1533181576214804e-05, "loss": 0.6202, "step": 7197 }, { "epoch": 0.9090535953903228, "grad_norm": 1.8125, "learning_rate": 1.1531208713174151e-05, "loss": 0.6417, "step": 7198 }, { "epoch": 0.909179887915384, "grad_norm": 1.9140625, "learning_rate": 1.1529235789103204e-05, "loss": 0.5925, "step": 7199 }, { "epoch": 0.9093061804404452, "grad_norm": 2.015625, "learning_rate": 1.1527262804080601e-05, "loss": 0.6247, "step": 7200 }, { "epoch": 0.9094324729655063, "grad_norm": 1.7109375, "learning_rate": 1.1525289758184983e-05, "loss": 0.6226, "step": 7201 }, { "epoch": 0.9095587654905676, "grad_norm": 1.59375, "learning_rate": 1.1523316651494989e-05, "loss": 0.588, "step": 7202 }, { "epoch": 0.9096850580156287, "grad_norm": 1.8046875, "learning_rate": 1.1521343484089262e-05, "loss": 0.6244, "step": 7203 }, { "epoch": 0.9098113505406898, "grad_norm": 1.859375, "learning_rate": 1.1519370256046449e-05, "loss": 0.7009, "step": 7204 }, { "epoch": 0.9099376430657511, "grad_norm": 1.9765625, "learning_rate": 1.1517396967445196e-05, "loss": 0.6421, "step": 7205 }, { "epoch": 0.9100639355908122, "grad_norm": 1.6953125, "learning_rate": 1.1515423618364155e-05, "loss": 0.6208, "step": 7206 }, { "epoch": 0.9101902281158734, "grad_norm": 1.8203125, "learning_rate": 1.1513450208881978e-05, "loss": 0.6203, "step": 7207 }, { "epoch": 0.9103165206409346, "grad_norm": 1.765625, "learning_rate": 1.1511476739077323e-05, "loss": 0.6153, "step": 7208 }, { "epoch": 0.9104428131659957, "grad_norm": 1.75, "learning_rate": 1.1509503209028843e-05, "loss": 0.6194, "step": 7209 }, { "epoch": 0.9105691056910569, "grad_norm": 2.09375, "learning_rate": 1.1507529618815202e-05, "loss": 0.9175, "step": 7210 }, { "epoch": 0.9106953982161181, "grad_norm": 2.0, "learning_rate": 1.1505555968515064e-05, "loss": 0.751, "step": 7211 }, { "epoch": 0.9108216907411792, "grad_norm": 1.671875, "learning_rate": 1.150358225820709e-05, "loss": 0.6293, "step": 7212 }, { "epoch": 0.9109479832662404, "grad_norm": 1.75, "learning_rate": 1.1501608487969949e-05, "loss": 0.6533, "step": 7213 }, { "epoch": 0.9110742757913016, "grad_norm": 1.8828125, "learning_rate": 1.1499634657882308e-05, "loss": 0.6786, "step": 7214 }, { "epoch": 0.9112005683163628, "grad_norm": 2.0625, "learning_rate": 1.1497660768022843e-05, "loss": 0.7008, "step": 7215 }, { "epoch": 0.911326860841424, "grad_norm": 1.7265625, "learning_rate": 1.1495686818470228e-05, "loss": 0.6494, "step": 7216 }, { "epoch": 0.9114531533664851, "grad_norm": 1.671875, "learning_rate": 1.149371280930314e-05, "loss": 0.5818, "step": 7217 }, { "epoch": 0.9115794458915463, "grad_norm": 1.8828125, "learning_rate": 1.1491738740600253e-05, "loss": 0.6039, "step": 7218 }, { "epoch": 0.9117057384166075, "grad_norm": 1.7109375, "learning_rate": 1.1489764612440257e-05, "loss": 0.5133, "step": 7219 }, { "epoch": 0.9118320309416686, "grad_norm": 1.7109375, "learning_rate": 1.1487790424901833e-05, "loss": 0.6362, "step": 7220 }, { "epoch": 0.9119583234667298, "grad_norm": 1.765625, "learning_rate": 1.1485816178063662e-05, "loss": 0.6096, "step": 7221 }, { "epoch": 0.912084615991791, "grad_norm": 1.8203125, "learning_rate": 1.148384187200444e-05, "loss": 0.6833, "step": 7222 }, { "epoch": 0.9122109085168522, "grad_norm": 1.8515625, "learning_rate": 1.1481867506802852e-05, "loss": 0.7295, "step": 7223 }, { "epoch": 0.9123372010419133, "grad_norm": 1.90625, "learning_rate": 1.1479893082537597e-05, "loss": 0.6843, "step": 7224 }, { "epoch": 0.9124634935669745, "grad_norm": 1.6484375, "learning_rate": 1.1477918599287365e-05, "loss": 0.557, "step": 7225 }, { "epoch": 0.9125897860920357, "grad_norm": 1.7265625, "learning_rate": 1.1475944057130861e-05, "loss": 0.5642, "step": 7226 }, { "epoch": 0.9127160786170968, "grad_norm": 1.7265625, "learning_rate": 1.1473969456146782e-05, "loss": 0.6172, "step": 7227 }, { "epoch": 0.912842371142158, "grad_norm": 1.7265625, "learning_rate": 1.147199479641383e-05, "loss": 0.6777, "step": 7228 }, { "epoch": 0.9129686636672192, "grad_norm": 1.71875, "learning_rate": 1.1470020078010711e-05, "loss": 0.5696, "step": 7229 }, { "epoch": 0.9130949561922803, "grad_norm": 1.90625, "learning_rate": 1.1468045301016133e-05, "loss": 0.742, "step": 7230 }, { "epoch": 0.9132212487173416, "grad_norm": 1.7109375, "learning_rate": 1.1466070465508803e-05, "loss": 0.612, "step": 7231 }, { "epoch": 0.9133475412424027, "grad_norm": 1.9609375, "learning_rate": 1.1464095571567437e-05, "loss": 0.6526, "step": 7232 }, { "epoch": 0.9134738337674639, "grad_norm": 1.8125, "learning_rate": 1.1462120619270748e-05, "loss": 0.5666, "step": 7233 }, { "epoch": 0.9136001262925251, "grad_norm": 1.828125, "learning_rate": 1.1460145608697451e-05, "loss": 0.7063, "step": 7234 }, { "epoch": 0.9137264188175862, "grad_norm": 1.90625, "learning_rate": 1.145817053992627e-05, "loss": 0.6848, "step": 7235 }, { "epoch": 0.9138527113426475, "grad_norm": 1.7421875, "learning_rate": 1.1456195413035922e-05, "loss": 0.567, "step": 7236 }, { "epoch": 0.9139790038677086, "grad_norm": 1.796875, "learning_rate": 1.1454220228105133e-05, "loss": 0.6561, "step": 7237 }, { "epoch": 0.9141052963927697, "grad_norm": 1.703125, "learning_rate": 1.1452244985212628e-05, "loss": 0.5643, "step": 7238 }, { "epoch": 0.914231588917831, "grad_norm": 2.0625, "learning_rate": 1.1450269684437136e-05, "loss": 0.6789, "step": 7239 }, { "epoch": 0.9143578814428921, "grad_norm": 1.84375, "learning_rate": 1.1448294325857387e-05, "loss": 0.6691, "step": 7240 }, { "epoch": 0.9144841739679532, "grad_norm": 1.703125, "learning_rate": 1.1446318909552117e-05, "loss": 0.5591, "step": 7241 }, { "epoch": 0.9146104664930145, "grad_norm": 2.015625, "learning_rate": 1.1444343435600053e-05, "loss": 0.6233, "step": 7242 }, { "epoch": 0.9147367590180756, "grad_norm": 1.8125, "learning_rate": 1.1442367904079943e-05, "loss": 0.6828, "step": 7243 }, { "epoch": 0.9148630515431367, "grad_norm": 1.765625, "learning_rate": 1.1440392315070523e-05, "loss": 0.5702, "step": 7244 }, { "epoch": 0.914989344068198, "grad_norm": 1.7890625, "learning_rate": 1.1438416668650534e-05, "loss": 0.5898, "step": 7245 }, { "epoch": 0.9151156365932591, "grad_norm": 1.8515625, "learning_rate": 1.1436440964898721e-05, "loss": 0.683, "step": 7246 }, { "epoch": 0.9152419291183204, "grad_norm": 1.9609375, "learning_rate": 1.1434465203893832e-05, "loss": 0.6496, "step": 7247 }, { "epoch": 0.9153682216433815, "grad_norm": 1.796875, "learning_rate": 1.1432489385714614e-05, "loss": 0.7269, "step": 7248 }, { "epoch": 0.9154945141684426, "grad_norm": 1.6796875, "learning_rate": 1.143051351043982e-05, "loss": 0.5953, "step": 7249 }, { "epoch": 0.9156208066935039, "grad_norm": 1.8828125, "learning_rate": 1.1428537578148202e-05, "loss": 0.6807, "step": 7250 }, { "epoch": 0.915747099218565, "grad_norm": 1.8046875, "learning_rate": 1.1426561588918517e-05, "loss": 0.6439, "step": 7251 }, { "epoch": 0.9158733917436261, "grad_norm": 1.6875, "learning_rate": 1.1424585542829522e-05, "loss": 0.6724, "step": 7252 }, { "epoch": 0.9159996842686874, "grad_norm": 1.8515625, "learning_rate": 1.1422609439959981e-05, "loss": 0.6639, "step": 7253 }, { "epoch": 0.9161259767937485, "grad_norm": 1.8046875, "learning_rate": 1.1420633280388655e-05, "loss": 0.5905, "step": 7254 }, { "epoch": 0.9162522693188097, "grad_norm": 1.9140625, "learning_rate": 1.1418657064194307e-05, "loss": 0.7188, "step": 7255 }, { "epoch": 0.9163785618438709, "grad_norm": 1.78125, "learning_rate": 1.1416680791455706e-05, "loss": 0.6328, "step": 7256 }, { "epoch": 0.916504854368932, "grad_norm": 1.7421875, "learning_rate": 1.1414704462251621e-05, "loss": 0.6414, "step": 7257 }, { "epoch": 0.9166311468939932, "grad_norm": 1.921875, "learning_rate": 1.1412728076660822e-05, "loss": 0.6135, "step": 7258 }, { "epoch": 0.9167574394190544, "grad_norm": 1.9140625, "learning_rate": 1.1410751634762085e-05, "loss": 0.6114, "step": 7259 }, { "epoch": 0.9168837319441155, "grad_norm": 1.71875, "learning_rate": 1.1408775136634185e-05, "loss": 0.5207, "step": 7260 }, { "epoch": 0.9170100244691767, "grad_norm": 1.8828125, "learning_rate": 1.1406798582355905e-05, "loss": 0.7359, "step": 7261 }, { "epoch": 0.9171363169942379, "grad_norm": 1.8203125, "learning_rate": 1.140482197200602e-05, "loss": 0.7252, "step": 7262 }, { "epoch": 0.917262609519299, "grad_norm": 1.7734375, "learning_rate": 1.1402845305663313e-05, "loss": 0.6785, "step": 7263 }, { "epoch": 0.9173889020443603, "grad_norm": 1.703125, "learning_rate": 1.1400868583406575e-05, "loss": 0.6172, "step": 7264 }, { "epoch": 0.9175151945694214, "grad_norm": 1.7421875, "learning_rate": 1.1398891805314586e-05, "loss": 0.6604, "step": 7265 }, { "epoch": 0.9176414870944826, "grad_norm": 1.6953125, "learning_rate": 1.139691497146614e-05, "loss": 0.5316, "step": 7266 }, { "epoch": 0.9177677796195438, "grad_norm": 1.7109375, "learning_rate": 1.139493808194003e-05, "loss": 0.5235, "step": 7267 }, { "epoch": 0.9178940721446049, "grad_norm": 1.8515625, "learning_rate": 1.1392961136815046e-05, "loss": 0.6257, "step": 7268 }, { "epoch": 0.9180203646696661, "grad_norm": 1.7890625, "learning_rate": 1.1390984136169985e-05, "loss": 0.7529, "step": 7269 }, { "epoch": 0.9181466571947273, "grad_norm": 1.7734375, "learning_rate": 1.1389007080083648e-05, "loss": 0.566, "step": 7270 }, { "epoch": 0.9182729497197885, "grad_norm": 1.7578125, "learning_rate": 1.1387029968634836e-05, "loss": 0.6505, "step": 7271 }, { "epoch": 0.9183992422448496, "grad_norm": 1.8984375, "learning_rate": 1.1385052801902348e-05, "loss": 0.7564, "step": 7272 }, { "epoch": 0.9185255347699108, "grad_norm": 1.859375, "learning_rate": 1.1383075579964993e-05, "loss": 0.6304, "step": 7273 }, { "epoch": 0.918651827294972, "grad_norm": 1.828125, "learning_rate": 1.1381098302901577e-05, "loss": 0.6771, "step": 7274 }, { "epoch": 0.9187781198200331, "grad_norm": 1.9375, "learning_rate": 1.1379120970790907e-05, "loss": 0.7668, "step": 7275 }, { "epoch": 0.9189044123450943, "grad_norm": 1.75, "learning_rate": 1.1377143583711797e-05, "loss": 0.6283, "step": 7276 }, { "epoch": 0.9190307048701555, "grad_norm": 1.765625, "learning_rate": 1.1375166141743064e-05, "loss": 0.6169, "step": 7277 }, { "epoch": 0.9191569973952167, "grad_norm": 1.71875, "learning_rate": 1.1373188644963517e-05, "loss": 0.6445, "step": 7278 }, { "epoch": 0.9192832899202779, "grad_norm": 1.8046875, "learning_rate": 1.1371211093451978e-05, "loss": 0.5873, "step": 7279 }, { "epoch": 0.919409582445339, "grad_norm": 1.75, "learning_rate": 1.1369233487287272e-05, "loss": 0.6352, "step": 7280 }, { "epoch": 0.9195358749704002, "grad_norm": 1.7421875, "learning_rate": 1.1367255826548215e-05, "loss": 0.6041, "step": 7281 }, { "epoch": 0.9196621674954614, "grad_norm": 1.71875, "learning_rate": 1.1365278111313634e-05, "loss": 0.6036, "step": 7282 }, { "epoch": 0.9197884600205225, "grad_norm": 1.84375, "learning_rate": 1.1363300341662356e-05, "loss": 0.6439, "step": 7283 }, { "epoch": 0.9199147525455837, "grad_norm": 1.578125, "learning_rate": 1.136132251767321e-05, "loss": 0.5403, "step": 7284 }, { "epoch": 0.9200410450706449, "grad_norm": 1.8203125, "learning_rate": 1.1359344639425028e-05, "loss": 0.5943, "step": 7285 }, { "epoch": 0.920167337595706, "grad_norm": 1.8125, "learning_rate": 1.1357366706996642e-05, "loss": 0.696, "step": 7286 }, { "epoch": 0.9202936301207673, "grad_norm": 1.7578125, "learning_rate": 1.1355388720466887e-05, "loss": 0.6377, "step": 7287 }, { "epoch": 0.9204199226458284, "grad_norm": 1.8203125, "learning_rate": 1.1353410679914605e-05, "loss": 0.7324, "step": 7288 }, { "epoch": 0.9205462151708895, "grad_norm": 1.671875, "learning_rate": 1.1351432585418631e-05, "loss": 0.5913, "step": 7289 }, { "epoch": 0.9206725076959508, "grad_norm": 1.78125, "learning_rate": 1.1349454437057813e-05, "loss": 0.5858, "step": 7290 }, { "epoch": 0.9207988002210119, "grad_norm": 1.8046875, "learning_rate": 1.1347476234910988e-05, "loss": 0.6381, "step": 7291 }, { "epoch": 0.920925092746073, "grad_norm": 1.6015625, "learning_rate": 1.1345497979057005e-05, "loss": 0.5863, "step": 7292 }, { "epoch": 0.9210513852711343, "grad_norm": 1.78125, "learning_rate": 1.1343519669574716e-05, "loss": 0.66, "step": 7293 }, { "epoch": 0.9211776777961954, "grad_norm": 1.8046875, "learning_rate": 1.1341541306542969e-05, "loss": 0.6917, "step": 7294 }, { "epoch": 0.9213039703212567, "grad_norm": 1.8671875, "learning_rate": 1.1339562890040617e-05, "loss": 0.713, "step": 7295 }, { "epoch": 0.9214302628463178, "grad_norm": 1.8203125, "learning_rate": 1.133758442014651e-05, "loss": 0.646, "step": 7296 }, { "epoch": 0.9215565553713789, "grad_norm": 1.640625, "learning_rate": 1.1335605896939514e-05, "loss": 0.5697, "step": 7297 }, { "epoch": 0.9216828478964402, "grad_norm": 1.8125, "learning_rate": 1.1333627320498483e-05, "loss": 0.6235, "step": 7298 }, { "epoch": 0.9218091404215013, "grad_norm": 1.8203125, "learning_rate": 1.1331648690902281e-05, "loss": 0.6945, "step": 7299 }, { "epoch": 0.9219354329465624, "grad_norm": 1.8359375, "learning_rate": 1.132967000822977e-05, "loss": 0.6762, "step": 7300 }, { "epoch": 0.9220617254716237, "grad_norm": 1.828125, "learning_rate": 1.1327691272559814e-05, "loss": 0.7034, "step": 7301 }, { "epoch": 0.9221880179966848, "grad_norm": 1.75, "learning_rate": 1.132571248397128e-05, "loss": 0.6096, "step": 7302 }, { "epoch": 0.922314310521746, "grad_norm": 1.8828125, "learning_rate": 1.1323733642543043e-05, "loss": 0.6828, "step": 7303 }, { "epoch": 0.9224406030468072, "grad_norm": 1.734375, "learning_rate": 1.1321754748353968e-05, "loss": 0.7188, "step": 7304 }, { "epoch": 0.9225668955718683, "grad_norm": 1.734375, "learning_rate": 1.1319775801482934e-05, "loss": 0.6402, "step": 7305 }, { "epoch": 0.9226931880969295, "grad_norm": 1.8984375, "learning_rate": 1.1317796802008814e-05, "loss": 0.6849, "step": 7306 }, { "epoch": 0.9228194806219907, "grad_norm": 1.796875, "learning_rate": 1.131581775001049e-05, "loss": 0.6538, "step": 7307 }, { "epoch": 0.9229457731470518, "grad_norm": 1.6640625, "learning_rate": 1.1313838645566837e-05, "loss": 0.6064, "step": 7308 }, { "epoch": 0.9230720656721131, "grad_norm": 1.828125, "learning_rate": 1.1311859488756742e-05, "loss": 0.6173, "step": 7309 }, { "epoch": 0.9231983581971742, "grad_norm": 1.9765625, "learning_rate": 1.1309880279659088e-05, "loss": 0.6094, "step": 7310 }, { "epoch": 0.9233246507222354, "grad_norm": 1.7421875, "learning_rate": 1.1307901018352761e-05, "loss": 0.5657, "step": 7311 }, { "epoch": 0.9234509432472966, "grad_norm": 1.96875, "learning_rate": 1.1305921704916648e-05, "loss": 0.6547, "step": 7312 }, { "epoch": 0.9235772357723577, "grad_norm": 1.703125, "learning_rate": 1.1303942339429646e-05, "loss": 0.5438, "step": 7313 }, { "epoch": 0.9237035282974189, "grad_norm": 1.796875, "learning_rate": 1.1301962921970637e-05, "loss": 0.6175, "step": 7314 }, { "epoch": 0.9238298208224801, "grad_norm": 1.8046875, "learning_rate": 1.1299983452618524e-05, "loss": 0.5869, "step": 7315 }, { "epoch": 0.9239561133475412, "grad_norm": 1.7890625, "learning_rate": 1.1298003931452202e-05, "loss": 0.6723, "step": 7316 }, { "epoch": 0.9240824058726024, "grad_norm": 1.8515625, "learning_rate": 1.129602435855057e-05, "loss": 0.666, "step": 7317 }, { "epoch": 0.9242086983976636, "grad_norm": 1.6640625, "learning_rate": 1.129404473399253e-05, "loss": 0.5788, "step": 7318 }, { "epoch": 0.9243349909227248, "grad_norm": 1.890625, "learning_rate": 1.1292065057856983e-05, "loss": 0.5825, "step": 7319 }, { "epoch": 0.9244612834477859, "grad_norm": 1.7734375, "learning_rate": 1.1290085330222835e-05, "loss": 0.5945, "step": 7320 }, { "epoch": 0.9245875759728471, "grad_norm": 1.7734375, "learning_rate": 1.1288105551168992e-05, "loss": 0.6446, "step": 7321 }, { "epoch": 0.9247138684979083, "grad_norm": 1.75, "learning_rate": 1.1286125720774366e-05, "loss": 0.6556, "step": 7322 }, { "epoch": 0.9248401610229694, "grad_norm": 1.890625, "learning_rate": 1.1284145839117864e-05, "loss": 0.6857, "step": 7323 }, { "epoch": 0.9249664535480306, "grad_norm": 1.6953125, "learning_rate": 1.1282165906278402e-05, "loss": 0.5947, "step": 7324 }, { "epoch": 0.9250927460730918, "grad_norm": 1.828125, "learning_rate": 1.12801859223349e-05, "loss": 0.615, "step": 7325 }, { "epoch": 0.925219038598153, "grad_norm": 1.6875, "learning_rate": 1.1278205887366264e-05, "loss": 0.6245, "step": 7326 }, { "epoch": 0.9253453311232142, "grad_norm": 1.7109375, "learning_rate": 1.1276225801451423e-05, "loss": 0.6715, "step": 7327 }, { "epoch": 0.9254716236482753, "grad_norm": 1.90625, "learning_rate": 1.12742456646693e-05, "loss": 0.6433, "step": 7328 }, { "epoch": 0.9255979161733365, "grad_norm": 1.7890625, "learning_rate": 1.1272265477098809e-05, "loss": 0.6776, "step": 7329 }, { "epoch": 0.9257242086983977, "grad_norm": 1.796875, "learning_rate": 1.1270285238818881e-05, "loss": 0.6963, "step": 7330 }, { "epoch": 0.9258505012234588, "grad_norm": 1.6875, "learning_rate": 1.1268304949908444e-05, "loss": 0.698, "step": 7331 }, { "epoch": 0.92597679374852, "grad_norm": 2.046875, "learning_rate": 1.1266324610446425e-05, "loss": 0.7342, "step": 7332 }, { "epoch": 0.9261030862735812, "grad_norm": 1.671875, "learning_rate": 1.1264344220511758e-05, "loss": 0.6017, "step": 7333 }, { "epoch": 0.9262293787986423, "grad_norm": 1.8359375, "learning_rate": 1.1262363780183377e-05, "loss": 0.624, "step": 7334 }, { "epoch": 0.9263556713237036, "grad_norm": 1.75, "learning_rate": 1.1260383289540215e-05, "loss": 0.5605, "step": 7335 }, { "epoch": 0.9264819638487647, "grad_norm": 1.828125, "learning_rate": 1.1258402748661212e-05, "loss": 0.6232, "step": 7336 }, { "epoch": 0.9266082563738258, "grad_norm": 1.8125, "learning_rate": 1.1256422157625306e-05, "loss": 0.6066, "step": 7337 }, { "epoch": 0.9267345488988871, "grad_norm": 1.859375, "learning_rate": 1.1254441516511439e-05, "loss": 0.7097, "step": 7338 }, { "epoch": 0.9268608414239482, "grad_norm": 2.0625, "learning_rate": 1.1252460825398555e-05, "loss": 0.614, "step": 7339 }, { "epoch": 0.9269871339490094, "grad_norm": 1.8359375, "learning_rate": 1.1250480084365598e-05, "loss": 0.6687, "step": 7340 }, { "epoch": 0.9271134264740706, "grad_norm": 2.53125, "learning_rate": 1.1248499293491515e-05, "loss": 0.5951, "step": 7341 }, { "epoch": 0.9272397189991317, "grad_norm": 1.84375, "learning_rate": 1.1246518452855261e-05, "loss": 0.6515, "step": 7342 }, { "epoch": 0.927366011524193, "grad_norm": 1.7578125, "learning_rate": 1.1244537562535782e-05, "loss": 0.6481, "step": 7343 }, { "epoch": 0.9274923040492541, "grad_norm": 1.78125, "learning_rate": 1.1242556622612035e-05, "loss": 0.6407, "step": 7344 }, { "epoch": 0.9276185965743152, "grad_norm": 1.921875, "learning_rate": 1.1240575633162973e-05, "loss": 0.662, "step": 7345 }, { "epoch": 0.9277448890993765, "grad_norm": 1.875, "learning_rate": 1.1238594594267555e-05, "loss": 0.6358, "step": 7346 }, { "epoch": 0.9278711816244376, "grad_norm": 1.8203125, "learning_rate": 1.123661350600474e-05, "loss": 0.6106, "step": 7347 }, { "epoch": 0.9279974741494987, "grad_norm": 1.890625, "learning_rate": 1.123463236845349e-05, "loss": 0.5812, "step": 7348 }, { "epoch": 0.92812376667456, "grad_norm": 1.8671875, "learning_rate": 1.1232651181692766e-05, "loss": 0.6472, "step": 7349 }, { "epoch": 0.9282500591996211, "grad_norm": 1.7734375, "learning_rate": 1.1230669945801533e-05, "loss": 0.6254, "step": 7350 }, { "epoch": 0.9283763517246822, "grad_norm": 1.8515625, "learning_rate": 1.1228688660858762e-05, "loss": 0.6558, "step": 7351 }, { "epoch": 0.9285026442497435, "grad_norm": 1.78125, "learning_rate": 1.122670732694342e-05, "loss": 0.6353, "step": 7352 }, { "epoch": 0.9286289367748046, "grad_norm": 1.7734375, "learning_rate": 1.122472594413448e-05, "loss": 0.5643, "step": 7353 }, { "epoch": 0.9287552292998658, "grad_norm": 1.8515625, "learning_rate": 1.1222744512510914e-05, "loss": 0.683, "step": 7354 }, { "epoch": 0.928881521824927, "grad_norm": 1.78125, "learning_rate": 1.1220763032151695e-05, "loss": 0.6582, "step": 7355 }, { "epoch": 0.9290078143499881, "grad_norm": 1.734375, "learning_rate": 1.1218781503135805e-05, "loss": 0.5728, "step": 7356 }, { "epoch": 0.9291341068750494, "grad_norm": 1.96875, "learning_rate": 1.1216799925542218e-05, "loss": 0.6705, "step": 7357 }, { "epoch": 0.9292603994001105, "grad_norm": 1.6875, "learning_rate": 1.1214818299449917e-05, "loss": 0.5967, "step": 7358 }, { "epoch": 0.9293866919251716, "grad_norm": 1.6875, "learning_rate": 1.1212836624937882e-05, "loss": 0.6152, "step": 7359 }, { "epoch": 0.9295129844502329, "grad_norm": 1.65625, "learning_rate": 1.1210854902085104e-05, "loss": 0.6218, "step": 7360 }, { "epoch": 0.929639276975294, "grad_norm": 1.78125, "learning_rate": 1.1208873130970565e-05, "loss": 0.6099, "step": 7361 }, { "epoch": 0.9297655695003552, "grad_norm": 2.03125, "learning_rate": 1.1206891311673255e-05, "loss": 0.7362, "step": 7362 }, { "epoch": 0.9298918620254164, "grad_norm": 1.96875, "learning_rate": 1.1204909444272165e-05, "loss": 0.7564, "step": 7363 }, { "epoch": 0.9300181545504775, "grad_norm": 1.8125, "learning_rate": 1.1202927528846286e-05, "loss": 0.6333, "step": 7364 }, { "epoch": 0.9301444470755387, "grad_norm": 1.703125, "learning_rate": 1.1200945565474613e-05, "loss": 0.6553, "step": 7365 }, { "epoch": 0.9302707396005999, "grad_norm": 1.8671875, "learning_rate": 1.1198963554236142e-05, "loss": 0.5946, "step": 7366 }, { "epoch": 0.930397032125661, "grad_norm": 1.7890625, "learning_rate": 1.1196981495209874e-05, "loss": 0.5708, "step": 7367 }, { "epoch": 0.9305233246507222, "grad_norm": 1.828125, "learning_rate": 1.1194999388474801e-05, "loss": 0.6667, "step": 7368 }, { "epoch": 0.9306496171757834, "grad_norm": 1.734375, "learning_rate": 1.1193017234109935e-05, "loss": 0.6131, "step": 7369 }, { "epoch": 0.9307759097008446, "grad_norm": 2.078125, "learning_rate": 1.1191035032194275e-05, "loss": 0.6697, "step": 7370 }, { "epoch": 0.9309022022259058, "grad_norm": 1.84375, "learning_rate": 1.1189052782806828e-05, "loss": 0.6457, "step": 7371 }, { "epoch": 0.9310284947509669, "grad_norm": 1.875, "learning_rate": 1.1187070486026602e-05, "loss": 0.6659, "step": 7372 }, { "epoch": 0.9311547872760281, "grad_norm": 1.9140625, "learning_rate": 1.1185088141932603e-05, "loss": 0.7049, "step": 7373 }, { "epoch": 0.9312810798010893, "grad_norm": 1.8671875, "learning_rate": 1.1183105750603847e-05, "loss": 0.6574, "step": 7374 }, { "epoch": 0.9314073723261505, "grad_norm": 1.7890625, "learning_rate": 1.1181123312119344e-05, "loss": 0.6122, "step": 7375 }, { "epoch": 0.9315336648512116, "grad_norm": 1.7578125, "learning_rate": 1.1179140826558113e-05, "loss": 0.6784, "step": 7376 }, { "epoch": 0.9316599573762728, "grad_norm": 1.96875, "learning_rate": 1.1177158293999165e-05, "loss": 0.7494, "step": 7377 }, { "epoch": 0.931786249901334, "grad_norm": 1.7734375, "learning_rate": 1.1175175714521525e-05, "loss": 0.6045, "step": 7378 }, { "epoch": 0.9319125424263951, "grad_norm": 1.8359375, "learning_rate": 1.117319308820421e-05, "loss": 0.5991, "step": 7379 }, { "epoch": 0.9320388349514563, "grad_norm": 1.7890625, "learning_rate": 1.1171210415126248e-05, "loss": 0.6282, "step": 7380 }, { "epoch": 0.9321651274765175, "grad_norm": 1.9140625, "learning_rate": 1.1169227695366658e-05, "loss": 0.6909, "step": 7381 }, { "epoch": 0.9322914200015786, "grad_norm": 1.90625, "learning_rate": 1.1167244929004468e-05, "loss": 0.6133, "step": 7382 }, { "epoch": 0.9324177125266399, "grad_norm": 1.7265625, "learning_rate": 1.1165262116118707e-05, "loss": 0.6402, "step": 7383 }, { "epoch": 0.932544005051701, "grad_norm": 1.65625, "learning_rate": 1.1163279256788403e-05, "loss": 0.601, "step": 7384 }, { "epoch": 0.9326702975767622, "grad_norm": 1.640625, "learning_rate": 1.1161296351092594e-05, "loss": 0.544, "step": 7385 }, { "epoch": 0.9327965901018234, "grad_norm": 1.796875, "learning_rate": 1.1159313399110306e-05, "loss": 0.6054, "step": 7386 }, { "epoch": 0.9329228826268845, "grad_norm": 1.84375, "learning_rate": 1.1157330400920578e-05, "loss": 0.5956, "step": 7387 }, { "epoch": 0.9330491751519457, "grad_norm": 1.703125, "learning_rate": 1.1155347356602448e-05, "loss": 0.6536, "step": 7388 }, { "epoch": 0.9331754676770069, "grad_norm": 1.8046875, "learning_rate": 1.1153364266234956e-05, "loss": 0.634, "step": 7389 }, { "epoch": 0.933301760202068, "grad_norm": 1.9296875, "learning_rate": 1.1151381129897144e-05, "loss": 0.6099, "step": 7390 }, { "epoch": 0.9334280527271293, "grad_norm": 2.046875, "learning_rate": 1.1149397947668053e-05, "loss": 0.6298, "step": 7391 }, { "epoch": 0.9335543452521904, "grad_norm": 1.7421875, "learning_rate": 1.1147414719626726e-05, "loss": 0.5678, "step": 7392 }, { "epoch": 0.9336806377772515, "grad_norm": 1.8046875, "learning_rate": 1.1145431445852216e-05, "loss": 0.6042, "step": 7393 }, { "epoch": 0.9338069303023128, "grad_norm": 1.71875, "learning_rate": 1.1143448126423564e-05, "loss": 0.5957, "step": 7394 }, { "epoch": 0.9339332228273739, "grad_norm": 1.859375, "learning_rate": 1.1141464761419824e-05, "loss": 0.6538, "step": 7395 }, { "epoch": 0.934059515352435, "grad_norm": 1.6015625, "learning_rate": 1.1139481350920046e-05, "loss": 0.6226, "step": 7396 }, { "epoch": 0.9341858078774963, "grad_norm": 1.8359375, "learning_rate": 1.1137497895003291e-05, "loss": 0.6825, "step": 7397 }, { "epoch": 0.9343121004025574, "grad_norm": 1.921875, "learning_rate": 1.1135514393748607e-05, "loss": 0.6813, "step": 7398 }, { "epoch": 0.9344383929276185, "grad_norm": 1.8515625, "learning_rate": 1.1133530847235054e-05, "loss": 0.6807, "step": 7399 }, { "epoch": 0.9345646854526798, "grad_norm": 1.7890625, "learning_rate": 1.1131547255541692e-05, "loss": 0.6106, "step": 7400 }, { "epoch": 0.9346909779777409, "grad_norm": 1.828125, "learning_rate": 1.1129563618747583e-05, "loss": 0.5899, "step": 7401 }, { "epoch": 0.9348172705028022, "grad_norm": 1.6875, "learning_rate": 1.1127579936931789e-05, "loss": 0.5537, "step": 7402 }, { "epoch": 0.9349435630278633, "grad_norm": 1.8828125, "learning_rate": 1.1125596210173375e-05, "loss": 0.5759, "step": 7403 }, { "epoch": 0.9350698555529244, "grad_norm": 1.7109375, "learning_rate": 1.1123612438551406e-05, "loss": 0.6544, "step": 7404 }, { "epoch": 0.9351961480779857, "grad_norm": 1.796875, "learning_rate": 1.112162862214495e-05, "loss": 0.5939, "step": 7405 }, { "epoch": 0.9353224406030468, "grad_norm": 1.796875, "learning_rate": 1.1119644761033079e-05, "loss": 0.6348, "step": 7406 }, { "epoch": 0.935448733128108, "grad_norm": 1.796875, "learning_rate": 1.1117660855294867e-05, "loss": 0.6068, "step": 7407 }, { "epoch": 0.9355750256531692, "grad_norm": 1.921875, "learning_rate": 1.1115676905009385e-05, "loss": 0.6468, "step": 7408 }, { "epoch": 0.9357013181782303, "grad_norm": 1.7109375, "learning_rate": 1.1113692910255708e-05, "loss": 0.6162, "step": 7409 }, { "epoch": 0.9358276107032915, "grad_norm": 1.75, "learning_rate": 1.1111708871112916e-05, "loss": 0.6435, "step": 7410 }, { "epoch": 0.9359539032283527, "grad_norm": 1.8984375, "learning_rate": 1.1109724787660083e-05, "loss": 0.6815, "step": 7411 }, { "epoch": 0.9360801957534138, "grad_norm": 1.8046875, "learning_rate": 1.1107740659976297e-05, "loss": 0.6753, "step": 7412 }, { "epoch": 0.936206488278475, "grad_norm": 1.8671875, "learning_rate": 1.1105756488140633e-05, "loss": 0.627, "step": 7413 }, { "epoch": 0.9363327808035362, "grad_norm": 1.7109375, "learning_rate": 1.1103772272232178e-05, "loss": 0.638, "step": 7414 }, { "epoch": 0.9364590733285973, "grad_norm": 1.7578125, "learning_rate": 1.110178801233002e-05, "loss": 0.6788, "step": 7415 }, { "epoch": 0.9365853658536586, "grad_norm": 1.6171875, "learning_rate": 1.1099803708513246e-05, "loss": 0.626, "step": 7416 }, { "epoch": 0.9367116583787197, "grad_norm": 1.90625, "learning_rate": 1.1097819360860946e-05, "loss": 0.6786, "step": 7417 }, { "epoch": 0.9368379509037809, "grad_norm": 1.8046875, "learning_rate": 1.1095834969452211e-05, "loss": 0.5748, "step": 7418 }, { "epoch": 0.9369642434288421, "grad_norm": 1.8046875, "learning_rate": 1.1093850534366131e-05, "loss": 0.5682, "step": 7419 }, { "epoch": 0.9370905359539032, "grad_norm": 1.75, "learning_rate": 1.1091866055681806e-05, "loss": 0.5971, "step": 7420 }, { "epoch": 0.9372168284789644, "grad_norm": 1.734375, "learning_rate": 1.1089881533478328e-05, "loss": 0.6585, "step": 7421 }, { "epoch": 0.9373431210040256, "grad_norm": 2.03125, "learning_rate": 1.1087896967834799e-05, "loss": 0.6902, "step": 7422 }, { "epoch": 0.9374694135290867, "grad_norm": 1.90625, "learning_rate": 1.1085912358830315e-05, "loss": 0.7273, "step": 7423 }, { "epoch": 0.9375957060541479, "grad_norm": 1.765625, "learning_rate": 1.108392770654398e-05, "loss": 0.6237, "step": 7424 }, { "epoch": 0.9377219985792091, "grad_norm": 1.796875, "learning_rate": 1.10819430110549e-05, "loss": 0.6219, "step": 7425 }, { "epoch": 0.9378482911042703, "grad_norm": 1.8046875, "learning_rate": 1.1079958272442177e-05, "loss": 0.6604, "step": 7426 }, { "epoch": 0.9379745836293314, "grad_norm": 1.890625, "learning_rate": 1.1077973490784917e-05, "loss": 0.6394, "step": 7427 }, { "epoch": 0.9381008761543926, "grad_norm": 1.8984375, "learning_rate": 1.1075988666162232e-05, "loss": 0.661, "step": 7428 }, { "epoch": 0.9382271686794538, "grad_norm": 1.8671875, "learning_rate": 1.107400379865323e-05, "loss": 0.5728, "step": 7429 }, { "epoch": 0.9383534612045149, "grad_norm": 1.7890625, "learning_rate": 1.1072018888337022e-05, "loss": 0.6754, "step": 7430 }, { "epoch": 0.9384797537295761, "grad_norm": 1.8359375, "learning_rate": 1.1070033935292723e-05, "loss": 0.6306, "step": 7431 }, { "epoch": 0.9386060462546373, "grad_norm": 1.6484375, "learning_rate": 1.106804893959945e-05, "loss": 0.6041, "step": 7432 }, { "epoch": 0.9387323387796985, "grad_norm": 1.734375, "learning_rate": 1.1066063901336317e-05, "loss": 0.6352, "step": 7433 }, { "epoch": 0.9388586313047597, "grad_norm": 1.84375, "learning_rate": 1.1064078820582447e-05, "loss": 0.5842, "step": 7434 }, { "epoch": 0.9389849238298208, "grad_norm": 1.859375, "learning_rate": 1.1062093697416959e-05, "loss": 0.5898, "step": 7435 }, { "epoch": 0.939111216354882, "grad_norm": 1.6875, "learning_rate": 1.1060108531918972e-05, "loss": 0.6324, "step": 7436 }, { "epoch": 0.9392375088799432, "grad_norm": 1.90625, "learning_rate": 1.1058123324167613e-05, "loss": 0.6371, "step": 7437 }, { "epoch": 0.9393638014050043, "grad_norm": 1.7109375, "learning_rate": 1.1056138074242009e-05, "loss": 0.561, "step": 7438 }, { "epoch": 0.9394900939300656, "grad_norm": 1.8203125, "learning_rate": 1.1054152782221283e-05, "loss": 0.6207, "step": 7439 }, { "epoch": 0.9396163864551267, "grad_norm": 1.8046875, "learning_rate": 1.1052167448184565e-05, "loss": 0.6236, "step": 7440 }, { "epoch": 0.9397426789801878, "grad_norm": 1.8671875, "learning_rate": 1.1050182072210989e-05, "loss": 0.6677, "step": 7441 }, { "epoch": 0.9398689715052491, "grad_norm": 1.71875, "learning_rate": 1.1048196654379684e-05, "loss": 0.5844, "step": 7442 }, { "epoch": 0.9399952640303102, "grad_norm": 1.7734375, "learning_rate": 1.1046211194769784e-05, "loss": 0.5794, "step": 7443 }, { "epoch": 0.9401215565553713, "grad_norm": 1.6953125, "learning_rate": 1.104422569346043e-05, "loss": 0.5967, "step": 7444 }, { "epoch": 0.9402478490804326, "grad_norm": 1.7265625, "learning_rate": 1.1042240150530752e-05, "loss": 0.6507, "step": 7445 }, { "epoch": 0.9403741416054937, "grad_norm": 1.71875, "learning_rate": 1.1040254566059892e-05, "loss": 0.5806, "step": 7446 }, { "epoch": 0.940500434130555, "grad_norm": 1.6796875, "learning_rate": 1.103826894012699e-05, "loss": 0.5494, "step": 7447 }, { "epoch": 0.9406267266556161, "grad_norm": 1.734375, "learning_rate": 1.1036283272811192e-05, "loss": 0.5944, "step": 7448 }, { "epoch": 0.9407530191806772, "grad_norm": 1.6953125, "learning_rate": 1.1034297564191633e-05, "loss": 0.5341, "step": 7449 }, { "epoch": 0.9408793117057385, "grad_norm": 1.96875, "learning_rate": 1.103231181434747e-05, "loss": 0.6082, "step": 7450 }, { "epoch": 0.9410056042307996, "grad_norm": 1.6796875, "learning_rate": 1.1030326023357843e-05, "loss": 0.6285, "step": 7451 }, { "epoch": 0.9411318967558607, "grad_norm": 1.734375, "learning_rate": 1.10283401913019e-05, "loss": 0.5935, "step": 7452 }, { "epoch": 0.941258189280922, "grad_norm": 1.9609375, "learning_rate": 1.1026354318258797e-05, "loss": 0.7078, "step": 7453 }, { "epoch": 0.9413844818059831, "grad_norm": 1.734375, "learning_rate": 1.1024368404307682e-05, "loss": 0.5585, "step": 7454 }, { "epoch": 0.9415107743310442, "grad_norm": 1.671875, "learning_rate": 1.102238244952771e-05, "loss": 0.6312, "step": 7455 }, { "epoch": 0.9416370668561055, "grad_norm": 1.890625, "learning_rate": 1.1020396453998036e-05, "loss": 0.7125, "step": 7456 }, { "epoch": 0.9417633593811666, "grad_norm": 1.7734375, "learning_rate": 1.1018410417797815e-05, "loss": 0.6606, "step": 7457 }, { "epoch": 0.9418896519062278, "grad_norm": 1.8046875, "learning_rate": 1.1016424341006209e-05, "loss": 0.6837, "step": 7458 }, { "epoch": 0.942015944431289, "grad_norm": 1.7421875, "learning_rate": 1.101443822370238e-05, "loss": 0.591, "step": 7459 }, { "epoch": 0.9421422369563501, "grad_norm": 1.8046875, "learning_rate": 1.1012452065965481e-05, "loss": 0.5979, "step": 7460 }, { "epoch": 0.9422685294814113, "grad_norm": 1.734375, "learning_rate": 1.1010465867874688e-05, "loss": 0.5718, "step": 7461 }, { "epoch": 0.9423948220064725, "grad_norm": 1.734375, "learning_rate": 1.1008479629509154e-05, "loss": 0.6198, "step": 7462 }, { "epoch": 0.9425211145315336, "grad_norm": 1.7421875, "learning_rate": 1.1006493350948055e-05, "loss": 0.6645, "step": 7463 }, { "epoch": 0.9426474070565949, "grad_norm": 1.8125, "learning_rate": 1.1004507032270553e-05, "loss": 0.5771, "step": 7464 }, { "epoch": 0.942773699581656, "grad_norm": 1.71875, "learning_rate": 1.1002520673555819e-05, "loss": 0.6836, "step": 7465 }, { "epoch": 0.9428999921067172, "grad_norm": 1.6796875, "learning_rate": 1.1000534274883027e-05, "loss": 0.6117, "step": 7466 }, { "epoch": 0.9430262846317784, "grad_norm": 1.8984375, "learning_rate": 1.099854783633135e-05, "loss": 0.6773, "step": 7467 }, { "epoch": 0.9431525771568395, "grad_norm": 1.7265625, "learning_rate": 1.099656135797996e-05, "loss": 0.6083, "step": 7468 }, { "epoch": 0.9432788696819007, "grad_norm": 1.8046875, "learning_rate": 1.0994574839908037e-05, "loss": 0.6803, "step": 7469 }, { "epoch": 0.9434051622069619, "grad_norm": 1.9453125, "learning_rate": 1.0992588282194753e-05, "loss": 0.6714, "step": 7470 }, { "epoch": 0.943531454732023, "grad_norm": 1.7890625, "learning_rate": 1.0990601684919293e-05, "loss": 0.7307, "step": 7471 }, { "epoch": 0.9436577472570842, "grad_norm": 1.8125, "learning_rate": 1.0988615048160836e-05, "loss": 0.6621, "step": 7472 }, { "epoch": 0.9437840397821454, "grad_norm": 1.609375, "learning_rate": 1.0986628371998562e-05, "loss": 0.5184, "step": 7473 }, { "epoch": 0.9439103323072066, "grad_norm": 1.9296875, "learning_rate": 1.0984641656511658e-05, "loss": 0.6656, "step": 7474 }, { "epoch": 0.9440366248322677, "grad_norm": 1.8515625, "learning_rate": 1.098265490177931e-05, "loss": 0.7189, "step": 7475 }, { "epoch": 0.9441629173573289, "grad_norm": 1.96875, "learning_rate": 1.0980668107880707e-05, "loss": 0.7027, "step": 7476 }, { "epoch": 0.9442892098823901, "grad_norm": 1.8359375, "learning_rate": 1.0978681274895033e-05, "loss": 0.5751, "step": 7477 }, { "epoch": 0.9444155024074513, "grad_norm": 1.9140625, "learning_rate": 1.0976694402901482e-05, "loss": 0.7237, "step": 7478 }, { "epoch": 0.9445417949325124, "grad_norm": 1.796875, "learning_rate": 1.0974707491979242e-05, "loss": 0.6192, "step": 7479 }, { "epoch": 0.9446680874575736, "grad_norm": 1.8046875, "learning_rate": 1.0972720542207511e-05, "loss": 0.6492, "step": 7480 }, { "epoch": 0.9447943799826348, "grad_norm": 1.8671875, "learning_rate": 1.097073355366548e-05, "loss": 0.5483, "step": 7481 }, { "epoch": 0.944920672507696, "grad_norm": 1.8828125, "learning_rate": 1.096874652643235e-05, "loss": 0.6308, "step": 7482 }, { "epoch": 0.9450469650327571, "grad_norm": 1.84375, "learning_rate": 1.0966759460587317e-05, "loss": 0.6413, "step": 7483 }, { "epoch": 0.9451732575578183, "grad_norm": 1.828125, "learning_rate": 1.0964772356209581e-05, "loss": 0.6603, "step": 7484 }, { "epoch": 0.9452995500828795, "grad_norm": 1.671875, "learning_rate": 1.096278521337834e-05, "loss": 0.5548, "step": 7485 }, { "epoch": 0.9454258426079406, "grad_norm": 1.71875, "learning_rate": 1.0960798032172801e-05, "loss": 0.602, "step": 7486 }, { "epoch": 0.9455521351330018, "grad_norm": 2.03125, "learning_rate": 1.0958810812672166e-05, "loss": 0.5623, "step": 7487 }, { "epoch": 0.945678427658063, "grad_norm": 1.7265625, "learning_rate": 1.0956823554955644e-05, "loss": 0.6632, "step": 7488 }, { "epoch": 0.9458047201831241, "grad_norm": 1.890625, "learning_rate": 1.0954836259102434e-05, "loss": 0.727, "step": 7489 }, { "epoch": 0.9459310127081854, "grad_norm": 1.8515625, "learning_rate": 1.0952848925191756e-05, "loss": 0.5981, "step": 7490 }, { "epoch": 0.9460573052332465, "grad_norm": 1.7890625, "learning_rate": 1.0950861553302815e-05, "loss": 0.5679, "step": 7491 }, { "epoch": 0.9461835977583076, "grad_norm": 1.8046875, "learning_rate": 1.094887414351482e-05, "loss": 0.6599, "step": 7492 }, { "epoch": 0.9463098902833689, "grad_norm": 1.7421875, "learning_rate": 1.094688669590699e-05, "loss": 0.61, "step": 7493 }, { "epoch": 0.94643618280843, "grad_norm": 1.8203125, "learning_rate": 1.0944899210558533e-05, "loss": 0.6475, "step": 7494 }, { "epoch": 0.9465624753334912, "grad_norm": 1.8125, "learning_rate": 1.0942911687548671e-05, "loss": 0.5865, "step": 7495 }, { "epoch": 0.9466887678585524, "grad_norm": 1.8984375, "learning_rate": 1.0940924126956623e-05, "loss": 0.6518, "step": 7496 }, { "epoch": 0.9468150603836135, "grad_norm": 1.84375, "learning_rate": 1.0938936528861602e-05, "loss": 0.5705, "step": 7497 }, { "epoch": 0.9469413529086748, "grad_norm": 1.84375, "learning_rate": 1.0936948893342834e-05, "loss": 0.5896, "step": 7498 }, { "epoch": 0.9470676454337359, "grad_norm": 1.7109375, "learning_rate": 1.0934961220479538e-05, "loss": 0.5858, "step": 7499 }, { "epoch": 0.947193937958797, "grad_norm": 1.734375, "learning_rate": 1.0932973510350944e-05, "loss": 0.6162, "step": 7500 }, { "epoch": 0.9473202304838583, "grad_norm": 1.7578125, "learning_rate": 1.0930985763036271e-05, "loss": 0.6353, "step": 7501 }, { "epoch": 0.9474465230089194, "grad_norm": 1.6953125, "learning_rate": 1.0928997978614749e-05, "loss": 0.6127, "step": 7502 }, { "epoch": 0.9475728155339805, "grad_norm": 1.7421875, "learning_rate": 1.0927010157165603e-05, "loss": 0.6076, "step": 7503 }, { "epoch": 0.9476991080590418, "grad_norm": 1.921875, "learning_rate": 1.0925022298768066e-05, "loss": 0.6635, "step": 7504 }, { "epoch": 0.9478254005841029, "grad_norm": 1.9453125, "learning_rate": 1.0923034403501365e-05, "loss": 0.6391, "step": 7505 }, { "epoch": 0.947951693109164, "grad_norm": 1.6171875, "learning_rate": 1.092104647144474e-05, "loss": 0.5396, "step": 7506 }, { "epoch": 0.9480779856342253, "grad_norm": 1.734375, "learning_rate": 1.091905850267742e-05, "loss": 0.6128, "step": 7507 }, { "epoch": 0.9482042781592864, "grad_norm": 1.671875, "learning_rate": 1.0917070497278644e-05, "loss": 0.5628, "step": 7508 }, { "epoch": 0.9483305706843477, "grad_norm": 1.7265625, "learning_rate": 1.0915082455327644e-05, "loss": 0.5992, "step": 7509 }, { "epoch": 0.9484568632094088, "grad_norm": 1.828125, "learning_rate": 1.0913094376903662e-05, "loss": 0.6129, "step": 7510 }, { "epoch": 0.9485831557344699, "grad_norm": 2.09375, "learning_rate": 1.0911106262085937e-05, "loss": 0.6212, "step": 7511 }, { "epoch": 0.9487094482595312, "grad_norm": 1.796875, "learning_rate": 1.0909118110953714e-05, "loss": 0.5893, "step": 7512 }, { "epoch": 0.9488357407845923, "grad_norm": 1.859375, "learning_rate": 1.0907129923586228e-05, "loss": 0.6535, "step": 7513 }, { "epoch": 0.9489620333096535, "grad_norm": 2.25, "learning_rate": 1.0905141700062731e-05, "loss": 0.7347, "step": 7514 }, { "epoch": 0.9490883258347147, "grad_norm": 1.8359375, "learning_rate": 1.0903153440462466e-05, "loss": 0.6262, "step": 7515 }, { "epoch": 0.9492146183597758, "grad_norm": 1.90625, "learning_rate": 1.090116514486468e-05, "loss": 0.5758, "step": 7516 }, { "epoch": 0.949340910884837, "grad_norm": 2.046875, "learning_rate": 1.0899176813348622e-05, "loss": 0.7073, "step": 7517 }, { "epoch": 0.9494672034098982, "grad_norm": 1.9453125, "learning_rate": 1.0897188445993543e-05, "loss": 0.6672, "step": 7518 }, { "epoch": 0.9495934959349593, "grad_norm": 1.8515625, "learning_rate": 1.089520004287869e-05, "loss": 0.6835, "step": 7519 }, { "epoch": 0.9497197884600205, "grad_norm": 2.03125, "learning_rate": 1.0893211604083325e-05, "loss": 0.6475, "step": 7520 }, { "epoch": 0.9498460809850817, "grad_norm": 1.890625, "learning_rate": 1.0891223129686692e-05, "loss": 0.7278, "step": 7521 }, { "epoch": 0.9499723735101429, "grad_norm": 1.8515625, "learning_rate": 1.0889234619768052e-05, "loss": 0.6485, "step": 7522 }, { "epoch": 0.950098666035204, "grad_norm": 1.8046875, "learning_rate": 1.0887246074406663e-05, "loss": 0.5568, "step": 7523 }, { "epoch": 0.9502249585602652, "grad_norm": 1.75, "learning_rate": 1.0885257493681784e-05, "loss": 0.5991, "step": 7524 }, { "epoch": 0.9503512510853264, "grad_norm": 1.8203125, "learning_rate": 1.0883268877672672e-05, "loss": 0.6445, "step": 7525 }, { "epoch": 0.9504775436103876, "grad_norm": 1.71875, "learning_rate": 1.0881280226458592e-05, "loss": 0.6426, "step": 7526 }, { "epoch": 0.9506038361354487, "grad_norm": 1.828125, "learning_rate": 1.0879291540118804e-05, "loss": 0.6413, "step": 7527 }, { "epoch": 0.9507301286605099, "grad_norm": 1.640625, "learning_rate": 1.0877302818732574e-05, "loss": 0.5075, "step": 7528 }, { "epoch": 0.9508564211855711, "grad_norm": 1.75, "learning_rate": 1.0875314062379166e-05, "loss": 0.623, "step": 7529 }, { "epoch": 0.9509827137106323, "grad_norm": 1.828125, "learning_rate": 1.0873325271137847e-05, "loss": 0.6428, "step": 7530 }, { "epoch": 0.9511090062356934, "grad_norm": 1.9375, "learning_rate": 1.0871336445087883e-05, "loss": 0.6636, "step": 7531 }, { "epoch": 0.9512352987607546, "grad_norm": 1.6953125, "learning_rate": 1.0869347584308551e-05, "loss": 0.6292, "step": 7532 }, { "epoch": 0.9513615912858158, "grad_norm": 1.8046875, "learning_rate": 1.0867358688879116e-05, "loss": 0.6482, "step": 7533 }, { "epoch": 0.9514878838108769, "grad_norm": 1.765625, "learning_rate": 1.0865369758878857e-05, "loss": 0.5935, "step": 7534 }, { "epoch": 0.9516141763359381, "grad_norm": 1.84375, "learning_rate": 1.0863380794387041e-05, "loss": 0.6169, "step": 7535 }, { "epoch": 0.9517404688609993, "grad_norm": 1.78125, "learning_rate": 1.0861391795482947e-05, "loss": 0.7136, "step": 7536 }, { "epoch": 0.9518667613860604, "grad_norm": 1.8671875, "learning_rate": 1.0859402762245849e-05, "loss": 0.6297, "step": 7537 }, { "epoch": 0.9519930539111217, "grad_norm": 1.8671875, "learning_rate": 1.085741369475503e-05, "loss": 0.6385, "step": 7538 }, { "epoch": 0.9521193464361828, "grad_norm": 2.03125, "learning_rate": 1.0855424593089764e-05, "loss": 0.6915, "step": 7539 }, { "epoch": 0.952245638961244, "grad_norm": 1.7890625, "learning_rate": 1.0853435457329334e-05, "loss": 0.627, "step": 7540 }, { "epoch": 0.9523719314863052, "grad_norm": 1.8125, "learning_rate": 1.0851446287553022e-05, "loss": 0.6184, "step": 7541 }, { "epoch": 0.9524982240113663, "grad_norm": 1.8203125, "learning_rate": 1.0849457083840113e-05, "loss": 0.6355, "step": 7542 }, { "epoch": 0.9526245165364275, "grad_norm": 1.7421875, "learning_rate": 1.084746784626989e-05, "loss": 0.558, "step": 7543 }, { "epoch": 0.9527508090614887, "grad_norm": 1.8828125, "learning_rate": 1.0845478574921642e-05, "loss": 0.7, "step": 7544 }, { "epoch": 0.9528771015865498, "grad_norm": 1.890625, "learning_rate": 1.0843489269874652e-05, "loss": 0.6557, "step": 7545 }, { "epoch": 0.9530033941116111, "grad_norm": 1.796875, "learning_rate": 1.0841499931208212e-05, "loss": 0.6175, "step": 7546 }, { "epoch": 0.9531296866366722, "grad_norm": 1.9921875, "learning_rate": 1.0839510559001614e-05, "loss": 0.6898, "step": 7547 }, { "epoch": 0.9532559791617333, "grad_norm": 2.125, "learning_rate": 1.0837521153334143e-05, "loss": 0.6282, "step": 7548 }, { "epoch": 0.9533822716867946, "grad_norm": 1.8125, "learning_rate": 1.0835531714285096e-05, "loss": 0.6123, "step": 7549 }, { "epoch": 0.9535085642118557, "grad_norm": 1.8515625, "learning_rate": 1.083354224193377e-05, "loss": 0.7333, "step": 7550 }, { "epoch": 0.9536348567369168, "grad_norm": 1.8203125, "learning_rate": 1.0831552736359458e-05, "loss": 0.653, "step": 7551 }, { "epoch": 0.9537611492619781, "grad_norm": 1.7265625, "learning_rate": 1.0829563197641458e-05, "loss": 0.6838, "step": 7552 }, { "epoch": 0.9538874417870392, "grad_norm": 1.8359375, "learning_rate": 1.0827573625859064e-05, "loss": 0.8069, "step": 7553 }, { "epoch": 0.9540137343121003, "grad_norm": 1.7890625, "learning_rate": 1.082558402109158e-05, "loss": 0.6363, "step": 7554 }, { "epoch": 0.9541400268371616, "grad_norm": 1.78125, "learning_rate": 1.0823594383418306e-05, "loss": 0.6588, "step": 7555 }, { "epoch": 0.9542663193622227, "grad_norm": 1.9453125, "learning_rate": 1.0821604712918544e-05, "loss": 0.6757, "step": 7556 }, { "epoch": 0.954392611887284, "grad_norm": 1.734375, "learning_rate": 1.0819615009671595e-05, "loss": 0.5631, "step": 7557 }, { "epoch": 0.9545189044123451, "grad_norm": 1.71875, "learning_rate": 1.0817625273756767e-05, "loss": 0.5959, "step": 7558 }, { "epoch": 0.9546451969374062, "grad_norm": 2.015625, "learning_rate": 1.0815635505253363e-05, "loss": 0.6899, "step": 7559 }, { "epoch": 0.9547714894624675, "grad_norm": 2.625, "learning_rate": 1.0813645704240696e-05, "loss": 0.7322, "step": 7560 }, { "epoch": 0.9548977819875286, "grad_norm": 1.765625, "learning_rate": 1.081165587079807e-05, "loss": 0.6488, "step": 7561 }, { "epoch": 0.9550240745125897, "grad_norm": 1.8125, "learning_rate": 1.0809666005004795e-05, "loss": 0.6123, "step": 7562 }, { "epoch": 0.955150367037651, "grad_norm": 1.7265625, "learning_rate": 1.0807676106940184e-05, "loss": 0.5646, "step": 7563 }, { "epoch": 0.9552766595627121, "grad_norm": 1.859375, "learning_rate": 1.0805686176683547e-05, "loss": 0.6031, "step": 7564 }, { "epoch": 0.9554029520877733, "grad_norm": 1.9609375, "learning_rate": 1.0803696214314203e-05, "loss": 0.6981, "step": 7565 }, { "epoch": 0.9555292446128345, "grad_norm": 1.8828125, "learning_rate": 1.0801706219911461e-05, "loss": 0.6847, "step": 7566 }, { "epoch": 0.9556555371378956, "grad_norm": 1.7109375, "learning_rate": 1.079971619355464e-05, "loss": 0.7054, "step": 7567 }, { "epoch": 0.9557818296629568, "grad_norm": 1.8125, "learning_rate": 1.0797726135323059e-05, "loss": 0.6638, "step": 7568 }, { "epoch": 0.955908122188018, "grad_norm": 1.8984375, "learning_rate": 1.0795736045296035e-05, "loss": 0.7894, "step": 7569 }, { "epoch": 0.9560344147130792, "grad_norm": 1.8125, "learning_rate": 1.079374592355289e-05, "loss": 0.6775, "step": 7570 }, { "epoch": 0.9561607072381404, "grad_norm": 1.8203125, "learning_rate": 1.0791755770172943e-05, "loss": 0.634, "step": 7571 }, { "epoch": 0.9562869997632015, "grad_norm": 1.7109375, "learning_rate": 1.0789765585235518e-05, "loss": 0.6149, "step": 7572 }, { "epoch": 0.9564132922882627, "grad_norm": 1.6953125, "learning_rate": 1.078777536881994e-05, "loss": 0.578, "step": 7573 }, { "epoch": 0.9565395848133239, "grad_norm": 1.6796875, "learning_rate": 1.0785785121005534e-05, "loss": 0.6139, "step": 7574 }, { "epoch": 0.956665877338385, "grad_norm": 1.8203125, "learning_rate": 1.0783794841871625e-05, "loss": 0.7722, "step": 7575 }, { "epoch": 0.9567921698634462, "grad_norm": 1.9375, "learning_rate": 1.078180453149754e-05, "loss": 0.6376, "step": 7576 }, { "epoch": 0.9569184623885074, "grad_norm": 1.8984375, "learning_rate": 1.077981418996261e-05, "loss": 0.5897, "step": 7577 }, { "epoch": 0.9570447549135686, "grad_norm": 1.8359375, "learning_rate": 1.0777823817346165e-05, "loss": 0.6401, "step": 7578 }, { "epoch": 0.9571710474386297, "grad_norm": 1.8671875, "learning_rate": 1.0775833413727537e-05, "loss": 0.664, "step": 7579 }, { "epoch": 0.9572973399636909, "grad_norm": 1.78125, "learning_rate": 1.0773842979186057e-05, "loss": 0.6242, "step": 7580 }, { "epoch": 0.9574236324887521, "grad_norm": 1.8125, "learning_rate": 1.077185251380106e-05, "loss": 0.6932, "step": 7581 }, { "epoch": 0.9575499250138132, "grad_norm": 1.765625, "learning_rate": 1.0769862017651881e-05, "loss": 0.6381, "step": 7582 }, { "epoch": 0.9576762175388744, "grad_norm": 1.78125, "learning_rate": 1.0767871490817856e-05, "loss": 0.6684, "step": 7583 }, { "epoch": 0.9578025100639356, "grad_norm": 1.7421875, "learning_rate": 1.0765880933378325e-05, "loss": 0.596, "step": 7584 }, { "epoch": 0.9579288025889967, "grad_norm": 1.828125, "learning_rate": 1.0763890345412619e-05, "loss": 0.6579, "step": 7585 }, { "epoch": 0.958055095114058, "grad_norm": 1.734375, "learning_rate": 1.076189972700009e-05, "loss": 0.683, "step": 7586 }, { "epoch": 0.9581813876391191, "grad_norm": 1.9609375, "learning_rate": 1.075990907822007e-05, "loss": 0.6174, "step": 7587 }, { "epoch": 0.9583076801641803, "grad_norm": 2.03125, "learning_rate": 1.0757918399151905e-05, "loss": 0.7965, "step": 7588 }, { "epoch": 0.9584339726892415, "grad_norm": 1.953125, "learning_rate": 1.0755927689874939e-05, "loss": 0.6069, "step": 7589 }, { "epoch": 0.9585602652143026, "grad_norm": 1.78125, "learning_rate": 1.0753936950468515e-05, "loss": 0.6446, "step": 7590 }, { "epoch": 0.9586865577393638, "grad_norm": 1.703125, "learning_rate": 1.0751946181011981e-05, "loss": 0.5972, "step": 7591 }, { "epoch": 0.958812850264425, "grad_norm": 1.703125, "learning_rate": 1.0749955381584684e-05, "loss": 0.6003, "step": 7592 }, { "epoch": 0.9589391427894861, "grad_norm": 1.6484375, "learning_rate": 1.074796455226597e-05, "loss": 0.5903, "step": 7593 }, { "epoch": 0.9590654353145474, "grad_norm": 1.7265625, "learning_rate": 1.074597369313519e-05, "loss": 0.6341, "step": 7594 }, { "epoch": 0.9591917278396085, "grad_norm": 1.6875, "learning_rate": 1.0743982804271698e-05, "loss": 0.6398, "step": 7595 }, { "epoch": 0.9593180203646696, "grad_norm": 1.84375, "learning_rate": 1.0741991885754842e-05, "loss": 0.592, "step": 7596 }, { "epoch": 0.9594443128897309, "grad_norm": 1.7890625, "learning_rate": 1.0740000937663977e-05, "loss": 0.6872, "step": 7597 }, { "epoch": 0.959570605414792, "grad_norm": 1.890625, "learning_rate": 1.0738009960078456e-05, "loss": 0.6947, "step": 7598 }, { "epoch": 0.9596968979398531, "grad_norm": 1.890625, "learning_rate": 1.0736018953077638e-05, "loss": 0.6376, "step": 7599 }, { "epoch": 0.9598231904649144, "grad_norm": 1.765625, "learning_rate": 1.0734027916740875e-05, "loss": 0.6036, "step": 7600 }, { "epoch": 0.9599494829899755, "grad_norm": 2.359375, "learning_rate": 1.0732036851147528e-05, "loss": 0.6914, "step": 7601 }, { "epoch": 0.9600757755150368, "grad_norm": 1.84375, "learning_rate": 1.0730045756376956e-05, "loss": 0.6873, "step": 7602 }, { "epoch": 0.9602020680400979, "grad_norm": 1.828125, "learning_rate": 1.0728054632508515e-05, "loss": 0.5845, "step": 7603 }, { "epoch": 0.960328360565159, "grad_norm": 1.9296875, "learning_rate": 1.0726063479621574e-05, "loss": 0.5782, "step": 7604 }, { "epoch": 0.9604546530902203, "grad_norm": 1.9609375, "learning_rate": 1.0724072297795491e-05, "loss": 0.7413, "step": 7605 }, { "epoch": 0.9605809456152814, "grad_norm": 2.0, "learning_rate": 1.072208108710963e-05, "loss": 0.7173, "step": 7606 }, { "epoch": 0.9607072381403425, "grad_norm": 1.8046875, "learning_rate": 1.0720089847643356e-05, "loss": 0.5671, "step": 7607 }, { "epoch": 0.9608335306654038, "grad_norm": 1.921875, "learning_rate": 1.0718098579476035e-05, "loss": 0.6637, "step": 7608 }, { "epoch": 0.9609598231904649, "grad_norm": 1.921875, "learning_rate": 1.0716107282687034e-05, "loss": 0.6334, "step": 7609 }, { "epoch": 0.961086115715526, "grad_norm": 1.796875, "learning_rate": 1.0714115957355721e-05, "loss": 0.6003, "step": 7610 }, { "epoch": 0.9612124082405873, "grad_norm": 1.8125, "learning_rate": 1.071212460356147e-05, "loss": 0.696, "step": 7611 }, { "epoch": 0.9613387007656484, "grad_norm": 2.140625, "learning_rate": 1.0710133221383641e-05, "loss": 0.7421, "step": 7612 }, { "epoch": 0.9614649932907096, "grad_norm": 1.71875, "learning_rate": 1.0708141810901616e-05, "loss": 0.6251, "step": 7613 }, { "epoch": 0.9615912858157708, "grad_norm": 1.7578125, "learning_rate": 1.0706150372194763e-05, "loss": 0.6189, "step": 7614 }, { "epoch": 0.9617175783408319, "grad_norm": 1.90625, "learning_rate": 1.0704158905342458e-05, "loss": 0.7057, "step": 7615 }, { "epoch": 0.9618438708658932, "grad_norm": 1.875, "learning_rate": 1.0702167410424075e-05, "loss": 0.6302, "step": 7616 }, { "epoch": 0.9619701633909543, "grad_norm": 1.71875, "learning_rate": 1.070017588751899e-05, "loss": 0.6436, "step": 7617 }, { "epoch": 0.9620964559160154, "grad_norm": 1.90625, "learning_rate": 1.0698184336706582e-05, "loss": 0.7092, "step": 7618 }, { "epoch": 0.9622227484410767, "grad_norm": 1.84375, "learning_rate": 1.0696192758066227e-05, "loss": 0.5816, "step": 7619 }, { "epoch": 0.9623490409661378, "grad_norm": 1.671875, "learning_rate": 1.0694201151677307e-05, "loss": 0.5845, "step": 7620 }, { "epoch": 0.962475333491199, "grad_norm": 1.7421875, "learning_rate": 1.0692209517619197e-05, "loss": 0.571, "step": 7621 }, { "epoch": 0.9626016260162602, "grad_norm": 1.875, "learning_rate": 1.0690217855971286e-05, "loss": 0.6617, "step": 7622 }, { "epoch": 0.9627279185413213, "grad_norm": 1.7890625, "learning_rate": 1.0688226166812952e-05, "loss": 0.6099, "step": 7623 }, { "epoch": 0.9628542110663825, "grad_norm": 1.90625, "learning_rate": 1.0686234450223582e-05, "loss": 0.6063, "step": 7624 }, { "epoch": 0.9629805035914437, "grad_norm": 1.7109375, "learning_rate": 1.068424270628256e-05, "loss": 0.6233, "step": 7625 }, { "epoch": 0.9631067961165048, "grad_norm": 1.828125, "learning_rate": 1.0682250935069272e-05, "loss": 0.6245, "step": 7626 }, { "epoch": 0.963233088641566, "grad_norm": 1.921875, "learning_rate": 1.0680259136663104e-05, "loss": 0.6941, "step": 7627 }, { "epoch": 0.9633593811666272, "grad_norm": 2.171875, "learning_rate": 1.0678267311143447e-05, "loss": 0.6644, "step": 7628 }, { "epoch": 0.9634856736916884, "grad_norm": 1.9375, "learning_rate": 1.0676275458589687e-05, "loss": 0.7548, "step": 7629 }, { "epoch": 0.9636119662167495, "grad_norm": 1.6171875, "learning_rate": 1.0674283579081215e-05, "loss": 0.5524, "step": 7630 }, { "epoch": 0.9637382587418107, "grad_norm": 1.6640625, "learning_rate": 1.0672291672697424e-05, "loss": 0.5545, "step": 7631 }, { "epoch": 0.9638645512668719, "grad_norm": 1.609375, "learning_rate": 1.067029973951771e-05, "loss": 0.5528, "step": 7632 }, { "epoch": 0.9639908437919331, "grad_norm": 1.8359375, "learning_rate": 1.0668307779621459e-05, "loss": 0.6835, "step": 7633 }, { "epoch": 0.9641171363169942, "grad_norm": 1.6796875, "learning_rate": 1.0666315793088071e-05, "loss": 0.565, "step": 7634 }, { "epoch": 0.9642434288420554, "grad_norm": 1.703125, "learning_rate": 1.066432377999694e-05, "loss": 0.6603, "step": 7635 }, { "epoch": 0.9643697213671166, "grad_norm": 1.8046875, "learning_rate": 1.0662331740427465e-05, "loss": 0.6891, "step": 7636 }, { "epoch": 0.9644960138921778, "grad_norm": 1.828125, "learning_rate": 1.066033967445904e-05, "loss": 0.6634, "step": 7637 }, { "epoch": 0.9646223064172389, "grad_norm": 1.8125, "learning_rate": 1.0658347582171067e-05, "loss": 0.6547, "step": 7638 }, { "epoch": 0.9647485989423001, "grad_norm": 2.34375, "learning_rate": 1.0656355463642942e-05, "loss": 0.6162, "step": 7639 }, { "epoch": 0.9648748914673613, "grad_norm": 1.84375, "learning_rate": 1.0654363318954073e-05, "loss": 0.6556, "step": 7640 }, { "epoch": 0.9650011839924224, "grad_norm": 1.8203125, "learning_rate": 1.0652371148183858e-05, "loss": 0.6473, "step": 7641 }, { "epoch": 0.9651274765174837, "grad_norm": 1.875, "learning_rate": 1.06503789514117e-05, "loss": 0.6946, "step": 7642 }, { "epoch": 0.9652537690425448, "grad_norm": 1.890625, "learning_rate": 1.0648386728717005e-05, "loss": 0.6271, "step": 7643 }, { "epoch": 0.9653800615676059, "grad_norm": 1.8515625, "learning_rate": 1.0646394480179173e-05, "loss": 0.6662, "step": 7644 }, { "epoch": 0.9655063540926672, "grad_norm": 1.8984375, "learning_rate": 1.0644402205877617e-05, "loss": 0.6366, "step": 7645 }, { "epoch": 0.9656326466177283, "grad_norm": 1.671875, "learning_rate": 1.064240990589174e-05, "loss": 0.5828, "step": 7646 }, { "epoch": 0.9657589391427895, "grad_norm": 1.796875, "learning_rate": 1.0640417580300953e-05, "loss": 0.6128, "step": 7647 }, { "epoch": 0.9658852316678507, "grad_norm": 1.71875, "learning_rate": 1.0638425229184659e-05, "loss": 0.6123, "step": 7648 }, { "epoch": 0.9660115241929118, "grad_norm": 1.6875, "learning_rate": 1.0636432852622278e-05, "loss": 0.5962, "step": 7649 }, { "epoch": 0.966137816717973, "grad_norm": 1.875, "learning_rate": 1.0634440450693214e-05, "loss": 0.5876, "step": 7650 }, { "epoch": 0.9662641092430342, "grad_norm": 1.953125, "learning_rate": 1.0632448023476885e-05, "loss": 0.7727, "step": 7651 }, { "epoch": 0.9663904017680953, "grad_norm": 1.6875, "learning_rate": 1.0630455571052697e-05, "loss": 0.5847, "step": 7652 }, { "epoch": 0.9665166942931566, "grad_norm": 2.03125, "learning_rate": 1.0628463093500073e-05, "loss": 0.8645, "step": 7653 }, { "epoch": 0.9666429868182177, "grad_norm": 2.0, "learning_rate": 1.062647059089842e-05, "loss": 0.8413, "step": 7654 }, { "epoch": 0.9667692793432788, "grad_norm": 1.859375, "learning_rate": 1.0624478063327159e-05, "loss": 0.6546, "step": 7655 }, { "epoch": 0.9668955718683401, "grad_norm": 1.8828125, "learning_rate": 1.0622485510865707e-05, "loss": 0.746, "step": 7656 }, { "epoch": 0.9670218643934012, "grad_norm": 1.6015625, "learning_rate": 1.0620492933593481e-05, "loss": 0.5594, "step": 7657 }, { "epoch": 0.9671481569184623, "grad_norm": 1.9609375, "learning_rate": 1.0618500331589902e-05, "loss": 0.7206, "step": 7658 }, { "epoch": 0.9672744494435236, "grad_norm": 1.8203125, "learning_rate": 1.0616507704934392e-05, "loss": 0.6729, "step": 7659 }, { "epoch": 0.9674007419685847, "grad_norm": 1.7265625, "learning_rate": 1.0614515053706367e-05, "loss": 0.6004, "step": 7660 }, { "epoch": 0.9675270344936459, "grad_norm": 1.7265625, "learning_rate": 1.0612522377985253e-05, "loss": 0.6358, "step": 7661 }, { "epoch": 0.9676533270187071, "grad_norm": 1.734375, "learning_rate": 1.0610529677850471e-05, "loss": 0.619, "step": 7662 }, { "epoch": 0.9677796195437682, "grad_norm": 1.5859375, "learning_rate": 1.0608536953381448e-05, "loss": 0.552, "step": 7663 }, { "epoch": 0.9679059120688295, "grad_norm": 1.7890625, "learning_rate": 1.0606544204657607e-05, "loss": 0.6852, "step": 7664 }, { "epoch": 0.9680322045938906, "grad_norm": 1.8125, "learning_rate": 1.0604551431758376e-05, "loss": 0.6374, "step": 7665 }, { "epoch": 0.9681584971189517, "grad_norm": 1.6953125, "learning_rate": 1.0602558634763178e-05, "loss": 0.6123, "step": 7666 }, { "epoch": 0.968284789644013, "grad_norm": 1.8125, "learning_rate": 1.0600565813751446e-05, "loss": 0.6053, "step": 7667 }, { "epoch": 0.9684110821690741, "grad_norm": 1.8359375, "learning_rate": 1.059857296880261e-05, "loss": 0.6944, "step": 7668 }, { "epoch": 0.9685373746941353, "grad_norm": 1.6796875, "learning_rate": 1.0596580099996095e-05, "loss": 0.5738, "step": 7669 }, { "epoch": 0.9686636672191965, "grad_norm": 1.7421875, "learning_rate": 1.0594587207411333e-05, "loss": 0.6428, "step": 7670 }, { "epoch": 0.9687899597442576, "grad_norm": 1.6953125, "learning_rate": 1.0592594291127758e-05, "loss": 0.5795, "step": 7671 }, { "epoch": 0.9689162522693188, "grad_norm": 1.8046875, "learning_rate": 1.0590601351224803e-05, "loss": 0.5702, "step": 7672 }, { "epoch": 0.96904254479438, "grad_norm": 1.8125, "learning_rate": 1.0588608387781901e-05, "loss": 0.6274, "step": 7673 }, { "epoch": 0.9691688373194411, "grad_norm": 1.7265625, "learning_rate": 1.0586615400878484e-05, "loss": 0.5799, "step": 7674 }, { "epoch": 0.9692951298445023, "grad_norm": 1.734375, "learning_rate": 1.058462239059399e-05, "loss": 0.5705, "step": 7675 }, { "epoch": 0.9694214223695635, "grad_norm": 1.921875, "learning_rate": 1.0582629357007857e-05, "loss": 0.7421, "step": 7676 }, { "epoch": 0.9695477148946247, "grad_norm": 1.7578125, "learning_rate": 1.058063630019952e-05, "loss": 0.6824, "step": 7677 }, { "epoch": 0.9696740074196859, "grad_norm": 1.875, "learning_rate": 1.057864322024842e-05, "loss": 0.5943, "step": 7678 }, { "epoch": 0.969800299944747, "grad_norm": 1.8359375, "learning_rate": 1.0576650117233995e-05, "loss": 0.6672, "step": 7679 }, { "epoch": 0.9699265924698082, "grad_norm": 1.8125, "learning_rate": 1.0574656991235685e-05, "loss": 0.6051, "step": 7680 }, { "epoch": 0.9700528849948694, "grad_norm": 1.796875, "learning_rate": 1.0572663842332933e-05, "loss": 0.5978, "step": 7681 }, { "epoch": 0.9701791775199305, "grad_norm": 1.8046875, "learning_rate": 1.0570670670605178e-05, "loss": 0.6202, "step": 7682 }, { "epoch": 0.9703054700449917, "grad_norm": 1.8046875, "learning_rate": 1.0568677476131866e-05, "loss": 0.6162, "step": 7683 }, { "epoch": 0.9704317625700529, "grad_norm": 1.875, "learning_rate": 1.0566684258992437e-05, "loss": 0.6394, "step": 7684 }, { "epoch": 0.9705580550951141, "grad_norm": 1.8125, "learning_rate": 1.0564691019266338e-05, "loss": 0.6416, "step": 7685 }, { "epoch": 0.9706843476201752, "grad_norm": 1.9296875, "learning_rate": 1.0562697757033018e-05, "loss": 0.7037, "step": 7686 }, { "epoch": 0.9708106401452364, "grad_norm": 1.625, "learning_rate": 1.0560704472371919e-05, "loss": 0.5268, "step": 7687 }, { "epoch": 0.9709369326702976, "grad_norm": 1.796875, "learning_rate": 1.0558711165362491e-05, "loss": 0.6386, "step": 7688 }, { "epoch": 0.9710632251953587, "grad_norm": 1.7109375, "learning_rate": 1.0556717836084182e-05, "loss": 0.5901, "step": 7689 }, { "epoch": 0.97118951772042, "grad_norm": 2.0625, "learning_rate": 1.0554724484616441e-05, "loss": 0.8142, "step": 7690 }, { "epoch": 0.9713158102454811, "grad_norm": 1.8671875, "learning_rate": 1.0552731111038716e-05, "loss": 0.692, "step": 7691 }, { "epoch": 0.9714421027705422, "grad_norm": 1.6796875, "learning_rate": 1.0550737715430463e-05, "loss": 0.59, "step": 7692 }, { "epoch": 0.9715683952956035, "grad_norm": 1.78125, "learning_rate": 1.054874429787113e-05, "loss": 0.6755, "step": 7693 }, { "epoch": 0.9716946878206646, "grad_norm": 1.6875, "learning_rate": 1.0546750858440169e-05, "loss": 0.6451, "step": 7694 }, { "epoch": 0.9718209803457258, "grad_norm": 1.6015625, "learning_rate": 1.0544757397217038e-05, "loss": 0.6249, "step": 7695 }, { "epoch": 0.971947272870787, "grad_norm": 1.6484375, "learning_rate": 1.054276391428119e-05, "loss": 0.5669, "step": 7696 }, { "epoch": 0.9720735653958481, "grad_norm": 1.7890625, "learning_rate": 1.054077040971208e-05, "loss": 0.6472, "step": 7697 }, { "epoch": 0.9721998579209093, "grad_norm": 1.875, "learning_rate": 1.0538776883589164e-05, "loss": 0.6653, "step": 7698 }, { "epoch": 0.9723261504459705, "grad_norm": 1.796875, "learning_rate": 1.0536783335991896e-05, "loss": 0.5671, "step": 7699 }, { "epoch": 0.9724524429710316, "grad_norm": 1.6640625, "learning_rate": 1.053478976699974e-05, "loss": 0.5786, "step": 7700 }, { "epoch": 0.9725787354960929, "grad_norm": 1.765625, "learning_rate": 1.0532796176692151e-05, "loss": 0.5959, "step": 7701 }, { "epoch": 0.972705028021154, "grad_norm": 1.71875, "learning_rate": 1.0530802565148588e-05, "loss": 0.6627, "step": 7702 }, { "epoch": 0.9728313205462151, "grad_norm": 1.859375, "learning_rate": 1.0528808932448515e-05, "loss": 0.6289, "step": 7703 }, { "epoch": 0.9729576130712764, "grad_norm": 1.8515625, "learning_rate": 1.0526815278671392e-05, "loss": 0.6006, "step": 7704 }, { "epoch": 0.9730839055963375, "grad_norm": 1.75, "learning_rate": 1.0524821603896682e-05, "loss": 0.5317, "step": 7705 }, { "epoch": 0.9732101981213986, "grad_norm": 1.7265625, "learning_rate": 1.0522827908203845e-05, "loss": 0.6768, "step": 7706 }, { "epoch": 0.9733364906464599, "grad_norm": 1.7578125, "learning_rate": 1.0520834191672349e-05, "loss": 0.6138, "step": 7707 }, { "epoch": 0.973462783171521, "grad_norm": 1.8046875, "learning_rate": 1.0518840454381654e-05, "loss": 0.6247, "step": 7708 }, { "epoch": 0.9735890756965823, "grad_norm": 1.8359375, "learning_rate": 1.051684669641123e-05, "loss": 0.6141, "step": 7709 }, { "epoch": 0.9737153682216434, "grad_norm": 1.90625, "learning_rate": 1.0514852917840541e-05, "loss": 0.6671, "step": 7710 }, { "epoch": 0.9738416607467045, "grad_norm": 1.78125, "learning_rate": 1.0512859118749052e-05, "loss": 0.5986, "step": 7711 }, { "epoch": 0.9739679532717658, "grad_norm": 1.8046875, "learning_rate": 1.0510865299216237e-05, "loss": 0.5902, "step": 7712 }, { "epoch": 0.9740942457968269, "grad_norm": 1.9453125, "learning_rate": 1.0508871459321562e-05, "loss": 0.5914, "step": 7713 }, { "epoch": 0.974220538321888, "grad_norm": 1.71875, "learning_rate": 1.0506877599144496e-05, "loss": 0.5392, "step": 7714 }, { "epoch": 0.9743468308469493, "grad_norm": 1.6640625, "learning_rate": 1.0504883718764509e-05, "loss": 0.5739, "step": 7715 }, { "epoch": 0.9744731233720104, "grad_norm": 1.796875, "learning_rate": 1.0502889818261075e-05, "loss": 0.5969, "step": 7716 }, { "epoch": 0.9745994158970716, "grad_norm": 1.7578125, "learning_rate": 1.050089589771366e-05, "loss": 0.6475, "step": 7717 }, { "epoch": 0.9747257084221328, "grad_norm": 1.7109375, "learning_rate": 1.0498901957201745e-05, "loss": 0.5592, "step": 7718 }, { "epoch": 0.9748520009471939, "grad_norm": 1.8359375, "learning_rate": 1.04969079968048e-05, "loss": 0.6498, "step": 7719 }, { "epoch": 0.9749782934722551, "grad_norm": 1.6484375, "learning_rate": 1.0494914016602293e-05, "loss": 0.5659, "step": 7720 }, { "epoch": 0.9751045859973163, "grad_norm": 1.875, "learning_rate": 1.049292001667371e-05, "loss": 0.6307, "step": 7721 }, { "epoch": 0.9752308785223774, "grad_norm": 1.828125, "learning_rate": 1.0490925997098525e-05, "loss": 0.6145, "step": 7722 }, { "epoch": 0.9753571710474386, "grad_norm": 1.9375, "learning_rate": 1.0488931957956208e-05, "loss": 0.6415, "step": 7723 }, { "epoch": 0.9754834635724998, "grad_norm": 1.953125, "learning_rate": 1.0486937899326241e-05, "loss": 0.649, "step": 7724 }, { "epoch": 0.975609756097561, "grad_norm": 1.8125, "learning_rate": 1.0484943821288105e-05, "loss": 0.6181, "step": 7725 }, { "epoch": 0.9757360486226222, "grad_norm": 1.8984375, "learning_rate": 1.0482949723921274e-05, "loss": 0.7537, "step": 7726 }, { "epoch": 0.9758623411476833, "grad_norm": 1.9375, "learning_rate": 1.0480955607305232e-05, "loss": 0.6422, "step": 7727 }, { "epoch": 0.9759886336727445, "grad_norm": 2.0, "learning_rate": 1.0478961471519458e-05, "loss": 0.6661, "step": 7728 }, { "epoch": 0.9761149261978057, "grad_norm": 1.6328125, "learning_rate": 1.047696731664343e-05, "loss": 0.569, "step": 7729 }, { "epoch": 0.9762412187228668, "grad_norm": 1.7421875, "learning_rate": 1.0474973142756634e-05, "loss": 0.572, "step": 7730 }, { "epoch": 0.976367511247928, "grad_norm": 1.84375, "learning_rate": 1.0472978949938558e-05, "loss": 0.6905, "step": 7731 }, { "epoch": 0.9764938037729892, "grad_norm": 1.625, "learning_rate": 1.0470984738268675e-05, "loss": 0.6397, "step": 7732 }, { "epoch": 0.9766200962980504, "grad_norm": 1.8359375, "learning_rate": 1.0468990507826478e-05, "loss": 0.6525, "step": 7733 }, { "epoch": 0.9767463888231115, "grad_norm": 1.8203125, "learning_rate": 1.0466996258691447e-05, "loss": 0.6586, "step": 7734 }, { "epoch": 0.9768726813481727, "grad_norm": 1.71875, "learning_rate": 1.046500199094307e-05, "loss": 0.7075, "step": 7735 }, { "epoch": 0.9769989738732339, "grad_norm": 1.734375, "learning_rate": 1.0463007704660834e-05, "loss": 0.5936, "step": 7736 }, { "epoch": 0.977125266398295, "grad_norm": 1.8515625, "learning_rate": 1.0461013399924227e-05, "loss": 0.7487, "step": 7737 }, { "epoch": 0.9772515589233562, "grad_norm": 1.78125, "learning_rate": 1.0459019076812735e-05, "loss": 0.6456, "step": 7738 }, { "epoch": 0.9773778514484174, "grad_norm": 1.8203125, "learning_rate": 1.0457024735405847e-05, "loss": 0.676, "step": 7739 }, { "epoch": 0.9775041439734786, "grad_norm": 1.6484375, "learning_rate": 1.0455030375783058e-05, "loss": 0.6158, "step": 7740 }, { "epoch": 0.9776304364985398, "grad_norm": 1.8515625, "learning_rate": 1.045303599802385e-05, "loss": 0.6204, "step": 7741 }, { "epoch": 0.9777567290236009, "grad_norm": 1.859375, "learning_rate": 1.045104160220772e-05, "loss": 0.67, "step": 7742 }, { "epoch": 0.9778830215486621, "grad_norm": 1.796875, "learning_rate": 1.0449047188414161e-05, "loss": 0.6038, "step": 7743 }, { "epoch": 0.9780093140737233, "grad_norm": 1.8515625, "learning_rate": 1.044705275672266e-05, "loss": 0.5626, "step": 7744 }, { "epoch": 0.9781356065987844, "grad_norm": 1.9375, "learning_rate": 1.0445058307212712e-05, "loss": 0.6629, "step": 7745 }, { "epoch": 0.9782618991238456, "grad_norm": 1.7734375, "learning_rate": 1.0443063839963814e-05, "loss": 0.6606, "step": 7746 }, { "epoch": 0.9783881916489068, "grad_norm": 1.8671875, "learning_rate": 1.0441069355055455e-05, "loss": 0.5876, "step": 7747 }, { "epoch": 0.9785144841739679, "grad_norm": 1.8125, "learning_rate": 1.0439074852567137e-05, "loss": 0.6221, "step": 7748 }, { "epoch": 0.9786407766990292, "grad_norm": 1.7734375, "learning_rate": 1.0437080332578351e-05, "loss": 0.6122, "step": 7749 }, { "epoch": 0.9787670692240903, "grad_norm": 1.75, "learning_rate": 1.0435085795168598e-05, "loss": 0.6219, "step": 7750 }, { "epoch": 0.9788933617491514, "grad_norm": 1.859375, "learning_rate": 1.0433091240417372e-05, "loss": 0.635, "step": 7751 }, { "epoch": 0.9790196542742127, "grad_norm": 1.90625, "learning_rate": 1.0431096668404173e-05, "loss": 0.6384, "step": 7752 }, { "epoch": 0.9791459467992738, "grad_norm": 1.890625, "learning_rate": 1.0429102079208499e-05, "loss": 0.6776, "step": 7753 }, { "epoch": 0.9792722393243349, "grad_norm": 1.765625, "learning_rate": 1.042710747290985e-05, "loss": 0.5984, "step": 7754 }, { "epoch": 0.9793985318493962, "grad_norm": 2.03125, "learning_rate": 1.0425112849587725e-05, "loss": 0.7174, "step": 7755 }, { "epoch": 0.9795248243744573, "grad_norm": 1.71875, "learning_rate": 1.0423118209321624e-05, "loss": 0.6281, "step": 7756 }, { "epoch": 0.9796511168995186, "grad_norm": 1.8125, "learning_rate": 1.0421123552191053e-05, "loss": 0.6121, "step": 7757 }, { "epoch": 0.9797774094245797, "grad_norm": 1.8125, "learning_rate": 1.041912887827551e-05, "loss": 0.6003, "step": 7758 }, { "epoch": 0.9799037019496408, "grad_norm": 1.875, "learning_rate": 1.04171341876545e-05, "loss": 0.6478, "step": 7759 }, { "epoch": 0.9800299944747021, "grad_norm": 1.7734375, "learning_rate": 1.0415139480407526e-05, "loss": 0.6517, "step": 7760 }, { "epoch": 0.9801562869997632, "grad_norm": 1.7421875, "learning_rate": 1.0413144756614093e-05, "loss": 0.63, "step": 7761 }, { "epoch": 0.9802825795248243, "grad_norm": 1.84375, "learning_rate": 1.0411150016353707e-05, "loss": 0.5947, "step": 7762 }, { "epoch": 0.9804088720498856, "grad_norm": 1.703125, "learning_rate": 1.0409155259705867e-05, "loss": 0.5897, "step": 7763 }, { "epoch": 0.9805351645749467, "grad_norm": 1.8984375, "learning_rate": 1.0407160486750087e-05, "loss": 0.6008, "step": 7764 }, { "epoch": 0.9806614571000078, "grad_norm": 1.96875, "learning_rate": 1.0405165697565867e-05, "loss": 0.6512, "step": 7765 }, { "epoch": 0.9807877496250691, "grad_norm": 1.859375, "learning_rate": 1.0403170892232721e-05, "loss": 0.7078, "step": 7766 }, { "epoch": 0.9809140421501302, "grad_norm": 1.75, "learning_rate": 1.0401176070830154e-05, "loss": 0.5509, "step": 7767 }, { "epoch": 0.9810403346751914, "grad_norm": 1.90625, "learning_rate": 1.0399181233437676e-05, "loss": 0.6233, "step": 7768 }, { "epoch": 0.9811666272002526, "grad_norm": 1.9296875, "learning_rate": 1.0397186380134796e-05, "loss": 0.624, "step": 7769 }, { "epoch": 0.9812929197253137, "grad_norm": 1.7109375, "learning_rate": 1.0395191511001023e-05, "loss": 0.6039, "step": 7770 }, { "epoch": 0.981419212250375, "grad_norm": 1.609375, "learning_rate": 1.0393196626115868e-05, "loss": 0.5452, "step": 7771 }, { "epoch": 0.9815455047754361, "grad_norm": 1.8671875, "learning_rate": 1.0391201725558842e-05, "loss": 0.722, "step": 7772 }, { "epoch": 0.9816717973004973, "grad_norm": 1.7890625, "learning_rate": 1.0389206809409458e-05, "loss": 0.6248, "step": 7773 }, { "epoch": 0.9817980898255585, "grad_norm": 1.8046875, "learning_rate": 1.0387211877747225e-05, "loss": 0.5852, "step": 7774 }, { "epoch": 0.9819243823506196, "grad_norm": 1.859375, "learning_rate": 1.0385216930651663e-05, "loss": 0.5979, "step": 7775 }, { "epoch": 0.9820506748756808, "grad_norm": 1.8671875, "learning_rate": 1.0383221968202283e-05, "loss": 0.6883, "step": 7776 }, { "epoch": 0.982176967400742, "grad_norm": 2.03125, "learning_rate": 1.0381226990478596e-05, "loss": 0.594, "step": 7777 }, { "epoch": 0.9823032599258031, "grad_norm": 1.65625, "learning_rate": 1.037923199756012e-05, "loss": 0.5595, "step": 7778 }, { "epoch": 0.9824295524508643, "grad_norm": 1.8515625, "learning_rate": 1.0377236989526369e-05, "loss": 0.6187, "step": 7779 }, { "epoch": 0.9825558449759255, "grad_norm": 1.7734375, "learning_rate": 1.0375241966456862e-05, "loss": 0.6437, "step": 7780 }, { "epoch": 0.9826821375009867, "grad_norm": 1.7890625, "learning_rate": 1.0373246928431113e-05, "loss": 0.5436, "step": 7781 }, { "epoch": 0.9828084300260478, "grad_norm": 1.8125, "learning_rate": 1.0371251875528638e-05, "loss": 0.6391, "step": 7782 }, { "epoch": 0.982934722551109, "grad_norm": 1.8828125, "learning_rate": 1.0369256807828963e-05, "loss": 0.6881, "step": 7783 }, { "epoch": 0.9830610150761702, "grad_norm": 2.0, "learning_rate": 1.0367261725411598e-05, "loss": 0.5972, "step": 7784 }, { "epoch": 0.9831873076012313, "grad_norm": 1.7890625, "learning_rate": 1.0365266628356063e-05, "loss": 0.6141, "step": 7785 }, { "epoch": 0.9833136001262925, "grad_norm": 2.0, "learning_rate": 1.0363271516741884e-05, "loss": 0.5921, "step": 7786 }, { "epoch": 0.9834398926513537, "grad_norm": 1.8125, "learning_rate": 1.0361276390648576e-05, "loss": 0.5498, "step": 7787 }, { "epoch": 0.9835661851764149, "grad_norm": 1.796875, "learning_rate": 1.0359281250155662e-05, "loss": 0.6607, "step": 7788 }, { "epoch": 0.983692477701476, "grad_norm": 2.03125, "learning_rate": 1.035728609534266e-05, "loss": 0.5979, "step": 7789 }, { "epoch": 0.9838187702265372, "grad_norm": 1.671875, "learning_rate": 1.0355290926289095e-05, "loss": 0.6202, "step": 7790 }, { "epoch": 0.9839450627515984, "grad_norm": 1.8515625, "learning_rate": 1.0353295743074492e-05, "loss": 0.6162, "step": 7791 }, { "epoch": 0.9840713552766596, "grad_norm": 1.7578125, "learning_rate": 1.0351300545778372e-05, "loss": 0.6469, "step": 7792 }, { "epoch": 0.9841976478017207, "grad_norm": 1.90625, "learning_rate": 1.0349305334480254e-05, "loss": 0.6157, "step": 7793 }, { "epoch": 0.9843239403267819, "grad_norm": 1.7734375, "learning_rate": 1.0347310109259672e-05, "loss": 0.5902, "step": 7794 }, { "epoch": 0.9844502328518431, "grad_norm": 1.703125, "learning_rate": 1.0345314870196143e-05, "loss": 0.6534, "step": 7795 }, { "epoch": 0.9845765253769042, "grad_norm": 1.8203125, "learning_rate": 1.0343319617369196e-05, "loss": 0.653, "step": 7796 }, { "epoch": 0.9847028179019655, "grad_norm": 1.7421875, "learning_rate": 1.0341324350858354e-05, "loss": 0.6118, "step": 7797 }, { "epoch": 0.9848291104270266, "grad_norm": 1.7890625, "learning_rate": 1.0339329070743147e-05, "loss": 0.5934, "step": 7798 }, { "epoch": 0.9849554029520877, "grad_norm": 1.8125, "learning_rate": 1.03373337771031e-05, "loss": 0.6641, "step": 7799 }, { "epoch": 0.985081695477149, "grad_norm": 1.9296875, "learning_rate": 1.0335338470017742e-05, "loss": 0.7335, "step": 7800 }, { "epoch": 0.9852079880022101, "grad_norm": 1.9921875, "learning_rate": 1.0333343149566599e-05, "loss": 0.7091, "step": 7801 }, { "epoch": 0.9853342805272713, "grad_norm": 1.8359375, "learning_rate": 1.0331347815829203e-05, "loss": 0.669, "step": 7802 }, { "epoch": 0.9854605730523325, "grad_norm": 1.9609375, "learning_rate": 1.032935246888508e-05, "loss": 0.691, "step": 7803 }, { "epoch": 0.9855868655773936, "grad_norm": 1.625, "learning_rate": 1.032735710881376e-05, "loss": 0.562, "step": 7804 }, { "epoch": 0.9857131581024549, "grad_norm": 1.8671875, "learning_rate": 1.0325361735694777e-05, "loss": 0.5912, "step": 7805 }, { "epoch": 0.985839450627516, "grad_norm": 1.8828125, "learning_rate": 1.0323366349607654e-05, "loss": 0.6291, "step": 7806 }, { "epoch": 0.9859657431525771, "grad_norm": 1.6796875, "learning_rate": 1.032137095063193e-05, "loss": 0.563, "step": 7807 }, { "epoch": 0.9860920356776384, "grad_norm": 1.7421875, "learning_rate": 1.0319375538847134e-05, "loss": 0.5642, "step": 7808 }, { "epoch": 0.9862183282026995, "grad_norm": 1.765625, "learning_rate": 1.03173801143328e-05, "loss": 0.6301, "step": 7809 }, { "epoch": 0.9863446207277606, "grad_norm": 1.984375, "learning_rate": 1.0315384677168456e-05, "loss": 0.6411, "step": 7810 }, { "epoch": 0.9864709132528219, "grad_norm": 1.7890625, "learning_rate": 1.031338922743364e-05, "loss": 0.6056, "step": 7811 }, { "epoch": 0.986597205777883, "grad_norm": 1.7109375, "learning_rate": 1.0311393765207884e-05, "loss": 0.5706, "step": 7812 }, { "epoch": 0.9867234983029441, "grad_norm": 1.78125, "learning_rate": 1.0309398290570725e-05, "loss": 0.6442, "step": 7813 }, { "epoch": 0.9868497908280054, "grad_norm": 1.9296875, "learning_rate": 1.030740280360169e-05, "loss": 0.6812, "step": 7814 }, { "epoch": 0.9869760833530665, "grad_norm": 1.84375, "learning_rate": 1.0305407304380324e-05, "loss": 0.6547, "step": 7815 }, { "epoch": 0.9871023758781277, "grad_norm": 2.078125, "learning_rate": 1.0303411792986157e-05, "loss": 0.6666, "step": 7816 }, { "epoch": 0.9872286684031889, "grad_norm": 1.796875, "learning_rate": 1.0301416269498728e-05, "loss": 0.5881, "step": 7817 }, { "epoch": 0.98735496092825, "grad_norm": 1.734375, "learning_rate": 1.0299420733997569e-05, "loss": 0.5694, "step": 7818 }, { "epoch": 0.9874812534533113, "grad_norm": 1.78125, "learning_rate": 1.0297425186562224e-05, "loss": 0.5942, "step": 7819 }, { "epoch": 0.9876075459783724, "grad_norm": 1.9609375, "learning_rate": 1.0295429627272228e-05, "loss": 0.6672, "step": 7820 }, { "epoch": 0.9877338385034335, "grad_norm": 1.8203125, "learning_rate": 1.0293434056207116e-05, "loss": 0.5814, "step": 7821 }, { "epoch": 0.9878601310284948, "grad_norm": 1.78125, "learning_rate": 1.0291438473446428e-05, "loss": 0.5844, "step": 7822 }, { "epoch": 0.9879864235535559, "grad_norm": 1.7578125, "learning_rate": 1.0289442879069709e-05, "loss": 0.6599, "step": 7823 }, { "epoch": 0.9881127160786171, "grad_norm": 1.90625, "learning_rate": 1.0287447273156492e-05, "loss": 0.5957, "step": 7824 }, { "epoch": 0.9882390086036783, "grad_norm": 1.7578125, "learning_rate": 1.028545165578632e-05, "loss": 0.6267, "step": 7825 }, { "epoch": 0.9883653011287394, "grad_norm": 1.96875, "learning_rate": 1.0283456027038732e-05, "loss": 0.7051, "step": 7826 }, { "epoch": 0.9884915936538006, "grad_norm": 1.8125, "learning_rate": 1.0281460386993269e-05, "loss": 0.6896, "step": 7827 }, { "epoch": 0.9886178861788618, "grad_norm": 1.703125, "learning_rate": 1.0279464735729472e-05, "loss": 0.6203, "step": 7828 }, { "epoch": 0.988744178703923, "grad_norm": 1.8203125, "learning_rate": 1.0277469073326886e-05, "loss": 0.6179, "step": 7829 }, { "epoch": 0.9888704712289841, "grad_norm": 1.6015625, "learning_rate": 1.0275473399865048e-05, "loss": 0.556, "step": 7830 }, { "epoch": 0.9889967637540453, "grad_norm": 1.8359375, "learning_rate": 1.0273477715423505e-05, "loss": 0.6312, "step": 7831 }, { "epoch": 0.9891230562791065, "grad_norm": 1.75, "learning_rate": 1.02714820200818e-05, "loss": 0.769, "step": 7832 }, { "epoch": 0.9892493488041677, "grad_norm": 1.7265625, "learning_rate": 1.0269486313919477e-05, "loss": 0.6429, "step": 7833 }, { "epoch": 0.9893756413292288, "grad_norm": 1.84375, "learning_rate": 1.0267490597016076e-05, "loss": 0.6413, "step": 7834 }, { "epoch": 0.98950193385429, "grad_norm": 1.7421875, "learning_rate": 1.0265494869451145e-05, "loss": 0.5524, "step": 7835 }, { "epoch": 0.9896282263793512, "grad_norm": 1.7109375, "learning_rate": 1.0263499131304228e-05, "loss": 0.6151, "step": 7836 }, { "epoch": 0.9897545189044124, "grad_norm": 1.96875, "learning_rate": 1.0261503382654872e-05, "loss": 0.5951, "step": 7837 }, { "epoch": 0.9898808114294735, "grad_norm": 1.6484375, "learning_rate": 1.0259507623582616e-05, "loss": 0.5443, "step": 7838 }, { "epoch": 0.9900071039545347, "grad_norm": 1.71875, "learning_rate": 1.0257511854167014e-05, "loss": 0.683, "step": 7839 }, { "epoch": 0.9901333964795959, "grad_norm": 1.8515625, "learning_rate": 1.025551607448761e-05, "loss": 0.7092, "step": 7840 }, { "epoch": 0.990259689004657, "grad_norm": 1.8515625, "learning_rate": 1.0253520284623952e-05, "loss": 0.6659, "step": 7841 }, { "epoch": 0.9903859815297182, "grad_norm": 1.7578125, "learning_rate": 1.0251524484655587e-05, "loss": 0.5888, "step": 7842 }, { "epoch": 0.9905122740547794, "grad_norm": 2.015625, "learning_rate": 1.024952867466206e-05, "loss": 0.7045, "step": 7843 }, { "epoch": 0.9906385665798405, "grad_norm": 1.8203125, "learning_rate": 1.024753285472292e-05, "loss": 0.6935, "step": 7844 }, { "epoch": 0.9907648591049018, "grad_norm": 1.8125, "learning_rate": 1.0245537024917717e-05, "loss": 0.635, "step": 7845 }, { "epoch": 0.9908911516299629, "grad_norm": 1.8671875, "learning_rate": 1.0243541185326001e-05, "loss": 0.6187, "step": 7846 }, { "epoch": 0.9910174441550241, "grad_norm": 1.8359375, "learning_rate": 1.0241545336027316e-05, "loss": 0.7057, "step": 7847 }, { "epoch": 0.9911437366800853, "grad_norm": 1.765625, "learning_rate": 1.0239549477101221e-05, "loss": 0.5893, "step": 7848 }, { "epoch": 0.9912700292051464, "grad_norm": 1.8203125, "learning_rate": 1.0237553608627257e-05, "loss": 0.6318, "step": 7849 }, { "epoch": 0.9913963217302076, "grad_norm": 1.734375, "learning_rate": 1.0235557730684982e-05, "loss": 0.5677, "step": 7850 }, { "epoch": 0.9915226142552688, "grad_norm": 1.71875, "learning_rate": 1.023356184335394e-05, "loss": 0.5802, "step": 7851 }, { "epoch": 0.9916489067803299, "grad_norm": 1.90625, "learning_rate": 1.0231565946713688e-05, "loss": 0.6796, "step": 7852 }, { "epoch": 0.9917751993053912, "grad_norm": 1.8125, "learning_rate": 1.0229570040843775e-05, "loss": 0.6317, "step": 7853 }, { "epoch": 0.9919014918304523, "grad_norm": 1.8046875, "learning_rate": 1.0227574125823752e-05, "loss": 0.5732, "step": 7854 }, { "epoch": 0.9920277843555134, "grad_norm": 1.953125, "learning_rate": 1.0225578201733172e-05, "loss": 0.6693, "step": 7855 }, { "epoch": 0.9921540768805747, "grad_norm": 1.8671875, "learning_rate": 1.0223582268651585e-05, "loss": 0.6165, "step": 7856 }, { "epoch": 0.9922803694056358, "grad_norm": 1.828125, "learning_rate": 1.0221586326658553e-05, "loss": 0.692, "step": 7857 }, { "epoch": 0.9924066619306969, "grad_norm": 1.703125, "learning_rate": 1.0219590375833622e-05, "loss": 0.6001, "step": 7858 }, { "epoch": 0.9925329544557582, "grad_norm": 1.828125, "learning_rate": 1.0217594416256346e-05, "loss": 0.6284, "step": 7859 }, { "epoch": 0.9926592469808193, "grad_norm": 1.7578125, "learning_rate": 1.0215598448006284e-05, "loss": 0.6422, "step": 7860 }, { "epoch": 0.9927855395058804, "grad_norm": 1.84375, "learning_rate": 1.0213602471162984e-05, "loss": 0.5953, "step": 7861 }, { "epoch": 0.9929118320309417, "grad_norm": 1.78125, "learning_rate": 1.0211606485806006e-05, "loss": 0.6253, "step": 7862 }, { "epoch": 0.9930381245560028, "grad_norm": 1.8984375, "learning_rate": 1.0209610492014904e-05, "loss": 0.6572, "step": 7863 }, { "epoch": 0.9931644170810641, "grad_norm": 1.75, "learning_rate": 1.020761448986923e-05, "loss": 0.5517, "step": 7864 }, { "epoch": 0.9932907096061252, "grad_norm": 1.7890625, "learning_rate": 1.0205618479448543e-05, "loss": 0.6264, "step": 7865 }, { "epoch": 0.9934170021311863, "grad_norm": 1.8125, "learning_rate": 1.0203622460832399e-05, "loss": 0.5473, "step": 7866 }, { "epoch": 0.9935432946562476, "grad_norm": 1.9140625, "learning_rate": 1.0201626434100354e-05, "loss": 0.6374, "step": 7867 }, { "epoch": 0.9936695871813087, "grad_norm": 1.8828125, "learning_rate": 1.0199630399331965e-05, "loss": 0.8255, "step": 7868 }, { "epoch": 0.9937958797063698, "grad_norm": 1.765625, "learning_rate": 1.019763435660679e-05, "loss": 0.6536, "step": 7869 }, { "epoch": 0.9939221722314311, "grad_norm": 1.9140625, "learning_rate": 1.0195638306004384e-05, "loss": 0.6405, "step": 7870 }, { "epoch": 0.9940484647564922, "grad_norm": 1.96875, "learning_rate": 1.0193642247604307e-05, "loss": 0.5961, "step": 7871 }, { "epoch": 0.9941747572815534, "grad_norm": 1.96875, "learning_rate": 1.019164618148612e-05, "loss": 0.6116, "step": 7872 }, { "epoch": 0.9943010498066146, "grad_norm": 1.9140625, "learning_rate": 1.0189650107729374e-05, "loss": 0.6875, "step": 7873 }, { "epoch": 0.9944273423316757, "grad_norm": 1.8828125, "learning_rate": 1.018765402641363e-05, "loss": 0.7318, "step": 7874 }, { "epoch": 0.9945536348567369, "grad_norm": 2.03125, "learning_rate": 1.018565793761845e-05, "loss": 0.7432, "step": 7875 }, { "epoch": 0.9946799273817981, "grad_norm": 1.8828125, "learning_rate": 1.0183661841423394e-05, "loss": 0.6271, "step": 7876 }, { "epoch": 0.9948062199068592, "grad_norm": 1.7890625, "learning_rate": 1.0181665737908018e-05, "loss": 0.5621, "step": 7877 }, { "epoch": 0.9949325124319205, "grad_norm": 1.6953125, "learning_rate": 1.0179669627151882e-05, "loss": 0.4996, "step": 7878 }, { "epoch": 0.9950588049569816, "grad_norm": 1.859375, "learning_rate": 1.017767350923455e-05, "loss": 0.6144, "step": 7879 }, { "epoch": 0.9951850974820428, "grad_norm": 1.9375, "learning_rate": 1.0175677384235577e-05, "loss": 0.6403, "step": 7880 }, { "epoch": 0.995311390007104, "grad_norm": 1.9296875, "learning_rate": 1.0173681252234528e-05, "loss": 0.5671, "step": 7881 }, { "epoch": 0.9954376825321651, "grad_norm": 1.90625, "learning_rate": 1.0171685113310964e-05, "loss": 0.5636, "step": 7882 }, { "epoch": 0.9955639750572263, "grad_norm": 1.796875, "learning_rate": 1.016968896754444e-05, "loss": 0.6217, "step": 7883 }, { "epoch": 0.9956902675822875, "grad_norm": 1.6875, "learning_rate": 1.0167692815014527e-05, "loss": 0.6092, "step": 7884 }, { "epoch": 0.9958165601073486, "grad_norm": 1.8125, "learning_rate": 1.0165696655800781e-05, "loss": 0.6259, "step": 7885 }, { "epoch": 0.9959428526324098, "grad_norm": 2.078125, "learning_rate": 1.0163700489982767e-05, "loss": 0.614, "step": 7886 }, { "epoch": 0.996069145157471, "grad_norm": 1.875, "learning_rate": 1.0161704317640044e-05, "loss": 0.6078, "step": 7887 }, { "epoch": 0.9961954376825322, "grad_norm": 1.6796875, "learning_rate": 1.0159708138852175e-05, "loss": 0.6494, "step": 7888 }, { "epoch": 0.9963217302075933, "grad_norm": 1.75, "learning_rate": 1.0157711953698728e-05, "loss": 0.5868, "step": 7889 }, { "epoch": 0.9964480227326545, "grad_norm": 1.6484375, "learning_rate": 1.015571576225926e-05, "loss": 0.5262, "step": 7890 }, { "epoch": 0.9965743152577157, "grad_norm": 1.8125, "learning_rate": 1.0153719564613338e-05, "loss": 0.6167, "step": 7891 }, { "epoch": 0.9967006077827768, "grad_norm": 1.765625, "learning_rate": 1.0151723360840522e-05, "loss": 0.6144, "step": 7892 }, { "epoch": 0.996826900307838, "grad_norm": 1.8125, "learning_rate": 1.0149727151020378e-05, "loss": 0.689, "step": 7893 }, { "epoch": 0.9969531928328992, "grad_norm": 1.8984375, "learning_rate": 1.0147730935232474e-05, "loss": 0.7043, "step": 7894 }, { "epoch": 0.9970794853579604, "grad_norm": 1.9453125, "learning_rate": 1.014573471355637e-05, "loss": 0.7913, "step": 7895 }, { "epoch": 0.9972057778830216, "grad_norm": 1.7890625, "learning_rate": 1.014373848607163e-05, "loss": 0.6283, "step": 7896 }, { "epoch": 0.9973320704080827, "grad_norm": 1.96875, "learning_rate": 1.014174225285782e-05, "loss": 0.6445, "step": 7897 }, { "epoch": 0.9974583629331439, "grad_norm": 1.640625, "learning_rate": 1.0139746013994507e-05, "loss": 0.5555, "step": 7898 }, { "epoch": 0.9975846554582051, "grad_norm": 1.703125, "learning_rate": 1.0137749769561254e-05, "loss": 0.507, "step": 7899 }, { "epoch": 0.9977109479832662, "grad_norm": 1.7421875, "learning_rate": 1.0135753519637625e-05, "loss": 0.6339, "step": 7900 }, { "epoch": 0.9978372405083274, "grad_norm": 1.90625, "learning_rate": 1.0133757264303187e-05, "loss": 0.6959, "step": 7901 }, { "epoch": 0.9979635330333886, "grad_norm": 1.75, "learning_rate": 1.013176100363751e-05, "loss": 0.5989, "step": 7902 }, { "epoch": 0.9980898255584497, "grad_norm": 1.65625, "learning_rate": 1.0129764737720156e-05, "loss": 0.559, "step": 7903 }, { "epoch": 0.998216118083511, "grad_norm": 1.8125, "learning_rate": 1.0127768466630692e-05, "loss": 0.6412, "step": 7904 }, { "epoch": 0.9983424106085721, "grad_norm": 1.96875, "learning_rate": 1.0125772190448684e-05, "loss": 0.6428, "step": 7905 }, { "epoch": 0.9984687031336332, "grad_norm": 1.765625, "learning_rate": 1.0123775909253698e-05, "loss": 0.5709, "step": 7906 }, { "epoch": 0.9985949956586945, "grad_norm": 1.8828125, "learning_rate": 1.0121779623125305e-05, "loss": 0.6335, "step": 7907 }, { "epoch": 0.9987212881837556, "grad_norm": 1.8046875, "learning_rate": 1.011978333214307e-05, "loss": 0.6827, "step": 7908 }, { "epoch": 0.9988475807088169, "grad_norm": 1.7890625, "learning_rate": 1.0117787036386558e-05, "loss": 0.6945, "step": 7909 }, { "epoch": 0.998973873233878, "grad_norm": 1.8125, "learning_rate": 1.0115790735935335e-05, "loss": 0.5749, "step": 7910 }, { "epoch": 0.9991001657589391, "grad_norm": 1.6953125, "learning_rate": 1.0113794430868977e-05, "loss": 0.688, "step": 7911 }, { "epoch": 0.9992264582840004, "grad_norm": 1.796875, "learning_rate": 1.0111798121267047e-05, "loss": 0.7123, "step": 7912 }, { "epoch": 0.9993527508090615, "grad_norm": 1.7890625, "learning_rate": 1.0109801807209113e-05, "loss": 0.6546, "step": 7913 }, { "epoch": 0.9994790433341226, "grad_norm": 1.96875, "learning_rate": 1.0107805488774742e-05, "loss": 0.5718, "step": 7914 }, { "epoch": 0.9996053358591839, "grad_norm": 1.78125, "learning_rate": 1.0105809166043505e-05, "loss": 0.5841, "step": 7915 }, { "epoch": 0.999731628384245, "grad_norm": 1.796875, "learning_rate": 1.0103812839094969e-05, "loss": 0.6069, "step": 7916 }, { "epoch": 0.9998579209093061, "grad_norm": 1.734375, "learning_rate": 1.0101816508008704e-05, "loss": 0.6435, "step": 7917 }, { "epoch": 0.9999842134343674, "grad_norm": 1.8359375, "learning_rate": 1.0099820172864279e-05, "loss": 0.5924, "step": 7918 }, { "epoch": 0.9999842134343674, "eval_loss": 0.7777696251869202, "eval_runtime": 4335.8804, "eval_samples_per_second": 11.495, "eval_steps_per_second": 3.832, "step": 7918 }, { "epoch": 1.0001105059594286, "grad_norm": 1.8515625, "learning_rate": 1.0097823833741259e-05, "loss": 0.6533, "step": 7919 }, { "epoch": 1.0002367984844898, "grad_norm": 1.84375, "learning_rate": 1.0095827490719217e-05, "loss": 0.6713, "step": 7920 }, { "epoch": 1.000363091009551, "grad_norm": 1.6484375, "learning_rate": 1.0093831143877724e-05, "loss": 0.5305, "step": 7921 }, { "epoch": 1.000489383534612, "grad_norm": 1.90625, "learning_rate": 1.0091834793296347e-05, "loss": 0.6244, "step": 7922 }, { "epoch": 1.0006156760596732, "grad_norm": 1.6484375, "learning_rate": 1.0089838439054658e-05, "loss": 0.5426, "step": 7923 }, { "epoch": 1.0007419685847343, "grad_norm": 1.7890625, "learning_rate": 1.0087842081232225e-05, "loss": 0.6171, "step": 7924 }, { "epoch": 1.0008682611097957, "grad_norm": 1.765625, "learning_rate": 1.0085845719908615e-05, "loss": 0.5946, "step": 7925 }, { "epoch": 1.0009945536348568, "grad_norm": 1.703125, "learning_rate": 1.0083849355163404e-05, "loss": 0.5792, "step": 7926 }, { "epoch": 1.001120846159918, "grad_norm": 1.765625, "learning_rate": 1.008185298707616e-05, "loss": 0.6274, "step": 7927 }, { "epoch": 1.001247138684979, "grad_norm": 1.8203125, "learning_rate": 1.0079856615726448e-05, "loss": 0.5609, "step": 7928 }, { "epoch": 1.0013734312100402, "grad_norm": 1.90625, "learning_rate": 1.007786024119385e-05, "loss": 0.6757, "step": 7929 }, { "epoch": 1.0014997237351013, "grad_norm": 1.7890625, "learning_rate": 1.0075863863557926e-05, "loss": 0.6219, "step": 7930 }, { "epoch": 1.0016260162601627, "grad_norm": 1.796875, "learning_rate": 1.0073867482898254e-05, "loss": 0.7078, "step": 7931 }, { "epoch": 1.0017523087852238, "grad_norm": 1.8203125, "learning_rate": 1.0071871099294401e-05, "loss": 0.6783, "step": 7932 }, { "epoch": 1.001878601310285, "grad_norm": 1.6484375, "learning_rate": 1.006987471282594e-05, "loss": 0.5936, "step": 7933 }, { "epoch": 1.002004893835346, "grad_norm": 1.6953125, "learning_rate": 1.0067878323572443e-05, "loss": 0.5547, "step": 7934 }, { "epoch": 1.0021311863604072, "grad_norm": 1.703125, "learning_rate": 1.0065881931613477e-05, "loss": 0.5765, "step": 7935 }, { "epoch": 1.0022574788854686, "grad_norm": 1.734375, "learning_rate": 1.0063885537028615e-05, "loss": 0.6023, "step": 7936 }, { "epoch": 1.0023837714105297, "grad_norm": 2.0625, "learning_rate": 1.006188913989743e-05, "loss": 0.6519, "step": 7937 }, { "epoch": 1.0025100639355908, "grad_norm": 1.6875, "learning_rate": 1.0059892740299493e-05, "loss": 0.6148, "step": 7938 }, { "epoch": 1.002636356460652, "grad_norm": 1.625, "learning_rate": 1.0057896338314375e-05, "loss": 0.4979, "step": 7939 }, { "epoch": 1.002762648985713, "grad_norm": 1.9140625, "learning_rate": 1.0055899934021649e-05, "loss": 0.6825, "step": 7940 }, { "epoch": 1.0028889415107742, "grad_norm": 1.75, "learning_rate": 1.0053903527500888e-05, "loss": 0.5948, "step": 7941 }, { "epoch": 1.0030152340358356, "grad_norm": 1.828125, "learning_rate": 1.005190711883166e-05, "loss": 0.5568, "step": 7942 }, { "epoch": 1.0031415265608967, "grad_norm": 1.7421875, "learning_rate": 1.0049910708093539e-05, "loss": 0.5561, "step": 7943 }, { "epoch": 1.0032678190859579, "grad_norm": 1.8203125, "learning_rate": 1.0047914295366102e-05, "loss": 0.6981, "step": 7944 }, { "epoch": 1.003394111611019, "grad_norm": 1.921875, "learning_rate": 1.0045917880728912e-05, "loss": 0.6666, "step": 7945 }, { "epoch": 1.0035204041360801, "grad_norm": 1.8671875, "learning_rate": 1.0043921464261546e-05, "loss": 0.6169, "step": 7946 }, { "epoch": 1.0036466966611413, "grad_norm": 2.015625, "learning_rate": 1.0041925046043577e-05, "loss": 0.7614, "step": 7947 }, { "epoch": 1.0037729891862026, "grad_norm": 1.8828125, "learning_rate": 1.0039928626154576e-05, "loss": 0.6443, "step": 7948 }, { "epoch": 1.0038992817112637, "grad_norm": 1.7109375, "learning_rate": 1.0037932204674118e-05, "loss": 0.6338, "step": 7949 }, { "epoch": 1.0040255742363249, "grad_norm": 1.796875, "learning_rate": 1.0035935781681771e-05, "loss": 0.6029, "step": 7950 }, { "epoch": 1.004151866761386, "grad_norm": 1.84375, "learning_rate": 1.0033939357257114e-05, "loss": 0.6574, "step": 7951 }, { "epoch": 1.0042781592864471, "grad_norm": 1.796875, "learning_rate": 1.0031942931479714e-05, "loss": 0.613, "step": 7952 }, { "epoch": 1.0044044518115085, "grad_norm": 2.0625, "learning_rate": 1.0029946504429146e-05, "loss": 0.6053, "step": 7953 }, { "epoch": 1.0045307443365696, "grad_norm": 1.8046875, "learning_rate": 1.0027950076184982e-05, "loss": 0.572, "step": 7954 }, { "epoch": 1.0046570368616308, "grad_norm": 1.8984375, "learning_rate": 1.0025953646826793e-05, "loss": 0.6532, "step": 7955 }, { "epoch": 1.004783329386692, "grad_norm": 2.140625, "learning_rate": 1.0023957216434157e-05, "loss": 0.6833, "step": 7956 }, { "epoch": 1.004909621911753, "grad_norm": 1.8046875, "learning_rate": 1.0021960785086643e-05, "loss": 0.6065, "step": 7957 }, { "epoch": 1.0050359144368142, "grad_norm": 1.7890625, "learning_rate": 1.0019964352863826e-05, "loss": 0.6088, "step": 7958 }, { "epoch": 1.0051622069618755, "grad_norm": 1.796875, "learning_rate": 1.001796791984528e-05, "loss": 0.5526, "step": 7959 }, { "epoch": 1.0052884994869367, "grad_norm": 1.6875, "learning_rate": 1.0015971486110571e-05, "loss": 0.5468, "step": 7960 }, { "epoch": 1.0054147920119978, "grad_norm": 1.953125, "learning_rate": 1.0013975051739282e-05, "loss": 0.645, "step": 7961 }, { "epoch": 1.005541084537059, "grad_norm": 1.78125, "learning_rate": 1.0011978616810978e-05, "loss": 0.6318, "step": 7962 }, { "epoch": 1.00566737706212, "grad_norm": 1.875, "learning_rate": 1.0009982181405236e-05, "loss": 0.6482, "step": 7963 }, { "epoch": 1.0057936695871814, "grad_norm": 1.8828125, "learning_rate": 1.0007985745601626e-05, "loss": 0.7032, "step": 7964 }, { "epoch": 1.0059199621122425, "grad_norm": 1.921875, "learning_rate": 1.0005989309479727e-05, "loss": 0.6569, "step": 7965 }, { "epoch": 1.0060462546373037, "grad_norm": 1.859375, "learning_rate": 1.0003992873119107e-05, "loss": 0.5906, "step": 7966 }, { "epoch": 1.0061725471623648, "grad_norm": 1.7578125, "learning_rate": 1.0001996436599342e-05, "loss": 0.5965, "step": 7967 }, { "epoch": 1.006298839687426, "grad_norm": 1.8046875, "learning_rate": 1e-05, "loss": 0.6202, "step": 7968 }, { "epoch": 1.006425132212487, "grad_norm": 1.8828125, "learning_rate": 9.998003563400663e-06, "loss": 0.6794, "step": 7969 }, { "epoch": 1.0065514247375484, "grad_norm": 1.8125, "learning_rate": 9.996007126880897e-06, "loss": 0.6177, "step": 7970 }, { "epoch": 1.0066777172626096, "grad_norm": 1.7109375, "learning_rate": 9.994010690520276e-06, "loss": 0.639, "step": 7971 }, { "epoch": 1.0068040097876707, "grad_norm": 1.7578125, "learning_rate": 9.992014254398377e-06, "loss": 0.6196, "step": 7972 }, { "epoch": 1.0069303023127318, "grad_norm": 1.8046875, "learning_rate": 9.990017818594769e-06, "loss": 0.6436, "step": 7973 }, { "epoch": 1.007056594837793, "grad_norm": 1.765625, "learning_rate": 9.988021383189027e-06, "loss": 0.6423, "step": 7974 }, { "epoch": 1.007182887362854, "grad_norm": 1.7578125, "learning_rate": 9.986024948260724e-06, "loss": 0.6169, "step": 7975 }, { "epoch": 1.0073091798879155, "grad_norm": 1.875, "learning_rate": 9.984028513889434e-06, "loss": 0.6119, "step": 7976 }, { "epoch": 1.0074354724129766, "grad_norm": 2.015625, "learning_rate": 9.982032080154727e-06, "loss": 0.6802, "step": 7977 }, { "epoch": 1.0075617649380377, "grad_norm": 1.8203125, "learning_rate": 9.980035647136174e-06, "loss": 0.6916, "step": 7978 }, { "epoch": 1.0076880574630989, "grad_norm": 1.9453125, "learning_rate": 9.978039214913357e-06, "loss": 0.6208, "step": 7979 }, { "epoch": 1.00781434998816, "grad_norm": 1.6796875, "learning_rate": 9.976042783565843e-06, "loss": 0.5454, "step": 7980 }, { "epoch": 1.0079406425132214, "grad_norm": 1.84375, "learning_rate": 9.974046353173207e-06, "loss": 0.6932, "step": 7981 }, { "epoch": 1.0080669350382825, "grad_norm": 1.6953125, "learning_rate": 9.97204992381502e-06, "loss": 0.6057, "step": 7982 }, { "epoch": 1.0081932275633436, "grad_norm": 1.75, "learning_rate": 9.970053495570857e-06, "loss": 0.6317, "step": 7983 }, { "epoch": 1.0083195200884048, "grad_norm": 1.7734375, "learning_rate": 9.96805706852029e-06, "loss": 0.5926, "step": 7984 }, { "epoch": 1.0084458126134659, "grad_norm": 2.015625, "learning_rate": 9.96606064274289e-06, "loss": 0.7407, "step": 7985 }, { "epoch": 1.008572105138527, "grad_norm": 1.7578125, "learning_rate": 9.96406421831823e-06, "loss": 0.6248, "step": 7986 }, { "epoch": 1.0086983976635884, "grad_norm": 1.796875, "learning_rate": 9.962067795325884e-06, "loss": 0.6452, "step": 7987 }, { "epoch": 1.0088246901886495, "grad_norm": 1.78125, "learning_rate": 9.960071373845427e-06, "loss": 0.6255, "step": 7988 }, { "epoch": 1.0089509827137106, "grad_norm": 1.75, "learning_rate": 9.958074953956428e-06, "loss": 0.616, "step": 7989 }, { "epoch": 1.0090772752387718, "grad_norm": 1.8203125, "learning_rate": 9.956078535738459e-06, "loss": 0.6093, "step": 7990 }, { "epoch": 1.009203567763833, "grad_norm": 1.84375, "learning_rate": 9.954082119271093e-06, "loss": 0.732, "step": 7991 }, { "epoch": 1.009329860288894, "grad_norm": 1.7109375, "learning_rate": 9.952085704633905e-06, "loss": 0.5403, "step": 7992 }, { "epoch": 1.0094561528139554, "grad_norm": 1.84375, "learning_rate": 9.950089291906464e-06, "loss": 0.5582, "step": 7993 }, { "epoch": 1.0095824453390165, "grad_norm": 1.9609375, "learning_rate": 9.948092881168345e-06, "loss": 0.7691, "step": 7994 }, { "epoch": 1.0097087378640777, "grad_norm": 1.8359375, "learning_rate": 9.946096472499117e-06, "loss": 0.594, "step": 7995 }, { "epoch": 1.0098350303891388, "grad_norm": 1.75, "learning_rate": 9.944100065978351e-06, "loss": 0.6005, "step": 7996 }, { "epoch": 1.0099613229142, "grad_norm": 1.7890625, "learning_rate": 9.942103661685625e-06, "loss": 0.6843, "step": 7997 }, { "epoch": 1.0100876154392613, "grad_norm": 2.1875, "learning_rate": 9.940107259700509e-06, "loss": 0.7, "step": 7998 }, { "epoch": 1.0102139079643224, "grad_norm": 1.796875, "learning_rate": 9.938110860102572e-06, "loss": 0.6471, "step": 7999 }, { "epoch": 1.0103402004893836, "grad_norm": 1.84375, "learning_rate": 9.936114462971388e-06, "loss": 0.632, "step": 8000 }, { "epoch": 1.0104664930144447, "grad_norm": 1.7734375, "learning_rate": 9.934118068386528e-06, "loss": 0.6202, "step": 8001 }, { "epoch": 1.0105927855395058, "grad_norm": 1.8984375, "learning_rate": 9.93212167642756e-06, "loss": 0.5988, "step": 8002 }, { "epoch": 1.010719078064567, "grad_norm": 1.890625, "learning_rate": 9.930125287174061e-06, "loss": 0.6391, "step": 8003 }, { "epoch": 1.0108453705896283, "grad_norm": 1.6875, "learning_rate": 9.9281289007056e-06, "loss": 0.5925, "step": 8004 }, { "epoch": 1.0109716631146894, "grad_norm": 1.9140625, "learning_rate": 9.926132517101748e-06, "loss": 0.7222, "step": 8005 }, { "epoch": 1.0110979556397506, "grad_norm": 1.8828125, "learning_rate": 9.924136136442075e-06, "loss": 0.6669, "step": 8006 }, { "epoch": 1.0112242481648117, "grad_norm": 1.7109375, "learning_rate": 9.922139758806154e-06, "loss": 0.5856, "step": 8007 }, { "epoch": 1.0113505406898728, "grad_norm": 1.75, "learning_rate": 9.920143384273553e-06, "loss": 0.5575, "step": 8008 }, { "epoch": 1.0114768332149342, "grad_norm": 1.921875, "learning_rate": 9.918147012923845e-06, "loss": 0.6008, "step": 8009 }, { "epoch": 1.0116031257399953, "grad_norm": 1.9140625, "learning_rate": 9.916150644836601e-06, "loss": 0.646, "step": 8010 }, { "epoch": 1.0117294182650565, "grad_norm": 1.765625, "learning_rate": 9.91415428009139e-06, "loss": 0.6156, "step": 8011 }, { "epoch": 1.0118557107901176, "grad_norm": 1.8515625, "learning_rate": 9.912157918767782e-06, "loss": 0.6543, "step": 8012 }, { "epoch": 1.0119820033151787, "grad_norm": 1.875, "learning_rate": 9.910161560945347e-06, "loss": 0.6319, "step": 8013 }, { "epoch": 1.0121082958402399, "grad_norm": 1.6875, "learning_rate": 9.908165206703653e-06, "loss": 0.5396, "step": 8014 }, { "epoch": 1.0122345883653012, "grad_norm": 1.671875, "learning_rate": 9.906168856122277e-06, "loss": 0.6329, "step": 8015 }, { "epoch": 1.0123608808903624, "grad_norm": 1.8828125, "learning_rate": 9.904172509280783e-06, "loss": 0.6499, "step": 8016 }, { "epoch": 1.0124871734154235, "grad_norm": 1.796875, "learning_rate": 9.902176166258743e-06, "loss": 0.5879, "step": 8017 }, { "epoch": 1.0126134659404846, "grad_norm": 1.75, "learning_rate": 9.900179827135725e-06, "loss": 0.5175, "step": 8018 }, { "epoch": 1.0127397584655458, "grad_norm": 1.8515625, "learning_rate": 9.8981834919913e-06, "loss": 0.5832, "step": 8019 }, { "epoch": 1.012866050990607, "grad_norm": 1.8984375, "learning_rate": 9.896187160905033e-06, "loss": 0.6767, "step": 8020 }, { "epoch": 1.0129923435156682, "grad_norm": 1.9453125, "learning_rate": 9.894190833956497e-06, "loss": 0.6012, "step": 8021 }, { "epoch": 1.0131186360407294, "grad_norm": 1.828125, "learning_rate": 9.89219451122526e-06, "loss": 0.6828, "step": 8022 }, { "epoch": 1.0132449285657905, "grad_norm": 1.828125, "learning_rate": 9.89019819279089e-06, "loss": 0.6066, "step": 8023 }, { "epoch": 1.0133712210908516, "grad_norm": 1.7578125, "learning_rate": 9.888201878732956e-06, "loss": 0.6192, "step": 8024 }, { "epoch": 1.0134975136159128, "grad_norm": 1.8046875, "learning_rate": 9.886205569131025e-06, "loss": 0.6103, "step": 8025 }, { "epoch": 1.0136238061409741, "grad_norm": 1.734375, "learning_rate": 9.884209264064667e-06, "loss": 0.6682, "step": 8026 }, { "epoch": 1.0137500986660353, "grad_norm": 1.875, "learning_rate": 9.882212963613447e-06, "loss": 0.6876, "step": 8027 }, { "epoch": 1.0138763911910964, "grad_norm": 1.7578125, "learning_rate": 9.880216667856935e-06, "loss": 0.5655, "step": 8028 }, { "epoch": 1.0140026837161575, "grad_norm": 1.8203125, "learning_rate": 9.8782203768747e-06, "loss": 0.6135, "step": 8029 }, { "epoch": 1.0141289762412187, "grad_norm": 1.96875, "learning_rate": 9.876224090746307e-06, "loss": 0.7417, "step": 8030 }, { "epoch": 1.0142552687662798, "grad_norm": 1.875, "learning_rate": 9.874227809551321e-06, "loss": 0.6801, "step": 8031 }, { "epoch": 1.0143815612913412, "grad_norm": 1.8203125, "learning_rate": 9.87223153336931e-06, "loss": 0.6787, "step": 8032 }, { "epoch": 1.0145078538164023, "grad_norm": 1.8203125, "learning_rate": 9.870235262279845e-06, "loss": 0.6755, "step": 8033 }, { "epoch": 1.0146341463414634, "grad_norm": 1.796875, "learning_rate": 9.86823899636249e-06, "loss": 0.6954, "step": 8034 }, { "epoch": 1.0147604388665246, "grad_norm": 1.703125, "learning_rate": 9.866242735696813e-06, "loss": 0.5752, "step": 8035 }, { "epoch": 1.0148867313915857, "grad_norm": 1.875, "learning_rate": 9.864246480362376e-06, "loss": 0.621, "step": 8036 }, { "epoch": 1.0150130239166468, "grad_norm": 1.8046875, "learning_rate": 9.86225023043875e-06, "loss": 0.6413, "step": 8037 }, { "epoch": 1.0151393164417082, "grad_norm": 1.7578125, "learning_rate": 9.860253986005496e-06, "loss": 0.651, "step": 8038 }, { "epoch": 1.0152656089667693, "grad_norm": 2.015625, "learning_rate": 9.858257747142182e-06, "loss": 0.6221, "step": 8039 }, { "epoch": 1.0153919014918305, "grad_norm": 1.90625, "learning_rate": 9.856261513928373e-06, "loss": 0.6035, "step": 8040 }, { "epoch": 1.0155181940168916, "grad_norm": 1.828125, "learning_rate": 9.854265286443633e-06, "loss": 0.5738, "step": 8041 }, { "epoch": 1.0156444865419527, "grad_norm": 1.828125, "learning_rate": 9.85226906476753e-06, "loss": 0.6296, "step": 8042 }, { "epoch": 1.015770779067014, "grad_norm": 1.9375, "learning_rate": 9.850272848979623e-06, "loss": 0.6203, "step": 8043 }, { "epoch": 1.0158970715920752, "grad_norm": 1.703125, "learning_rate": 9.848276639159481e-06, "loss": 0.5959, "step": 8044 }, { "epoch": 1.0000157865656327, "grad_norm": 1.9375, "learning_rate": 9.846280435386668e-06, "loss": 0.674, "step": 8045 }, { "epoch": 1.0001420790906939, "grad_norm": 2.015625, "learning_rate": 9.844284237740745e-06, "loss": 0.6105, "step": 8046 }, { "epoch": 1.000268371615755, "grad_norm": 1.9140625, "learning_rate": 9.842288046301277e-06, "loss": 0.5502, "step": 8047 }, { "epoch": 1.0003946641408161, "grad_norm": 1.9921875, "learning_rate": 9.840291861147828e-06, "loss": 0.5465, "step": 8048 }, { "epoch": 1.0005209566658773, "grad_norm": 1.71875, "learning_rate": 9.838295682359962e-06, "loss": 0.5425, "step": 8049 }, { "epoch": 1.0006472491909386, "grad_norm": 1.78125, "learning_rate": 9.836299510017235e-06, "loss": 0.5618, "step": 8050 }, { "epoch": 1.0007735417159997, "grad_norm": 1.7421875, "learning_rate": 9.834303344199219e-06, "loss": 0.479, "step": 8051 }, { "epoch": 1.0008998342410609, "grad_norm": 1.859375, "learning_rate": 9.832307184985475e-06, "loss": 0.4466, "step": 8052 }, { "epoch": 1.001026126766122, "grad_norm": 2.03125, "learning_rate": 9.830311032455561e-06, "loss": 0.506, "step": 8053 }, { "epoch": 1.0011524192911831, "grad_norm": 2.0625, "learning_rate": 9.82831488668904e-06, "loss": 0.4811, "step": 8054 }, { "epoch": 1.0012787118162443, "grad_norm": 1.96875, "learning_rate": 9.826318747765474e-06, "loss": 0.5226, "step": 8055 }, { "epoch": 1.0014050043413056, "grad_norm": 2.140625, "learning_rate": 9.824322615764425e-06, "loss": 0.5341, "step": 8056 }, { "epoch": 1.0015312968663668, "grad_norm": 2.109375, "learning_rate": 9.822326490765455e-06, "loss": 0.5376, "step": 8057 }, { "epoch": 1.001657589391428, "grad_norm": 1.90625, "learning_rate": 9.820330372848122e-06, "loss": 0.4669, "step": 8058 }, { "epoch": 1.001783881916489, "grad_norm": 1.796875, "learning_rate": 9.818334262091986e-06, "loss": 0.479, "step": 8059 }, { "epoch": 1.0019101744415502, "grad_norm": 1.765625, "learning_rate": 9.816338158576611e-06, "loss": 0.4909, "step": 8060 }, { "epoch": 1.0020364669666113, "grad_norm": 2.015625, "learning_rate": 9.814342062381552e-06, "loss": 0.5376, "step": 8061 }, { "epoch": 1.0021627594916727, "grad_norm": 1.9140625, "learning_rate": 9.812345973586374e-06, "loss": 0.5262, "step": 8062 }, { "epoch": 1.0022890520167338, "grad_norm": 2.0, "learning_rate": 9.810349892270631e-06, "loss": 0.5251, "step": 8063 }, { "epoch": 1.002415344541795, "grad_norm": 1.953125, "learning_rate": 9.808353818513887e-06, "loss": 0.4379, "step": 8064 }, { "epoch": 1.002541637066856, "grad_norm": 1.9765625, "learning_rate": 9.806357752395696e-06, "loss": 0.4981, "step": 8065 }, { "epoch": 1.0026679295919172, "grad_norm": 1.90625, "learning_rate": 9.80436169399562e-06, "loss": 0.4695, "step": 8066 }, { "epoch": 1.0027942221169786, "grad_norm": 1.75, "learning_rate": 9.802365643393213e-06, "loss": 0.5001, "step": 8067 }, { "epoch": 1.0029205146420397, "grad_norm": 1.9765625, "learning_rate": 9.800369600668036e-06, "loss": 0.5155, "step": 8068 }, { "epoch": 1.0030468071671008, "grad_norm": 2.046875, "learning_rate": 9.798373565899648e-06, "loss": 0.5295, "step": 8069 }, { "epoch": 1.003173099692162, "grad_norm": 1.890625, "learning_rate": 9.796377539167603e-06, "loss": 0.5539, "step": 8070 }, { "epoch": 1.003299392217223, "grad_norm": 2.03125, "learning_rate": 9.794381520551459e-06, "loss": 0.4903, "step": 8071 }, { "epoch": 1.0034256847422842, "grad_norm": 1.8125, "learning_rate": 9.792385510130773e-06, "loss": 0.4662, "step": 8072 }, { "epoch": 1.0035519772673456, "grad_norm": 1.8828125, "learning_rate": 9.7903895079851e-06, "loss": 0.4839, "step": 8073 }, { "epoch": 1.0036782697924067, "grad_norm": 1.859375, "learning_rate": 9.788393514193997e-06, "loss": 0.5075, "step": 8074 }, { "epoch": 1.0038045623174678, "grad_norm": 1.8671875, "learning_rate": 9.786397528837018e-06, "loss": 0.5504, "step": 8075 }, { "epoch": 1.003930854842529, "grad_norm": 2.265625, "learning_rate": 9.78440155199372e-06, "loss": 0.5162, "step": 8076 }, { "epoch": 1.00405714736759, "grad_norm": 1.75, "learning_rate": 9.782405583743655e-06, "loss": 0.4201, "step": 8077 }, { "epoch": 1.0041834398926515, "grad_norm": 1.828125, "learning_rate": 9.78040962416638e-06, "loss": 0.5438, "step": 8078 }, { "epoch": 1.0043097324177126, "grad_norm": 1.8671875, "learning_rate": 9.77841367334145e-06, "loss": 0.5708, "step": 8079 }, { "epoch": 1.0044360249427737, "grad_norm": 1.8984375, "learning_rate": 9.776417731348416e-06, "loss": 0.504, "step": 8080 }, { "epoch": 1.0045623174678349, "grad_norm": 1.8828125, "learning_rate": 9.774421798266833e-06, "loss": 0.4703, "step": 8081 }, { "epoch": 1.004688609992896, "grad_norm": 1.859375, "learning_rate": 9.772425874176253e-06, "loss": 0.5025, "step": 8082 }, { "epoch": 1.0048149025179571, "grad_norm": 1.8515625, "learning_rate": 9.77042995915623e-06, "loss": 0.4859, "step": 8083 }, { "epoch": 1.0049411950430185, "grad_norm": 1.9921875, "learning_rate": 9.768434053286315e-06, "loss": 0.4898, "step": 8084 }, { "epoch": 1.0050674875680796, "grad_norm": 1.7421875, "learning_rate": 9.766438156646063e-06, "loss": 0.4693, "step": 8085 }, { "epoch": 1.0051937800931408, "grad_norm": 1.7578125, "learning_rate": 9.764442269315019e-06, "loss": 0.448, "step": 8086 }, { "epoch": 1.0053200726182019, "grad_norm": 2.0, "learning_rate": 9.762446391372741e-06, "loss": 0.5631, "step": 8087 }, { "epoch": 1.005446365143263, "grad_norm": 1.8671875, "learning_rate": 9.76045052289878e-06, "loss": 0.493, "step": 8088 }, { "epoch": 1.0055726576683242, "grad_norm": 1.8984375, "learning_rate": 9.758454663972684e-06, "loss": 0.567, "step": 8089 }, { "epoch": 1.0056989501933855, "grad_norm": 1.921875, "learning_rate": 9.756458814674e-06, "loss": 0.4691, "step": 8090 }, { "epoch": 1.0058252427184466, "grad_norm": 1.8984375, "learning_rate": 9.754462975082285e-06, "loss": 0.5357, "step": 8091 }, { "epoch": 1.0059515352435078, "grad_norm": 1.8984375, "learning_rate": 9.752467145277082e-06, "loss": 0.5861, "step": 8092 }, { "epoch": 1.006077827768569, "grad_norm": 1.8515625, "learning_rate": 9.750471325337945e-06, "loss": 0.4384, "step": 8093 }, { "epoch": 1.00620412029363, "grad_norm": 1.8671875, "learning_rate": 9.748475515344418e-06, "loss": 0.4649, "step": 8094 }, { "epoch": 1.0063304128186914, "grad_norm": 1.828125, "learning_rate": 9.74647971537605e-06, "loss": 0.5331, "step": 8095 }, { "epoch": 1.0064567053437525, "grad_norm": 1.9609375, "learning_rate": 9.744483925512391e-06, "loss": 0.5271, "step": 8096 }, { "epoch": 1.0065829978688137, "grad_norm": 1.9453125, "learning_rate": 9.742488145832987e-06, "loss": 0.5545, "step": 8097 }, { "epoch": 1.0067092903938748, "grad_norm": 1.84375, "learning_rate": 9.740492376417387e-06, "loss": 0.4812, "step": 8098 }, { "epoch": 1.006835582918936, "grad_norm": 1.8671875, "learning_rate": 9.738496617345133e-06, "loss": 0.5033, "step": 8099 }, { "epoch": 1.006961875443997, "grad_norm": 2.125, "learning_rate": 9.736500868695773e-06, "loss": 0.525, "step": 8100 }, { "epoch": 1.0070881679690584, "grad_norm": 1.8359375, "learning_rate": 9.734505130548858e-06, "loss": 0.5118, "step": 8101 }, { "epoch": 1.0072144604941196, "grad_norm": 2.265625, "learning_rate": 9.732509402983927e-06, "loss": 0.6069, "step": 8102 }, { "epoch": 1.0073407530191807, "grad_norm": 2.359375, "learning_rate": 9.730513686080528e-06, "loss": 0.5496, "step": 8103 }, { "epoch": 1.0074670455442418, "grad_norm": 1.8671875, "learning_rate": 9.7285179799182e-06, "loss": 0.4965, "step": 8104 }, { "epoch": 1.007593338069303, "grad_norm": 2.046875, "learning_rate": 9.726522284576495e-06, "loss": 0.5195, "step": 8105 }, { "epoch": 1.007719630594364, "grad_norm": 1.8828125, "learning_rate": 9.724526600134953e-06, "loss": 0.5007, "step": 8106 }, { "epoch": 1.0078459231194254, "grad_norm": 1.828125, "learning_rate": 9.722530926673117e-06, "loss": 0.5584, "step": 8107 }, { "epoch": 1.0079722156444866, "grad_norm": 1.921875, "learning_rate": 9.720535264270529e-06, "loss": 0.6151, "step": 8108 }, { "epoch": 1.0080985081695477, "grad_norm": 2.015625, "learning_rate": 9.718539613006735e-06, "loss": 0.5528, "step": 8109 }, { "epoch": 1.0082248006946088, "grad_norm": 1.84375, "learning_rate": 9.716543972961271e-06, "loss": 0.4952, "step": 8110 }, { "epoch": 1.00835109321967, "grad_norm": 2.046875, "learning_rate": 9.714548344213683e-06, "loss": 0.5039, "step": 8111 }, { "epoch": 1.0084773857447313, "grad_norm": 2.078125, "learning_rate": 9.71255272684351e-06, "loss": 0.4869, "step": 8112 }, { "epoch": 1.0086036782697925, "grad_norm": 1.9375, "learning_rate": 9.710557120930295e-06, "loss": 0.4525, "step": 8113 }, { "epoch": 1.0087299707948536, "grad_norm": 1.9296875, "learning_rate": 9.708561526553573e-06, "loss": 0.4543, "step": 8114 }, { "epoch": 1.0088562633199147, "grad_norm": 1.8359375, "learning_rate": 9.706565943792886e-06, "loss": 0.5037, "step": 8115 }, { "epoch": 1.0089825558449759, "grad_norm": 2.015625, "learning_rate": 9.704570372727775e-06, "loss": 0.5504, "step": 8116 }, { "epoch": 1.009108848370037, "grad_norm": 1.828125, "learning_rate": 9.702574813437779e-06, "loss": 0.5027, "step": 8117 }, { "epoch": 1.0092351408950984, "grad_norm": 1.9921875, "learning_rate": 9.700579266002433e-06, "loss": 0.5102, "step": 8118 }, { "epoch": 1.0093614334201595, "grad_norm": 1.796875, "learning_rate": 9.698583730501276e-06, "loss": 0.4425, "step": 8119 }, { "epoch": 1.0094877259452206, "grad_norm": 1.8828125, "learning_rate": 9.696588207013846e-06, "loss": 0.4821, "step": 8120 }, { "epoch": 1.0096140184702818, "grad_norm": 1.9140625, "learning_rate": 9.69459269561968e-06, "loss": 0.5213, "step": 8121 }, { "epoch": 1.009740310995343, "grad_norm": 1.8203125, "learning_rate": 9.69259719639831e-06, "loss": 0.4589, "step": 8122 }, { "epoch": 1.009866603520404, "grad_norm": 2.015625, "learning_rate": 9.690601709429278e-06, "loss": 0.5115, "step": 8123 }, { "epoch": 1.0099928960454654, "grad_norm": 1.875, "learning_rate": 9.688606234792118e-06, "loss": 0.5316, "step": 8124 }, { "epoch": 1.0101191885705265, "grad_norm": 1.953125, "learning_rate": 9.686610772566361e-06, "loss": 0.5132, "step": 8125 }, { "epoch": 1.0102454810955876, "grad_norm": 1.921875, "learning_rate": 9.684615322831546e-06, "loss": 0.523, "step": 8126 }, { "epoch": 1.0103717736206488, "grad_norm": 1.8828125, "learning_rate": 9.682619885667204e-06, "loss": 0.4862, "step": 8127 }, { "epoch": 1.01049806614571, "grad_norm": 1.8984375, "learning_rate": 9.680624461152867e-06, "loss": 0.4654, "step": 8128 }, { "epoch": 1.0106243586707713, "grad_norm": 2.0625, "learning_rate": 9.678629049368072e-06, "loss": 0.5736, "step": 8129 }, { "epoch": 1.0107506511958324, "grad_norm": 1.8671875, "learning_rate": 9.676633650392348e-06, "loss": 0.3973, "step": 8130 }, { "epoch": 1.0108769437208935, "grad_norm": 1.8125, "learning_rate": 9.674638264305228e-06, "loss": 0.5195, "step": 8131 }, { "epoch": 1.0110032362459547, "grad_norm": 2.015625, "learning_rate": 9.672642891186243e-06, "loss": 0.6144, "step": 8132 }, { "epoch": 1.0111295287710158, "grad_norm": 1.8671875, "learning_rate": 9.670647531114922e-06, "loss": 0.4653, "step": 8133 }, { "epoch": 1.011255821296077, "grad_norm": 2.296875, "learning_rate": 9.6686521841708e-06, "loss": 0.5397, "step": 8134 }, { "epoch": 1.0113821138211383, "grad_norm": 1.9296875, "learning_rate": 9.666656850433403e-06, "loss": 0.4757, "step": 8135 }, { "epoch": 1.0115084063461994, "grad_norm": 1.90625, "learning_rate": 9.664661529982261e-06, "loss": 0.4698, "step": 8136 }, { "epoch": 1.0116346988712606, "grad_norm": 1.8515625, "learning_rate": 9.662666222896903e-06, "loss": 0.4778, "step": 8137 }, { "epoch": 1.0117609913963217, "grad_norm": 1.921875, "learning_rate": 9.660670929256856e-06, "loss": 0.4606, "step": 8138 }, { "epoch": 1.0118872839213828, "grad_norm": 1.8828125, "learning_rate": 9.65867564914165e-06, "loss": 0.5229, "step": 8139 }, { "epoch": 1.0120135764464442, "grad_norm": 1.921875, "learning_rate": 9.656680382630807e-06, "loss": 0.4563, "step": 8140 }, { "epoch": 1.0121398689715053, "grad_norm": 1.9140625, "learning_rate": 9.65468512980386e-06, "loss": 0.5158, "step": 8141 }, { "epoch": 1.0122661614965665, "grad_norm": 1.828125, "learning_rate": 9.652689890740332e-06, "loss": 0.5132, "step": 8142 }, { "epoch": 1.0123924540216276, "grad_norm": 1.875, "learning_rate": 9.650694665519747e-06, "loss": 0.4939, "step": 8143 }, { "epoch": 1.0125187465466887, "grad_norm": 1.890625, "learning_rate": 9.648699454221633e-06, "loss": 0.4918, "step": 8144 }, { "epoch": 1.0126450390717499, "grad_norm": 1.9375, "learning_rate": 9.646704256925511e-06, "loss": 0.4866, "step": 8145 }, { "epoch": 1.0127713315968112, "grad_norm": 1.953125, "learning_rate": 9.644709073710907e-06, "loss": 0.4911, "step": 8146 }, { "epoch": 1.0128976241218723, "grad_norm": 2.078125, "learning_rate": 9.642713904657341e-06, "loss": 0.5081, "step": 8147 }, { "epoch": 1.0130239166469335, "grad_norm": 1.8515625, "learning_rate": 9.640718749844343e-06, "loss": 0.4375, "step": 8148 }, { "epoch": 1.0131502091719946, "grad_norm": 1.96875, "learning_rate": 9.638723609351427e-06, "loss": 0.5318, "step": 8149 }, { "epoch": 1.0132765016970557, "grad_norm": 1.890625, "learning_rate": 9.63672848325812e-06, "loss": 0.4904, "step": 8150 }, { "epoch": 1.0134027942221169, "grad_norm": 1.9765625, "learning_rate": 9.634733371643938e-06, "loss": 0.5253, "step": 8151 }, { "epoch": 1.0135290867471782, "grad_norm": 1.8515625, "learning_rate": 9.632738274588406e-06, "loss": 0.4789, "step": 8152 }, { "epoch": 1.0136553792722394, "grad_norm": 2.046875, "learning_rate": 9.630743192171042e-06, "loss": 0.5828, "step": 8153 }, { "epoch": 1.0137816717973005, "grad_norm": 1.921875, "learning_rate": 9.628748124471364e-06, "loss": 0.5618, "step": 8154 }, { "epoch": 1.0139079643223616, "grad_norm": 1.9765625, "learning_rate": 9.626753071568892e-06, "loss": 0.5506, "step": 8155 }, { "epoch": 1.0140342568474228, "grad_norm": 1.921875, "learning_rate": 9.624758033543143e-06, "loss": 0.4805, "step": 8156 }, { "epoch": 1.0141605493724841, "grad_norm": 1.921875, "learning_rate": 9.622763010473634e-06, "loss": 0.4835, "step": 8157 }, { "epoch": 1.0142868418975453, "grad_norm": 2.0, "learning_rate": 9.620768002439884e-06, "loss": 0.5108, "step": 8158 }, { "epoch": 1.0144131344226064, "grad_norm": 1.7265625, "learning_rate": 9.618773009521407e-06, "loss": 0.4806, "step": 8159 }, { "epoch": 1.0145394269476675, "grad_norm": 1.875, "learning_rate": 9.61677803179772e-06, "loss": 0.503, "step": 8160 }, { "epoch": 1.0146657194727287, "grad_norm": 1.90625, "learning_rate": 9.614783069348339e-06, "loss": 0.4481, "step": 8161 }, { "epoch": 1.0147920119977898, "grad_norm": 1.796875, "learning_rate": 9.612788122252776e-06, "loss": 0.4425, "step": 8162 }, { "epoch": 1.0149183045228511, "grad_norm": 1.7734375, "learning_rate": 9.610793190590543e-06, "loss": 0.4988, "step": 8163 }, { "epoch": 1.0150445970479123, "grad_norm": 2.015625, "learning_rate": 9.60879827444116e-06, "loss": 0.5382, "step": 8164 }, { "epoch": 1.0151708895729734, "grad_norm": 2.140625, "learning_rate": 9.606803373884135e-06, "loss": 0.5749, "step": 8165 }, { "epoch": 1.0152971820980345, "grad_norm": 1.7109375, "learning_rate": 9.60480848899898e-06, "loss": 0.4124, "step": 8166 }, { "epoch": 1.0154234746230957, "grad_norm": 1.703125, "learning_rate": 9.602813619865207e-06, "loss": 0.448, "step": 8167 }, { "epoch": 1.0155497671481568, "grad_norm": 2.078125, "learning_rate": 9.600818766562326e-06, "loss": 0.5835, "step": 8168 }, { "epoch": 1.0156760596732182, "grad_norm": 1.8203125, "learning_rate": 9.598823929169847e-06, "loss": 0.5208, "step": 8169 }, { "epoch": 1.0158023521982793, "grad_norm": 2.0, "learning_rate": 9.596829107767282e-06, "loss": 0.4865, "step": 8170 }, { "epoch": 1.0159286447233404, "grad_norm": 1.8828125, "learning_rate": 9.594834302434135e-06, "loss": 0.5307, "step": 8171 }, { "epoch": 1.0160549372484016, "grad_norm": 1.90625, "learning_rate": 9.592839513249918e-06, "loss": 0.5053, "step": 8172 }, { "epoch": 1.0161812297734627, "grad_norm": 1.8828125, "learning_rate": 9.590844740294138e-06, "loss": 0.5072, "step": 8173 }, { "epoch": 1.016307522298524, "grad_norm": 1.9140625, "learning_rate": 9.5888499836463e-06, "loss": 0.546, "step": 8174 }, { "epoch": 1.0164338148235852, "grad_norm": 1.921875, "learning_rate": 9.586855243385912e-06, "loss": 0.5258, "step": 8175 }, { "epoch": 1.0165601073486463, "grad_norm": 1.9609375, "learning_rate": 9.584860519592476e-06, "loss": 0.4973, "step": 8176 }, { "epoch": 1.0166863998737075, "grad_norm": 1.7890625, "learning_rate": 9.582865812345503e-06, "loss": 0.4487, "step": 8177 }, { "epoch": 1.0168126923987686, "grad_norm": 2.046875, "learning_rate": 9.580871121724493e-06, "loss": 0.5812, "step": 8178 }, { "epoch": 1.0169389849238297, "grad_norm": 1.90625, "learning_rate": 9.578876447808948e-06, "loss": 0.4811, "step": 8179 }, { "epoch": 1.017065277448891, "grad_norm": 2.015625, "learning_rate": 9.576881790678378e-06, "loss": 0.5373, "step": 8180 }, { "epoch": 1.0171915699739522, "grad_norm": 1.875, "learning_rate": 9.574887150412279e-06, "loss": 0.4742, "step": 8181 }, { "epoch": 1.0173178624990133, "grad_norm": 2.015625, "learning_rate": 9.572892527090154e-06, "loss": 0.4646, "step": 8182 }, { "epoch": 1.0174441550240745, "grad_norm": 1.8203125, "learning_rate": 9.570897920791504e-06, "loss": 0.4512, "step": 8183 }, { "epoch": 1.0175704475491356, "grad_norm": 1.90625, "learning_rate": 9.56890333159583e-06, "loss": 0.5619, "step": 8184 }, { "epoch": 1.017696740074197, "grad_norm": 2.046875, "learning_rate": 9.56690875958263e-06, "loss": 0.5258, "step": 8185 }, { "epoch": 1.017823032599258, "grad_norm": 2.109375, "learning_rate": 9.564914204831405e-06, "loss": 0.5313, "step": 8186 }, { "epoch": 1.0179493251243192, "grad_norm": 2.078125, "learning_rate": 9.56291966742165e-06, "loss": 0.5284, "step": 8187 }, { "epoch": 1.0180756176493804, "grad_norm": 2.046875, "learning_rate": 9.560925147432866e-06, "loss": 0.5619, "step": 8188 }, { "epoch": 1.0182019101744415, "grad_norm": 1.9609375, "learning_rate": 9.558930644944547e-06, "loss": 0.5737, "step": 8189 }, { "epoch": 1.0183282026995026, "grad_norm": 1.984375, "learning_rate": 9.556936160036191e-06, "loss": 0.4698, "step": 8190 }, { "epoch": 1.018454495224564, "grad_norm": 1.984375, "learning_rate": 9.554941692787292e-06, "loss": 0.5413, "step": 8191 }, { "epoch": 1.0185807877496251, "grad_norm": 1.8984375, "learning_rate": 9.552947243277346e-06, "loss": 0.527, "step": 8192 }, { "epoch": 1.0187070802746863, "grad_norm": 1.9609375, "learning_rate": 9.550952811585846e-06, "loss": 0.5417, "step": 8193 }, { "epoch": 1.0188333727997474, "grad_norm": 2.078125, "learning_rate": 9.548958397792281e-06, "loss": 0.5207, "step": 8194 }, { "epoch": 1.0189596653248085, "grad_norm": 1.9921875, "learning_rate": 9.546964001976148e-06, "loss": 0.5829, "step": 8195 }, { "epoch": 1.0190859578498697, "grad_norm": 1.9765625, "learning_rate": 9.544969624216944e-06, "loss": 0.474, "step": 8196 }, { "epoch": 1.019212250374931, "grad_norm": 1.9765625, "learning_rate": 9.542975264594151e-06, "loss": 0.5264, "step": 8197 }, { "epoch": 1.0193385428999922, "grad_norm": 1.8359375, "learning_rate": 9.540980923187266e-06, "loss": 0.5082, "step": 8198 }, { "epoch": 1.0194648354250533, "grad_norm": 1.890625, "learning_rate": 9.538986600075775e-06, "loss": 0.5373, "step": 8199 }, { "epoch": 1.0195911279501144, "grad_norm": 1.8984375, "learning_rate": 9.536992295339167e-06, "loss": 0.4673, "step": 8200 }, { "epoch": 1.0197174204751756, "grad_norm": 1.75, "learning_rate": 9.53499800905693e-06, "loss": 0.4416, "step": 8201 }, { "epoch": 1.019843713000237, "grad_norm": 1.796875, "learning_rate": 9.533003741308555e-06, "loss": 0.5195, "step": 8202 }, { "epoch": 1.019970005525298, "grad_norm": 1.890625, "learning_rate": 9.531009492173526e-06, "loss": 0.4681, "step": 8203 }, { "epoch": 1.0200962980503592, "grad_norm": 1.9140625, "learning_rate": 9.529015261731326e-06, "loss": 0.4851, "step": 8204 }, { "epoch": 1.0202225905754203, "grad_norm": 1.8046875, "learning_rate": 9.527021050061448e-06, "loss": 0.4284, "step": 8205 }, { "epoch": 1.0203488831004814, "grad_norm": 2.09375, "learning_rate": 9.525026857243368e-06, "loss": 0.5294, "step": 8206 }, { "epoch": 1.0204751756255426, "grad_norm": 2.03125, "learning_rate": 9.523032683356574e-06, "loss": 0.5014, "step": 8207 }, { "epoch": 1.020601468150604, "grad_norm": 1.8515625, "learning_rate": 9.521038528480547e-06, "loss": 0.5246, "step": 8208 }, { "epoch": 1.020727760675665, "grad_norm": 2.015625, "learning_rate": 9.519044392694773e-06, "loss": 0.4472, "step": 8209 }, { "epoch": 1.0208540532007262, "grad_norm": 1.8984375, "learning_rate": 9.51705027607873e-06, "loss": 0.4687, "step": 8210 }, { "epoch": 1.0209803457257873, "grad_norm": 2.171875, "learning_rate": 9.5150561787119e-06, "loss": 0.5049, "step": 8211 }, { "epoch": 1.0211066382508485, "grad_norm": 1.8984375, "learning_rate": 9.513062100673759e-06, "loss": 0.4958, "step": 8212 }, { "epoch": 1.0212329307759096, "grad_norm": 1.875, "learning_rate": 9.511068042043793e-06, "loss": 0.5027, "step": 8213 }, { "epoch": 1.021359223300971, "grad_norm": 1.9453125, "learning_rate": 9.509074002901476e-06, "loss": 0.5123, "step": 8214 }, { "epoch": 1.021485515826032, "grad_norm": 1.703125, "learning_rate": 9.50707998332629e-06, "loss": 0.4315, "step": 8215 }, { "epoch": 1.0216118083510932, "grad_norm": 1.8203125, "learning_rate": 9.505085983397707e-06, "loss": 0.4619, "step": 8216 }, { "epoch": 1.0217381008761544, "grad_norm": 1.8359375, "learning_rate": 9.503092003195203e-06, "loss": 0.4541, "step": 8217 }, { "epoch": 1.0218643934012155, "grad_norm": 1.984375, "learning_rate": 9.501098042798256e-06, "loss": 0.5202, "step": 8218 }, { "epoch": 1.0219906859262768, "grad_norm": 2.0625, "learning_rate": 9.499104102286341e-06, "loss": 0.5463, "step": 8219 }, { "epoch": 1.022116978451338, "grad_norm": 1.8125, "learning_rate": 9.497110181738928e-06, "loss": 0.5029, "step": 8220 }, { "epoch": 1.022243270976399, "grad_norm": 1.9453125, "learning_rate": 9.495116281235493e-06, "loss": 0.5411, "step": 8221 }, { "epoch": 1.0223695635014602, "grad_norm": 1.84375, "learning_rate": 9.493122400855507e-06, "loss": 0.4881, "step": 8222 }, { "epoch": 1.0224958560265214, "grad_norm": 1.828125, "learning_rate": 9.49112854067844e-06, "loss": 0.5177, "step": 8223 }, { "epoch": 1.0226221485515825, "grad_norm": 1.828125, "learning_rate": 9.489134700783766e-06, "loss": 0.4649, "step": 8224 }, { "epoch": 1.0227484410766439, "grad_norm": 1.90625, "learning_rate": 9.487140881250951e-06, "loss": 0.5111, "step": 8225 }, { "epoch": 1.022874733601705, "grad_norm": 2.25, "learning_rate": 9.485147082159464e-06, "loss": 0.4976, "step": 8226 }, { "epoch": 1.0230010261267661, "grad_norm": 2.015625, "learning_rate": 9.483153303588774e-06, "loss": 0.54, "step": 8227 }, { "epoch": 1.0231273186518273, "grad_norm": 1.96875, "learning_rate": 9.481159545618351e-06, "loss": 0.6095, "step": 8228 }, { "epoch": 1.0232536111768884, "grad_norm": 1.9765625, "learning_rate": 9.479165808327658e-06, "loss": 0.4727, "step": 8229 }, { "epoch": 1.0233799037019495, "grad_norm": 2.03125, "learning_rate": 9.477172091796155e-06, "loss": 0.4964, "step": 8230 }, { "epoch": 1.023506196227011, "grad_norm": 1.9375, "learning_rate": 9.47517839610332e-06, "loss": 0.5088, "step": 8231 }, { "epoch": 1.023632488752072, "grad_norm": 2.234375, "learning_rate": 9.473184721328608e-06, "loss": 0.5468, "step": 8232 }, { "epoch": 1.0237587812771332, "grad_norm": 1.9375, "learning_rate": 9.471191067551484e-06, "loss": 0.4983, "step": 8233 }, { "epoch": 1.0238850738021943, "grad_norm": 1.9140625, "learning_rate": 9.469197434851412e-06, "loss": 0.4697, "step": 8234 }, { "epoch": 1.0240113663272554, "grad_norm": 2.140625, "learning_rate": 9.46720382330785e-06, "loss": 0.4922, "step": 8235 }, { "epoch": 1.0241376588523168, "grad_norm": 2.078125, "learning_rate": 9.465210233000262e-06, "loss": 0.5131, "step": 8236 }, { "epoch": 1.024263951377378, "grad_norm": 1.984375, "learning_rate": 9.463216664008106e-06, "loss": 0.48, "step": 8237 }, { "epoch": 1.024390243902439, "grad_norm": 2.046875, "learning_rate": 9.46122311641084e-06, "loss": 0.5502, "step": 8238 }, { "epoch": 1.0245165364275002, "grad_norm": 1.7734375, "learning_rate": 9.459229590287923e-06, "loss": 0.4397, "step": 8239 }, { "epoch": 1.0246428289525613, "grad_norm": 1.9140625, "learning_rate": 9.457236085718813e-06, "loss": 0.4834, "step": 8240 }, { "epoch": 1.0247691214776224, "grad_norm": 1.90625, "learning_rate": 9.455242602782964e-06, "loss": 0.485, "step": 8241 }, { "epoch": 1.0248954140026838, "grad_norm": 2.0625, "learning_rate": 9.453249141559833e-06, "loss": 0.5506, "step": 8242 }, { "epoch": 1.025021706527745, "grad_norm": 2.046875, "learning_rate": 9.451255702128874e-06, "loss": 0.5611, "step": 8243 }, { "epoch": 1.025147999052806, "grad_norm": 1.9921875, "learning_rate": 9.449262284569542e-06, "loss": 0.5181, "step": 8244 }, { "epoch": 1.0252742915778672, "grad_norm": 2.140625, "learning_rate": 9.447268888961287e-06, "loss": 0.4616, "step": 8245 }, { "epoch": 1.0254005841029283, "grad_norm": 2.015625, "learning_rate": 9.445275515383564e-06, "loss": 0.5264, "step": 8246 }, { "epoch": 1.0255268766279895, "grad_norm": 1.9609375, "learning_rate": 9.443282163915823e-06, "loss": 0.5525, "step": 8247 }, { "epoch": 1.0256531691530508, "grad_norm": 1.765625, "learning_rate": 9.44128883463751e-06, "loss": 0.4573, "step": 8248 }, { "epoch": 1.025779461678112, "grad_norm": 1.8515625, "learning_rate": 9.439295527628083e-06, "loss": 0.4337, "step": 8249 }, { "epoch": 1.025905754203173, "grad_norm": 1.8046875, "learning_rate": 9.437302242966984e-06, "loss": 0.4649, "step": 8250 }, { "epoch": 1.0260320467282342, "grad_norm": 1.921875, "learning_rate": 9.435308980733662e-06, "loss": 0.5137, "step": 8251 }, { "epoch": 1.0261583392532954, "grad_norm": 1.9453125, "learning_rate": 9.433315741007564e-06, "loss": 0.5204, "step": 8252 }, { "epoch": 1.0262846317783567, "grad_norm": 1.921875, "learning_rate": 9.431322523868137e-06, "loss": 0.4912, "step": 8253 }, { "epoch": 1.0264109243034178, "grad_norm": 1.8671875, "learning_rate": 9.429329329394824e-06, "loss": 0.4416, "step": 8254 }, { "epoch": 1.026537216828479, "grad_norm": 2.03125, "learning_rate": 9.427336157667069e-06, "loss": 0.4738, "step": 8255 }, { "epoch": 1.0266635093535401, "grad_norm": 2.046875, "learning_rate": 9.425343008764316e-06, "loss": 0.4639, "step": 8256 }, { "epoch": 1.0267898018786012, "grad_norm": 1.90625, "learning_rate": 9.423349882766007e-06, "loss": 0.4743, "step": 8257 }, { "epoch": 1.0269160944036624, "grad_norm": 1.921875, "learning_rate": 9.421356779751581e-06, "loss": 0.6312, "step": 8258 }, { "epoch": 1.0270423869287237, "grad_norm": 1.9765625, "learning_rate": 9.419363699800481e-06, "loss": 0.4783, "step": 8259 }, { "epoch": 1.0271686794537849, "grad_norm": 1.9609375, "learning_rate": 9.417370642992146e-06, "loss": 0.5245, "step": 8260 }, { "epoch": 1.027294971978846, "grad_norm": 1.875, "learning_rate": 9.415377609406013e-06, "loss": 0.4202, "step": 8261 }, { "epoch": 1.0274212645039071, "grad_norm": 1.96875, "learning_rate": 9.41338459912152e-06, "loss": 0.5801, "step": 8262 }, { "epoch": 1.0275475570289683, "grad_norm": 1.9921875, "learning_rate": 9.411391612218104e-06, "loss": 0.4755, "step": 8263 }, { "epoch": 1.0276738495540296, "grad_norm": 1.84375, "learning_rate": 9.409398648775202e-06, "loss": 0.4969, "step": 8264 }, { "epoch": 1.0278001420790908, "grad_norm": 1.8359375, "learning_rate": 9.407405708872246e-06, "loss": 0.4605, "step": 8265 }, { "epoch": 1.027926434604152, "grad_norm": 2.046875, "learning_rate": 9.405412792588667e-06, "loss": 0.5171, "step": 8266 }, { "epoch": 1.028052727129213, "grad_norm": 2.203125, "learning_rate": 9.403419900003907e-06, "loss": 0.5161, "step": 8267 }, { "epoch": 1.0281790196542742, "grad_norm": 1.953125, "learning_rate": 9.401427031197392e-06, "loss": 0.5063, "step": 8268 }, { "epoch": 1.0283053121793353, "grad_norm": 1.8125, "learning_rate": 9.399434186248554e-06, "loss": 0.4166, "step": 8269 }, { "epoch": 1.0284316047043967, "grad_norm": 1.9140625, "learning_rate": 9.397441365236822e-06, "loss": 0.5012, "step": 8270 }, { "epoch": 1.0285578972294578, "grad_norm": 1.8671875, "learning_rate": 9.395448568241627e-06, "loss": 0.4678, "step": 8271 }, { "epoch": 1.028684189754519, "grad_norm": 1.984375, "learning_rate": 9.393455795342395e-06, "loss": 0.5297, "step": 8272 }, { "epoch": 1.02881048227958, "grad_norm": 2.03125, "learning_rate": 9.391463046618556e-06, "loss": 0.6343, "step": 8273 }, { "epoch": 1.0289367748046412, "grad_norm": 1.7890625, "learning_rate": 9.389470322149532e-06, "loss": 0.4925, "step": 8274 }, { "epoch": 1.0290630673297023, "grad_norm": 1.8125, "learning_rate": 9.387477622014752e-06, "loss": 0.5243, "step": 8275 }, { "epoch": 1.0291893598547637, "grad_norm": 1.8671875, "learning_rate": 9.385484946293636e-06, "loss": 0.4637, "step": 8276 }, { "epoch": 1.0293156523798248, "grad_norm": 1.953125, "learning_rate": 9.383492295065613e-06, "loss": 0.4346, "step": 8277 }, { "epoch": 1.029441944904886, "grad_norm": 1.9375, "learning_rate": 9.3814996684101e-06, "loss": 0.532, "step": 8278 }, { "epoch": 1.029568237429947, "grad_norm": 2.390625, "learning_rate": 9.37950706640652e-06, "loss": 0.6476, "step": 8279 }, { "epoch": 1.0296945299550082, "grad_norm": 1.984375, "learning_rate": 9.377514489134298e-06, "loss": 0.4914, "step": 8280 }, { "epoch": 1.0298208224800696, "grad_norm": 2.0, "learning_rate": 9.375521936672846e-06, "loss": 0.4827, "step": 8281 }, { "epoch": 1.0299471150051307, "grad_norm": 1.8125, "learning_rate": 9.373529409101584e-06, "loss": 0.4833, "step": 8282 }, { "epoch": 1.0300734075301918, "grad_norm": 1.8203125, "learning_rate": 9.371536906499932e-06, "loss": 0.5161, "step": 8283 }, { "epoch": 1.030199700055253, "grad_norm": 2.203125, "learning_rate": 9.369544428947303e-06, "loss": 0.57, "step": 8284 }, { "epoch": 1.030325992580314, "grad_norm": 2.03125, "learning_rate": 9.367551976523117e-06, "loss": 0.4859, "step": 8285 }, { "epoch": 1.0304522851053752, "grad_norm": 1.859375, "learning_rate": 9.365559549306785e-06, "loss": 0.4641, "step": 8286 }, { "epoch": 1.0305785776304366, "grad_norm": 1.78125, "learning_rate": 9.363567147377723e-06, "loss": 0.4782, "step": 8287 }, { "epoch": 1.0307048701554977, "grad_norm": 2.1875, "learning_rate": 9.36157477081534e-06, "loss": 0.6072, "step": 8288 }, { "epoch": 1.0308311626805589, "grad_norm": 2.03125, "learning_rate": 9.35958241969905e-06, "loss": 0.5212, "step": 8289 }, { "epoch": 1.03095745520562, "grad_norm": 1.8828125, "learning_rate": 9.357590094108262e-06, "loss": 0.4825, "step": 8290 }, { "epoch": 1.0310837477306811, "grad_norm": 2.0625, "learning_rate": 9.355597794122384e-06, "loss": 0.4836, "step": 8291 }, { "epoch": 1.0312100402557423, "grad_norm": 1.984375, "learning_rate": 9.353605519820828e-06, "loss": 0.526, "step": 8292 }, { "epoch": 1.0313363327808036, "grad_norm": 2.015625, "learning_rate": 9.351613271282999e-06, "loss": 0.5819, "step": 8293 }, { "epoch": 1.0314626253058647, "grad_norm": 2.046875, "learning_rate": 9.349621048588302e-06, "loss": 0.4713, "step": 8294 }, { "epoch": 1.0315889178309259, "grad_norm": 2.03125, "learning_rate": 9.347628851816144e-06, "loss": 0.5199, "step": 8295 }, { "epoch": 1.031715210355987, "grad_norm": 2.15625, "learning_rate": 9.345636681045929e-06, "loss": 0.6043, "step": 8296 }, { "epoch": 1.0318415028810481, "grad_norm": 1.984375, "learning_rate": 9.34364453635706e-06, "loss": 0.5623, "step": 8297 }, { "epoch": 1.0319677954061095, "grad_norm": 1.953125, "learning_rate": 9.341652417828938e-06, "loss": 0.5476, "step": 8298 }, { "epoch": 1.0320940879311706, "grad_norm": 1.796875, "learning_rate": 9.339660325540964e-06, "loss": 0.5062, "step": 8299 }, { "epoch": 1.0322203804562318, "grad_norm": 1.9375, "learning_rate": 9.337668259572541e-06, "loss": 0.4659, "step": 8300 }, { "epoch": 1.032346672981293, "grad_norm": 1.8984375, "learning_rate": 9.335676220003065e-06, "loss": 0.5642, "step": 8301 }, { "epoch": 1.032472965506354, "grad_norm": 1.8984375, "learning_rate": 9.333684206911929e-06, "loss": 0.507, "step": 8302 }, { "epoch": 1.0325992580314152, "grad_norm": 1.90625, "learning_rate": 9.331692220378543e-06, "loss": 0.5178, "step": 8303 }, { "epoch": 1.0327255505564765, "grad_norm": 1.921875, "learning_rate": 9.329700260482292e-06, "loss": 0.5308, "step": 8304 }, { "epoch": 1.0328518430815377, "grad_norm": 1.609375, "learning_rate": 9.327708327302576e-06, "loss": 0.4224, "step": 8305 }, { "epoch": 1.0329781356065988, "grad_norm": 1.8515625, "learning_rate": 9.325716420918786e-06, "loss": 0.523, "step": 8306 }, { "epoch": 1.03310442813166, "grad_norm": 2.609375, "learning_rate": 9.323724541410316e-06, "loss": 0.6987, "step": 8307 }, { "epoch": 1.033230720656721, "grad_norm": 1.8984375, "learning_rate": 9.321732688856558e-06, "loss": 0.4877, "step": 8308 }, { "epoch": 1.0333570131817824, "grad_norm": 1.9765625, "learning_rate": 9.3197408633369e-06, "loss": 0.4469, "step": 8309 }, { "epoch": 1.0334833057068435, "grad_norm": 1.9453125, "learning_rate": 9.317749064930732e-06, "loss": 0.5134, "step": 8310 }, { "epoch": 1.0336095982319047, "grad_norm": 1.8359375, "learning_rate": 9.315757293717443e-06, "loss": 0.4449, "step": 8311 }, { "epoch": 1.0337358907569658, "grad_norm": 1.7265625, "learning_rate": 9.31376554977642e-06, "loss": 0.4901, "step": 8312 }, { "epoch": 1.033862183282027, "grad_norm": 1.78125, "learning_rate": 9.311773833187051e-06, "loss": 0.4462, "step": 8313 }, { "epoch": 1.033988475807088, "grad_norm": 1.90625, "learning_rate": 9.309782144028718e-06, "loss": 0.454, "step": 8314 }, { "epoch": 1.0341147683321494, "grad_norm": 1.78125, "learning_rate": 9.307790482380807e-06, "loss": 0.4659, "step": 8315 }, { "epoch": 1.0342410608572106, "grad_norm": 1.90625, "learning_rate": 9.3057988483227e-06, "loss": 0.519, "step": 8316 }, { "epoch": 1.0343673533822717, "grad_norm": 1.7890625, "learning_rate": 9.303807241933779e-06, "loss": 0.4504, "step": 8317 }, { "epoch": 1.0344936459073328, "grad_norm": 1.875, "learning_rate": 9.301815663293423e-06, "loss": 0.4954, "step": 8318 }, { "epoch": 1.034619938432394, "grad_norm": 1.96875, "learning_rate": 9.299824112481014e-06, "loss": 0.4702, "step": 8319 }, { "epoch": 1.034746230957455, "grad_norm": 1.7578125, "learning_rate": 9.297832589575924e-06, "loss": 0.4358, "step": 8320 }, { "epoch": 1.0348725234825165, "grad_norm": 2.0, "learning_rate": 9.295841094657542e-06, "loss": 0.5127, "step": 8321 }, { "epoch": 1.0349988160075776, "grad_norm": 1.890625, "learning_rate": 9.293849627805237e-06, "loss": 0.4874, "step": 8322 }, { "epoch": 1.0351251085326387, "grad_norm": 1.953125, "learning_rate": 9.291858189098384e-06, "loss": 0.5032, "step": 8323 }, { "epoch": 1.0352514010576999, "grad_norm": 1.9921875, "learning_rate": 9.28986677861636e-06, "loss": 0.5035, "step": 8324 }, { "epoch": 1.035377693582761, "grad_norm": 2.109375, "learning_rate": 9.287875396438534e-06, "loss": 0.535, "step": 8325 }, { "epoch": 1.0355039861078223, "grad_norm": 1.8203125, "learning_rate": 9.28588404264428e-06, "loss": 0.5155, "step": 8326 }, { "epoch": 1.0356302786328835, "grad_norm": 1.6796875, "learning_rate": 9.283892717312968e-06, "loss": 0.4532, "step": 8327 }, { "epoch": 1.0357565711579446, "grad_norm": 1.9375, "learning_rate": 9.281901420523968e-06, "loss": 0.5009, "step": 8328 }, { "epoch": 1.0358828636830058, "grad_norm": 1.9140625, "learning_rate": 9.279910152356648e-06, "loss": 0.448, "step": 8329 }, { "epoch": 1.0360091562080669, "grad_norm": 1.9375, "learning_rate": 9.277918912890373e-06, "loss": 0.4277, "step": 8330 }, { "epoch": 1.036135448733128, "grad_norm": 2.078125, "learning_rate": 9.27592770220451e-06, "loss": 0.5264, "step": 8331 }, { "epoch": 1.0362617412581894, "grad_norm": 2.203125, "learning_rate": 9.273936520378428e-06, "loss": 0.5862, "step": 8332 }, { "epoch": 1.0363880337832505, "grad_norm": 1.953125, "learning_rate": 9.271945367491487e-06, "loss": 0.4746, "step": 8333 }, { "epoch": 1.0365143263083116, "grad_norm": 1.8203125, "learning_rate": 9.269954243623049e-06, "loss": 0.4764, "step": 8334 }, { "epoch": 1.0366406188333728, "grad_norm": 2.0, "learning_rate": 9.267963148852477e-06, "loss": 0.5402, "step": 8335 }, { "epoch": 1.036766911358434, "grad_norm": 1.9765625, "learning_rate": 9.26597208325913e-06, "loss": 0.4796, "step": 8336 }, { "epoch": 1.036893203883495, "grad_norm": 1.9609375, "learning_rate": 9.263981046922369e-06, "loss": 0.5474, "step": 8337 }, { "epoch": 1.0370194964085564, "grad_norm": 1.9453125, "learning_rate": 9.261990039921543e-06, "loss": 0.4876, "step": 8338 }, { "epoch": 1.0371457889336175, "grad_norm": 2.0625, "learning_rate": 9.259999062336024e-06, "loss": 0.558, "step": 8339 }, { "epoch": 1.0372720814586787, "grad_norm": 1.9375, "learning_rate": 9.258008114245158e-06, "loss": 0.5125, "step": 8340 }, { "epoch": 1.0373983739837398, "grad_norm": 2.015625, "learning_rate": 9.256017195728303e-06, "loss": 0.5669, "step": 8341 }, { "epoch": 1.037524666508801, "grad_norm": 1.9765625, "learning_rate": 9.25402630686481e-06, "loss": 0.522, "step": 8342 }, { "epoch": 1.0376509590338623, "grad_norm": 1.9765625, "learning_rate": 9.252035447734031e-06, "loss": 0.5221, "step": 8343 }, { "epoch": 1.0377772515589234, "grad_norm": 1.8984375, "learning_rate": 9.250044618415319e-06, "loss": 0.4721, "step": 8344 }, { "epoch": 1.0379035440839846, "grad_norm": 2.0, "learning_rate": 9.24805381898802e-06, "loss": 0.5466, "step": 8345 }, { "epoch": 1.0380298366090457, "grad_norm": 1.984375, "learning_rate": 9.246063049531488e-06, "loss": 0.5164, "step": 8346 }, { "epoch": 1.0381561291341068, "grad_norm": 1.8359375, "learning_rate": 9.244072310125064e-06, "loss": 0.4427, "step": 8347 }, { "epoch": 1.038282421659168, "grad_norm": 2.28125, "learning_rate": 9.242081600848097e-06, "loss": 0.5181, "step": 8348 }, { "epoch": 1.0384087141842293, "grad_norm": 2.140625, "learning_rate": 9.240090921779933e-06, "loss": 0.4994, "step": 8349 }, { "epoch": 1.0385350067092904, "grad_norm": 2.015625, "learning_rate": 9.238100272999914e-06, "loss": 0.5517, "step": 8350 }, { "epoch": 1.0386612992343516, "grad_norm": 1.9765625, "learning_rate": 9.236109654587383e-06, "loss": 0.4972, "step": 8351 }, { "epoch": 1.0387875917594127, "grad_norm": 1.9921875, "learning_rate": 9.234119066621681e-06, "loss": 0.5731, "step": 8352 }, { "epoch": 1.0389138842844738, "grad_norm": 2.09375, "learning_rate": 9.232128509182148e-06, "loss": 0.5057, "step": 8353 }, { "epoch": 1.039040176809535, "grad_norm": 1.90625, "learning_rate": 9.230137982348124e-06, "loss": 0.4694, "step": 8354 }, { "epoch": 1.0391664693345963, "grad_norm": 1.96875, "learning_rate": 9.228147486198945e-06, "loss": 0.4668, "step": 8355 }, { "epoch": 1.0392927618596575, "grad_norm": 1.8515625, "learning_rate": 9.226157020813945e-06, "loss": 0.4547, "step": 8356 }, { "epoch": 1.0394190543847186, "grad_norm": 2.0625, "learning_rate": 9.224166586272465e-06, "loss": 0.6366, "step": 8357 }, { "epoch": 1.0395453469097797, "grad_norm": 1.8203125, "learning_rate": 9.222176182653836e-06, "loss": 0.5024, "step": 8358 }, { "epoch": 1.0396716394348409, "grad_norm": 1.90625, "learning_rate": 9.220185810037392e-06, "loss": 0.4824, "step": 8359 }, { "epoch": 1.0397979319599022, "grad_norm": 1.8359375, "learning_rate": 9.218195468502462e-06, "loss": 0.4543, "step": 8360 }, { "epoch": 1.0399242244849634, "grad_norm": 2.109375, "learning_rate": 9.216205158128378e-06, "loss": 0.5806, "step": 8361 }, { "epoch": 1.0400505170100245, "grad_norm": 2.015625, "learning_rate": 9.214214878994469e-06, "loss": 0.5231, "step": 8362 }, { "epoch": 1.0401768095350856, "grad_norm": 1.90625, "learning_rate": 9.212224631180062e-06, "loss": 0.4712, "step": 8363 }, { "epoch": 1.0403031020601468, "grad_norm": 1.9765625, "learning_rate": 9.210234414764484e-06, "loss": 0.5059, "step": 8364 }, { "epoch": 1.040429394585208, "grad_norm": 2.03125, "learning_rate": 9.20824422982706e-06, "loss": 0.4372, "step": 8365 }, { "epoch": 1.0405556871102692, "grad_norm": 1.9609375, "learning_rate": 9.206254076447115e-06, "loss": 0.4819, "step": 8366 }, { "epoch": 1.0406819796353304, "grad_norm": 2.0625, "learning_rate": 9.204263954703969e-06, "loss": 0.509, "step": 8367 }, { "epoch": 1.0408082721603915, "grad_norm": 2.015625, "learning_rate": 9.202273864676945e-06, "loss": 0.5304, "step": 8368 }, { "epoch": 1.0409345646854526, "grad_norm": 2.03125, "learning_rate": 9.200283806445364e-06, "loss": 0.4637, "step": 8369 }, { "epoch": 1.0410608572105138, "grad_norm": 2.015625, "learning_rate": 9.198293780088542e-06, "loss": 0.5047, "step": 8370 }, { "epoch": 1.041187149735575, "grad_norm": 1.8984375, "learning_rate": 9.196303785685804e-06, "loss": 0.4228, "step": 8371 }, { "epoch": 1.0413134422606363, "grad_norm": 2.046875, "learning_rate": 9.194313823316457e-06, "loss": 0.5246, "step": 8372 }, { "epoch": 1.0414397347856974, "grad_norm": 1.8984375, "learning_rate": 9.192323893059821e-06, "loss": 0.5285, "step": 8373 }, { "epoch": 1.0415660273107585, "grad_norm": 1.6875, "learning_rate": 9.19033399499521e-06, "loss": 0.4151, "step": 8374 }, { "epoch": 1.0416923198358197, "grad_norm": 2.125, "learning_rate": 9.188344129201932e-06, "loss": 0.5518, "step": 8375 }, { "epoch": 1.0418186123608808, "grad_norm": 1.9609375, "learning_rate": 9.186354295759305e-06, "loss": 0.5269, "step": 8376 }, { "epoch": 1.0419449048859422, "grad_norm": 1.9453125, "learning_rate": 9.184364494746637e-06, "loss": 0.4408, "step": 8377 }, { "epoch": 1.0420711974110033, "grad_norm": 1.9765625, "learning_rate": 9.182374726243234e-06, "loss": 0.4972, "step": 8378 }, { "epoch": 1.0421974899360644, "grad_norm": 2.046875, "learning_rate": 9.180384990328407e-06, "loss": 0.5528, "step": 8379 }, { "epoch": 1.0423237824611256, "grad_norm": 2.0625, "learning_rate": 9.17839528708146e-06, "loss": 0.5527, "step": 8380 }, { "epoch": 1.0424500749861867, "grad_norm": 1.96875, "learning_rate": 9.176405616581696e-06, "loss": 0.484, "step": 8381 }, { "epoch": 1.0425763675112478, "grad_norm": 1.859375, "learning_rate": 9.174415978908422e-06, "loss": 0.5125, "step": 8382 }, { "epoch": 1.0427026600363092, "grad_norm": 2.078125, "learning_rate": 9.172426374140938e-06, "loss": 0.5524, "step": 8383 }, { "epoch": 1.0428289525613703, "grad_norm": 1.828125, "learning_rate": 9.170436802358547e-06, "loss": 0.4499, "step": 8384 }, { "epoch": 1.0429552450864314, "grad_norm": 2.5625, "learning_rate": 9.168447263640543e-06, "loss": 0.6332, "step": 8385 }, { "epoch": 1.0430815376114926, "grad_norm": 1.796875, "learning_rate": 9.166457758066231e-06, "loss": 0.4745, "step": 8386 }, { "epoch": 1.0432078301365537, "grad_norm": 1.8984375, "learning_rate": 9.164468285714905e-06, "loss": 0.4422, "step": 8387 }, { "epoch": 1.043334122661615, "grad_norm": 2.078125, "learning_rate": 9.16247884666586e-06, "loss": 0.4996, "step": 8388 }, { "epoch": 1.0434604151866762, "grad_norm": 1.84375, "learning_rate": 9.160489440998393e-06, "loss": 0.4379, "step": 8389 }, { "epoch": 1.0435867077117373, "grad_norm": 1.90625, "learning_rate": 9.158500068791793e-06, "loss": 0.4951, "step": 8390 }, { "epoch": 1.0437130002367985, "grad_norm": 1.9921875, "learning_rate": 9.156510730125353e-06, "loss": 0.6273, "step": 8391 }, { "epoch": 1.0438392927618596, "grad_norm": 1.953125, "learning_rate": 9.154521425078363e-06, "loss": 0.4925, "step": 8392 }, { "epoch": 1.0439655852869207, "grad_norm": 2.078125, "learning_rate": 9.15253215373011e-06, "loss": 0.5657, "step": 8393 }, { "epoch": 1.044091877811982, "grad_norm": 1.9609375, "learning_rate": 9.150542916159889e-06, "loss": 0.4692, "step": 8394 }, { "epoch": 1.0442181703370432, "grad_norm": 1.8671875, "learning_rate": 9.14855371244698e-06, "loss": 0.4385, "step": 8395 }, { "epoch": 1.0443444628621044, "grad_norm": 2.0, "learning_rate": 9.146564542670668e-06, "loss": 0.5256, "step": 8396 }, { "epoch": 1.0444707553871655, "grad_norm": 1.9609375, "learning_rate": 9.144575406910239e-06, "loss": 0.541, "step": 8397 }, { "epoch": 1.0445970479122266, "grad_norm": 1.9140625, "learning_rate": 9.142586305244973e-06, "loss": 0.5415, "step": 8398 }, { "epoch": 1.0447233404372878, "grad_norm": 2.015625, "learning_rate": 9.140597237754153e-06, "loss": 0.5534, "step": 8399 }, { "epoch": 1.0448496329623491, "grad_norm": 2.046875, "learning_rate": 9.138608204517057e-06, "loss": 0.5357, "step": 8400 }, { "epoch": 1.0449759254874103, "grad_norm": 2.015625, "learning_rate": 9.136619205612962e-06, "loss": 0.4543, "step": 8401 }, { "epoch": 1.0451022180124714, "grad_norm": 2.109375, "learning_rate": 9.134630241121147e-06, "loss": 0.5776, "step": 8402 }, { "epoch": 1.0452285105375325, "grad_norm": 1.984375, "learning_rate": 9.132641311120885e-06, "loss": 0.4351, "step": 8403 }, { "epoch": 1.0453548030625937, "grad_norm": 2.015625, "learning_rate": 9.130652415691452e-06, "loss": 0.4786, "step": 8404 }, { "epoch": 1.045481095587655, "grad_norm": 2.140625, "learning_rate": 9.128663554912119e-06, "loss": 0.4996, "step": 8405 }, { "epoch": 1.0456073881127161, "grad_norm": 2.25, "learning_rate": 9.12667472886216e-06, "loss": 0.5389, "step": 8406 }, { "epoch": 1.0457336806377773, "grad_norm": 1.9140625, "learning_rate": 9.12468593762084e-06, "loss": 0.4663, "step": 8407 }, { "epoch": 1.0458599731628384, "grad_norm": 2.015625, "learning_rate": 9.122697181267431e-06, "loss": 0.5862, "step": 8408 }, { "epoch": 1.0459862656878995, "grad_norm": 1.953125, "learning_rate": 9.1207084598812e-06, "loss": 0.5097, "step": 8409 }, { "epoch": 1.0461125582129607, "grad_norm": 1.96875, "learning_rate": 9.11871977354141e-06, "loss": 0.5442, "step": 8410 }, { "epoch": 1.046238850738022, "grad_norm": 2.078125, "learning_rate": 9.116731122327328e-06, "loss": 0.6054, "step": 8411 }, { "epoch": 1.0463651432630832, "grad_norm": 1.90625, "learning_rate": 9.114742506318216e-06, "loss": 0.4867, "step": 8412 }, { "epoch": 1.0464914357881443, "grad_norm": 1.84375, "learning_rate": 9.112753925593335e-06, "loss": 0.5236, "step": 8413 }, { "epoch": 1.0466177283132054, "grad_norm": 1.9296875, "learning_rate": 9.110765380231948e-06, "loss": 0.4858, "step": 8414 }, { "epoch": 1.0467440208382666, "grad_norm": 2.0, "learning_rate": 9.10877687031331e-06, "loss": 0.5237, "step": 8415 }, { "epoch": 1.046870313363328, "grad_norm": 1.8125, "learning_rate": 9.106788395916679e-06, "loss": 0.4496, "step": 8416 }, { "epoch": 1.046996605888389, "grad_norm": 2.140625, "learning_rate": 9.104799957121311e-06, "loss": 0.5232, "step": 8417 }, { "epoch": 1.0471228984134502, "grad_norm": 2.1875, "learning_rate": 9.102811554006462e-06, "loss": 0.5574, "step": 8418 }, { "epoch": 1.0472491909385113, "grad_norm": 1.734375, "learning_rate": 9.100823186651381e-06, "loss": 0.4895, "step": 8419 }, { "epoch": 1.0473754834635725, "grad_norm": 2.09375, "learning_rate": 9.098834855135324e-06, "loss": 0.4711, "step": 8420 }, { "epoch": 1.0475017759886336, "grad_norm": 1.8984375, "learning_rate": 9.096846559537537e-06, "loss": 0.4669, "step": 8421 }, { "epoch": 1.047628068513695, "grad_norm": 1.84375, "learning_rate": 9.094858299937272e-06, "loss": 0.4297, "step": 8422 }, { "epoch": 1.047754361038756, "grad_norm": 1.984375, "learning_rate": 9.092870076413775e-06, "loss": 0.4979, "step": 8423 }, { "epoch": 1.0478806535638172, "grad_norm": 1.9140625, "learning_rate": 9.09088188904629e-06, "loss": 0.4537, "step": 8424 }, { "epoch": 1.0480069460888783, "grad_norm": 1.984375, "learning_rate": 9.088893737914065e-06, "loss": 0.494, "step": 8425 }, { "epoch": 1.0481332386139395, "grad_norm": 1.8046875, "learning_rate": 9.08690562309634e-06, "loss": 0.5059, "step": 8426 }, { "epoch": 1.0482595311390006, "grad_norm": 1.96875, "learning_rate": 9.08491754467236e-06, "loss": 0.5392, "step": 8427 }, { "epoch": 1.048385823664062, "grad_norm": 2.09375, "learning_rate": 9.08292950272136e-06, "loss": 0.6271, "step": 8428 }, { "epoch": 1.048512116189123, "grad_norm": 1.9453125, "learning_rate": 9.080941497322581e-06, "loss": 0.5168, "step": 8429 }, { "epoch": 1.0486384087141842, "grad_norm": 1.9609375, "learning_rate": 9.078953528555261e-06, "loss": 0.4886, "step": 8430 }, { "epoch": 1.0487647012392454, "grad_norm": 2.09375, "learning_rate": 9.076965596498635e-06, "loss": 0.5521, "step": 8431 }, { "epoch": 1.0488909937643065, "grad_norm": 1.8984375, "learning_rate": 9.074977701231937e-06, "loss": 0.5319, "step": 8432 }, { "epoch": 1.0490172862893679, "grad_norm": 1.90625, "learning_rate": 9.0729898428344e-06, "loss": 0.551, "step": 8433 }, { "epoch": 1.049143578814429, "grad_norm": 1.921875, "learning_rate": 9.071002021385255e-06, "loss": 0.4621, "step": 8434 }, { "epoch": 1.0492698713394901, "grad_norm": 1.8125, "learning_rate": 9.069014236963732e-06, "loss": 0.4162, "step": 8435 }, { "epoch": 1.0493961638645513, "grad_norm": 1.8359375, "learning_rate": 9.06702648964906e-06, "loss": 0.4821, "step": 8436 }, { "epoch": 1.0495224563896124, "grad_norm": 1.9609375, "learning_rate": 9.065038779520463e-06, "loss": 0.5247, "step": 8437 }, { "epoch": 1.0496487489146735, "grad_norm": 1.8203125, "learning_rate": 9.06305110665717e-06, "loss": 0.4604, "step": 8438 }, { "epoch": 1.0497750414397349, "grad_norm": 2.25, "learning_rate": 9.061063471138401e-06, "loss": 0.5022, "step": 8439 }, { "epoch": 1.049901333964796, "grad_norm": 1.8203125, "learning_rate": 9.05907587304338e-06, "loss": 0.4361, "step": 8440 }, { "epoch": 1.0500276264898571, "grad_norm": 2.046875, "learning_rate": 9.05708831245133e-06, "loss": 0.6075, "step": 8441 }, { "epoch": 1.0501539190149183, "grad_norm": 1.8203125, "learning_rate": 9.055100789441469e-06, "loss": 0.47, "step": 8442 }, { "epoch": 1.0502802115399794, "grad_norm": 1.984375, "learning_rate": 9.053113304093016e-06, "loss": 0.5608, "step": 8443 }, { "epoch": 1.0504065040650405, "grad_norm": 2.140625, "learning_rate": 9.051125856485183e-06, "loss": 0.5947, "step": 8444 }, { "epoch": 1.050532796590102, "grad_norm": 1.9453125, "learning_rate": 9.04913844669719e-06, "loss": 0.5136, "step": 8445 }, { "epoch": 1.050659089115163, "grad_norm": 1.9453125, "learning_rate": 9.047151074808247e-06, "loss": 0.4516, "step": 8446 }, { "epoch": 1.0507853816402242, "grad_norm": 2.390625, "learning_rate": 9.045163740897566e-06, "loss": 0.4677, "step": 8447 }, { "epoch": 1.0509116741652853, "grad_norm": 1.84375, "learning_rate": 9.04317644504436e-06, "loss": 0.4807, "step": 8448 }, { "epoch": 1.0510379666903464, "grad_norm": 1.8359375, "learning_rate": 9.041189187327835e-06, "loss": 0.4273, "step": 8449 }, { "epoch": 1.0511642592154078, "grad_norm": 1.9296875, "learning_rate": 9.039201967827202e-06, "loss": 0.4471, "step": 8450 }, { "epoch": 1.051290551740469, "grad_norm": 1.75, "learning_rate": 9.037214786621662e-06, "loss": 0.4598, "step": 8451 }, { "epoch": 1.05141684426553, "grad_norm": 2.140625, "learning_rate": 9.035227643790422e-06, "loss": 0.4722, "step": 8452 }, { "epoch": 1.0515431367905912, "grad_norm": 1.9921875, "learning_rate": 9.033240539412686e-06, "loss": 0.48, "step": 8453 }, { "epoch": 1.0516694293156523, "grad_norm": 1.8046875, "learning_rate": 9.031253473567653e-06, "loss": 0.4907, "step": 8454 }, { "epoch": 1.0517957218407135, "grad_norm": 1.84375, "learning_rate": 9.029266446334522e-06, "loss": 0.5091, "step": 8455 }, { "epoch": 1.0519220143657748, "grad_norm": 1.8984375, "learning_rate": 9.02727945779249e-06, "loss": 0.5044, "step": 8456 }, { "epoch": 1.052048306890836, "grad_norm": 1.8203125, "learning_rate": 9.02529250802076e-06, "loss": 0.4731, "step": 8457 }, { "epoch": 1.052174599415897, "grad_norm": 2.015625, "learning_rate": 9.023305597098521e-06, "loss": 0.49, "step": 8458 }, { "epoch": 1.0523008919409582, "grad_norm": 2.140625, "learning_rate": 9.021318725104969e-06, "loss": 0.5367, "step": 8459 }, { "epoch": 1.0524271844660193, "grad_norm": 2.1875, "learning_rate": 9.019331892119296e-06, "loss": 0.5042, "step": 8460 }, { "epoch": 1.0525534769910805, "grad_norm": 1.6640625, "learning_rate": 9.017345098220691e-06, "loss": 0.4362, "step": 8461 }, { "epoch": 1.0526797695161418, "grad_norm": 2.03125, "learning_rate": 9.015358343488344e-06, "loss": 0.494, "step": 8462 }, { "epoch": 1.052806062041203, "grad_norm": 1.9296875, "learning_rate": 9.013371628001442e-06, "loss": 0.475, "step": 8463 }, { "epoch": 1.052932354566264, "grad_norm": 1.90625, "learning_rate": 9.011384951839169e-06, "loss": 0.4892, "step": 8464 }, { "epoch": 1.0530586470913252, "grad_norm": 1.953125, "learning_rate": 9.00939831508071e-06, "loss": 0.4907, "step": 8465 }, { "epoch": 1.0531849396163864, "grad_norm": 1.953125, "learning_rate": 9.007411717805248e-06, "loss": 0.5476, "step": 8466 }, { "epoch": 1.0533112321414477, "grad_norm": 1.90625, "learning_rate": 9.005425160091967e-06, "loss": 0.4998, "step": 8467 }, { "epoch": 1.0534375246665089, "grad_norm": 1.828125, "learning_rate": 9.003438642020042e-06, "loss": 0.5096, "step": 8468 }, { "epoch": 1.05356381719157, "grad_norm": 1.9765625, "learning_rate": 9.001452163668653e-06, "loss": 0.469, "step": 8469 }, { "epoch": 1.0536901097166311, "grad_norm": 1.6875, "learning_rate": 8.999465725116976e-06, "loss": 0.4081, "step": 8470 }, { "epoch": 1.0538164022416923, "grad_norm": 2.09375, "learning_rate": 8.997479326444185e-06, "loss": 0.8182, "step": 8471 }, { "epoch": 1.0539426947667534, "grad_norm": 2.0, "learning_rate": 8.99549296772945e-06, "loss": 0.5056, "step": 8472 }, { "epoch": 1.0540689872918148, "grad_norm": 1.90625, "learning_rate": 8.993506649051948e-06, "loss": 0.5288, "step": 8473 }, { "epoch": 1.0541952798168759, "grad_norm": 2.078125, "learning_rate": 8.991520370490847e-06, "loss": 0.5391, "step": 8474 }, { "epoch": 1.054321572341937, "grad_norm": 1.78125, "learning_rate": 8.989534132125316e-06, "loss": 0.4765, "step": 8475 }, { "epoch": 1.0544478648669982, "grad_norm": 1.859375, "learning_rate": 8.98754793403452e-06, "loss": 0.4672, "step": 8476 }, { "epoch": 1.0545741573920593, "grad_norm": 1.953125, "learning_rate": 8.985561776297625e-06, "loss": 0.533, "step": 8477 }, { "epoch": 1.0547004499171204, "grad_norm": 1.9375, "learning_rate": 8.983575658993793e-06, "loss": 0.5048, "step": 8478 }, { "epoch": 1.0548267424421818, "grad_norm": 1.8828125, "learning_rate": 8.981589582202188e-06, "loss": 0.4821, "step": 8479 }, { "epoch": 1.054953034967243, "grad_norm": 1.984375, "learning_rate": 8.979603546001969e-06, "loss": 0.391, "step": 8480 }, { "epoch": 1.055079327492304, "grad_norm": 1.765625, "learning_rate": 8.977617550472294e-06, "loss": 0.4505, "step": 8481 }, { "epoch": 1.0552056200173652, "grad_norm": 1.8984375, "learning_rate": 8.975631595692323e-06, "loss": 0.4722, "step": 8482 }, { "epoch": 1.0553319125424263, "grad_norm": 1.953125, "learning_rate": 8.973645681741205e-06, "loss": 0.4958, "step": 8483 }, { "epoch": 1.0554582050674877, "grad_norm": 1.8671875, "learning_rate": 8.971659808698102e-06, "loss": 0.5349, "step": 8484 }, { "epoch": 1.0555844975925488, "grad_norm": 1.9609375, "learning_rate": 8.969673976642162e-06, "loss": 0.5498, "step": 8485 }, { "epoch": 1.05571079011761, "grad_norm": 2.09375, "learning_rate": 8.967688185652533e-06, "loss": 0.4795, "step": 8486 }, { "epoch": 1.055837082642671, "grad_norm": 1.9140625, "learning_rate": 8.965702435808368e-06, "loss": 0.4593, "step": 8487 }, { "epoch": 1.0559633751677322, "grad_norm": 1.984375, "learning_rate": 8.963716727188811e-06, "loss": 0.5054, "step": 8488 }, { "epoch": 1.0560896676927933, "grad_norm": 1.96875, "learning_rate": 8.96173105987301e-06, "loss": 0.4808, "step": 8489 }, { "epoch": 1.0562159602178547, "grad_norm": 1.8359375, "learning_rate": 8.959745433940112e-06, "loss": 0.444, "step": 8490 }, { "epoch": 1.0563422527429158, "grad_norm": 1.8515625, "learning_rate": 8.957759849469252e-06, "loss": 0.5319, "step": 8491 }, { "epoch": 1.056468545267977, "grad_norm": 2.046875, "learning_rate": 8.955774306539574e-06, "loss": 0.4634, "step": 8492 }, { "epoch": 1.056594837793038, "grad_norm": 1.953125, "learning_rate": 8.953788805230217e-06, "loss": 0.5064, "step": 8493 }, { "epoch": 1.0567211303180992, "grad_norm": 2.109375, "learning_rate": 8.95180334562032e-06, "loss": 0.5223, "step": 8494 }, { "epoch": 1.0568474228431606, "grad_norm": 1.9296875, "learning_rate": 8.949817927789014e-06, "loss": 0.4983, "step": 8495 }, { "epoch": 1.0569737153682217, "grad_norm": 2.046875, "learning_rate": 8.947832551815438e-06, "loss": 0.4664, "step": 8496 }, { "epoch": 1.0571000078932828, "grad_norm": 1.84375, "learning_rate": 8.945847217778722e-06, "loss": 0.4706, "step": 8497 }, { "epoch": 1.057226300418344, "grad_norm": 1.8828125, "learning_rate": 8.943861925757998e-06, "loss": 0.5053, "step": 8498 }, { "epoch": 1.0573525929434051, "grad_norm": 1.9375, "learning_rate": 8.941876675832392e-06, "loss": 0.4974, "step": 8499 }, { "epoch": 1.0574788854684662, "grad_norm": 1.828125, "learning_rate": 8.939891468081033e-06, "loss": 0.5075, "step": 8500 }, { "epoch": 1.0576051779935276, "grad_norm": 2.0, "learning_rate": 8.937906302583045e-06, "loss": 0.5148, "step": 8501 }, { "epoch": 1.0577314705185887, "grad_norm": 1.9375, "learning_rate": 8.935921179417556e-06, "loss": 0.4879, "step": 8502 }, { "epoch": 1.0578577630436499, "grad_norm": 1.796875, "learning_rate": 8.933936098663685e-06, "loss": 0.4625, "step": 8503 }, { "epoch": 1.057984055568711, "grad_norm": 1.796875, "learning_rate": 8.931951060400552e-06, "loss": 0.4667, "step": 8504 }, { "epoch": 1.0581103480937721, "grad_norm": 1.9140625, "learning_rate": 8.929966064707279e-06, "loss": 0.5718, "step": 8505 }, { "epoch": 1.0582366406188333, "grad_norm": 1.796875, "learning_rate": 8.92798111166298e-06, "loss": 0.4556, "step": 8506 }, { "epoch": 1.0583629331438946, "grad_norm": 1.9140625, "learning_rate": 8.925996201346773e-06, "loss": 0.5031, "step": 8507 }, { "epoch": 1.0584892256689558, "grad_norm": 2.09375, "learning_rate": 8.924011333837771e-06, "loss": 0.5164, "step": 8508 }, { "epoch": 1.058615518194017, "grad_norm": 1.9140625, "learning_rate": 8.922026509215086e-06, "loss": 0.4869, "step": 8509 }, { "epoch": 1.058741810719078, "grad_norm": 1.9140625, "learning_rate": 8.920041727557827e-06, "loss": 0.534, "step": 8510 }, { "epoch": 1.0588681032441392, "grad_norm": 1.9140625, "learning_rate": 8.918056988945103e-06, "loss": 0.5049, "step": 8511 }, { "epoch": 1.0589943957692005, "grad_norm": 1.9921875, "learning_rate": 8.916072293456022e-06, "loss": 0.5099, "step": 8512 }, { "epoch": 1.0591206882942616, "grad_norm": 1.8828125, "learning_rate": 8.914087641169688e-06, "loss": 0.4979, "step": 8513 }, { "epoch": 1.0592469808193228, "grad_norm": 2.984375, "learning_rate": 8.912103032165206e-06, "loss": 0.5896, "step": 8514 }, { "epoch": 1.059373273344384, "grad_norm": 1.84375, "learning_rate": 8.910118466521675e-06, "loss": 0.4215, "step": 8515 }, { "epoch": 1.059499565869445, "grad_norm": 1.765625, "learning_rate": 8.908133944318199e-06, "loss": 0.4255, "step": 8516 }, { "epoch": 1.0596258583945062, "grad_norm": 2.203125, "learning_rate": 8.906149465633872e-06, "loss": 0.5902, "step": 8517 }, { "epoch": 1.0597521509195675, "grad_norm": 2.015625, "learning_rate": 8.904165030547795e-06, "loss": 0.4971, "step": 8518 }, { "epoch": 1.0598784434446287, "grad_norm": 1.9609375, "learning_rate": 8.902180639139057e-06, "loss": 0.5551, "step": 8519 }, { "epoch": 1.0600047359696898, "grad_norm": 2.09375, "learning_rate": 8.900196291486755e-06, "loss": 0.5402, "step": 8520 }, { "epoch": 1.060131028494751, "grad_norm": 1.9453125, "learning_rate": 8.89821198766998e-06, "loss": 0.4783, "step": 8521 }, { "epoch": 1.060257321019812, "grad_norm": 1.8828125, "learning_rate": 8.896227727767823e-06, "loss": 0.5229, "step": 8522 }, { "epoch": 1.0603836135448734, "grad_norm": 2.234375, "learning_rate": 8.89424351185937e-06, "loss": 0.5972, "step": 8523 }, { "epoch": 1.0605099060699346, "grad_norm": 2.65625, "learning_rate": 8.892259340023708e-06, "loss": 0.5983, "step": 8524 }, { "epoch": 1.0606361985949957, "grad_norm": 1.9140625, "learning_rate": 8.890275212339918e-06, "loss": 0.5242, "step": 8525 }, { "epoch": 1.0607624911200568, "grad_norm": 1.8984375, "learning_rate": 8.888291128887088e-06, "loss": 0.495, "step": 8526 }, { "epoch": 1.060888783645118, "grad_norm": 1.9375, "learning_rate": 8.886307089744293e-06, "loss": 0.4656, "step": 8527 }, { "epoch": 1.061015076170179, "grad_norm": 1.9609375, "learning_rate": 8.884323094990619e-06, "loss": 0.5228, "step": 8528 }, { "epoch": 1.0611413686952405, "grad_norm": 1.8359375, "learning_rate": 8.882339144705135e-06, "loss": 0.4899, "step": 8529 }, { "epoch": 1.0612676612203016, "grad_norm": 1.78125, "learning_rate": 8.880355238966923e-06, "loss": 0.5241, "step": 8530 }, { "epoch": 1.0613939537453627, "grad_norm": 1.9609375, "learning_rate": 8.878371377855052e-06, "loss": 0.576, "step": 8531 }, { "epoch": 1.0615202462704239, "grad_norm": 2.03125, "learning_rate": 8.8763875614486e-06, "loss": 0.5309, "step": 8532 }, { "epoch": 1.061646538795485, "grad_norm": 1.9140625, "learning_rate": 8.874403789826632e-06, "loss": 0.5152, "step": 8533 }, { "epoch": 1.0617728313205461, "grad_norm": 1.84375, "learning_rate": 8.872420063068216e-06, "loss": 0.4859, "step": 8534 }, { "epoch": 1.0618991238456075, "grad_norm": 1.8828125, "learning_rate": 8.87043638125242e-06, "loss": 0.4829, "step": 8535 }, { "epoch": 1.0620254163706686, "grad_norm": 1.84375, "learning_rate": 8.868452744458311e-06, "loss": 0.479, "step": 8536 }, { "epoch": 1.0621517088957297, "grad_norm": 1.9765625, "learning_rate": 8.866469152764946e-06, "loss": 0.5522, "step": 8537 }, { "epoch": 1.0622780014207909, "grad_norm": 2.0, "learning_rate": 8.864485606251394e-06, "loss": 0.5786, "step": 8538 }, { "epoch": 1.062404293945852, "grad_norm": 1.84375, "learning_rate": 8.86250210499671e-06, "loss": 0.4767, "step": 8539 }, { "epoch": 1.0625305864709134, "grad_norm": 1.96875, "learning_rate": 8.860518649079953e-06, "loss": 0.4976, "step": 8540 }, { "epoch": 1.0626568789959745, "grad_norm": 2.0625, "learning_rate": 8.858535238580178e-06, "loss": 0.4714, "step": 8541 }, { "epoch": 1.0627831715210356, "grad_norm": 1.8984375, "learning_rate": 8.85655187357644e-06, "loss": 0.4731, "step": 8542 }, { "epoch": 1.0629094640460968, "grad_norm": 1.9765625, "learning_rate": 8.85456855414779e-06, "loss": 0.5324, "step": 8543 }, { "epoch": 1.063035756571158, "grad_norm": 2.046875, "learning_rate": 8.852585280373275e-06, "loss": 0.4902, "step": 8544 }, { "epoch": 1.063162049096219, "grad_norm": 2.0, "learning_rate": 8.85060205233195e-06, "loss": 0.5914, "step": 8545 }, { "epoch": 1.0632883416212804, "grad_norm": 2.03125, "learning_rate": 8.848618870102858e-06, "loss": 0.5605, "step": 8546 }, { "epoch": 1.0634146341463415, "grad_norm": 1.9609375, "learning_rate": 8.846635733765045e-06, "loss": 0.4291, "step": 8547 }, { "epoch": 1.0635409266714027, "grad_norm": 1.8515625, "learning_rate": 8.844652643397554e-06, "loss": 0.4466, "step": 8548 }, { "epoch": 1.0636672191964638, "grad_norm": 1.8828125, "learning_rate": 8.842669599079425e-06, "loss": 0.4725, "step": 8549 }, { "epoch": 1.063793511721525, "grad_norm": 1.9375, "learning_rate": 8.8406866008897e-06, "loss": 0.4838, "step": 8550 }, { "epoch": 1.063919804246586, "grad_norm": 1.890625, "learning_rate": 8.838703648907411e-06, "loss": 0.4954, "step": 8551 }, { "epoch": 1.0640460967716474, "grad_norm": 1.9375, "learning_rate": 8.836720743211602e-06, "loss": 0.5865, "step": 8552 }, { "epoch": 1.0641723892967085, "grad_norm": 1.9921875, "learning_rate": 8.834737883881298e-06, "loss": 0.5862, "step": 8553 }, { "epoch": 1.0642986818217697, "grad_norm": 1.8125, "learning_rate": 8.832755070995538e-06, "loss": 0.5126, "step": 8554 }, { "epoch": 1.0644249743468308, "grad_norm": 1.8984375, "learning_rate": 8.830772304633343e-06, "loss": 0.5025, "step": 8555 }, { "epoch": 1.064551266871892, "grad_norm": 1.9921875, "learning_rate": 8.828789584873754e-06, "loss": 0.555, "step": 8556 }, { "epoch": 1.0646775593969533, "grad_norm": 1.921875, "learning_rate": 8.826806911795788e-06, "loss": 0.5115, "step": 8557 }, { "epoch": 1.0648038519220144, "grad_norm": 1.7578125, "learning_rate": 8.824824285478477e-06, "loss": 0.4843, "step": 8558 }, { "epoch": 1.0649301444470756, "grad_norm": 1.890625, "learning_rate": 8.822841706000837e-06, "loss": 0.4931, "step": 8559 }, { "epoch": 1.0650564369721367, "grad_norm": 1.84375, "learning_rate": 8.820859173441892e-06, "loss": 0.5524, "step": 8560 }, { "epoch": 1.0651827294971978, "grad_norm": 1.8984375, "learning_rate": 8.818876687880658e-06, "loss": 0.5158, "step": 8561 }, { "epoch": 1.065309022022259, "grad_norm": 2.109375, "learning_rate": 8.816894249396156e-06, "loss": 0.4928, "step": 8562 }, { "epoch": 1.0654353145473203, "grad_norm": 1.9453125, "learning_rate": 8.8149118580674e-06, "loss": 0.5083, "step": 8563 }, { "epoch": 1.0655616070723815, "grad_norm": 2.015625, "learning_rate": 8.812929513973403e-06, "loss": 0.5291, "step": 8564 }, { "epoch": 1.0656878995974426, "grad_norm": 1.8359375, "learning_rate": 8.810947217193175e-06, "loss": 0.4659, "step": 8565 }, { "epoch": 1.0658141921225037, "grad_norm": 1.921875, "learning_rate": 8.808964967805727e-06, "loss": 0.4957, "step": 8566 }, { "epoch": 1.0659404846475649, "grad_norm": 1.8828125, "learning_rate": 8.806982765890069e-06, "loss": 0.5219, "step": 8567 }, { "epoch": 1.066066777172626, "grad_norm": 1.90625, "learning_rate": 8.805000611525202e-06, "loss": 0.4508, "step": 8568 }, { "epoch": 1.0661930696976873, "grad_norm": 2.03125, "learning_rate": 8.803018504790133e-06, "loss": 0.4849, "step": 8569 }, { "epoch": 1.0663193622227485, "grad_norm": 1.953125, "learning_rate": 8.801036445763863e-06, "loss": 0.4877, "step": 8570 }, { "epoch": 1.0664456547478096, "grad_norm": 2.0, "learning_rate": 8.799054434525392e-06, "loss": 0.4776, "step": 8571 }, { "epoch": 1.0665719472728707, "grad_norm": 2.0625, "learning_rate": 8.797072471153721e-06, "loss": 0.5217, "step": 8572 }, { "epoch": 1.0666982397979319, "grad_norm": 1.8828125, "learning_rate": 8.795090555727836e-06, "loss": 0.5072, "step": 8573 }, { "epoch": 1.0668245323229932, "grad_norm": 1.9296875, "learning_rate": 8.793108688326746e-06, "loss": 0.4798, "step": 8574 }, { "epoch": 1.0669508248480544, "grad_norm": 1.890625, "learning_rate": 8.791126869029436e-06, "loss": 0.5073, "step": 8575 }, { "epoch": 1.0670771173731155, "grad_norm": 2.1875, "learning_rate": 8.789145097914898e-06, "loss": 0.536, "step": 8576 }, { "epoch": 1.0672034098981766, "grad_norm": 1.96875, "learning_rate": 8.787163375062118e-06, "loss": 0.4532, "step": 8577 }, { "epoch": 1.0673297024232378, "grad_norm": 1.921875, "learning_rate": 8.785181700550085e-06, "loss": 0.446, "step": 8578 }, { "epoch": 1.067455994948299, "grad_norm": 2.171875, "learning_rate": 8.783200074457785e-06, "loss": 0.5987, "step": 8579 }, { "epoch": 1.0675822874733603, "grad_norm": 2.015625, "learning_rate": 8.781218496864198e-06, "loss": 0.4685, "step": 8580 }, { "epoch": 1.0677085799984214, "grad_norm": 2.234375, "learning_rate": 8.779236967848307e-06, "loss": 0.5508, "step": 8581 }, { "epoch": 1.0678348725234825, "grad_norm": 2.03125, "learning_rate": 8.777255487489088e-06, "loss": 0.4743, "step": 8582 }, { "epoch": 1.0679611650485437, "grad_norm": 2.15625, "learning_rate": 8.775274055865522e-06, "loss": 0.6183, "step": 8583 }, { "epoch": 1.0680874575736048, "grad_norm": 2.171875, "learning_rate": 8.773292673056582e-06, "loss": 0.4804, "step": 8584 }, { "epoch": 1.068213750098666, "grad_norm": 2.34375, "learning_rate": 8.77131133914124e-06, "loss": 0.5326, "step": 8585 }, { "epoch": 1.0683400426237273, "grad_norm": 2.03125, "learning_rate": 8.76933005419847e-06, "loss": 0.4968, "step": 8586 }, { "epoch": 1.0684663351487884, "grad_norm": 2.203125, "learning_rate": 8.767348818307239e-06, "loss": 0.4535, "step": 8587 }, { "epoch": 1.0685926276738495, "grad_norm": 1.9765625, "learning_rate": 8.765367631546516e-06, "loss": 0.47, "step": 8588 }, { "epoch": 1.0687189201989107, "grad_norm": 1.9375, "learning_rate": 8.763386493995264e-06, "loss": 0.5915, "step": 8589 }, { "epoch": 1.0688452127239718, "grad_norm": 1.96875, "learning_rate": 8.761405405732449e-06, "loss": 0.5538, "step": 8590 }, { "epoch": 1.0689715052490332, "grad_norm": 2.015625, "learning_rate": 8.759424366837027e-06, "loss": 0.5571, "step": 8591 }, { "epoch": 1.0690977977740943, "grad_norm": 2.03125, "learning_rate": 8.757443377387965e-06, "loss": 0.541, "step": 8592 }, { "epoch": 1.0692240902991554, "grad_norm": 1.765625, "learning_rate": 8.755462437464217e-06, "loss": 0.4605, "step": 8593 }, { "epoch": 1.0693503828242166, "grad_norm": 1.8359375, "learning_rate": 8.75348154714474e-06, "loss": 0.4391, "step": 8594 }, { "epoch": 1.0694766753492777, "grad_norm": 2.140625, "learning_rate": 8.751500706508485e-06, "loss": 0.5432, "step": 8595 }, { "epoch": 1.0696029678743388, "grad_norm": 2.078125, "learning_rate": 8.749519915634405e-06, "loss": 0.5096, "step": 8596 }, { "epoch": 1.0697292603994002, "grad_norm": 1.9375, "learning_rate": 8.74753917460145e-06, "loss": 0.4982, "step": 8597 }, { "epoch": 1.0698555529244613, "grad_norm": 1.7890625, "learning_rate": 8.745558483488564e-06, "loss": 0.4616, "step": 8598 }, { "epoch": 1.0699818454495225, "grad_norm": 1.9765625, "learning_rate": 8.743577842374697e-06, "loss": 0.4747, "step": 8599 }, { "epoch": 1.0701081379745836, "grad_norm": 1.875, "learning_rate": 8.741597251338792e-06, "loss": 0.5165, "step": 8600 }, { "epoch": 1.0702344304996447, "grad_norm": 1.8828125, "learning_rate": 8.739616710459788e-06, "loss": 0.4803, "step": 8601 }, { "epoch": 1.0703607230247059, "grad_norm": 2.0625, "learning_rate": 8.737636219816626e-06, "loss": 0.5002, "step": 8602 }, { "epoch": 1.0704870155497672, "grad_norm": 2.109375, "learning_rate": 8.735655779488245e-06, "loss": 0.5446, "step": 8603 }, { "epoch": 1.0706133080748284, "grad_norm": 1.9921875, "learning_rate": 8.733675389553579e-06, "loss": 0.5075, "step": 8604 }, { "epoch": 1.0707396005998895, "grad_norm": 2.09375, "learning_rate": 8.731695050091561e-06, "loss": 0.5773, "step": 8605 }, { "epoch": 1.0708658931249506, "grad_norm": 2.015625, "learning_rate": 8.729714761181124e-06, "loss": 0.4928, "step": 8606 }, { "epoch": 1.0709921856500118, "grad_norm": 1.8671875, "learning_rate": 8.727734522901196e-06, "loss": 0.4759, "step": 8607 }, { "epoch": 1.071118478175073, "grad_norm": 1.9140625, "learning_rate": 8.725754335330708e-06, "loss": 0.4937, "step": 8608 }, { "epoch": 1.0712447707001342, "grad_norm": 1.8515625, "learning_rate": 8.723774198548575e-06, "loss": 0.4896, "step": 8609 }, { "epoch": 1.0713710632251954, "grad_norm": 2.03125, "learning_rate": 8.721794112633736e-06, "loss": 0.5369, "step": 8610 }, { "epoch": 1.0714973557502565, "grad_norm": 2.0, "learning_rate": 8.719814077665102e-06, "loss": 0.4269, "step": 8611 }, { "epoch": 1.0716236482753176, "grad_norm": 1.8671875, "learning_rate": 8.717834093721598e-06, "loss": 0.5021, "step": 8612 }, { "epoch": 1.0717499408003788, "grad_norm": 1.9765625, "learning_rate": 8.715854160882138e-06, "loss": 0.4423, "step": 8613 }, { "epoch": 1.0718762333254401, "grad_norm": 2.03125, "learning_rate": 8.713874279225638e-06, "loss": 0.4813, "step": 8614 }, { "epoch": 1.0720025258505013, "grad_norm": 1.9296875, "learning_rate": 8.711894448831011e-06, "loss": 0.531, "step": 8615 }, { "epoch": 1.0721288183755624, "grad_norm": 2.484375, "learning_rate": 8.709914669777169e-06, "loss": 0.5696, "step": 8616 }, { "epoch": 1.0722551109006235, "grad_norm": 1.875, "learning_rate": 8.70793494214302e-06, "loss": 0.4731, "step": 8617 }, { "epoch": 1.0723814034256847, "grad_norm": 1.8515625, "learning_rate": 8.705955266007473e-06, "loss": 0.5339, "step": 8618 }, { "epoch": 1.0725076959507458, "grad_norm": 1.9921875, "learning_rate": 8.703975641449432e-06, "loss": 0.4157, "step": 8619 }, { "epoch": 1.0726339884758072, "grad_norm": 1.9453125, "learning_rate": 8.7019960685478e-06, "loss": 0.4876, "step": 8620 }, { "epoch": 1.0727602810008683, "grad_norm": 2.078125, "learning_rate": 8.70001654738148e-06, "loss": 0.6709, "step": 8621 }, { "epoch": 1.0728865735259294, "grad_norm": 2.171875, "learning_rate": 8.698037078029366e-06, "loss": 0.5271, "step": 8622 }, { "epoch": 1.0730128660509906, "grad_norm": 1.8671875, "learning_rate": 8.69605766057036e-06, "loss": 0.5124, "step": 8623 }, { "epoch": 1.0731391585760517, "grad_norm": 1.7890625, "learning_rate": 8.694078295083355e-06, "loss": 0.4878, "step": 8624 }, { "epoch": 1.073265451101113, "grad_norm": 2.078125, "learning_rate": 8.692098981647244e-06, "loss": 0.5005, "step": 8625 }, { "epoch": 1.0733917436261742, "grad_norm": 1.9609375, "learning_rate": 8.690119720340917e-06, "loss": 0.5195, "step": 8626 }, { "epoch": 1.0735180361512353, "grad_norm": 1.90625, "learning_rate": 8.688140511243258e-06, "loss": 0.4688, "step": 8627 }, { "epoch": 1.0736443286762964, "grad_norm": 1.9453125, "learning_rate": 8.686161354433163e-06, "loss": 0.5155, "step": 8628 }, { "epoch": 1.0737706212013576, "grad_norm": 1.8984375, "learning_rate": 8.68418224998951e-06, "loss": 0.5133, "step": 8629 }, { "epoch": 1.073896913726419, "grad_norm": 1.8671875, "learning_rate": 8.682203197991186e-06, "loss": 0.5089, "step": 8630 }, { "epoch": 1.07402320625148, "grad_norm": 2.03125, "learning_rate": 8.680224198517068e-06, "loss": 0.5313, "step": 8631 }, { "epoch": 1.0741494987765412, "grad_norm": 1.9765625, "learning_rate": 8.678245251646034e-06, "loss": 0.4755, "step": 8632 }, { "epoch": 1.0742757913016023, "grad_norm": 2.046875, "learning_rate": 8.67626635745696e-06, "loss": 0.4722, "step": 8633 }, { "epoch": 1.0744020838266635, "grad_norm": 1.953125, "learning_rate": 8.674287516028721e-06, "loss": 0.4651, "step": 8634 }, { "epoch": 1.0745283763517246, "grad_norm": 1.9609375, "learning_rate": 8.67230872744019e-06, "loss": 0.4577, "step": 8635 }, { "epoch": 1.074654668876786, "grad_norm": 1.9453125, "learning_rate": 8.670329991770234e-06, "loss": 0.455, "step": 8636 }, { "epoch": 1.074780961401847, "grad_norm": 1.890625, "learning_rate": 8.66835130909772e-06, "loss": 0.4446, "step": 8637 }, { "epoch": 1.0749072539269082, "grad_norm": 1.953125, "learning_rate": 8.666372679501518e-06, "loss": 0.4456, "step": 8638 }, { "epoch": 1.0750335464519694, "grad_norm": 1.890625, "learning_rate": 8.664394103060487e-06, "loss": 0.4363, "step": 8639 }, { "epoch": 1.0751598389770305, "grad_norm": 1.890625, "learning_rate": 8.662415579853492e-06, "loss": 0.4351, "step": 8640 }, { "epoch": 1.0752861315020916, "grad_norm": 1.8984375, "learning_rate": 8.660437109959388e-06, "loss": 0.4277, "step": 8641 }, { "epoch": 1.075412424027153, "grad_norm": 1.8125, "learning_rate": 8.658458693457036e-06, "loss": 0.4773, "step": 8642 }, { "epoch": 1.0755387165522141, "grad_norm": 1.921875, "learning_rate": 8.656480330425287e-06, "loss": 0.5272, "step": 8643 }, { "epoch": 1.0756650090772752, "grad_norm": 2.0, "learning_rate": 8.654502020942998e-06, "loss": 0.5294, "step": 8644 }, { "epoch": 1.0757913016023364, "grad_norm": 2.0, "learning_rate": 8.652523765089013e-06, "loss": 0.5679, "step": 8645 }, { "epoch": 1.0759175941273975, "grad_norm": 1.953125, "learning_rate": 8.650545562942189e-06, "loss": 0.5629, "step": 8646 }, { "epoch": 1.0760438866524589, "grad_norm": 1.890625, "learning_rate": 8.648567414581369e-06, "loss": 0.4821, "step": 8647 }, { "epoch": 1.07617017917752, "grad_norm": 1.96875, "learning_rate": 8.646589320085397e-06, "loss": 0.4664, "step": 8648 }, { "epoch": 1.0762964717025811, "grad_norm": 2.0, "learning_rate": 8.644611279533114e-06, "loss": 0.4535, "step": 8649 }, { "epoch": 1.0764227642276423, "grad_norm": 1.9609375, "learning_rate": 8.642633293003361e-06, "loss": 0.505, "step": 8650 }, { "epoch": 1.0765490567527034, "grad_norm": 1.8984375, "learning_rate": 8.640655360574974e-06, "loss": 0.5041, "step": 8651 }, { "epoch": 1.0766753492777645, "grad_norm": 1.9609375, "learning_rate": 8.638677482326792e-06, "loss": 0.5684, "step": 8652 }, { "epoch": 1.076801641802826, "grad_norm": 2.09375, "learning_rate": 8.636699658337646e-06, "loss": 0.5088, "step": 8653 }, { "epoch": 1.076927934327887, "grad_norm": 2.0, "learning_rate": 8.634721888686368e-06, "loss": 0.4763, "step": 8654 }, { "epoch": 1.0770542268529482, "grad_norm": 1.9375, "learning_rate": 8.632744173451788e-06, "loss": 0.5164, "step": 8655 }, { "epoch": 1.0771805193780093, "grad_norm": 1.890625, "learning_rate": 8.63076651271273e-06, "loss": 0.4002, "step": 8656 }, { "epoch": 1.0773068119030704, "grad_norm": 2.09375, "learning_rate": 8.628788906548023e-06, "loss": 0.5064, "step": 8657 }, { "epoch": 1.0774331044281316, "grad_norm": 1.9765625, "learning_rate": 8.626811355036486e-06, "loss": 0.4797, "step": 8658 }, { "epoch": 1.077559396953193, "grad_norm": 1.84375, "learning_rate": 8.624833858256941e-06, "loss": 0.4182, "step": 8659 }, { "epoch": 1.077685689478254, "grad_norm": 1.875, "learning_rate": 8.622856416288206e-06, "loss": 0.4766, "step": 8660 }, { "epoch": 1.0778119820033152, "grad_norm": 1.921875, "learning_rate": 8.620879029209098e-06, "loss": 0.5034, "step": 8661 }, { "epoch": 1.0779382745283763, "grad_norm": 2.234375, "learning_rate": 8.61890169709843e-06, "loss": 0.5498, "step": 8662 }, { "epoch": 1.0780645670534375, "grad_norm": 2.0, "learning_rate": 8.616924420035008e-06, "loss": 0.5399, "step": 8663 }, { "epoch": 1.0781908595784988, "grad_norm": 2.0625, "learning_rate": 8.614947198097653e-06, "loss": 0.5707, "step": 8664 }, { "epoch": 1.07831715210356, "grad_norm": 1.9140625, "learning_rate": 8.612970031365166e-06, "loss": 0.4698, "step": 8665 }, { "epoch": 1.078443444628621, "grad_norm": 2.03125, "learning_rate": 8.610992919916354e-06, "loss": 0.5041, "step": 8666 }, { "epoch": 1.0785697371536822, "grad_norm": 2.09375, "learning_rate": 8.609015863830017e-06, "loss": 0.5853, "step": 8667 }, { "epoch": 1.0786960296787433, "grad_norm": 1.8984375, "learning_rate": 8.607038863184957e-06, "loss": 0.5278, "step": 8668 }, { "epoch": 1.0788223222038045, "grad_norm": 1.9609375, "learning_rate": 8.605061918059973e-06, "loss": 0.5578, "step": 8669 }, { "epoch": 1.0789486147288658, "grad_norm": 1.8515625, "learning_rate": 8.603085028533861e-06, "loss": 0.4526, "step": 8670 }, { "epoch": 1.079074907253927, "grad_norm": 1.9140625, "learning_rate": 8.601108194685417e-06, "loss": 0.4899, "step": 8671 }, { "epoch": 1.079201199778988, "grad_norm": 2.015625, "learning_rate": 8.59913141659343e-06, "loss": 0.4872, "step": 8672 }, { "epoch": 1.0793274923040492, "grad_norm": 2.109375, "learning_rate": 8.597154694336689e-06, "loss": 0.6082, "step": 8673 }, { "epoch": 1.0794537848291104, "grad_norm": 1.84375, "learning_rate": 8.595178027993985e-06, "loss": 0.4654, "step": 8674 }, { "epoch": 1.0795800773541715, "grad_norm": 1.734375, "learning_rate": 8.5932014176441e-06, "loss": 0.4319, "step": 8675 }, { "epoch": 1.0797063698792329, "grad_norm": 2.140625, "learning_rate": 8.591224863365817e-06, "loss": 0.4792, "step": 8676 }, { "epoch": 1.079832662404294, "grad_norm": 1.9140625, "learning_rate": 8.589248365237919e-06, "loss": 0.4787, "step": 8677 }, { "epoch": 1.0799589549293551, "grad_norm": 1.9765625, "learning_rate": 8.587271923339181e-06, "loss": 0.4497, "step": 8678 }, { "epoch": 1.0800852474544163, "grad_norm": 2.28125, "learning_rate": 8.585295537748384e-06, "loss": 0.6028, "step": 8679 }, { "epoch": 1.0802115399794774, "grad_norm": 2.03125, "learning_rate": 8.583319208544299e-06, "loss": 0.5187, "step": 8680 }, { "epoch": 1.0803378325045387, "grad_norm": 1.8984375, "learning_rate": 8.581342935805695e-06, "loss": 0.4625, "step": 8681 }, { "epoch": 1.0804641250295999, "grad_norm": 1.703125, "learning_rate": 8.579366719611347e-06, "loss": 0.3964, "step": 8682 }, { "epoch": 1.080590417554661, "grad_norm": 2.015625, "learning_rate": 8.577390560040019e-06, "loss": 0.5834, "step": 8683 }, { "epoch": 1.0807167100797221, "grad_norm": 1.96875, "learning_rate": 8.575414457170476e-06, "loss": 0.459, "step": 8684 }, { "epoch": 1.0808430026047833, "grad_norm": 2.015625, "learning_rate": 8.573438411081485e-06, "loss": 0.5437, "step": 8685 }, { "epoch": 1.0809692951298444, "grad_norm": 1.9296875, "learning_rate": 8.5714624218518e-06, "loss": 0.4616, "step": 8686 }, { "epoch": 1.0810955876549058, "grad_norm": 1.875, "learning_rate": 8.569486489560183e-06, "loss": 0.4758, "step": 8687 }, { "epoch": 1.081221880179967, "grad_norm": 1.859375, "learning_rate": 8.56751061428539e-06, "loss": 0.513, "step": 8688 }, { "epoch": 1.081348172705028, "grad_norm": 1.9453125, "learning_rate": 8.565534796106171e-06, "loss": 0.4886, "step": 8689 }, { "epoch": 1.0814744652300892, "grad_norm": 2.234375, "learning_rate": 8.56355903510128e-06, "loss": 0.5737, "step": 8690 }, { "epoch": 1.0816007577551503, "grad_norm": 2.015625, "learning_rate": 8.561583331349468e-06, "loss": 0.5191, "step": 8691 }, { "epoch": 1.0817270502802114, "grad_norm": 1.90625, "learning_rate": 8.55960768492948e-06, "loss": 0.4974, "step": 8692 }, { "epoch": 1.0818533428052728, "grad_norm": 1.8125, "learning_rate": 8.557632095920059e-06, "loss": 0.5271, "step": 8693 }, { "epoch": 1.081979635330334, "grad_norm": 2.109375, "learning_rate": 8.555656564399948e-06, "loss": 0.586, "step": 8694 }, { "epoch": 1.082105927855395, "grad_norm": 3.140625, "learning_rate": 8.55368109044789e-06, "loss": 0.5241, "step": 8695 }, { "epoch": 1.0822322203804562, "grad_norm": 2.109375, "learning_rate": 8.551705674142618e-06, "loss": 0.5006, "step": 8696 }, { "epoch": 1.0823585129055173, "grad_norm": 1.890625, "learning_rate": 8.549730315562869e-06, "loss": 0.5053, "step": 8697 }, { "epoch": 1.0824848054305787, "grad_norm": 1.9296875, "learning_rate": 8.547755014787377e-06, "loss": 0.5039, "step": 8698 }, { "epoch": 1.0826110979556398, "grad_norm": 1.8515625, "learning_rate": 8.545779771894868e-06, "loss": 0.456, "step": 8699 }, { "epoch": 1.082737390480701, "grad_norm": 2.015625, "learning_rate": 8.543804586964078e-06, "loss": 0.5218, "step": 8700 }, { "epoch": 1.082863683005762, "grad_norm": 1.953125, "learning_rate": 8.541829460073732e-06, "loss": 0.4965, "step": 8701 }, { "epoch": 1.0829899755308232, "grad_norm": 1.8984375, "learning_rate": 8.539854391302549e-06, "loss": 0.4316, "step": 8702 }, { "epoch": 1.0831162680558843, "grad_norm": 1.9921875, "learning_rate": 8.537879380729254e-06, "loss": 0.5039, "step": 8703 }, { "epoch": 1.0832425605809457, "grad_norm": 1.8984375, "learning_rate": 8.535904428432565e-06, "loss": 0.5205, "step": 8704 }, { "epoch": 1.0833688531060068, "grad_norm": 1.984375, "learning_rate": 8.533929534491199e-06, "loss": 0.4469, "step": 8705 }, { "epoch": 1.083495145631068, "grad_norm": 1.9453125, "learning_rate": 8.531954698983872e-06, "loss": 0.4985, "step": 8706 }, { "epoch": 1.083621438156129, "grad_norm": 2.046875, "learning_rate": 8.529979921989292e-06, "loss": 0.4833, "step": 8707 }, { "epoch": 1.0837477306811902, "grad_norm": 1.9609375, "learning_rate": 8.528005203586172e-06, "loss": 0.5535, "step": 8708 }, { "epoch": 1.0838740232062514, "grad_norm": 1.8359375, "learning_rate": 8.526030543853221e-06, "loss": 0.4644, "step": 8709 }, { "epoch": 1.0840003157313127, "grad_norm": 1.90625, "learning_rate": 8.52405594286914e-06, "loss": 0.4968, "step": 8710 }, { "epoch": 1.0841266082563739, "grad_norm": 1.9140625, "learning_rate": 8.522081400712636e-06, "loss": 0.4788, "step": 8711 }, { "epoch": 1.084252900781435, "grad_norm": 1.765625, "learning_rate": 8.520106917462407e-06, "loss": 0.4606, "step": 8712 }, { "epoch": 1.0843791933064961, "grad_norm": 1.828125, "learning_rate": 8.518132493197153e-06, "loss": 0.4797, "step": 8713 }, { "epoch": 1.0845054858315573, "grad_norm": 2.125, "learning_rate": 8.516158127995567e-06, "loss": 0.548, "step": 8714 }, { "epoch": 1.0846317783566186, "grad_norm": 1.984375, "learning_rate": 8.514183821936343e-06, "loss": 0.5136, "step": 8715 }, { "epoch": 1.0847580708816797, "grad_norm": 1.90625, "learning_rate": 8.512209575098174e-06, "loss": 0.5634, "step": 8716 }, { "epoch": 1.0848843634067409, "grad_norm": 2.1875, "learning_rate": 8.510235387559743e-06, "loss": 0.5461, "step": 8717 }, { "epoch": 1.085010655931802, "grad_norm": 1.8515625, "learning_rate": 8.508261259399747e-06, "loss": 0.4843, "step": 8718 }, { "epoch": 1.0851369484568631, "grad_norm": 1.9375, "learning_rate": 8.506287190696862e-06, "loss": 0.4974, "step": 8719 }, { "epoch": 1.0852632409819243, "grad_norm": 1.875, "learning_rate": 8.504313181529774e-06, "loss": 0.515, "step": 8720 }, { "epoch": 1.0853895335069856, "grad_norm": 2.296875, "learning_rate": 8.502339231977159e-06, "loss": 0.537, "step": 8721 }, { "epoch": 1.0855158260320468, "grad_norm": 1.796875, "learning_rate": 8.500365342117693e-06, "loss": 0.489, "step": 8722 }, { "epoch": 1.085642118557108, "grad_norm": 1.8984375, "learning_rate": 8.498391512030055e-06, "loss": 0.5015, "step": 8723 }, { "epoch": 1.085768411082169, "grad_norm": 1.875, "learning_rate": 8.496417741792912e-06, "loss": 0.4716, "step": 8724 }, { "epoch": 1.0858947036072302, "grad_norm": 2.015625, "learning_rate": 8.494444031484938e-06, "loss": 0.5055, "step": 8725 }, { "epoch": 1.0860209961322913, "grad_norm": 2.015625, "learning_rate": 8.4924703811848e-06, "loss": 0.4885, "step": 8726 }, { "epoch": 1.0861472886573527, "grad_norm": 1.890625, "learning_rate": 8.490496790971159e-06, "loss": 0.6454, "step": 8727 }, { "epoch": 1.0862735811824138, "grad_norm": 2.109375, "learning_rate": 8.488523260922682e-06, "loss": 0.5652, "step": 8728 }, { "epoch": 1.086399873707475, "grad_norm": 1.875, "learning_rate": 8.486549791118025e-06, "loss": 0.4367, "step": 8729 }, { "epoch": 1.086526166232536, "grad_norm": 1.8515625, "learning_rate": 8.484576381635849e-06, "loss": 0.4206, "step": 8730 }, { "epoch": 1.0866524587575972, "grad_norm": 2.1875, "learning_rate": 8.482603032554807e-06, "loss": 0.5463, "step": 8731 }, { "epoch": 1.0867787512826586, "grad_norm": 2.0625, "learning_rate": 8.480629743953556e-06, "loss": 0.5178, "step": 8732 }, { "epoch": 1.0869050438077197, "grad_norm": 2.0625, "learning_rate": 8.47865651591074e-06, "loss": 0.5403, "step": 8733 }, { "epoch": 1.0870313363327808, "grad_norm": 1.8984375, "learning_rate": 8.476683348505013e-06, "loss": 0.4453, "step": 8734 }, { "epoch": 1.087157628857842, "grad_norm": 1.828125, "learning_rate": 8.474710241815016e-06, "loss": 0.4366, "step": 8735 }, { "epoch": 1.087283921382903, "grad_norm": 1.9296875, "learning_rate": 8.472737195919399e-06, "loss": 0.4613, "step": 8736 }, { "epoch": 1.0874102139079644, "grad_norm": 1.828125, "learning_rate": 8.470764210896798e-06, "loss": 0.5015, "step": 8737 }, { "epoch": 1.0875365064330256, "grad_norm": 2.03125, "learning_rate": 8.46879128682585e-06, "loss": 0.5638, "step": 8738 }, { "epoch": 1.0876627989580867, "grad_norm": 1.96875, "learning_rate": 8.466818423785194e-06, "loss": 0.4494, "step": 8739 }, { "epoch": 1.0877890914831478, "grad_norm": 1.90625, "learning_rate": 8.464845621853465e-06, "loss": 0.474, "step": 8740 }, { "epoch": 1.087915384008209, "grad_norm": 1.9609375, "learning_rate": 8.46287288110929e-06, "loss": 0.463, "step": 8741 }, { "epoch": 1.08804167653327, "grad_norm": 2.078125, "learning_rate": 8.4609002016313e-06, "loss": 0.5101, "step": 8742 }, { "epoch": 1.0881679690583315, "grad_norm": 2.0, "learning_rate": 8.458927583498119e-06, "loss": 0.5476, "step": 8743 }, { "epoch": 1.0882942615833926, "grad_norm": 2.046875, "learning_rate": 8.456955026788377e-06, "loss": 0.5269, "step": 8744 }, { "epoch": 1.0884205541084537, "grad_norm": 2.03125, "learning_rate": 8.454982531580687e-06, "loss": 0.5027, "step": 8745 }, { "epoch": 1.0885468466335149, "grad_norm": 1.875, "learning_rate": 8.453010097953673e-06, "loss": 0.4461, "step": 8746 }, { "epoch": 1.088673139158576, "grad_norm": 1.8671875, "learning_rate": 8.451037725985951e-06, "loss": 0.5477, "step": 8747 }, { "epoch": 1.0887994316836371, "grad_norm": 1.9453125, "learning_rate": 8.449065415756133e-06, "loss": 0.4533, "step": 8748 }, { "epoch": 1.0889257242086985, "grad_norm": 1.8125, "learning_rate": 8.447093167342831e-06, "loss": 0.4644, "step": 8749 }, { "epoch": 1.0890520167337596, "grad_norm": 1.859375, "learning_rate": 8.445120980824657e-06, "loss": 0.4884, "step": 8750 }, { "epoch": 1.0891783092588208, "grad_norm": 1.8515625, "learning_rate": 8.443148856280214e-06, "loss": 0.4021, "step": 8751 }, { "epoch": 1.0893046017838819, "grad_norm": 2.03125, "learning_rate": 8.44117679378811e-06, "loss": 0.4598, "step": 8752 }, { "epoch": 1.089430894308943, "grad_norm": 1.9765625, "learning_rate": 8.43920479342694e-06, "loss": 0.5182, "step": 8753 }, { "epoch": 1.0895571868340044, "grad_norm": 1.9296875, "learning_rate": 8.437232855275312e-06, "loss": 0.5461, "step": 8754 }, { "epoch": 1.0896834793590655, "grad_norm": 2.125, "learning_rate": 8.435260979411816e-06, "loss": 0.4822, "step": 8755 }, { "epoch": 1.0898097718841266, "grad_norm": 1.875, "learning_rate": 8.43328916591505e-06, "loss": 0.4506, "step": 8756 }, { "epoch": 1.0899360644091878, "grad_norm": 1.984375, "learning_rate": 8.431317414863602e-06, "loss": 0.497, "step": 8757 }, { "epoch": 1.090062356934249, "grad_norm": 1.8671875, "learning_rate": 8.429345726336065e-06, "loss": 0.4994, "step": 8758 }, { "epoch": 1.09018864945931, "grad_norm": 2.09375, "learning_rate": 8.427374100411025e-06, "loss": 0.4955, "step": 8759 }, { "epoch": 1.0903149419843714, "grad_norm": 1.953125, "learning_rate": 8.425402537167065e-06, "loss": 0.5305, "step": 8760 }, { "epoch": 1.0904412345094325, "grad_norm": 2.0, "learning_rate": 8.423431036682767e-06, "loss": 0.5073, "step": 8761 }, { "epoch": 1.0905675270344937, "grad_norm": 1.9765625, "learning_rate": 8.42145959903671e-06, "loss": 0.5138, "step": 8762 }, { "epoch": 1.0906938195595548, "grad_norm": 2.0625, "learning_rate": 8.419488224307472e-06, "loss": 0.5265, "step": 8763 }, { "epoch": 1.090820112084616, "grad_norm": 1.96875, "learning_rate": 8.417516912573625e-06, "loss": 0.5232, "step": 8764 }, { "epoch": 1.090946404609677, "grad_norm": 2.0625, "learning_rate": 8.415545663913742e-06, "loss": 0.5364, "step": 8765 }, { "epoch": 1.0910726971347384, "grad_norm": 1.9296875, "learning_rate": 8.413574478406393e-06, "loss": 0.4772, "step": 8766 }, { "epoch": 1.0911989896597996, "grad_norm": 1.9921875, "learning_rate": 8.411603356130145e-06, "loss": 0.5122, "step": 8767 }, { "epoch": 1.0913252821848607, "grad_norm": 2.015625, "learning_rate": 8.409632297163562e-06, "loss": 0.5688, "step": 8768 }, { "epoch": 1.0914515747099218, "grad_norm": 2.140625, "learning_rate": 8.407661301585204e-06, "loss": 0.585, "step": 8769 }, { "epoch": 1.091577867234983, "grad_norm": 1.9375, "learning_rate": 8.405690369473631e-06, "loss": 0.5131, "step": 8770 }, { "epoch": 1.0917041597600443, "grad_norm": 2.015625, "learning_rate": 8.4037195009074e-06, "loss": 0.5252, "step": 8771 }, { "epoch": 1.0918304522851054, "grad_norm": 2.03125, "learning_rate": 8.401748695965062e-06, "loss": 0.5138, "step": 8772 }, { "epoch": 1.0919567448101666, "grad_norm": 2.078125, "learning_rate": 8.399777954725174e-06, "loss": 0.5082, "step": 8773 }, { "epoch": 1.0920830373352277, "grad_norm": 1.9765625, "learning_rate": 8.397807277266283e-06, "loss": 0.5009, "step": 8774 }, { "epoch": 1.0922093298602888, "grad_norm": 1.9375, "learning_rate": 8.395836663666935e-06, "loss": 0.4871, "step": 8775 }, { "epoch": 1.09233562238535, "grad_norm": 1.953125, "learning_rate": 8.393866114005672e-06, "loss": 0.5444, "step": 8776 }, { "epoch": 1.0924619149104113, "grad_norm": 1.921875, "learning_rate": 8.39189562836104e-06, "loss": 0.5039, "step": 8777 }, { "epoch": 1.0925882074354725, "grad_norm": 1.9375, "learning_rate": 8.38992520681157e-06, "loss": 0.5167, "step": 8778 }, { "epoch": 1.0927144999605336, "grad_norm": 1.8203125, "learning_rate": 8.387954849435808e-06, "loss": 0.4767, "step": 8779 }, { "epoch": 1.0928407924855947, "grad_norm": 1.890625, "learning_rate": 8.385984556312282e-06, "loss": 0.5308, "step": 8780 }, { "epoch": 1.0929670850106559, "grad_norm": 1.9921875, "learning_rate": 8.384014327519519e-06, "loss": 0.4562, "step": 8781 }, { "epoch": 1.093093377535717, "grad_norm": 1.9140625, "learning_rate": 8.382044163136058e-06, "loss": 0.5178, "step": 8782 }, { "epoch": 1.0932196700607784, "grad_norm": 2.046875, "learning_rate": 8.380074063240417e-06, "loss": 0.5389, "step": 8783 }, { "epoch": 1.0933459625858395, "grad_norm": 1.859375, "learning_rate": 8.378104027911124e-06, "loss": 0.4119, "step": 8784 }, { "epoch": 1.0934722551109006, "grad_norm": 2.109375, "learning_rate": 8.376134057226697e-06, "loss": 0.5314, "step": 8785 }, { "epoch": 1.0935985476359618, "grad_norm": 1.953125, "learning_rate": 8.374164151265654e-06, "loss": 0.4929, "step": 8786 }, { "epoch": 1.093724840161023, "grad_norm": 1.8046875, "learning_rate": 8.372194310106514e-06, "loss": 0.4654, "step": 8787 }, { "epoch": 1.0938511326860842, "grad_norm": 1.953125, "learning_rate": 8.370224533827787e-06, "loss": 0.4685, "step": 8788 }, { "epoch": 1.0939774252111454, "grad_norm": 2.046875, "learning_rate": 8.368254822507984e-06, "loss": 0.4735, "step": 8789 }, { "epoch": 1.0941037177362065, "grad_norm": 1.9140625, "learning_rate": 8.366285176225612e-06, "loss": 0.5089, "step": 8790 }, { "epoch": 1.0942300102612676, "grad_norm": 1.890625, "learning_rate": 8.36431559505918e-06, "loss": 0.4823, "step": 8791 }, { "epoch": 1.0943563027863288, "grad_norm": 2.296875, "learning_rate": 8.362346079087187e-06, "loss": 0.5056, "step": 8792 }, { "epoch": 1.09448259531139, "grad_norm": 2.0, "learning_rate": 8.360376628388138e-06, "loss": 0.4715, "step": 8793 }, { "epoch": 1.0946088878364513, "grad_norm": 1.890625, "learning_rate": 8.358407243040524e-06, "loss": 0.442, "step": 8794 }, { "epoch": 1.0947351803615124, "grad_norm": 1.8125, "learning_rate": 8.356437923122845e-06, "loss": 0.5268, "step": 8795 }, { "epoch": 1.0948614728865735, "grad_norm": 1.96875, "learning_rate": 8.354468668713593e-06, "loss": 0.5004, "step": 8796 }, { "epoch": 1.0949877654116347, "grad_norm": 1.8359375, "learning_rate": 8.352499479891252e-06, "loss": 0.5081, "step": 8797 }, { "epoch": 1.0951140579366958, "grad_norm": 2.1875, "learning_rate": 8.350530356734315e-06, "loss": 0.5714, "step": 8798 }, { "epoch": 1.095240350461757, "grad_norm": 2.109375, "learning_rate": 8.348561299321267e-06, "loss": 0.5281, "step": 8799 }, { "epoch": 1.0953666429868183, "grad_norm": 1.9453125, "learning_rate": 8.346592307730587e-06, "loss": 0.5333, "step": 8800 }, { "epoch": 1.0954929355118794, "grad_norm": 2.046875, "learning_rate": 8.344623382040756e-06, "loss": 0.5017, "step": 8801 }, { "epoch": 1.0956192280369406, "grad_norm": 2.0, "learning_rate": 8.342654522330248e-06, "loss": 0.4671, "step": 8802 }, { "epoch": 1.0957455205620017, "grad_norm": 1.984375, "learning_rate": 8.34068572867754e-06, "loss": 0.5461, "step": 8803 }, { "epoch": 1.0958718130870628, "grad_norm": 2.140625, "learning_rate": 8.338717001161101e-06, "loss": 0.5642, "step": 8804 }, { "epoch": 1.0959981056121242, "grad_norm": 1.8828125, "learning_rate": 8.336748339859402e-06, "loss": 0.5446, "step": 8805 }, { "epoch": 1.0961243981371853, "grad_norm": 1.953125, "learning_rate": 8.334779744850909e-06, "loss": 0.4843, "step": 8806 }, { "epoch": 1.0962506906622465, "grad_norm": 1.921875, "learning_rate": 8.332811216214082e-06, "loss": 0.4434, "step": 8807 }, { "epoch": 1.0963769831873076, "grad_norm": 1.828125, "learning_rate": 8.330842754027383e-06, "loss": 0.4961, "step": 8808 }, { "epoch": 1.0965032757123687, "grad_norm": 1.9921875, "learning_rate": 8.328874358369274e-06, "loss": 0.4598, "step": 8809 }, { "epoch": 1.0966295682374299, "grad_norm": 1.875, "learning_rate": 8.326906029318209e-06, "loss": 0.4973, "step": 8810 }, { "epoch": 1.0967558607624912, "grad_norm": 1.7890625, "learning_rate": 8.324937766952638e-06, "loss": 0.4513, "step": 8811 }, { "epoch": 1.0968821532875523, "grad_norm": 1.90625, "learning_rate": 8.322969571351013e-06, "loss": 0.4659, "step": 8812 }, { "epoch": 1.0970084458126135, "grad_norm": 1.8828125, "learning_rate": 8.321001442591779e-06, "loss": 0.4384, "step": 8813 }, { "epoch": 1.0971347383376746, "grad_norm": 1.953125, "learning_rate": 8.319033380753388e-06, "loss": 0.5108, "step": 8814 }, { "epoch": 1.0972610308627357, "grad_norm": 1.8359375, "learning_rate": 8.317065385914275e-06, "loss": 0.4743, "step": 8815 }, { "epoch": 1.0973873233877969, "grad_norm": 1.9140625, "learning_rate": 8.315097458152884e-06, "loss": 0.531, "step": 8816 }, { "epoch": 1.0975136159128582, "grad_norm": 2.0, "learning_rate": 8.313129597547647e-06, "loss": 0.4782, "step": 8817 }, { "epoch": 1.0976399084379194, "grad_norm": 1.8515625, "learning_rate": 8.311161804177003e-06, "loss": 0.4779, "step": 8818 }, { "epoch": 1.0977662009629805, "grad_norm": 1.921875, "learning_rate": 8.309194078119382e-06, "loss": 0.5541, "step": 8819 }, { "epoch": 1.0978924934880416, "grad_norm": 1.9921875, "learning_rate": 8.307226419453214e-06, "loss": 0.4901, "step": 8820 }, { "epoch": 1.0980187860131028, "grad_norm": 2.015625, "learning_rate": 8.305258828256922e-06, "loss": 0.4587, "step": 8821 }, { "epoch": 1.0981450785381641, "grad_norm": 1.8828125, "learning_rate": 8.30329130460893e-06, "loss": 0.5078, "step": 8822 }, { "epoch": 1.0982713710632253, "grad_norm": 1.9140625, "learning_rate": 8.301323848587662e-06, "loss": 0.5484, "step": 8823 }, { "epoch": 1.0983976635882864, "grad_norm": 2.0625, "learning_rate": 8.299356460271532e-06, "loss": 0.5396, "step": 8824 }, { "epoch": 1.0985239561133475, "grad_norm": 1.9921875, "learning_rate": 8.297389139738957e-06, "loss": 0.5317, "step": 8825 }, { "epoch": 1.0986502486384087, "grad_norm": 2.0, "learning_rate": 8.295421887068348e-06, "loss": 0.4497, "step": 8826 }, { "epoch": 1.0987765411634698, "grad_norm": 2.015625, "learning_rate": 8.29345470233812e-06, "loss": 0.5147, "step": 8827 }, { "epoch": 1.0989028336885311, "grad_norm": 1.9296875, "learning_rate": 8.291487585626677e-06, "loss": 0.4265, "step": 8828 }, { "epoch": 1.0990291262135923, "grad_norm": 2.359375, "learning_rate": 8.289520537012423e-06, "loss": 0.5851, "step": 8829 }, { "epoch": 1.0991554187386534, "grad_norm": 1.921875, "learning_rate": 8.28755355657376e-06, "loss": 0.4962, "step": 8830 }, { "epoch": 1.0992817112637145, "grad_norm": 1.890625, "learning_rate": 8.285586644389087e-06, "loss": 0.5777, "step": 8831 }, { "epoch": 1.0994080037887757, "grad_norm": 2.03125, "learning_rate": 8.283619800536803e-06, "loss": 0.4674, "step": 8832 }, { "epoch": 1.0995342963138368, "grad_norm": 1.8359375, "learning_rate": 8.2816530250953e-06, "loss": 0.4574, "step": 8833 }, { "epoch": 1.0996605888388982, "grad_norm": 2.015625, "learning_rate": 8.279686318142966e-06, "loss": 0.5121, "step": 8834 }, { "epoch": 1.0997868813639593, "grad_norm": 2.140625, "learning_rate": 8.277719679758193e-06, "loss": 0.5328, "step": 8835 }, { "epoch": 1.0999131738890204, "grad_norm": 1.90625, "learning_rate": 8.275753110019367e-06, "loss": 0.4449, "step": 8836 }, { "epoch": 1.1000394664140816, "grad_norm": 1.9765625, "learning_rate": 8.273786609004867e-06, "loss": 0.5143, "step": 8837 }, { "epoch": 1.1001657589391427, "grad_norm": 1.8984375, "learning_rate": 8.271820176793075e-06, "loss": 0.4942, "step": 8838 }, { "epoch": 1.100292051464204, "grad_norm": 1.9765625, "learning_rate": 8.269853813462368e-06, "loss": 0.4561, "step": 8839 }, { "epoch": 1.1004183439892652, "grad_norm": 1.984375, "learning_rate": 8.267887519091121e-06, "loss": 0.4897, "step": 8840 }, { "epoch": 1.1005446365143263, "grad_norm": 2.078125, "learning_rate": 8.265921293757707e-06, "loss": 0.509, "step": 8841 }, { "epoch": 1.1006709290393875, "grad_norm": 1.9140625, "learning_rate": 8.263955137540491e-06, "loss": 0.5043, "step": 8842 }, { "epoch": 1.1007972215644486, "grad_norm": 1.9453125, "learning_rate": 8.261989050517843e-06, "loss": 0.5336, "step": 8843 }, { "epoch": 1.1009235140895097, "grad_norm": 1.8359375, "learning_rate": 8.260023032768122e-06, "loss": 0.4458, "step": 8844 }, { "epoch": 1.101049806614571, "grad_norm": 1.9296875, "learning_rate": 8.258057084369693e-06, "loss": 0.5376, "step": 8845 }, { "epoch": 1.1011760991396322, "grad_norm": 2.171875, "learning_rate": 8.256091205400913e-06, "loss": 0.5901, "step": 8846 }, { "epoch": 1.1013023916646933, "grad_norm": 1.859375, "learning_rate": 8.25412539594014e-06, "loss": 0.4974, "step": 8847 }, { "epoch": 1.1014286841897545, "grad_norm": 1.953125, "learning_rate": 8.252159656065722e-06, "loss": 0.5335, "step": 8848 }, { "epoch": 1.1015549767148156, "grad_norm": 1.9921875, "learning_rate": 8.25019398585601e-06, "loss": 0.5741, "step": 8849 }, { "epoch": 1.1016812692398767, "grad_norm": 1.8828125, "learning_rate": 8.248228385389352e-06, "loss": 0.46, "step": 8850 }, { "epoch": 1.101807561764938, "grad_norm": 1.8125, "learning_rate": 8.24626285474409e-06, "loss": 0.4089, "step": 8851 }, { "epoch": 1.1019338542899992, "grad_norm": 1.9765625, "learning_rate": 8.244297393998566e-06, "loss": 0.5127, "step": 8852 }, { "epoch": 1.1020601468150604, "grad_norm": 1.84375, "learning_rate": 8.242332003231122e-06, "loss": 0.4478, "step": 8853 }, { "epoch": 1.1021864393401215, "grad_norm": 2.015625, "learning_rate": 8.240366682520088e-06, "loss": 0.5545, "step": 8854 }, { "epoch": 1.1023127318651826, "grad_norm": 2.15625, "learning_rate": 8.238401431943801e-06, "loss": 0.5088, "step": 8855 }, { "epoch": 1.102439024390244, "grad_norm": 1.9375, "learning_rate": 8.236436251580588e-06, "loss": 0.5102, "step": 8856 }, { "epoch": 1.1025653169153051, "grad_norm": 2.125, "learning_rate": 8.234471141508781e-06, "loss": 0.5151, "step": 8857 }, { "epoch": 1.1026916094403663, "grad_norm": 1.8359375, "learning_rate": 8.2325061018067e-06, "loss": 0.5064, "step": 8858 }, { "epoch": 1.1028179019654274, "grad_norm": 2.015625, "learning_rate": 8.230541132552668e-06, "loss": 0.4983, "step": 8859 }, { "epoch": 1.1029441944904885, "grad_norm": 2.09375, "learning_rate": 8.228576233825006e-06, "loss": 0.5526, "step": 8860 }, { "epoch": 1.1030704870155499, "grad_norm": 2.234375, "learning_rate": 8.226611405702028e-06, "loss": 0.533, "step": 8861 }, { "epoch": 1.103196779540611, "grad_norm": 1.9921875, "learning_rate": 8.224646648262043e-06, "loss": 0.4639, "step": 8862 }, { "epoch": 1.1033230720656722, "grad_norm": 2.09375, "learning_rate": 8.22268196158337e-06, "loss": 0.6749, "step": 8863 }, { "epoch": 1.1034493645907333, "grad_norm": 1.953125, "learning_rate": 8.220717345744316e-06, "loss": 0.5126, "step": 8864 }, { "epoch": 1.1035756571157944, "grad_norm": 2.109375, "learning_rate": 8.21875280082318e-06, "loss": 0.594, "step": 8865 }, { "epoch": 1.1037019496408556, "grad_norm": 1.953125, "learning_rate": 8.21678832689827e-06, "loss": 0.5157, "step": 8866 }, { "epoch": 1.103828242165917, "grad_norm": 1.921875, "learning_rate": 8.214823924047879e-06, "loss": 0.4652, "step": 8867 }, { "epoch": 1.103954534690978, "grad_norm": 2.0625, "learning_rate": 8.212859592350308e-06, "loss": 0.5084, "step": 8868 }, { "epoch": 1.1040808272160392, "grad_norm": 1.75, "learning_rate": 8.210895331883847e-06, "loss": 0.465, "step": 8869 }, { "epoch": 1.1042071197411003, "grad_norm": 1.984375, "learning_rate": 8.208931142726792e-06, "loss": 0.601, "step": 8870 }, { "epoch": 1.1043334122661614, "grad_norm": 1.8359375, "learning_rate": 8.206967024957427e-06, "loss": 0.4915, "step": 8871 }, { "epoch": 1.1044597047912226, "grad_norm": 1.9765625, "learning_rate": 8.205002978654035e-06, "loss": 0.4691, "step": 8872 }, { "epoch": 1.104585997316284, "grad_norm": 1.8203125, "learning_rate": 8.203039003894904e-06, "loss": 0.4529, "step": 8873 }, { "epoch": 1.104712289841345, "grad_norm": 1.7890625, "learning_rate": 8.201075100758308e-06, "loss": 0.4671, "step": 8874 }, { "epoch": 1.1048385823664062, "grad_norm": 1.9921875, "learning_rate": 8.199111269322526e-06, "loss": 0.5334, "step": 8875 }, { "epoch": 1.1049648748914673, "grad_norm": 1.796875, "learning_rate": 8.19714750966583e-06, "loss": 0.4416, "step": 8876 }, { "epoch": 1.1050911674165285, "grad_norm": 2.0, "learning_rate": 8.195183821866493e-06, "loss": 0.592, "step": 8877 }, { "epoch": 1.1052174599415898, "grad_norm": 2.21875, "learning_rate": 8.193220206002783e-06, "loss": 0.5673, "step": 8878 }, { "epoch": 1.105343752466651, "grad_norm": 2.078125, "learning_rate": 8.191256662152962e-06, "loss": 0.4615, "step": 8879 }, { "epoch": 1.105470044991712, "grad_norm": 2.140625, "learning_rate": 8.189293190395289e-06, "loss": 0.5362, "step": 8880 }, { "epoch": 1.1055963375167732, "grad_norm": 1.8671875, "learning_rate": 8.187329790808033e-06, "loss": 0.4984, "step": 8881 }, { "epoch": 1.1057226300418344, "grad_norm": 1.9453125, "learning_rate": 8.185366463469446e-06, "loss": 0.4999, "step": 8882 }, { "epoch": 1.1058489225668955, "grad_norm": 1.859375, "learning_rate": 8.183403208457782e-06, "loss": 0.4291, "step": 8883 }, { "epoch": 1.1059752150919568, "grad_norm": 1.921875, "learning_rate": 8.181440025851288e-06, "loss": 0.5227, "step": 8884 }, { "epoch": 1.106101507617018, "grad_norm": 1.8359375, "learning_rate": 8.179476915728215e-06, "loss": 0.4605, "step": 8885 }, { "epoch": 1.106227800142079, "grad_norm": 1.953125, "learning_rate": 8.177513878166809e-06, "loss": 0.5265, "step": 8886 }, { "epoch": 1.1063540926671402, "grad_norm": 2.140625, "learning_rate": 8.175550913245308e-06, "loss": 0.5492, "step": 8887 }, { "epoch": 1.1064803851922014, "grad_norm": 1.9375, "learning_rate": 8.173588021041953e-06, "loss": 0.5495, "step": 8888 }, { "epoch": 1.1066066777172625, "grad_norm": 2.125, "learning_rate": 8.171625201634982e-06, "loss": 0.4943, "step": 8889 }, { "epoch": 1.1067329702423239, "grad_norm": 1.953125, "learning_rate": 8.169662455102625e-06, "loss": 0.562, "step": 8890 }, { "epoch": 1.106859262767385, "grad_norm": 2.0, "learning_rate": 8.167699781523115e-06, "loss": 0.4665, "step": 8891 }, { "epoch": 1.1069855552924461, "grad_norm": 1.9453125, "learning_rate": 8.165737180974678e-06, "loss": 0.4895, "step": 8892 }, { "epoch": 1.1071118478175073, "grad_norm": 2.0625, "learning_rate": 8.163774653535538e-06, "loss": 0.4893, "step": 8893 }, { "epoch": 1.1072381403425684, "grad_norm": 2.0625, "learning_rate": 8.161812199283918e-06, "loss": 0.5389, "step": 8894 }, { "epoch": 1.1073644328676298, "grad_norm": 1.8828125, "learning_rate": 8.159849818298037e-06, "loss": 0.5133, "step": 8895 }, { "epoch": 1.107490725392691, "grad_norm": 1.953125, "learning_rate": 8.15788751065611e-06, "loss": 0.5626, "step": 8896 }, { "epoch": 1.107617017917752, "grad_norm": 2.15625, "learning_rate": 8.155925276436349e-06, "loss": 0.4682, "step": 8897 }, { "epoch": 1.1077433104428132, "grad_norm": 1.9765625, "learning_rate": 8.153963115716961e-06, "loss": 0.52, "step": 8898 }, { "epoch": 1.1078696029678743, "grad_norm": 1.8984375, "learning_rate": 8.15200102857616e-06, "loss": 0.539, "step": 8899 }, { "epoch": 1.1079958954929354, "grad_norm": 1.8203125, "learning_rate": 8.150039015092147e-06, "loss": 0.4806, "step": 8900 }, { "epoch": 1.1081221880179968, "grad_norm": 1.9453125, "learning_rate": 8.148077075343124e-06, "loss": 0.4276, "step": 8901 }, { "epoch": 1.108248480543058, "grad_norm": 1.8359375, "learning_rate": 8.146115209407288e-06, "loss": 0.4478, "step": 8902 }, { "epoch": 1.108374773068119, "grad_norm": 1.9296875, "learning_rate": 8.144153417362834e-06, "loss": 0.4713, "step": 8903 }, { "epoch": 1.1085010655931802, "grad_norm": 1.84375, "learning_rate": 8.142191699287953e-06, "loss": 0.4851, "step": 8904 }, { "epoch": 1.1086273581182413, "grad_norm": 2.109375, "learning_rate": 8.140230055260839e-06, "loss": 0.5278, "step": 8905 }, { "epoch": 1.1087536506433024, "grad_norm": 1.78125, "learning_rate": 8.138268485359674e-06, "loss": 0.4392, "step": 8906 }, { "epoch": 1.1088799431683638, "grad_norm": 1.9296875, "learning_rate": 8.136306989662643e-06, "loss": 0.5006, "step": 8907 }, { "epoch": 1.109006235693425, "grad_norm": 2.234375, "learning_rate": 8.134345568247927e-06, "loss": 0.5183, "step": 8908 }, { "epoch": 1.109132528218486, "grad_norm": 1.921875, "learning_rate": 8.132384221193703e-06, "loss": 0.4521, "step": 8909 }, { "epoch": 1.1092588207435472, "grad_norm": 2.0, "learning_rate": 8.130422948578143e-06, "loss": 0.4849, "step": 8910 }, { "epoch": 1.1093851132686083, "grad_norm": 2.09375, "learning_rate": 8.128461750479425e-06, "loss": 0.5049, "step": 8911 }, { "epoch": 1.1095114057936697, "grad_norm": 1.8828125, "learning_rate": 8.126500626975711e-06, "loss": 0.4887, "step": 8912 }, { "epoch": 1.1096376983187308, "grad_norm": 1.90625, "learning_rate": 8.12453957814517e-06, "loss": 0.4395, "step": 8913 }, { "epoch": 1.109763990843792, "grad_norm": 2.09375, "learning_rate": 8.122578604065965e-06, "loss": 0.4293, "step": 8914 }, { "epoch": 1.109890283368853, "grad_norm": 1.9375, "learning_rate": 8.120617704816254e-06, "loss": 0.5067, "step": 8915 }, { "epoch": 1.1100165758939142, "grad_norm": 2.015625, "learning_rate": 8.11865688047419e-06, "loss": 0.5079, "step": 8916 }, { "epoch": 1.1101428684189754, "grad_norm": 1.890625, "learning_rate": 8.116696131117936e-06, "loss": 0.407, "step": 8917 }, { "epoch": 1.1102691609440367, "grad_norm": 2.078125, "learning_rate": 8.114735456825637e-06, "loss": 0.4963, "step": 8918 }, { "epoch": 1.1103954534690978, "grad_norm": 1.96875, "learning_rate": 8.112774857675442e-06, "loss": 0.5646, "step": 8919 }, { "epoch": 1.110521745994159, "grad_norm": 2.109375, "learning_rate": 8.110814333745496e-06, "loss": 0.5079, "step": 8920 }, { "epoch": 1.1106480385192201, "grad_norm": 1.8515625, "learning_rate": 8.10885388511394e-06, "loss": 0.4042, "step": 8921 }, { "epoch": 1.1107743310442812, "grad_norm": 1.9765625, "learning_rate": 8.106893511858916e-06, "loss": 0.4749, "step": 8922 }, { "epoch": 1.1109006235693424, "grad_norm": 1.890625, "learning_rate": 8.104933214058553e-06, "loss": 0.4526, "step": 8923 }, { "epoch": 1.1110269160944037, "grad_norm": 2.046875, "learning_rate": 8.10297299179099e-06, "loss": 0.5593, "step": 8924 }, { "epoch": 1.1111532086194649, "grad_norm": 2.140625, "learning_rate": 8.101012845134352e-06, "loss": 0.518, "step": 8925 }, { "epoch": 1.111279501144526, "grad_norm": 1.859375, "learning_rate": 8.099052774166769e-06, "loss": 0.5179, "step": 8926 }, { "epoch": 1.1114057936695871, "grad_norm": 1.8359375, "learning_rate": 8.097092778966363e-06, "loss": 0.4949, "step": 8927 }, { "epoch": 1.1115320861946483, "grad_norm": 2.09375, "learning_rate": 8.095132859611257e-06, "loss": 0.4944, "step": 8928 }, { "epoch": 1.1116583787197096, "grad_norm": 1.9765625, "learning_rate": 8.093173016179564e-06, "loss": 0.5306, "step": 8929 }, { "epoch": 1.1117846712447708, "grad_norm": 1.96875, "learning_rate": 8.091213248749406e-06, "loss": 0.5911, "step": 8930 }, { "epoch": 1.111910963769832, "grad_norm": 1.828125, "learning_rate": 8.089253557398888e-06, "loss": 0.4812, "step": 8931 }, { "epoch": 1.112037256294893, "grad_norm": 1.9296875, "learning_rate": 8.08729394220612e-06, "loss": 0.5155, "step": 8932 }, { "epoch": 1.1121635488199542, "grad_norm": 1.90625, "learning_rate": 8.08533440324921e-06, "loss": 0.4504, "step": 8933 }, { "epoch": 1.1122898413450153, "grad_norm": 2.125, "learning_rate": 8.083374940606254e-06, "loss": 0.4999, "step": 8934 }, { "epoch": 1.1124161338700767, "grad_norm": 2.046875, "learning_rate": 8.08141555435536e-06, "loss": 0.5719, "step": 8935 }, { "epoch": 1.1125424263951378, "grad_norm": 1.78125, "learning_rate": 8.079456244574622e-06, "loss": 0.5018, "step": 8936 }, { "epoch": 1.112668718920199, "grad_norm": 2.03125, "learning_rate": 8.077497011342132e-06, "loss": 0.4907, "step": 8937 }, { "epoch": 1.11279501144526, "grad_norm": 1.953125, "learning_rate": 8.07553785473598e-06, "loss": 0.4677, "step": 8938 }, { "epoch": 1.1129213039703212, "grad_norm": 1.9609375, "learning_rate": 8.073578774834254e-06, "loss": 0.465, "step": 8939 }, { "epoch": 1.1130475964953823, "grad_norm": 2.15625, "learning_rate": 8.071619771715038e-06, "loss": 0.5303, "step": 8940 }, { "epoch": 1.1131738890204437, "grad_norm": 2.015625, "learning_rate": 8.069660845456413e-06, "loss": 0.5846, "step": 8941 }, { "epoch": 1.1133001815455048, "grad_norm": 1.9296875, "learning_rate": 8.067701996136457e-06, "loss": 0.4569, "step": 8942 }, { "epoch": 1.113426474070566, "grad_norm": 2.09375, "learning_rate": 8.065743223833246e-06, "loss": 0.5452, "step": 8943 }, { "epoch": 1.113552766595627, "grad_norm": 1.96875, "learning_rate": 8.06378452862485e-06, "loss": 0.5429, "step": 8944 }, { "epoch": 1.1136790591206882, "grad_norm": 1.9140625, "learning_rate": 8.06182591058934e-06, "loss": 0.5036, "step": 8945 }, { "epoch": 1.1138053516457496, "grad_norm": 1.8671875, "learning_rate": 8.05986736980478e-06, "loss": 0.5439, "step": 8946 }, { "epoch": 1.1139316441708107, "grad_norm": 1.953125, "learning_rate": 8.057908906349235e-06, "loss": 0.4391, "step": 8947 }, { "epoch": 1.1140579366958718, "grad_norm": 1.984375, "learning_rate": 8.05595052030076e-06, "loss": 0.5145, "step": 8948 }, { "epoch": 1.114184229220933, "grad_norm": 1.859375, "learning_rate": 8.053992211737418e-06, "loss": 0.4617, "step": 8949 }, { "epoch": 1.114310521745994, "grad_norm": 1.8125, "learning_rate": 8.052033980737258e-06, "loss": 0.4405, "step": 8950 }, { "epoch": 1.1144368142710552, "grad_norm": 2.03125, "learning_rate": 8.050075827378331e-06, "loss": 0.5769, "step": 8951 }, { "epoch": 1.1145631067961166, "grad_norm": 1.9921875, "learning_rate": 8.048117751738683e-06, "loss": 0.5047, "step": 8952 }, { "epoch": 1.1146893993211777, "grad_norm": 1.8515625, "learning_rate": 8.046159753896361e-06, "loss": 0.415, "step": 8953 }, { "epoch": 1.1148156918462389, "grad_norm": 2.328125, "learning_rate": 8.044201833929406e-06, "loss": 0.6485, "step": 8954 }, { "epoch": 1.1149419843713, "grad_norm": 2.046875, "learning_rate": 8.042243991915858e-06, "loss": 0.5861, "step": 8955 }, { "epoch": 1.1150682768963611, "grad_norm": 1.9453125, "learning_rate": 8.040286227933744e-06, "loss": 0.445, "step": 8956 }, { "epoch": 1.1151945694214223, "grad_norm": 1.9453125, "learning_rate": 8.038328542061103e-06, "loss": 0.4532, "step": 8957 }, { "epoch": 1.1153208619464836, "grad_norm": 2.4375, "learning_rate": 8.036370934375962e-06, "loss": 0.5529, "step": 8958 }, { "epoch": 1.1154471544715447, "grad_norm": 1.96875, "learning_rate": 8.034413404956346e-06, "loss": 0.4819, "step": 8959 }, { "epoch": 1.1155734469966059, "grad_norm": 2.046875, "learning_rate": 8.032455953880277e-06, "loss": 0.4617, "step": 8960 }, { "epoch": 1.115699739521667, "grad_norm": 1.90625, "learning_rate": 8.030498581225774e-06, "loss": 0.4614, "step": 8961 }, { "epoch": 1.1158260320467281, "grad_norm": 1.7265625, "learning_rate": 8.028541287070853e-06, "loss": 0.4295, "step": 8962 }, { "epoch": 1.1159523245717895, "grad_norm": 1.921875, "learning_rate": 8.02658407149353e-06, "loss": 0.5537, "step": 8963 }, { "epoch": 1.1160786170968506, "grad_norm": 1.9765625, "learning_rate": 8.02462693457181e-06, "loss": 0.4976, "step": 8964 }, { "epoch": 1.1162049096219118, "grad_norm": 1.8984375, "learning_rate": 8.022669876383705e-06, "loss": 0.5046, "step": 8965 }, { "epoch": 1.116331202146973, "grad_norm": 1.9375, "learning_rate": 8.020712897007214e-06, "loss": 0.5388, "step": 8966 }, { "epoch": 1.116457494672034, "grad_norm": 1.796875, "learning_rate": 8.01875599652034e-06, "loss": 0.4893, "step": 8967 }, { "epoch": 1.1165837871970954, "grad_norm": 2.0, "learning_rate": 8.01679917500108e-06, "loss": 0.4949, "step": 8968 }, { "epoch": 1.1167100797221565, "grad_norm": 2.0, "learning_rate": 8.014842432527428e-06, "loss": 0.5383, "step": 8969 }, { "epoch": 1.1168363722472177, "grad_norm": 1.9140625, "learning_rate": 8.012885769177372e-06, "loss": 0.4753, "step": 8970 }, { "epoch": 1.1169626647722788, "grad_norm": 2.09375, "learning_rate": 8.010929185028905e-06, "loss": 0.5361, "step": 8971 }, { "epoch": 1.11708895729734, "grad_norm": 2.015625, "learning_rate": 8.00897268016001e-06, "loss": 0.5145, "step": 8972 }, { "epoch": 1.117215249822401, "grad_norm": 1.875, "learning_rate": 8.00701625464867e-06, "loss": 0.499, "step": 8973 }, { "epoch": 1.1173415423474624, "grad_norm": 1.8828125, "learning_rate": 8.00505990857286e-06, "loss": 0.5249, "step": 8974 }, { "epoch": 1.1174678348725235, "grad_norm": 2.046875, "learning_rate": 8.003103642010558e-06, "loss": 0.4393, "step": 8975 }, { "epoch": 1.1175941273975847, "grad_norm": 1.828125, "learning_rate": 8.001147455039735e-06, "loss": 0.4963, "step": 8976 }, { "epoch": 1.1177204199226458, "grad_norm": 2.296875, "learning_rate": 7.999191347738362e-06, "loss": 0.515, "step": 8977 }, { "epoch": 1.117846712447707, "grad_norm": 1.875, "learning_rate": 7.9972353201844e-06, "loss": 0.4689, "step": 8978 }, { "epoch": 1.117973004972768, "grad_norm": 2.046875, "learning_rate": 7.995279372455813e-06, "loss": 0.5559, "step": 8979 }, { "epoch": 1.1180992974978294, "grad_norm": 2.046875, "learning_rate": 7.993323504630564e-06, "loss": 0.5218, "step": 8980 }, { "epoch": 1.1182255900228906, "grad_norm": 1.96875, "learning_rate": 7.991367716786606e-06, "loss": 0.4484, "step": 8981 }, { "epoch": 1.1183518825479517, "grad_norm": 1.8125, "learning_rate": 7.989412009001891e-06, "loss": 0.4703, "step": 8982 }, { "epoch": 1.1184781750730128, "grad_norm": 1.8671875, "learning_rate": 7.987456381354373e-06, "loss": 0.4671, "step": 8983 }, { "epoch": 1.118604467598074, "grad_norm": 2.765625, "learning_rate": 7.985500833921995e-06, "loss": 0.5347, "step": 8984 }, { "epoch": 1.1187307601231353, "grad_norm": 1.890625, "learning_rate": 7.983545366782702e-06, "loss": 0.423, "step": 8985 }, { "epoch": 1.1188570526481965, "grad_norm": 1.9609375, "learning_rate": 7.981589980014433e-06, "loss": 0.4733, "step": 8986 }, { "epoch": 1.1189833451732576, "grad_norm": 1.890625, "learning_rate": 7.979634673695127e-06, "loss": 0.4653, "step": 8987 }, { "epoch": 1.1191096376983187, "grad_norm": 2.015625, "learning_rate": 7.97767944790271e-06, "loss": 0.4365, "step": 8988 }, { "epoch": 1.1192359302233799, "grad_norm": 1.96875, "learning_rate": 7.975724302715126e-06, "loss": 0.5266, "step": 8989 }, { "epoch": 1.119362222748441, "grad_norm": 1.9453125, "learning_rate": 7.973769238210293e-06, "loss": 0.5137, "step": 8990 }, { "epoch": 1.1194885152735023, "grad_norm": 2.0, "learning_rate": 7.97181425446614e-06, "loss": 0.5298, "step": 8991 }, { "epoch": 1.1196148077985635, "grad_norm": 1.71875, "learning_rate": 7.969859351560585e-06, "loss": 0.4129, "step": 8992 }, { "epoch": 1.1197411003236246, "grad_norm": 1.96875, "learning_rate": 7.967904529571546e-06, "loss": 0.472, "step": 8993 }, { "epoch": 1.1198673928486857, "grad_norm": 1.953125, "learning_rate": 7.965949788576937e-06, "loss": 0.4534, "step": 8994 }, { "epoch": 1.1199936853737469, "grad_norm": 1.96875, "learning_rate": 7.96399512865467e-06, "loss": 0.5698, "step": 8995 }, { "epoch": 1.120119977898808, "grad_norm": 1.8203125, "learning_rate": 7.962040549882656e-06, "loss": 0.42, "step": 8996 }, { "epoch": 1.1202462704238694, "grad_norm": 2.046875, "learning_rate": 7.960086052338795e-06, "loss": 0.5304, "step": 8997 }, { "epoch": 1.1203725629489305, "grad_norm": 1.890625, "learning_rate": 7.95813163610099e-06, "loss": 0.4561, "step": 8998 }, { "epoch": 1.1204988554739916, "grad_norm": 1.953125, "learning_rate": 7.956177301247141e-06, "loss": 0.5655, "step": 8999 }, { "epoch": 1.1206251479990528, "grad_norm": 1.8984375, "learning_rate": 7.954223047855141e-06, "loss": 0.4828, "step": 9000 }, { "epoch": 1.120751440524114, "grad_norm": 1.84375, "learning_rate": 7.952268876002883e-06, "loss": 0.4891, "step": 9001 }, { "epoch": 1.1208777330491753, "grad_norm": 2.078125, "learning_rate": 7.950314785768256e-06, "loss": 0.4784, "step": 9002 }, { "epoch": 1.1210040255742364, "grad_norm": 1.9140625, "learning_rate": 7.948360777229144e-06, "loss": 0.4381, "step": 9003 }, { "epoch": 1.1211303180992975, "grad_norm": 1.8203125, "learning_rate": 7.94640685046343e-06, "loss": 0.5293, "step": 9004 }, { "epoch": 1.1212566106243587, "grad_norm": 1.859375, "learning_rate": 7.944453005548992e-06, "loss": 0.4937, "step": 9005 }, { "epoch": 1.1213829031494198, "grad_norm": 1.9296875, "learning_rate": 7.942499242563701e-06, "loss": 0.4564, "step": 9006 }, { "epoch": 1.121509195674481, "grad_norm": 1.9140625, "learning_rate": 7.94054556158544e-06, "loss": 0.5106, "step": 9007 }, { "epoch": 1.1216354881995423, "grad_norm": 1.8828125, "learning_rate": 7.938591962692073e-06, "loss": 0.4889, "step": 9008 }, { "epoch": 1.1217617807246034, "grad_norm": 1.9765625, "learning_rate": 7.936638445961465e-06, "loss": 0.5102, "step": 9009 }, { "epoch": 1.1218880732496646, "grad_norm": 2.109375, "learning_rate": 7.934685011471478e-06, "loss": 0.4863, "step": 9010 }, { "epoch": 1.1220143657747257, "grad_norm": 1.9296875, "learning_rate": 7.93273165929997e-06, "loss": 0.5058, "step": 9011 }, { "epoch": 1.1221406582997868, "grad_norm": 1.890625, "learning_rate": 7.930778389524803e-06, "loss": 0.5035, "step": 9012 }, { "epoch": 1.122266950824848, "grad_norm": 1.96875, "learning_rate": 7.928825202223822e-06, "loss": 0.4699, "step": 9013 }, { "epoch": 1.1223932433499093, "grad_norm": 1.9609375, "learning_rate": 7.92687209747488e-06, "loss": 0.5243, "step": 9014 }, { "epoch": 1.1225195358749704, "grad_norm": 1.96875, "learning_rate": 7.924919075355823e-06, "loss": 0.6469, "step": 9015 }, { "epoch": 1.1226458284000316, "grad_norm": 1.875, "learning_rate": 7.922966135944496e-06, "loss": 0.4214, "step": 9016 }, { "epoch": 1.1227721209250927, "grad_norm": 1.96875, "learning_rate": 7.921013279318733e-06, "loss": 0.5071, "step": 9017 }, { "epoch": 1.1228984134501538, "grad_norm": 2.15625, "learning_rate": 7.919060505556374e-06, "loss": 0.547, "step": 9018 }, { "epoch": 1.1230247059752152, "grad_norm": 1.796875, "learning_rate": 7.91710781473525e-06, "loss": 0.5046, "step": 9019 }, { "epoch": 1.1231509985002763, "grad_norm": 2.046875, "learning_rate": 7.915155206933194e-06, "loss": 0.4867, "step": 9020 }, { "epoch": 1.1232772910253375, "grad_norm": 2.078125, "learning_rate": 7.913202682228028e-06, "loss": 0.4859, "step": 9021 }, { "epoch": 1.1234035835503986, "grad_norm": 1.96875, "learning_rate": 7.911250240697576e-06, "loss": 0.4587, "step": 9022 }, { "epoch": 1.1235298760754597, "grad_norm": 1.7890625, "learning_rate": 7.90929788241966e-06, "loss": 0.4442, "step": 9023 }, { "epoch": 1.1236561686005209, "grad_norm": 1.9375, "learning_rate": 7.90734560747209e-06, "loss": 0.5133, "step": 9024 }, { "epoch": 1.1237824611255822, "grad_norm": 2.265625, "learning_rate": 7.905393415932688e-06, "loss": 0.5201, "step": 9025 }, { "epoch": 1.1239087536506434, "grad_norm": 2.0, "learning_rate": 7.90344130787926e-06, "loss": 0.5803, "step": 9026 }, { "epoch": 1.1240350461757045, "grad_norm": 1.8125, "learning_rate": 7.90148928338961e-06, "loss": 0.4339, "step": 9027 }, { "epoch": 1.1241613387007656, "grad_norm": 1.78125, "learning_rate": 7.899537342541543e-06, "loss": 0.4905, "step": 9028 }, { "epoch": 1.1242876312258268, "grad_norm": 1.84375, "learning_rate": 7.897585485412857e-06, "loss": 0.4812, "step": 9029 }, { "epoch": 1.124413923750888, "grad_norm": 1.8671875, "learning_rate": 7.895633712081352e-06, "loss": 0.5218, "step": 9030 }, { "epoch": 1.1245402162759492, "grad_norm": 1.921875, "learning_rate": 7.893682022624819e-06, "loss": 0.4626, "step": 9031 }, { "epoch": 1.1246665088010104, "grad_norm": 1.8984375, "learning_rate": 7.891730417121044e-06, "loss": 0.4643, "step": 9032 }, { "epoch": 1.1247928013260715, "grad_norm": 2.09375, "learning_rate": 7.889778895647819e-06, "loss": 0.5691, "step": 9033 }, { "epoch": 1.1249190938511326, "grad_norm": 1.9765625, "learning_rate": 7.887827458282922e-06, "loss": 0.4872, "step": 9034 }, { "epoch": 1.1250453863761938, "grad_norm": 1.796875, "learning_rate": 7.885876105104137e-06, "loss": 0.424, "step": 9035 }, { "epoch": 1.1251716789012551, "grad_norm": 1.9453125, "learning_rate": 7.883924836189238e-06, "loss": 0.4671, "step": 9036 }, { "epoch": 1.1252979714263163, "grad_norm": 2.03125, "learning_rate": 7.881973651615998e-06, "loss": 0.5758, "step": 9037 }, { "epoch": 1.1254242639513774, "grad_norm": 1.875, "learning_rate": 7.880022551462187e-06, "loss": 0.4834, "step": 9038 }, { "epoch": 1.1255505564764385, "grad_norm": 1.9453125, "learning_rate": 7.87807153580557e-06, "loss": 0.4807, "step": 9039 }, { "epoch": 1.1256768490014997, "grad_norm": 2.15625, "learning_rate": 7.876120604723911e-06, "loss": 0.4953, "step": 9040 }, { "epoch": 1.1258031415265608, "grad_norm": 2.15625, "learning_rate": 7.874169758294969e-06, "loss": 0.6443, "step": 9041 }, { "epoch": 1.1259294340516222, "grad_norm": 1.859375, "learning_rate": 7.872218996596497e-06, "loss": 0.5395, "step": 9042 }, { "epoch": 1.1260557265766833, "grad_norm": 1.9609375, "learning_rate": 7.870268319706254e-06, "loss": 0.5245, "step": 9043 }, { "epoch": 1.1261820191017444, "grad_norm": 2.046875, "learning_rate": 7.868317727701985e-06, "loss": 0.4963, "step": 9044 }, { "epoch": 1.1263083116268056, "grad_norm": 2.140625, "learning_rate": 7.866367220661439e-06, "loss": 0.5799, "step": 9045 }, { "epoch": 1.1264346041518667, "grad_norm": 2.03125, "learning_rate": 7.864416798662354e-06, "loss": 0.5354, "step": 9046 }, { "epoch": 1.1265608966769278, "grad_norm": 1.7890625, "learning_rate": 7.862466461782473e-06, "loss": 0.4841, "step": 9047 }, { "epoch": 1.1266871892019892, "grad_norm": 1.8984375, "learning_rate": 7.860516210099529e-06, "loss": 0.5037, "step": 9048 }, { "epoch": 1.1268134817270503, "grad_norm": 2.109375, "learning_rate": 7.858566043691257e-06, "loss": 0.5552, "step": 9049 }, { "epoch": 1.1269397742521114, "grad_norm": 1.875, "learning_rate": 7.856615962635385e-06, "loss": 0.4666, "step": 9050 }, { "epoch": 1.1270660667771726, "grad_norm": 2.078125, "learning_rate": 7.854665967009635e-06, "loss": 0.5698, "step": 9051 }, { "epoch": 1.1271923593022337, "grad_norm": 1.8828125, "learning_rate": 7.852716056891736e-06, "loss": 0.4814, "step": 9052 }, { "epoch": 1.127318651827295, "grad_norm": 2.015625, "learning_rate": 7.850766232359401e-06, "loss": 0.5653, "step": 9053 }, { "epoch": 1.1274449443523562, "grad_norm": 1.90625, "learning_rate": 7.848816493490347e-06, "loss": 0.5155, "step": 9054 }, { "epoch": 1.1275712368774173, "grad_norm": 1.953125, "learning_rate": 7.846866840362286e-06, "loss": 0.4927, "step": 9055 }, { "epoch": 1.1276975294024785, "grad_norm": 1.9921875, "learning_rate": 7.844917273052929e-06, "loss": 0.5894, "step": 9056 }, { "epoch": 1.1278238219275396, "grad_norm": 2.03125, "learning_rate": 7.842967791639976e-06, "loss": 0.5479, "step": 9057 }, { "epoch": 1.127950114452601, "grad_norm": 1.8671875, "learning_rate": 7.841018396201132e-06, "loss": 0.4808, "step": 9058 }, { "epoch": 1.128076406977662, "grad_norm": 1.96875, "learning_rate": 7.839069086814093e-06, "loss": 0.5175, "step": 9059 }, { "epoch": 1.1282026995027232, "grad_norm": 1.9453125, "learning_rate": 7.837119863556554e-06, "loss": 0.4812, "step": 9060 }, { "epoch": 1.1283289920277844, "grad_norm": 1.890625, "learning_rate": 7.835170726506212e-06, "loss": 0.514, "step": 9061 }, { "epoch": 1.1284552845528455, "grad_norm": 1.9609375, "learning_rate": 7.833221675740748e-06, "loss": 0.475, "step": 9062 }, { "epoch": 1.1285815770779066, "grad_norm": 1.8359375, "learning_rate": 7.83127271133785e-06, "loss": 0.5062, "step": 9063 }, { "epoch": 1.1287078696029678, "grad_norm": 1.9140625, "learning_rate": 7.8293238333752e-06, "loss": 0.5105, "step": 9064 }, { "epoch": 1.1288341621280291, "grad_norm": 1.765625, "learning_rate": 7.82737504193047e-06, "loss": 0.4472, "step": 9065 }, { "epoch": 1.1289604546530903, "grad_norm": 1.8828125, "learning_rate": 7.82542633708134e-06, "loss": 0.5263, "step": 9066 }, { "epoch": 1.1290867471781514, "grad_norm": 2.09375, "learning_rate": 7.823477718905476e-06, "loss": 0.535, "step": 9067 }, { "epoch": 1.1292130397032125, "grad_norm": 2.078125, "learning_rate": 7.82152918748055e-06, "loss": 0.5144, "step": 9068 }, { "epoch": 1.1293393322282737, "grad_norm": 2.0625, "learning_rate": 7.819580742884223e-06, "loss": 0.4956, "step": 9069 }, { "epoch": 1.129465624753335, "grad_norm": 1.765625, "learning_rate": 7.817632385194156e-06, "loss": 0.479, "step": 9070 }, { "epoch": 1.1295919172783961, "grad_norm": 2.078125, "learning_rate": 7.815684114488005e-06, "loss": 0.4997, "step": 9071 }, { "epoch": 1.1297182098034573, "grad_norm": 2.03125, "learning_rate": 7.813735930843425e-06, "loss": 0.5118, "step": 9072 }, { "epoch": 1.1298445023285184, "grad_norm": 1.96875, "learning_rate": 7.811787834338063e-06, "loss": 0.5483, "step": 9073 }, { "epoch": 1.1299707948535795, "grad_norm": 1.9375, "learning_rate": 7.809839825049568e-06, "loss": 0.4494, "step": 9074 }, { "epoch": 1.130097087378641, "grad_norm": 1.9296875, "learning_rate": 7.807891903055582e-06, "loss": 0.4758, "step": 9075 }, { "epoch": 1.130223379903702, "grad_norm": 1.90625, "learning_rate": 7.805944068433745e-06, "loss": 0.4732, "step": 9076 }, { "epoch": 1.1303496724287632, "grad_norm": 1.8359375, "learning_rate": 7.803996321261692e-06, "loss": 0.5093, "step": 9077 }, { "epoch": 1.1304759649538243, "grad_norm": 1.84375, "learning_rate": 7.802048661617054e-06, "loss": 0.445, "step": 9078 }, { "epoch": 1.1306022574788854, "grad_norm": 1.90625, "learning_rate": 7.800101089577466e-06, "loss": 0.5296, "step": 9079 }, { "epoch": 1.1307285500039466, "grad_norm": 2.1875, "learning_rate": 7.798153605220549e-06, "loss": 0.5167, "step": 9080 }, { "epoch": 1.1308548425290077, "grad_norm": 2.0, "learning_rate": 7.796206208623927e-06, "loss": 0.4886, "step": 9081 }, { "epoch": 1.130981135054069, "grad_norm": 2.234375, "learning_rate": 7.794258899865216e-06, "loss": 0.5678, "step": 9082 }, { "epoch": 1.1311074275791302, "grad_norm": 2.0, "learning_rate": 7.792311679022033e-06, "loss": 0.4917, "step": 9083 }, { "epoch": 1.1312337201041913, "grad_norm": 2.0, "learning_rate": 7.790364546171988e-06, "loss": 0.4723, "step": 9084 }, { "epoch": 1.1313600126292525, "grad_norm": 2.03125, "learning_rate": 7.788417501392691e-06, "loss": 0.4839, "step": 9085 }, { "epoch": 1.1314863051543136, "grad_norm": 1.9453125, "learning_rate": 7.786470544761746e-06, "loss": 0.4725, "step": 9086 }, { "epoch": 1.131612597679375, "grad_norm": 2.140625, "learning_rate": 7.784523676356751e-06, "loss": 0.5759, "step": 9087 }, { "epoch": 1.131738890204436, "grad_norm": 1.984375, "learning_rate": 7.782576896255307e-06, "loss": 0.5129, "step": 9088 }, { "epoch": 1.1318651827294972, "grad_norm": 2.125, "learning_rate": 7.780630204535008e-06, "loss": 0.4409, "step": 9089 }, { "epoch": 1.1319914752545583, "grad_norm": 2.03125, "learning_rate": 7.778683601273442e-06, "loss": 0.5027, "step": 9090 }, { "epoch": 1.1321177677796195, "grad_norm": 1.984375, "learning_rate": 7.776737086548196e-06, "loss": 0.4426, "step": 9091 }, { "epoch": 1.1322440603046808, "grad_norm": 1.8828125, "learning_rate": 7.774790660436857e-06, "loss": 0.5353, "step": 9092 }, { "epoch": 1.132370352829742, "grad_norm": 2.1875, "learning_rate": 7.772844323017002e-06, "loss": 0.4635, "step": 9093 }, { "epoch": 1.132496645354803, "grad_norm": 2.0625, "learning_rate": 7.77089807436621e-06, "loss": 0.5501, "step": 9094 }, { "epoch": 1.1326229378798642, "grad_norm": 2.28125, "learning_rate": 7.768951914562048e-06, "loss": 0.6094, "step": 9095 }, { "epoch": 1.1327492304049254, "grad_norm": 1.8515625, "learning_rate": 7.767005843682089e-06, "loss": 0.4681, "step": 9096 }, { "epoch": 1.1328755229299865, "grad_norm": 2.15625, "learning_rate": 7.7650598618039e-06, "loss": 0.5012, "step": 9097 }, { "epoch": 1.1330018154550476, "grad_norm": 2.09375, "learning_rate": 7.763113969005042e-06, "loss": 0.544, "step": 9098 }, { "epoch": 1.133128107980109, "grad_norm": 2.09375, "learning_rate": 7.761168165363073e-06, "loss": 0.5155, "step": 9099 }, { "epoch": 1.1332544005051701, "grad_norm": 1.8828125, "learning_rate": 7.759222450955547e-06, "loss": 0.4546, "step": 9100 }, { "epoch": 1.1333806930302313, "grad_norm": 1.984375, "learning_rate": 7.75727682586002e-06, "loss": 0.4821, "step": 9101 }, { "epoch": 1.1335069855552924, "grad_norm": 2.046875, "learning_rate": 7.755331290154035e-06, "loss": 0.5282, "step": 9102 }, { "epoch": 1.1336332780803535, "grad_norm": 2.15625, "learning_rate": 7.753385843915138e-06, "loss": 0.604, "step": 9103 }, { "epoch": 1.1337595706054149, "grad_norm": 1.8671875, "learning_rate": 7.75144048722087e-06, "loss": 0.5653, "step": 9104 }, { "epoch": 1.133885863130476, "grad_norm": 1.921875, "learning_rate": 7.749495220148768e-06, "loss": 0.5228, "step": 9105 }, { "epoch": 1.1340121556555371, "grad_norm": 2.015625, "learning_rate": 7.747550042776367e-06, "loss": 0.5621, "step": 9106 }, { "epoch": 1.1341384481805983, "grad_norm": 1.875, "learning_rate": 7.745604955181193e-06, "loss": 0.5076, "step": 9107 }, { "epoch": 1.1342647407056594, "grad_norm": 1.9921875, "learning_rate": 7.743659957440778e-06, "loss": 0.4634, "step": 9108 }, { "epoch": 1.1343910332307208, "grad_norm": 1.9609375, "learning_rate": 7.741715049632643e-06, "loss": 0.5346, "step": 9109 }, { "epoch": 1.134517325755782, "grad_norm": 2.03125, "learning_rate": 7.739770231834305e-06, "loss": 0.4636, "step": 9110 }, { "epoch": 1.134643618280843, "grad_norm": 1.7734375, "learning_rate": 7.737825504123282e-06, "loss": 0.4596, "step": 9111 }, { "epoch": 1.1347699108059042, "grad_norm": 1.8125, "learning_rate": 7.735880866577086e-06, "loss": 0.4548, "step": 9112 }, { "epoch": 1.1348962033309653, "grad_norm": 1.9453125, "learning_rate": 7.733936319273224e-06, "loss": 0.5222, "step": 9113 }, { "epoch": 1.1350224958560264, "grad_norm": 1.84375, "learning_rate": 7.731991862289202e-06, "loss": 0.4881, "step": 9114 }, { "epoch": 1.1351487883810876, "grad_norm": 1.90625, "learning_rate": 7.730047495702521e-06, "loss": 0.4648, "step": 9115 }, { "epoch": 1.135275080906149, "grad_norm": 1.9765625, "learning_rate": 7.72810321959068e-06, "loss": 0.5174, "step": 9116 }, { "epoch": 1.13540137343121, "grad_norm": 1.8125, "learning_rate": 7.726159034031175e-06, "loss": 0.4854, "step": 9117 }, { "epoch": 1.1355276659562712, "grad_norm": 1.9296875, "learning_rate": 7.724214939101493e-06, "loss": 0.481, "step": 9118 }, { "epoch": 1.1356539584813323, "grad_norm": 1.8515625, "learning_rate": 7.72227093487912e-06, "loss": 0.4333, "step": 9119 }, { "epoch": 1.1357802510063935, "grad_norm": 1.9375, "learning_rate": 7.720327021441543e-06, "loss": 0.5049, "step": 9120 }, { "epoch": 1.1359065435314548, "grad_norm": 1.84375, "learning_rate": 7.71838319886624e-06, "loss": 0.4952, "step": 9121 }, { "epoch": 1.136032836056516, "grad_norm": 1.953125, "learning_rate": 7.716439467230688e-06, "loss": 0.5525, "step": 9122 }, { "epoch": 1.136159128581577, "grad_norm": 1.8515625, "learning_rate": 7.714495826612353e-06, "loss": 0.4518, "step": 9123 }, { "epoch": 1.1362854211066382, "grad_norm": 1.859375, "learning_rate": 7.712552277088716e-06, "loss": 0.4886, "step": 9124 }, { "epoch": 1.1364117136316993, "grad_norm": 2.5625, "learning_rate": 7.710608818737231e-06, "loss": 0.5759, "step": 9125 }, { "epoch": 1.1365380061567607, "grad_norm": 1.984375, "learning_rate": 7.708665451635368e-06, "loss": 0.5311, "step": 9126 }, { "epoch": 1.1366642986818218, "grad_norm": 2.046875, "learning_rate": 7.70672217586058e-06, "loss": 0.5191, "step": 9127 }, { "epoch": 1.136790591206883, "grad_norm": 2.21875, "learning_rate": 7.704778991490321e-06, "loss": 0.5659, "step": 9128 }, { "epoch": 1.136916883731944, "grad_norm": 1.859375, "learning_rate": 7.702835898602044e-06, "loss": 0.4988, "step": 9129 }, { "epoch": 1.1370431762570052, "grad_norm": 1.96875, "learning_rate": 7.700892897273195e-06, "loss": 0.4418, "step": 9130 }, { "epoch": 1.1371694687820664, "grad_norm": 2.0, "learning_rate": 7.698949987581216e-06, "loss": 0.5396, "step": 9131 }, { "epoch": 1.1372957613071277, "grad_norm": 1.8828125, "learning_rate": 7.697007169603547e-06, "loss": 0.4789, "step": 9132 }, { "epoch": 1.1374220538321889, "grad_norm": 1.953125, "learning_rate": 7.695064443417628e-06, "loss": 0.5063, "step": 9133 }, { "epoch": 1.13754834635725, "grad_norm": 2.328125, "learning_rate": 7.693121809100887e-06, "loss": 0.5738, "step": 9134 }, { "epoch": 1.1376746388823111, "grad_norm": 1.8125, "learning_rate": 7.691179266730756e-06, "loss": 0.5075, "step": 9135 }, { "epoch": 1.1378009314073723, "grad_norm": 2.0, "learning_rate": 7.689236816384657e-06, "loss": 0.4997, "step": 9136 }, { "epoch": 1.1379272239324334, "grad_norm": 1.8828125, "learning_rate": 7.687294458140013e-06, "loss": 0.4972, "step": 9137 }, { "epoch": 1.1380535164574948, "grad_norm": 2.09375, "learning_rate": 7.685352192074243e-06, "loss": 0.5436, "step": 9138 }, { "epoch": 1.1381798089825559, "grad_norm": 1.734375, "learning_rate": 7.683410018264753e-06, "loss": 0.4004, "step": 9139 }, { "epoch": 1.138306101507617, "grad_norm": 2.046875, "learning_rate": 7.681467936788967e-06, "loss": 0.5121, "step": 9140 }, { "epoch": 1.1384323940326782, "grad_norm": 1.9140625, "learning_rate": 7.679525947724282e-06, "loss": 0.4939, "step": 9141 }, { "epoch": 1.1385586865577393, "grad_norm": 2.140625, "learning_rate": 7.677584051148104e-06, "loss": 0.6162, "step": 9142 }, { "epoch": 1.1386849790828006, "grad_norm": 1.921875, "learning_rate": 7.67564224713783e-06, "loss": 0.4605, "step": 9143 }, { "epoch": 1.1388112716078618, "grad_norm": 2.015625, "learning_rate": 7.673700535770859e-06, "loss": 0.4601, "step": 9144 }, { "epoch": 1.138937564132923, "grad_norm": 2.09375, "learning_rate": 7.671758917124582e-06, "loss": 0.7148, "step": 9145 }, { "epoch": 1.139063856657984, "grad_norm": 1.9296875, "learning_rate": 7.669817391276386e-06, "loss": 0.4362, "step": 9146 }, { "epoch": 1.1391901491830452, "grad_norm": 1.984375, "learning_rate": 7.667875958303657e-06, "loss": 0.4621, "step": 9147 }, { "epoch": 1.1393164417081063, "grad_norm": 2.125, "learning_rate": 7.665934618283774e-06, "loss": 0.5265, "step": 9148 }, { "epoch": 1.1394427342331677, "grad_norm": 2.03125, "learning_rate": 7.663993371294115e-06, "loss": 0.5298, "step": 9149 }, { "epoch": 1.1395690267582288, "grad_norm": 2.078125, "learning_rate": 7.662052217412054e-06, "loss": 0.5653, "step": 9150 }, { "epoch": 1.13969531928329, "grad_norm": 1.8671875, "learning_rate": 7.660111156714958e-06, "loss": 0.4251, "step": 9151 }, { "epoch": 1.139821611808351, "grad_norm": 1.9765625, "learning_rate": 7.658170189280198e-06, "loss": 0.5986, "step": 9152 }, { "epoch": 1.1399479043334122, "grad_norm": 1.90625, "learning_rate": 7.656229315185133e-06, "loss": 0.5049, "step": 9153 }, { "epoch": 1.1400741968584733, "grad_norm": 1.8671875, "learning_rate": 7.654288534507124e-06, "loss": 0.4927, "step": 9154 }, { "epoch": 1.1402004893835347, "grad_norm": 1.984375, "learning_rate": 7.652347847323518e-06, "loss": 0.503, "step": 9155 }, { "epoch": 1.1403267819085958, "grad_norm": 1.8984375, "learning_rate": 7.650407253711678e-06, "loss": 0.49, "step": 9156 }, { "epoch": 1.140453074433657, "grad_norm": 2.15625, "learning_rate": 7.648466753748945e-06, "loss": 0.5482, "step": 9157 }, { "epoch": 1.140579366958718, "grad_norm": 1.890625, "learning_rate": 7.646526347512661e-06, "loss": 0.5157, "step": 9158 }, { "epoch": 1.1407056594837792, "grad_norm": 1.828125, "learning_rate": 7.644586035080169e-06, "loss": 0.4559, "step": 9159 }, { "epoch": 1.1408319520088406, "grad_norm": 1.953125, "learning_rate": 7.642645816528806e-06, "loss": 0.473, "step": 9160 }, { "epoch": 1.1409582445339017, "grad_norm": 1.921875, "learning_rate": 7.6407056919359e-06, "loss": 0.4869, "step": 9161 }, { "epoch": 1.1410845370589628, "grad_norm": 2.09375, "learning_rate": 7.638765661378785e-06, "loss": 0.4983, "step": 9162 }, { "epoch": 1.141210829584024, "grad_norm": 1.96875, "learning_rate": 7.63682572493478e-06, "loss": 0.463, "step": 9163 }, { "epoch": 1.1413371221090851, "grad_norm": 1.9765625, "learning_rate": 7.634885882681214e-06, "loss": 0.5265, "step": 9164 }, { "epoch": 1.1414634146341462, "grad_norm": 1.8203125, "learning_rate": 7.632946134695396e-06, "loss": 0.4411, "step": 9165 }, { "epoch": 1.1415897071592076, "grad_norm": 1.8515625, "learning_rate": 7.631006481054646e-06, "loss": 0.4538, "step": 9166 }, { "epoch": 1.1417159996842687, "grad_norm": 1.84375, "learning_rate": 7.62906692183627e-06, "loss": 0.4737, "step": 9167 }, { "epoch": 1.1418422922093299, "grad_norm": 2.265625, "learning_rate": 7.627127457117578e-06, "loss": 0.5328, "step": 9168 }, { "epoch": 1.141968584734391, "grad_norm": 1.953125, "learning_rate": 7.6251880869758654e-06, "loss": 0.4701, "step": 9169 }, { "epoch": 1.1420948772594521, "grad_norm": 2.265625, "learning_rate": 7.623248811488439e-06, "loss": 0.5594, "step": 9170 }, { "epoch": 1.1422211697845133, "grad_norm": 1.9140625, "learning_rate": 7.621309630732588e-06, "loss": 0.4945, "step": 9171 }, { "epoch": 1.1423474623095746, "grad_norm": 1.859375, "learning_rate": 7.619370544785608e-06, "loss": 0.4596, "step": 9172 }, { "epoch": 1.1424737548346358, "grad_norm": 1.9140625, "learning_rate": 7.617431553724784e-06, "loss": 0.4525, "step": 9173 }, { "epoch": 1.142600047359697, "grad_norm": 2.109375, "learning_rate": 7.615492657627399e-06, "loss": 0.5268, "step": 9174 }, { "epoch": 1.142726339884758, "grad_norm": 1.890625, "learning_rate": 7.613553856570733e-06, "loss": 0.4589, "step": 9175 }, { "epoch": 1.1428526324098192, "grad_norm": 2.125, "learning_rate": 7.611615150632064e-06, "loss": 0.4824, "step": 9176 }, { "epoch": 1.1429789249348805, "grad_norm": 2.109375, "learning_rate": 7.609676539888661e-06, "loss": 0.5083, "step": 9177 }, { "epoch": 1.1431052174599416, "grad_norm": 2.0625, "learning_rate": 7.607738024417794e-06, "loss": 0.4943, "step": 9178 }, { "epoch": 1.1432315099850028, "grad_norm": 2.09375, "learning_rate": 7.605799604296727e-06, "loss": 0.5285, "step": 9179 }, { "epoch": 1.143357802510064, "grad_norm": 2.0625, "learning_rate": 7.60386127960272e-06, "loss": 0.5588, "step": 9180 }, { "epoch": 1.143484095035125, "grad_norm": 2.171875, "learning_rate": 7.6019230504130315e-06, "loss": 0.5644, "step": 9181 }, { "epoch": 1.1436103875601864, "grad_norm": 2.03125, "learning_rate": 7.599984916804914e-06, "loss": 0.4879, "step": 9182 }, { "epoch": 1.1437366800852475, "grad_norm": 2.09375, "learning_rate": 7.598046878855618e-06, "loss": 0.5248, "step": 9183 }, { "epoch": 1.1438629726103087, "grad_norm": 1.859375, "learning_rate": 7.596108936642387e-06, "loss": 0.4754, "step": 9184 }, { "epoch": 1.1439892651353698, "grad_norm": 1.8359375, "learning_rate": 7.594171090242464e-06, "loss": 0.4716, "step": 9185 }, { "epoch": 1.144115557660431, "grad_norm": 2.09375, "learning_rate": 7.592233339733086e-06, "loss": 0.5061, "step": 9186 }, { "epoch": 1.144241850185492, "grad_norm": 1.8515625, "learning_rate": 7.590295685191485e-06, "loss": 0.5312, "step": 9187 }, { "epoch": 1.1443681427105532, "grad_norm": 1.7578125, "learning_rate": 7.588358126694896e-06, "loss": 0.4298, "step": 9188 }, { "epoch": 1.1444944352356146, "grad_norm": 1.9375, "learning_rate": 7.586420664320544e-06, "loss": 0.4516, "step": 9189 }, { "epoch": 1.1446207277606757, "grad_norm": 1.984375, "learning_rate": 7.584483298145651e-06, "loss": 0.5219, "step": 9190 }, { "epoch": 1.1447470202857368, "grad_norm": 2.125, "learning_rate": 7.582546028247435e-06, "loss": 0.4912, "step": 9191 }, { "epoch": 1.144873312810798, "grad_norm": 2.015625, "learning_rate": 7.580608854703113e-06, "loss": 0.5209, "step": 9192 }, { "epoch": 1.144999605335859, "grad_norm": 2.046875, "learning_rate": 7.578671777589894e-06, "loss": 0.5001, "step": 9193 }, { "epoch": 1.1451258978609204, "grad_norm": 1.828125, "learning_rate": 7.576734796984984e-06, "loss": 0.4441, "step": 9194 }, { "epoch": 1.1452521903859816, "grad_norm": 1.9765625, "learning_rate": 7.57479791296559e-06, "loss": 0.4604, "step": 9195 }, { "epoch": 1.1453784829110427, "grad_norm": 1.953125, "learning_rate": 7.57286112560891e-06, "loss": 0.4461, "step": 9196 }, { "epoch": 1.1455047754361039, "grad_norm": 2.234375, "learning_rate": 7.570924434992138e-06, "loss": 0.5595, "step": 9197 }, { "epoch": 1.145631067961165, "grad_norm": 2.015625, "learning_rate": 7.568987841192469e-06, "loss": 0.5021, "step": 9198 }, { "epoch": 1.1457573604862263, "grad_norm": 1.875, "learning_rate": 7.567051344287089e-06, "loss": 0.4759, "step": 9199 }, { "epoch": 1.1458836530112875, "grad_norm": 2.0625, "learning_rate": 7.56511494435318e-06, "loss": 0.5431, "step": 9200 }, { "epoch": 1.1460099455363486, "grad_norm": 2.09375, "learning_rate": 7.563178641467926e-06, "loss": 0.5733, "step": 9201 }, { "epoch": 1.1461362380614097, "grad_norm": 1.765625, "learning_rate": 7.561242435708502e-06, "loss": 0.4425, "step": 9202 }, { "epoch": 1.1462625305864709, "grad_norm": 3.28125, "learning_rate": 7.559306327152081e-06, "loss": 0.6369, "step": 9203 }, { "epoch": 1.146388823111532, "grad_norm": 2.109375, "learning_rate": 7.55737031587583e-06, "loss": 0.487, "step": 9204 }, { "epoch": 1.1465151156365931, "grad_norm": 2.015625, "learning_rate": 7.555434401956911e-06, "loss": 0.5168, "step": 9205 }, { "epoch": 1.1466414081616545, "grad_norm": 2.328125, "learning_rate": 7.553498585472493e-06, "loss": 0.5605, "step": 9206 }, { "epoch": 1.1467677006867156, "grad_norm": 1.84375, "learning_rate": 7.551562866499729e-06, "loss": 0.4935, "step": 9207 }, { "epoch": 1.1468939932117768, "grad_norm": 1.8359375, "learning_rate": 7.54962724511577e-06, "loss": 0.4612, "step": 9208 }, { "epoch": 1.147020285736838, "grad_norm": 1.9375, "learning_rate": 7.547691721397769e-06, "loss": 0.485, "step": 9209 }, { "epoch": 1.147146578261899, "grad_norm": 1.9921875, "learning_rate": 7.545756295422868e-06, "loss": 0.5118, "step": 9210 }, { "epoch": 1.1472728707869604, "grad_norm": 2.046875, "learning_rate": 7.54382096726821e-06, "loss": 0.4897, "step": 9211 }, { "epoch": 1.1473991633120215, "grad_norm": 2.015625, "learning_rate": 7.541885737010931e-06, "loss": 0.4838, "step": 9212 }, { "epoch": 1.1475254558370827, "grad_norm": 2.03125, "learning_rate": 7.539950604728167e-06, "loss": 0.5228, "step": 9213 }, { "epoch": 1.1476517483621438, "grad_norm": 1.9375, "learning_rate": 7.538015570497046e-06, "loss": 0.5072, "step": 9214 }, { "epoch": 1.147778040887205, "grad_norm": 2.515625, "learning_rate": 7.536080634394696e-06, "loss": 0.5162, "step": 9215 }, { "epoch": 1.1479043334122663, "grad_norm": 2.109375, "learning_rate": 7.534145796498234e-06, "loss": 0.5903, "step": 9216 }, { "epoch": 1.1480306259373274, "grad_norm": 2.125, "learning_rate": 7.532211056884784e-06, "loss": 0.5141, "step": 9217 }, { "epoch": 1.1481569184623885, "grad_norm": 1.9375, "learning_rate": 7.5302764156314565e-06, "loss": 0.5066, "step": 9218 }, { "epoch": 1.1482832109874497, "grad_norm": 1.96875, "learning_rate": 7.528341872815361e-06, "loss": 0.5413, "step": 9219 }, { "epoch": 1.1484095035125108, "grad_norm": 1.8359375, "learning_rate": 7.526407428513607e-06, "loss": 0.4477, "step": 9220 }, { "epoch": 1.148535796037572, "grad_norm": 1.9609375, "learning_rate": 7.524473082803294e-06, "loss": 0.5594, "step": 9221 }, { "epoch": 1.148662088562633, "grad_norm": 2.015625, "learning_rate": 7.5225388357615215e-06, "loss": 0.5306, "step": 9222 }, { "epoch": 1.1487883810876944, "grad_norm": 1.9921875, "learning_rate": 7.520604687465378e-06, "loss": 0.5146, "step": 9223 }, { "epoch": 1.1489146736127556, "grad_norm": 2.046875, "learning_rate": 7.518670637991966e-06, "loss": 0.5744, "step": 9224 }, { "epoch": 1.1490409661378167, "grad_norm": 1.96875, "learning_rate": 7.516736687418364e-06, "loss": 0.4792, "step": 9225 }, { "epoch": 1.1491672586628778, "grad_norm": 2.171875, "learning_rate": 7.514802835821657e-06, "loss": 0.5342, "step": 9226 }, { "epoch": 1.149293551187939, "grad_norm": 1.875, "learning_rate": 7.512869083278924e-06, "loss": 0.4826, "step": 9227 }, { "epoch": 1.1494198437130003, "grad_norm": 2.03125, "learning_rate": 7.510935429867237e-06, "loss": 0.5904, "step": 9228 }, { "epoch": 1.1495461362380615, "grad_norm": 1.90625, "learning_rate": 7.50900187566367e-06, "loss": 0.4923, "step": 9229 }, { "epoch": 1.1496724287631226, "grad_norm": 1.984375, "learning_rate": 7.507068420745288e-06, "loss": 0.5231, "step": 9230 }, { "epoch": 1.1497987212881837, "grad_norm": 1.9765625, "learning_rate": 7.5051350651891534e-06, "loss": 0.5293, "step": 9231 }, { "epoch": 1.1499250138132449, "grad_norm": 1.90625, "learning_rate": 7.503201809072327e-06, "loss": 0.5437, "step": 9232 }, { "epoch": 1.1500513063383062, "grad_norm": 1.890625, "learning_rate": 7.501268652471862e-06, "loss": 0.463, "step": 9233 }, { "epoch": 1.1501775988633673, "grad_norm": 2.0625, "learning_rate": 7.499335595464809e-06, "loss": 0.5273, "step": 9234 }, { "epoch": 1.1503038913884285, "grad_norm": 2.0, "learning_rate": 7.497402638128217e-06, "loss": 0.4647, "step": 9235 }, { "epoch": 1.1504301839134896, "grad_norm": 2.0625, "learning_rate": 7.495469780539128e-06, "loss": 0.5404, "step": 9236 }, { "epoch": 1.1505564764385507, "grad_norm": 2.03125, "learning_rate": 7.49353702277458e-06, "loss": 0.6009, "step": 9237 }, { "epoch": 1.1506827689636119, "grad_norm": 1.953125, "learning_rate": 7.491604364911609e-06, "loss": 0.5898, "step": 9238 }, { "epoch": 1.1508090614886732, "grad_norm": 1.890625, "learning_rate": 7.489671807027248e-06, "loss": 0.4798, "step": 9239 }, { "epoch": 1.1509353540137344, "grad_norm": 1.8828125, "learning_rate": 7.487739349198521e-06, "loss": 0.473, "step": 9240 }, { "epoch": 1.1510616465387955, "grad_norm": 2.0, "learning_rate": 7.485806991502448e-06, "loss": 0.5217, "step": 9241 }, { "epoch": 1.1511879390638566, "grad_norm": 2.0, "learning_rate": 7.483874734016057e-06, "loss": 0.5226, "step": 9242 }, { "epoch": 1.1513142315889178, "grad_norm": 1.96875, "learning_rate": 7.481942576816359e-06, "loss": 0.5124, "step": 9243 }, { "epoch": 1.151440524113979, "grad_norm": 1.9609375, "learning_rate": 7.4800105199803645e-06, "loss": 0.5469, "step": 9244 }, { "epoch": 1.1515668166390403, "grad_norm": 2.171875, "learning_rate": 7.47807856358508e-06, "loss": 0.5789, "step": 9245 }, { "epoch": 1.1516931091641014, "grad_norm": 2.046875, "learning_rate": 7.47614670770751e-06, "loss": 0.502, "step": 9246 }, { "epoch": 1.1518194016891625, "grad_norm": 2.109375, "learning_rate": 7.4742149524246544e-06, "loss": 0.5107, "step": 9247 }, { "epoch": 1.1519456942142237, "grad_norm": 1.984375, "learning_rate": 7.472283297813505e-06, "loss": 0.5353, "step": 9248 }, { "epoch": 1.1520719867392848, "grad_norm": 1.984375, "learning_rate": 7.470351743951057e-06, "loss": 0.5222, "step": 9249 }, { "epoch": 1.1521982792643461, "grad_norm": 1.90625, "learning_rate": 7.468420290914294e-06, "loss": 0.4734, "step": 9250 }, { "epoch": 1.1523245717894073, "grad_norm": 1.8828125, "learning_rate": 7.466488938780202e-06, "loss": 0.4701, "step": 9251 }, { "epoch": 1.1524508643144684, "grad_norm": 2.09375, "learning_rate": 7.4645576876257574e-06, "loss": 0.5346, "step": 9252 }, { "epoch": 1.1525771568395295, "grad_norm": 1.828125, "learning_rate": 7.462626537527937e-06, "loss": 0.4764, "step": 9253 }, { "epoch": 1.1527034493645907, "grad_norm": 2.046875, "learning_rate": 7.460695488563711e-06, "loss": 0.5166, "step": 9254 }, { "epoch": 1.1528297418896518, "grad_norm": 2.0625, "learning_rate": 7.458764540810047e-06, "loss": 0.5462, "step": 9255 }, { "epoch": 1.1529560344147132, "grad_norm": 2.046875, "learning_rate": 7.4568336943439055e-06, "loss": 0.4429, "step": 9256 }, { "epoch": 1.1530823269397743, "grad_norm": 1.984375, "learning_rate": 7.454902949242249e-06, "loss": 0.4793, "step": 9257 }, { "epoch": 1.1532086194648354, "grad_norm": 1.9296875, "learning_rate": 7.452972305582029e-06, "loss": 0.511, "step": 9258 }, { "epoch": 1.1533349119898966, "grad_norm": 1.9765625, "learning_rate": 7.4510417634401965e-06, "loss": 0.5105, "step": 9259 }, { "epoch": 1.1534612045149577, "grad_norm": 1.8515625, "learning_rate": 7.449111322893702e-06, "loss": 0.498, "step": 9260 }, { "epoch": 1.1535874970400188, "grad_norm": 1.890625, "learning_rate": 7.447180984019486e-06, "loss": 0.4758, "step": 9261 }, { "epoch": 1.1537137895650802, "grad_norm": 2.21875, "learning_rate": 7.4452507468944875e-06, "loss": 0.5412, "step": 9262 }, { "epoch": 1.1538400820901413, "grad_norm": 2.015625, "learning_rate": 7.443320611595641e-06, "loss": 0.4842, "step": 9263 }, { "epoch": 1.1539663746152025, "grad_norm": 2.109375, "learning_rate": 7.441390578199878e-06, "loss": 0.6196, "step": 9264 }, { "epoch": 1.1540926671402636, "grad_norm": 1.953125, "learning_rate": 7.439460646784122e-06, "loss": 0.4984, "step": 9265 }, { "epoch": 1.1542189596653247, "grad_norm": 1.890625, "learning_rate": 7.437530817425297e-06, "loss": 0.5597, "step": 9266 }, { "epoch": 1.154345252190386, "grad_norm": 2.21875, "learning_rate": 7.4356010902003215e-06, "loss": 0.5176, "step": 9267 }, { "epoch": 1.1544715447154472, "grad_norm": 2.09375, "learning_rate": 7.4336714651861116e-06, "loss": 0.512, "step": 9268 }, { "epoch": 1.1545978372405084, "grad_norm": 1.890625, "learning_rate": 7.431741942459574e-06, "loss": 0.5009, "step": 9269 }, { "epoch": 1.1547241297655695, "grad_norm": 2.03125, "learning_rate": 7.429812522097617e-06, "loss": 0.5036, "step": 9270 }, { "epoch": 1.1548504222906306, "grad_norm": 2.046875, "learning_rate": 7.427883204177142e-06, "loss": 0.4882, "step": 9271 }, { "epoch": 1.1549767148156918, "grad_norm": 2.015625, "learning_rate": 7.425953988775048e-06, "loss": 0.4705, "step": 9272 }, { "epoch": 1.155103007340753, "grad_norm": 2.140625, "learning_rate": 7.424024875968227e-06, "loss": 0.5403, "step": 9273 }, { "epoch": 1.1552292998658142, "grad_norm": 2.140625, "learning_rate": 7.42209586583357e-06, "loss": 0.5675, "step": 9274 }, { "epoch": 1.1553555923908754, "grad_norm": 1.8359375, "learning_rate": 7.420166958447963e-06, "loss": 0.3845, "step": 9275 }, { "epoch": 1.1554818849159365, "grad_norm": 2.015625, "learning_rate": 7.418238153888286e-06, "loss": 0.571, "step": 9276 }, { "epoch": 1.1556081774409976, "grad_norm": 2.0625, "learning_rate": 7.416309452231416e-06, "loss": 0.5376, "step": 9277 }, { "epoch": 1.1557344699660588, "grad_norm": 1.8359375, "learning_rate": 7.41438085355423e-06, "loss": 0.4731, "step": 9278 }, { "epoch": 1.1558607624911201, "grad_norm": 1.9609375, "learning_rate": 7.412452357933595e-06, "loss": 0.5185, "step": 9279 }, { "epoch": 1.1559870550161813, "grad_norm": 2.03125, "learning_rate": 7.410523965446378e-06, "loss": 0.4997, "step": 9280 }, { "epoch": 1.1561133475412424, "grad_norm": 2.0, "learning_rate": 7.408595676169438e-06, "loss": 0.5481, "step": 9281 }, { "epoch": 1.1562396400663035, "grad_norm": 1.8671875, "learning_rate": 7.406667490179633e-06, "loss": 0.5015, "step": 9282 }, { "epoch": 1.1563659325913647, "grad_norm": 1.9765625, "learning_rate": 7.404739407553814e-06, "loss": 0.579, "step": 9283 }, { "epoch": 1.156492225116426, "grad_norm": 1.921875, "learning_rate": 7.402811428368832e-06, "loss": 0.5327, "step": 9284 }, { "epoch": 1.1566185176414872, "grad_norm": 1.8203125, "learning_rate": 7.400883552701531e-06, "loss": 0.4694, "step": 9285 }, { "epoch": 1.1567448101665483, "grad_norm": 2.046875, "learning_rate": 7.398955780628752e-06, "loss": 0.5056, "step": 9286 }, { "epoch": 1.1568711026916094, "grad_norm": 1.8203125, "learning_rate": 7.397028112227329e-06, "loss": 0.4497, "step": 9287 }, { "epoch": 1.1569973952166706, "grad_norm": 1.984375, "learning_rate": 7.395100547574097e-06, "loss": 0.5667, "step": 9288 }, { "epoch": 1.157123687741732, "grad_norm": 2.15625, "learning_rate": 7.393173086745882e-06, "loss": 0.5518, "step": 9289 }, { "epoch": 1.157249980266793, "grad_norm": 1.8125, "learning_rate": 7.3912457298195084e-06, "loss": 0.4777, "step": 9290 }, { "epoch": 1.1573762727918542, "grad_norm": 2.109375, "learning_rate": 7.389318476871797e-06, "loss": 0.4822, "step": 9291 }, { "epoch": 1.1575025653169153, "grad_norm": 1.859375, "learning_rate": 7.3873913279795624e-06, "loss": 0.5082, "step": 9292 }, { "epoch": 1.1576288578419764, "grad_norm": 1.8984375, "learning_rate": 7.385464283219619e-06, "loss": 0.4887, "step": 9293 }, { "epoch": 1.1577551503670376, "grad_norm": 1.9609375, "learning_rate": 7.38353734266877e-06, "loss": 0.5683, "step": 9294 }, { "epoch": 1.1578814428920987, "grad_norm": 2.015625, "learning_rate": 7.381610506403816e-06, "loss": 0.4875, "step": 9295 }, { "epoch": 1.15800773541716, "grad_norm": 1.90625, "learning_rate": 7.379683774501565e-06, "loss": 0.47, "step": 9296 }, { "epoch": 1.1581340279422212, "grad_norm": 1.90625, "learning_rate": 7.377757147038806e-06, "loss": 0.4287, "step": 9297 }, { "epoch": 1.1582603204672823, "grad_norm": 1.9453125, "learning_rate": 7.375830624092332e-06, "loss": 0.5047, "step": 9298 }, { "epoch": 1.1583866129923435, "grad_norm": 2.015625, "learning_rate": 7.373904205738928e-06, "loss": 0.5188, "step": 9299 }, { "epoch": 1.1585129055174046, "grad_norm": 2.0625, "learning_rate": 7.37197789205538e-06, "loss": 0.568, "step": 9300 }, { "epoch": 1.158639198042466, "grad_norm": 2.0625, "learning_rate": 7.37005168311846e-06, "loss": 0.5518, "step": 9301 }, { "epoch": 1.158765490567527, "grad_norm": 1.9375, "learning_rate": 7.368125579004947e-06, "loss": 0.4971, "step": 9302 }, { "epoch": 1.1588917830925882, "grad_norm": 1.890625, "learning_rate": 7.366199579791608e-06, "loss": 0.4935, "step": 9303 }, { "epoch": 1.1590180756176494, "grad_norm": 2.0625, "learning_rate": 7.36427368555521e-06, "loss": 0.4426, "step": 9304 }, { "epoch": 1.1591443681427105, "grad_norm": 1.859375, "learning_rate": 7.362347896372515e-06, "loss": 0.4955, "step": 9305 }, { "epoch": 1.1592706606677718, "grad_norm": 1.984375, "learning_rate": 7.36042221232028e-06, "loss": 0.5455, "step": 9306 }, { "epoch": 1.159396953192833, "grad_norm": 2.015625, "learning_rate": 7.3584966334752565e-06, "loss": 0.5109, "step": 9307 }, { "epoch": 1.1595232457178941, "grad_norm": 2.03125, "learning_rate": 7.356571159914195e-06, "loss": 0.5054, "step": 9308 }, { "epoch": 1.1596495382429552, "grad_norm": 1.875, "learning_rate": 7.35464579171384e-06, "loss": 0.4399, "step": 9309 }, { "epoch": 1.1597758307680164, "grad_norm": 2.046875, "learning_rate": 7.352720528950933e-06, "loss": 0.574, "step": 9310 }, { "epoch": 1.1599021232930775, "grad_norm": 2.0, "learning_rate": 7.350795371702208e-06, "loss": 0.541, "step": 9311 }, { "epoch": 1.1600284158181386, "grad_norm": 1.75, "learning_rate": 7.348870320044399e-06, "loss": 0.4257, "step": 9312 }, { "epoch": 1.1601547083432, "grad_norm": 2.546875, "learning_rate": 7.346945374054231e-06, "loss": 0.6546, "step": 9313 }, { "epoch": 1.1602810008682611, "grad_norm": 1.984375, "learning_rate": 7.345020533808431e-06, "loss": 0.5412, "step": 9314 }, { "epoch": 1.1604072933933223, "grad_norm": 1.9375, "learning_rate": 7.343095799383719e-06, "loss": 0.4559, "step": 9315 }, { "epoch": 1.1605335859183834, "grad_norm": 1.8984375, "learning_rate": 7.34117117085681e-06, "loss": 0.4608, "step": 9316 }, { "epoch": 1.1606598784434445, "grad_norm": 2.03125, "learning_rate": 7.339246648304412e-06, "loss": 0.5313, "step": 9317 }, { "epoch": 1.160786170968506, "grad_norm": 1.984375, "learning_rate": 7.337322231803237e-06, "loss": 0.536, "step": 9318 }, { "epoch": 1.160912463493567, "grad_norm": 2.03125, "learning_rate": 7.335397921429981e-06, "loss": 0.5055, "step": 9319 }, { "epoch": 1.1610387560186282, "grad_norm": 1.875, "learning_rate": 7.3334737172613465e-06, "loss": 0.4612, "step": 9320 }, { "epoch": 1.1611650485436893, "grad_norm": 2.125, "learning_rate": 7.3315496193740274e-06, "loss": 0.6109, "step": 9321 }, { "epoch": 1.1612913410687504, "grad_norm": 1.8125, "learning_rate": 7.329625627844715e-06, "loss": 0.4391, "step": 9322 }, { "epoch": 1.1614176335938118, "grad_norm": 1.765625, "learning_rate": 7.32770174275009e-06, "loss": 0.4132, "step": 9323 }, { "epoch": 1.161543926118873, "grad_norm": 1.8828125, "learning_rate": 7.325777964166838e-06, "loss": 0.4679, "step": 9324 }, { "epoch": 1.161670218643934, "grad_norm": 2.0625, "learning_rate": 7.323854292171635e-06, "loss": 0.5261, "step": 9325 }, { "epoch": 1.1617965111689952, "grad_norm": 1.96875, "learning_rate": 7.3219307268411546e-06, "loss": 0.4468, "step": 9326 }, { "epoch": 1.1619228036940563, "grad_norm": 1.9140625, "learning_rate": 7.320007268252064e-06, "loss": 0.5087, "step": 9327 }, { "epoch": 1.1620490962191174, "grad_norm": 1.8984375, "learning_rate": 7.318083916481029e-06, "loss": 0.5403, "step": 9328 }, { "epoch": 1.1621753887441786, "grad_norm": 2.15625, "learning_rate": 7.316160671604708e-06, "loss": 0.5503, "step": 9329 }, { "epoch": 1.16230168126924, "grad_norm": 2.015625, "learning_rate": 7.314237533699761e-06, "loss": 0.5349, "step": 9330 }, { "epoch": 1.162427973794301, "grad_norm": 2.078125, "learning_rate": 7.31231450284283e-06, "loss": 0.5231, "step": 9331 }, { "epoch": 1.1625542663193622, "grad_norm": 2.078125, "learning_rate": 7.3103915791105755e-06, "loss": 0.584, "step": 9332 }, { "epoch": 1.1626805588444233, "grad_norm": 1.9296875, "learning_rate": 7.308468762579633e-06, "loss": 0.5139, "step": 9333 }, { "epoch": 1.1628068513694845, "grad_norm": 1.8984375, "learning_rate": 7.306546053326643e-06, "loss": 0.4499, "step": 9334 }, { "epoch": 1.1629331438945458, "grad_norm": 2.0, "learning_rate": 7.30462345142824e-06, "loss": 0.478, "step": 9335 }, { "epoch": 1.163059436419607, "grad_norm": 1.8828125, "learning_rate": 7.302700956961053e-06, "loss": 0.5231, "step": 9336 }, { "epoch": 1.163185728944668, "grad_norm": 1.984375, "learning_rate": 7.30077857000171e-06, "loss": 0.5222, "step": 9337 }, { "epoch": 1.1633120214697292, "grad_norm": 2.0625, "learning_rate": 7.2988562906268325e-06, "loss": 0.4934, "step": 9338 }, { "epoch": 1.1634383139947904, "grad_norm": 1.9453125, "learning_rate": 7.296934118913035e-06, "loss": 0.5235, "step": 9339 }, { "epoch": 1.1635646065198517, "grad_norm": 1.921875, "learning_rate": 7.295012054936934e-06, "loss": 0.4502, "step": 9340 }, { "epoch": 1.1636908990449129, "grad_norm": 1.953125, "learning_rate": 7.293090098775138e-06, "loss": 0.5742, "step": 9341 }, { "epoch": 1.163817191569974, "grad_norm": 1.984375, "learning_rate": 7.291168250504249e-06, "loss": 0.4561, "step": 9342 }, { "epoch": 1.1639434840950351, "grad_norm": 2.046875, "learning_rate": 7.289246510200869e-06, "loss": 0.5384, "step": 9343 }, { "epoch": 1.1640697766200963, "grad_norm": 1.8203125, "learning_rate": 7.287324877941595e-06, "loss": 0.4593, "step": 9344 }, { "epoch": 1.1641960691451574, "grad_norm": 1.78125, "learning_rate": 7.285403353803016e-06, "loss": 0.4545, "step": 9345 }, { "epoch": 1.1643223616702185, "grad_norm": 2.03125, "learning_rate": 7.283481937861723e-06, "loss": 0.4564, "step": 9346 }, { "epoch": 1.1644486541952799, "grad_norm": 1.8984375, "learning_rate": 7.281560630194295e-06, "loss": 0.4458, "step": 9347 }, { "epoch": 1.164574946720341, "grad_norm": 2.03125, "learning_rate": 7.2796394308773125e-06, "loss": 0.5231, "step": 9348 }, { "epoch": 1.1647012392454021, "grad_norm": 1.875, "learning_rate": 7.277718339987346e-06, "loss": 0.4855, "step": 9349 }, { "epoch": 1.1648275317704633, "grad_norm": 2.0625, "learning_rate": 7.2757973576009744e-06, "loss": 0.4362, "step": 9350 }, { "epoch": 1.1649538242955244, "grad_norm": 1.9921875, "learning_rate": 7.273876483794757e-06, "loss": 0.5329, "step": 9351 }, { "epoch": 1.1650801168205858, "grad_norm": 2.140625, "learning_rate": 7.271955718645258e-06, "loss": 0.5758, "step": 9352 }, { "epoch": 1.165206409345647, "grad_norm": 1.8046875, "learning_rate": 7.270035062229033e-06, "loss": 0.4271, "step": 9353 }, { "epoch": 1.165332701870708, "grad_norm": 1.9453125, "learning_rate": 7.268114514622635e-06, "loss": 0.5181, "step": 9354 }, { "epoch": 1.1654589943957692, "grad_norm": 2.140625, "learning_rate": 7.266194075902612e-06, "loss": 0.5513, "step": 9355 }, { "epoch": 1.1655852869208303, "grad_norm": 1.984375, "learning_rate": 7.264273746145508e-06, "loss": 0.4745, "step": 9356 }, { "epoch": 1.1657115794458917, "grad_norm": 2.109375, "learning_rate": 7.2623535254278634e-06, "loss": 0.4966, "step": 9357 }, { "epoch": 1.1658378719709528, "grad_norm": 1.9296875, "learning_rate": 7.260433413826214e-06, "loss": 0.5783, "step": 9358 }, { "epoch": 1.165964164496014, "grad_norm": 1.921875, "learning_rate": 7.258513411417089e-06, "loss": 0.5053, "step": 9359 }, { "epoch": 1.166090457021075, "grad_norm": 1.9453125, "learning_rate": 7.256593518277015e-06, "loss": 0.4652, "step": 9360 }, { "epoch": 1.1662167495461362, "grad_norm": 2.15625, "learning_rate": 7.254673734482518e-06, "loss": 0.4933, "step": 9361 }, { "epoch": 1.1663430420711973, "grad_norm": 2.0, "learning_rate": 7.252754060110112e-06, "loss": 0.501, "step": 9362 }, { "epoch": 1.1664693345962587, "grad_norm": 2.03125, "learning_rate": 7.250834495236312e-06, "loss": 0.6303, "step": 9363 }, { "epoch": 1.1665956271213198, "grad_norm": 2.171875, "learning_rate": 7.2489150399376276e-06, "loss": 0.5004, "step": 9364 }, { "epoch": 1.166721919646381, "grad_norm": 2.0, "learning_rate": 7.246995694290564e-06, "loss": 0.4484, "step": 9365 }, { "epoch": 1.166848212171442, "grad_norm": 2.140625, "learning_rate": 7.245076458371619e-06, "loss": 0.4774, "step": 9366 }, { "epoch": 1.1669745046965032, "grad_norm": 1.9765625, "learning_rate": 7.243157332257288e-06, "loss": 0.4897, "step": 9367 }, { "epoch": 1.1671007972215643, "grad_norm": 1.96875, "learning_rate": 7.241238316024069e-06, "loss": 0.4649, "step": 9368 }, { "epoch": 1.1672270897466257, "grad_norm": 2.015625, "learning_rate": 7.239319409748447e-06, "loss": 0.5008, "step": 9369 }, { "epoch": 1.1673533822716868, "grad_norm": 2.078125, "learning_rate": 7.237400613506902e-06, "loss": 0.583, "step": 9370 }, { "epoch": 1.167479674796748, "grad_norm": 1.78125, "learning_rate": 7.2354819273759155e-06, "loss": 0.5006, "step": 9371 }, { "epoch": 1.167605967321809, "grad_norm": 2.03125, "learning_rate": 7.23356335143196e-06, "loss": 0.4114, "step": 9372 }, { "epoch": 1.1677322598468702, "grad_norm": 2.015625, "learning_rate": 7.2316448857515076e-06, "loss": 0.4576, "step": 9373 }, { "epoch": 1.1678585523719316, "grad_norm": 2.125, "learning_rate": 7.229726530411021e-06, "loss": 0.5518, "step": 9374 }, { "epoch": 1.1679848448969927, "grad_norm": 1.859375, "learning_rate": 7.227808285486962e-06, "loss": 0.4461, "step": 9375 }, { "epoch": 1.1681111374220539, "grad_norm": 1.9375, "learning_rate": 7.225890151055787e-06, "loss": 0.476, "step": 9376 }, { "epoch": 1.168237429947115, "grad_norm": 1.8046875, "learning_rate": 7.22397212719395e-06, "loss": 0.526, "step": 9377 }, { "epoch": 1.1683637224721761, "grad_norm": 2.21875, "learning_rate": 7.222054213977897e-06, "loss": 0.5259, "step": 9378 }, { "epoch": 1.1684900149972373, "grad_norm": 1.84375, "learning_rate": 7.2201364114840715e-06, "loss": 0.3943, "step": 9379 }, { "epoch": 1.1686163075222986, "grad_norm": 1.9140625, "learning_rate": 7.218218719788914e-06, "loss": 0.4811, "step": 9380 }, { "epoch": 1.1687426000473597, "grad_norm": 1.890625, "learning_rate": 7.216301138968858e-06, "loss": 0.4536, "step": 9381 }, { "epoch": 1.1688688925724209, "grad_norm": 2.28125, "learning_rate": 7.214383669100331e-06, "loss": 0.5052, "step": 9382 }, { "epoch": 1.168995185097482, "grad_norm": 2.171875, "learning_rate": 7.212466310259765e-06, "loss": 0.4863, "step": 9383 }, { "epoch": 1.1691214776225431, "grad_norm": 1.8984375, "learning_rate": 7.210549062523573e-06, "loss": 0.4513, "step": 9384 }, { "epoch": 1.1692477701476043, "grad_norm": 1.9453125, "learning_rate": 7.208631925968177e-06, "loss": 0.4947, "step": 9385 }, { "epoch": 1.1693740626726656, "grad_norm": 1.9375, "learning_rate": 7.2067149006699914e-06, "loss": 0.5364, "step": 9386 }, { "epoch": 1.1695003551977268, "grad_norm": 1.8671875, "learning_rate": 7.204797986705423e-06, "loss": 0.5317, "step": 9387 }, { "epoch": 1.169626647722788, "grad_norm": 1.96875, "learning_rate": 7.2028811841508714e-06, "loss": 0.4766, "step": 9388 }, { "epoch": 1.169752940247849, "grad_norm": 1.828125, "learning_rate": 7.200964493082741e-06, "loss": 0.4205, "step": 9389 }, { "epoch": 1.1698792327729102, "grad_norm": 1.953125, "learning_rate": 7.199047913577424e-06, "loss": 0.5196, "step": 9390 }, { "epoch": 1.1700055252979715, "grad_norm": 1.9296875, "learning_rate": 7.197131445711309e-06, "loss": 0.4634, "step": 9391 }, { "epoch": 1.1701318178230327, "grad_norm": 1.90625, "learning_rate": 7.195215089560785e-06, "loss": 0.4528, "step": 9392 }, { "epoch": 1.1702581103480938, "grad_norm": 1.9140625, "learning_rate": 7.19329884520223e-06, "loss": 0.5066, "step": 9393 }, { "epoch": 1.170384402873155, "grad_norm": 2.125, "learning_rate": 7.191382712712025e-06, "loss": 0.5411, "step": 9394 }, { "epoch": 1.170510695398216, "grad_norm": 2.046875, "learning_rate": 7.189466692166538e-06, "loss": 0.5113, "step": 9395 }, { "epoch": 1.1706369879232772, "grad_norm": 1.8828125, "learning_rate": 7.187550783642141e-06, "loss": 0.4971, "step": 9396 }, { "epoch": 1.1707632804483386, "grad_norm": 1.984375, "learning_rate": 7.185634987215193e-06, "loss": 0.5236, "step": 9397 }, { "epoch": 1.1708895729733997, "grad_norm": 1.9375, "learning_rate": 7.183719302962058e-06, "loss": 0.4828, "step": 9398 }, { "epoch": 1.1710158654984608, "grad_norm": 2.078125, "learning_rate": 7.181803730959087e-06, "loss": 0.4924, "step": 9399 }, { "epoch": 1.171142158023522, "grad_norm": 1.9765625, "learning_rate": 7.179888271282629e-06, "loss": 0.5092, "step": 9400 }, { "epoch": 1.171268450548583, "grad_norm": 1.921875, "learning_rate": 7.177972924009035e-06, "loss": 0.5007, "step": 9401 }, { "epoch": 1.1713947430736442, "grad_norm": 2.09375, "learning_rate": 7.1760576892146415e-06, "loss": 0.4374, "step": 9402 }, { "epoch": 1.1715210355987056, "grad_norm": 1.9921875, "learning_rate": 7.1741425669757854e-06, "loss": 0.5612, "step": 9403 }, { "epoch": 1.1716473281237667, "grad_norm": 1.8984375, "learning_rate": 7.172227557368803e-06, "loss": 0.4505, "step": 9404 }, { "epoch": 1.1717736206488278, "grad_norm": 1.9921875, "learning_rate": 7.170312660470018e-06, "loss": 0.5001, "step": 9405 }, { "epoch": 1.171899913173889, "grad_norm": 1.96875, "learning_rate": 7.168397876355754e-06, "loss": 0.4594, "step": 9406 }, { "epoch": 1.17202620569895, "grad_norm": 1.890625, "learning_rate": 7.16648320510233e-06, "loss": 0.5137, "step": 9407 }, { "epoch": 1.1721524982240115, "grad_norm": 2.203125, "learning_rate": 7.1645686467860635e-06, "loss": 0.5241, "step": 9408 }, { "epoch": 1.1722787907490726, "grad_norm": 1.96875, "learning_rate": 7.162654201483259e-06, "loss": 0.4733, "step": 9409 }, { "epoch": 1.1724050832741337, "grad_norm": 1.796875, "learning_rate": 7.160739869270223e-06, "loss": 0.4676, "step": 9410 }, { "epoch": 1.1725313757991949, "grad_norm": 2.0, "learning_rate": 7.158825650223258e-06, "loss": 0.4031, "step": 9411 }, { "epoch": 1.172657668324256, "grad_norm": 2.078125, "learning_rate": 7.156911544418659e-06, "loss": 0.5215, "step": 9412 }, { "epoch": 1.1727839608493174, "grad_norm": 1.8671875, "learning_rate": 7.154997551932719e-06, "loss": 0.4582, "step": 9413 }, { "epoch": 1.1729102533743785, "grad_norm": 1.9453125, "learning_rate": 7.153083672841723e-06, "loss": 0.492, "step": 9414 }, { "epoch": 1.1730365458994396, "grad_norm": 2.0625, "learning_rate": 7.1511699072219544e-06, "loss": 0.5452, "step": 9415 }, { "epoch": 1.1731628384245008, "grad_norm": 2.0625, "learning_rate": 7.14925625514969e-06, "loss": 0.5262, "step": 9416 }, { "epoch": 1.1732891309495619, "grad_norm": 1.8828125, "learning_rate": 7.147342716701207e-06, "loss": 0.5541, "step": 9417 }, { "epoch": 1.173415423474623, "grad_norm": 1.9375, "learning_rate": 7.1454292919527705e-06, "loss": 0.4656, "step": 9418 }, { "epoch": 1.1735417159996842, "grad_norm": 1.9140625, "learning_rate": 7.143515980980648e-06, "loss": 0.4587, "step": 9419 }, { "epoch": 1.1736680085247455, "grad_norm": 2.1875, "learning_rate": 7.141602783861099e-06, "loss": 0.5811, "step": 9420 }, { "epoch": 1.1737943010498066, "grad_norm": 1.984375, "learning_rate": 7.139689700670375e-06, "loss": 0.5055, "step": 9421 }, { "epoch": 1.1739205935748678, "grad_norm": 1.8515625, "learning_rate": 7.1377767314847325e-06, "loss": 0.5048, "step": 9422 }, { "epoch": 1.174046886099929, "grad_norm": 2.03125, "learning_rate": 7.135863876380416e-06, "loss": 0.553, "step": 9423 }, { "epoch": 1.17417317862499, "grad_norm": 2.03125, "learning_rate": 7.133951135433666e-06, "loss": 0.4719, "step": 9424 }, { "epoch": 1.1742994711500514, "grad_norm": 2.046875, "learning_rate": 7.13203850872072e-06, "loss": 0.4863, "step": 9425 }, { "epoch": 1.1744257636751125, "grad_norm": 2.046875, "learning_rate": 7.130125996317811e-06, "loss": 0.4836, "step": 9426 }, { "epoch": 1.1745520562001737, "grad_norm": 2.515625, "learning_rate": 7.128213598301168e-06, "loss": 0.6448, "step": 9427 }, { "epoch": 1.1746783487252348, "grad_norm": 2.203125, "learning_rate": 7.126301314747013e-06, "loss": 0.4605, "step": 9428 }, { "epoch": 1.174804641250296, "grad_norm": 2.03125, "learning_rate": 7.124389145731567e-06, "loss": 0.4521, "step": 9429 }, { "epoch": 1.1749309337753573, "grad_norm": 1.9140625, "learning_rate": 7.1224770913310425e-06, "loss": 0.4339, "step": 9430 }, { "epoch": 1.1750572263004184, "grad_norm": 2.046875, "learning_rate": 7.12056515162165e-06, "loss": 0.4636, "step": 9431 }, { "epoch": 1.1751835188254796, "grad_norm": 1.90625, "learning_rate": 7.1186533266795945e-06, "loss": 0.466, "step": 9432 }, { "epoch": 1.1753098113505407, "grad_norm": 1.9921875, "learning_rate": 7.1167416165810775e-06, "loss": 0.5394, "step": 9433 }, { "epoch": 1.1754361038756018, "grad_norm": 1.984375, "learning_rate": 7.114830021402296e-06, "loss": 0.5195, "step": 9434 }, { "epoch": 1.175562396400663, "grad_norm": 2.171875, "learning_rate": 7.11291854121944e-06, "loss": 0.5101, "step": 9435 }, { "epoch": 1.175688688925724, "grad_norm": 1.953125, "learning_rate": 7.111007176108697e-06, "loss": 0.4728, "step": 9436 }, { "epoch": 1.1758149814507854, "grad_norm": 1.953125, "learning_rate": 7.10909592614625e-06, "loss": 0.4959, "step": 9437 }, { "epoch": 1.1759412739758466, "grad_norm": 2.171875, "learning_rate": 7.107184791408275e-06, "loss": 0.5406, "step": 9438 }, { "epoch": 1.1760675665009077, "grad_norm": 2.015625, "learning_rate": 7.105273771970946e-06, "loss": 0.5121, "step": 9439 }, { "epoch": 1.1761938590259688, "grad_norm": 1.875, "learning_rate": 7.103362867910433e-06, "loss": 0.5064, "step": 9440 }, { "epoch": 1.17632015155103, "grad_norm": 2.140625, "learning_rate": 7.1014520793029e-06, "loss": 0.5514, "step": 9441 }, { "epoch": 1.1764464440760913, "grad_norm": 2.0625, "learning_rate": 7.0995414062245036e-06, "loss": 0.4854, "step": 9442 }, { "epoch": 1.1765727366011525, "grad_norm": 1.9140625, "learning_rate": 7.097630848751403e-06, "loss": 0.4402, "step": 9443 }, { "epoch": 1.1766990291262136, "grad_norm": 1.96875, "learning_rate": 7.095720406959743e-06, "loss": 0.5282, "step": 9444 }, { "epoch": 1.1768253216512747, "grad_norm": 1.859375, "learning_rate": 7.093810080925675e-06, "loss": 0.4516, "step": 9445 }, { "epoch": 1.1769516141763359, "grad_norm": 1.875, "learning_rate": 7.091899870725336e-06, "loss": 0.499, "step": 9446 }, { "epoch": 1.1770779067013972, "grad_norm": 1.8671875, "learning_rate": 7.089989776434863e-06, "loss": 0.5464, "step": 9447 }, { "epoch": 1.1772041992264584, "grad_norm": 2.046875, "learning_rate": 7.088079798130387e-06, "loss": 0.46, "step": 9448 }, { "epoch": 1.1773304917515195, "grad_norm": 2.609375, "learning_rate": 7.0861699358880386e-06, "loss": 0.5346, "step": 9449 }, { "epoch": 1.1774567842765806, "grad_norm": 1.9609375, "learning_rate": 7.084260189783938e-06, "loss": 0.4728, "step": 9450 }, { "epoch": 1.1775830768016418, "grad_norm": 1.875, "learning_rate": 7.0823505598942034e-06, "loss": 0.4422, "step": 9451 }, { "epoch": 1.177709369326703, "grad_norm": 1.9609375, "learning_rate": 7.080441046294948e-06, "loss": 0.4575, "step": 9452 }, { "epoch": 1.177835661851764, "grad_norm": 1.9765625, "learning_rate": 7.078531649062279e-06, "loss": 0.4854, "step": 9453 }, { "epoch": 1.1779619543768254, "grad_norm": 1.859375, "learning_rate": 7.076622368272303e-06, "loss": 0.4816, "step": 9454 }, { "epoch": 1.1780882469018865, "grad_norm": 2.140625, "learning_rate": 7.074713204001118e-06, "loss": 0.6243, "step": 9455 }, { "epoch": 1.1782145394269476, "grad_norm": 1.8203125, "learning_rate": 7.072804156324818e-06, "loss": 0.4511, "step": 9456 }, { "epoch": 1.1783408319520088, "grad_norm": 1.9140625, "learning_rate": 7.070895225319492e-06, "loss": 0.5732, "step": 9457 }, { "epoch": 1.17846712447707, "grad_norm": 2.03125, "learning_rate": 7.068986411061229e-06, "loss": 0.4657, "step": 9458 }, { "epoch": 1.1785934170021313, "grad_norm": 2.1875, "learning_rate": 7.067077713626109e-06, "loss": 0.5943, "step": 9459 }, { "epoch": 1.1787197095271924, "grad_norm": 2.015625, "learning_rate": 7.065169133090206e-06, "loss": 0.4524, "step": 9460 }, { "epoch": 1.1788460020522535, "grad_norm": 1.9375, "learning_rate": 7.063260669529592e-06, "loss": 0.4861, "step": 9461 }, { "epoch": 1.1789722945773147, "grad_norm": 1.984375, "learning_rate": 7.061352323020334e-06, "loss": 0.512, "step": 9462 }, { "epoch": 1.1790985871023758, "grad_norm": 2.09375, "learning_rate": 7.0594440936384954e-06, "loss": 0.487, "step": 9463 }, { "epoch": 1.1792248796274372, "grad_norm": 2.046875, "learning_rate": 7.05753598146013e-06, "loss": 0.5578, "step": 9464 }, { "epoch": 1.1793511721524983, "grad_norm": 1.953125, "learning_rate": 7.055627986561295e-06, "loss": 0.5577, "step": 9465 }, { "epoch": 1.1794774646775594, "grad_norm": 1.8828125, "learning_rate": 7.053720109018037e-06, "loss": 0.4603, "step": 9466 }, { "epoch": 1.1796037572026206, "grad_norm": 1.859375, "learning_rate": 7.0518123489063975e-06, "loss": 0.4214, "step": 9467 }, { "epoch": 1.1797300497276817, "grad_norm": 2.03125, "learning_rate": 7.049904706302417e-06, "loss": 0.5199, "step": 9468 }, { "epoch": 1.1798563422527428, "grad_norm": 1.96875, "learning_rate": 7.047997181282129e-06, "loss": 0.5034, "step": 9469 }, { "epoch": 1.1799826347778042, "grad_norm": 1.84375, "learning_rate": 7.046089773921564e-06, "loss": 0.4007, "step": 9470 }, { "epoch": 1.1801089273028653, "grad_norm": 1.859375, "learning_rate": 7.044182484296744e-06, "loss": 0.5074, "step": 9471 }, { "epoch": 1.1802352198279265, "grad_norm": 1.9765625, "learning_rate": 7.042275312483693e-06, "loss": 0.5182, "step": 9472 }, { "epoch": 1.1803615123529876, "grad_norm": 1.9375, "learning_rate": 7.040368258558423e-06, "loss": 0.49, "step": 9473 }, { "epoch": 1.1804878048780487, "grad_norm": 1.9921875, "learning_rate": 7.038461322596945e-06, "loss": 0.5537, "step": 9474 }, { "epoch": 1.1806140974031099, "grad_norm": 2.25, "learning_rate": 7.036554504675264e-06, "loss": 0.4942, "step": 9475 }, { "epoch": 1.1807403899281712, "grad_norm": 1.8984375, "learning_rate": 7.034647804869383e-06, "loss": 0.4654, "step": 9476 }, { "epoch": 1.1808666824532323, "grad_norm": 2.046875, "learning_rate": 7.0327412232553e-06, "loss": 0.5027, "step": 9477 }, { "epoch": 1.1809929749782935, "grad_norm": 1.9375, "learning_rate": 7.0308347599090045e-06, "loss": 0.5112, "step": 9478 }, { "epoch": 1.1811192675033546, "grad_norm": 2.1875, "learning_rate": 7.028928414906482e-06, "loss": 0.4579, "step": 9479 }, { "epoch": 1.1812455600284157, "grad_norm": 1.9375, "learning_rate": 7.027022188323716e-06, "loss": 0.5511, "step": 9480 }, { "epoch": 1.181371852553477, "grad_norm": 2.015625, "learning_rate": 7.025116080236687e-06, "loss": 0.5087, "step": 9481 }, { "epoch": 1.1814981450785382, "grad_norm": 1.96875, "learning_rate": 7.023210090721364e-06, "loss": 0.4995, "step": 9482 }, { "epoch": 1.1816244376035994, "grad_norm": 1.890625, "learning_rate": 7.021304219853719e-06, "loss": 0.5463, "step": 9483 }, { "epoch": 1.1817507301286605, "grad_norm": 2.03125, "learning_rate": 7.019398467709711e-06, "loss": 0.4687, "step": 9484 }, { "epoch": 1.1818770226537216, "grad_norm": 1.8828125, "learning_rate": 7.017492834365303e-06, "loss": 0.46, "step": 9485 }, { "epoch": 1.1820033151787828, "grad_norm": 2.015625, "learning_rate": 7.015587319896444e-06, "loss": 0.4521, "step": 9486 }, { "epoch": 1.1821296077038441, "grad_norm": 2.15625, "learning_rate": 7.013681924379087e-06, "loss": 0.5578, "step": 9487 }, { "epoch": 1.1822559002289053, "grad_norm": 1.8828125, "learning_rate": 7.011776647889177e-06, "loss": 0.4897, "step": 9488 }, { "epoch": 1.1823821927539664, "grad_norm": 1.9296875, "learning_rate": 7.009871490502651e-06, "loss": 0.509, "step": 9489 }, { "epoch": 1.1825084852790275, "grad_norm": 2.0625, "learning_rate": 7.007966452295447e-06, "loss": 0.5394, "step": 9490 }, { "epoch": 1.1826347778040887, "grad_norm": 1.9765625, "learning_rate": 7.006061533343491e-06, "loss": 0.5132, "step": 9491 }, { "epoch": 1.1827610703291498, "grad_norm": 1.8671875, "learning_rate": 7.0041567337227125e-06, "loss": 0.4577, "step": 9492 }, { "epoch": 1.1828873628542111, "grad_norm": 2.0625, "learning_rate": 7.002252053509027e-06, "loss": 0.4899, "step": 9493 }, { "epoch": 1.1830136553792723, "grad_norm": 1.9609375, "learning_rate": 7.0003474927783585e-06, "loss": 0.4808, "step": 9494 }, { "epoch": 1.1831399479043334, "grad_norm": 1.96875, "learning_rate": 6.998443051606611e-06, "loss": 0.527, "step": 9495 }, { "epoch": 1.1832662404293945, "grad_norm": 2.03125, "learning_rate": 6.996538730069694e-06, "loss": 0.4758, "step": 9496 }, { "epoch": 1.1833925329544557, "grad_norm": 1.9140625, "learning_rate": 6.994634528243512e-06, "loss": 0.4164, "step": 9497 }, { "epoch": 1.183518825479517, "grad_norm": 1.9609375, "learning_rate": 6.992730446203956e-06, "loss": 0.5035, "step": 9498 }, { "epoch": 1.1836451180045782, "grad_norm": 2.140625, "learning_rate": 6.990826484026921e-06, "loss": 0.4712, "step": 9499 }, { "epoch": 1.1837714105296393, "grad_norm": 1.9296875, "learning_rate": 6.988922641788296e-06, "loss": 0.5521, "step": 9500 }, { "epoch": 1.1838977030547004, "grad_norm": 2.140625, "learning_rate": 6.987018919563961e-06, "loss": 0.5087, "step": 9501 }, { "epoch": 1.1840239955797616, "grad_norm": 2.015625, "learning_rate": 6.9851153174297945e-06, "loss": 0.5642, "step": 9502 }, { "epoch": 1.1841502881048227, "grad_norm": 1.8671875, "learning_rate": 6.9832118354616695e-06, "loss": 0.4131, "step": 9503 }, { "epoch": 1.184276580629884, "grad_norm": 1.9375, "learning_rate": 6.981308473735455e-06, "loss": 0.5166, "step": 9504 }, { "epoch": 1.1844028731549452, "grad_norm": 2.015625, "learning_rate": 6.979405232327015e-06, "loss": 0.4901, "step": 9505 }, { "epoch": 1.1845291656800063, "grad_norm": 1.9296875, "learning_rate": 6.977502111312205e-06, "loss": 0.4238, "step": 9506 }, { "epoch": 1.1846554582050675, "grad_norm": 1.8984375, "learning_rate": 6.975599110766881e-06, "loss": 0.4642, "step": 9507 }, { "epoch": 1.1847817507301286, "grad_norm": 2.015625, "learning_rate": 6.973696230766891e-06, "loss": 0.5682, "step": 9508 }, { "epoch": 1.1849080432551897, "grad_norm": 2.25, "learning_rate": 6.971793471388082e-06, "loss": 0.5202, "step": 9509 }, { "epoch": 1.185034335780251, "grad_norm": 1.8828125, "learning_rate": 6.96989083270629e-06, "loss": 0.5695, "step": 9510 }, { "epoch": 1.1851606283053122, "grad_norm": 1.9296875, "learning_rate": 6.967988314797349e-06, "loss": 0.5715, "step": 9511 }, { "epoch": 1.1852869208303733, "grad_norm": 1.9296875, "learning_rate": 6.966085917737091e-06, "loss": 0.4976, "step": 9512 }, { "epoch": 1.1854132133554345, "grad_norm": 1.8984375, "learning_rate": 6.964183641601342e-06, "loss": 0.4936, "step": 9513 }, { "epoch": 1.1855395058804956, "grad_norm": 1.859375, "learning_rate": 6.962281486465921e-06, "loss": 0.5069, "step": 9514 }, { "epoch": 1.185665798405557, "grad_norm": 1.9140625, "learning_rate": 6.9603794524066435e-06, "loss": 0.4446, "step": 9515 }, { "epoch": 1.185792090930618, "grad_norm": 2.0, "learning_rate": 6.9584775394993195e-06, "loss": 0.4746, "step": 9516 }, { "epoch": 1.1859183834556792, "grad_norm": 2.109375, "learning_rate": 6.956575747819755e-06, "loss": 0.528, "step": 9517 }, { "epoch": 1.1860446759807404, "grad_norm": 1.984375, "learning_rate": 6.954674077443751e-06, "loss": 0.5602, "step": 9518 }, { "epoch": 1.1861709685058015, "grad_norm": 2.265625, "learning_rate": 6.952772528447103e-06, "loss": 0.5425, "step": 9519 }, { "epoch": 1.1862972610308629, "grad_norm": 1.9609375, "learning_rate": 6.950871100905601e-06, "loss": 0.4288, "step": 9520 }, { "epoch": 1.186423553555924, "grad_norm": 2.171875, "learning_rate": 6.948969794895033e-06, "loss": 0.5145, "step": 9521 }, { "epoch": 1.1865498460809851, "grad_norm": 1.8125, "learning_rate": 6.947068610491181e-06, "loss": 0.4675, "step": 9522 }, { "epoch": 1.1866761386060463, "grad_norm": 2.140625, "learning_rate": 6.945167547769821e-06, "loss": 0.4911, "step": 9523 }, { "epoch": 1.1868024311311074, "grad_norm": 2.046875, "learning_rate": 6.943266606806725e-06, "loss": 0.5727, "step": 9524 }, { "epoch": 1.1869287236561685, "grad_norm": 1.9921875, "learning_rate": 6.9413657876776575e-06, "loss": 0.4812, "step": 9525 }, { "epoch": 1.1870550161812297, "grad_norm": 2.015625, "learning_rate": 6.939465090458384e-06, "loss": 0.5283, "step": 9526 }, { "epoch": 1.187181308706291, "grad_norm": 2.0, "learning_rate": 6.93756451522466e-06, "loss": 0.4619, "step": 9527 }, { "epoch": 1.1873076012313521, "grad_norm": 2.0625, "learning_rate": 6.935664062052236e-06, "loss": 0.5466, "step": 9528 }, { "epoch": 1.1874338937564133, "grad_norm": 1.8984375, "learning_rate": 6.9337637310168604e-06, "loss": 0.4557, "step": 9529 }, { "epoch": 1.1875601862814744, "grad_norm": 2.015625, "learning_rate": 6.93186352219428e-06, "loss": 0.533, "step": 9530 }, { "epoch": 1.1876864788065356, "grad_norm": 1.9921875, "learning_rate": 6.929963435660229e-06, "loss": 0.4597, "step": 9531 }, { "epoch": 1.187812771331597, "grad_norm": 1.90625, "learning_rate": 6.9280634714904405e-06, "loss": 0.5479, "step": 9532 }, { "epoch": 1.187939063856658, "grad_norm": 1.953125, "learning_rate": 6.926163629760645e-06, "loss": 0.5379, "step": 9533 }, { "epoch": 1.1880653563817192, "grad_norm": 1.8984375, "learning_rate": 6.924263910546561e-06, "loss": 0.5398, "step": 9534 }, { "epoch": 1.1881916489067803, "grad_norm": 1.96875, "learning_rate": 6.92236431392391e-06, "loss": 0.489, "step": 9535 }, { "epoch": 1.1883179414318414, "grad_norm": 1.9296875, "learning_rate": 6.920464839968405e-06, "loss": 0.4701, "step": 9536 }, { "epoch": 1.1884442339569028, "grad_norm": 1.8515625, "learning_rate": 6.918565488755753e-06, "loss": 0.5014, "step": 9537 }, { "epoch": 1.188570526481964, "grad_norm": 1.875, "learning_rate": 6.9166662603616596e-06, "loss": 0.5035, "step": 9538 }, { "epoch": 1.188696819007025, "grad_norm": 1.921875, "learning_rate": 6.9147671548618225e-06, "loss": 0.4953, "step": 9539 }, { "epoch": 1.1888231115320862, "grad_norm": 2.015625, "learning_rate": 6.912868172331935e-06, "loss": 0.5169, "step": 9540 }, { "epoch": 1.1889494040571473, "grad_norm": 1.9453125, "learning_rate": 6.910969312847686e-06, "loss": 0.5115, "step": 9541 }, { "epoch": 1.1890756965822085, "grad_norm": 2.03125, "learning_rate": 6.90907057648476e-06, "loss": 0.5297, "step": 9542 }, { "epoch": 1.1892019891072696, "grad_norm": 2.046875, "learning_rate": 6.907171963318835e-06, "loss": 0.4993, "step": 9543 }, { "epoch": 1.189328281632331, "grad_norm": 2.015625, "learning_rate": 6.905273473425587e-06, "loss": 0.5478, "step": 9544 }, { "epoch": 1.189454574157392, "grad_norm": 2.03125, "learning_rate": 6.903375106880683e-06, "loss": 0.5203, "step": 9545 }, { "epoch": 1.1895808666824532, "grad_norm": 2.015625, "learning_rate": 6.901476863759791e-06, "loss": 0.5694, "step": 9546 }, { "epoch": 1.1897071592075144, "grad_norm": 2.234375, "learning_rate": 6.899578744138566e-06, "loss": 0.5196, "step": 9547 }, { "epoch": 1.1898334517325755, "grad_norm": 1.921875, "learning_rate": 6.897680748092661e-06, "loss": 0.4675, "step": 9548 }, { "epoch": 1.1899597442576368, "grad_norm": 2.046875, "learning_rate": 6.8957828756977315e-06, "loss": 0.5045, "step": 9549 }, { "epoch": 1.190086036782698, "grad_norm": 1.9296875, "learning_rate": 6.893885127029422e-06, "loss": 0.5213, "step": 9550 }, { "epoch": 1.190212329307759, "grad_norm": 2.0, "learning_rate": 6.891987502163369e-06, "loss": 0.4598, "step": 9551 }, { "epoch": 1.1903386218328202, "grad_norm": 2.140625, "learning_rate": 6.890090001175207e-06, "loss": 0.524, "step": 9552 }, { "epoch": 1.1904649143578814, "grad_norm": 2.109375, "learning_rate": 6.888192624140568e-06, "loss": 0.5593, "step": 9553 }, { "epoch": 1.1905912068829427, "grad_norm": 1.8984375, "learning_rate": 6.886295371135075e-06, "loss": 0.5126, "step": 9554 }, { "epoch": 1.1907174994080039, "grad_norm": 2.109375, "learning_rate": 6.884398242234349e-06, "loss": 0.5513, "step": 9555 }, { "epoch": 1.190843791933065, "grad_norm": 2.078125, "learning_rate": 6.882501237514006e-06, "loss": 0.5223, "step": 9556 }, { "epoch": 1.1909700844581261, "grad_norm": 1.9375, "learning_rate": 6.8806043570496525e-06, "loss": 0.4833, "step": 9557 }, { "epoch": 1.1910963769831873, "grad_norm": 2.125, "learning_rate": 6.8787076009168966e-06, "loss": 0.5216, "step": 9558 }, { "epoch": 1.1912226695082484, "grad_norm": 2.015625, "learning_rate": 6.876810969191339e-06, "loss": 0.5195, "step": 9559 }, { "epoch": 1.1913489620333095, "grad_norm": 1.9140625, "learning_rate": 6.874914461948572e-06, "loss": 0.4915, "step": 9560 }, { "epoch": 1.1914752545583709, "grad_norm": 1.921875, "learning_rate": 6.873018079264186e-06, "loss": 0.4758, "step": 9561 }, { "epoch": 1.191601547083432, "grad_norm": 2.046875, "learning_rate": 6.871121821213771e-06, "loss": 0.5444, "step": 9562 }, { "epoch": 1.1917278396084932, "grad_norm": 2.03125, "learning_rate": 6.8692256878729e-06, "loss": 0.4657, "step": 9563 }, { "epoch": 1.1918541321335543, "grad_norm": 1.9765625, "learning_rate": 6.8673296793171555e-06, "loss": 0.5163, "step": 9564 }, { "epoch": 1.1919804246586154, "grad_norm": 1.8515625, "learning_rate": 6.865433795622103e-06, "loss": 0.4525, "step": 9565 }, { "epoch": 1.1921067171836768, "grad_norm": 2.0, "learning_rate": 6.863538036863304e-06, "loss": 0.5316, "step": 9566 }, { "epoch": 1.192233009708738, "grad_norm": 1.8984375, "learning_rate": 6.861642403116331e-06, "loss": 0.5413, "step": 9567 }, { "epoch": 1.192359302233799, "grad_norm": 2.203125, "learning_rate": 6.859746894456731e-06, "loss": 0.4491, "step": 9568 }, { "epoch": 1.1924855947588602, "grad_norm": 1.953125, "learning_rate": 6.8578515109600565e-06, "loss": 0.4634, "step": 9569 }, { "epoch": 1.1926118872839213, "grad_norm": 2.015625, "learning_rate": 6.855956252701852e-06, "loss": 0.5517, "step": 9570 }, { "epoch": 1.1927381798089827, "grad_norm": 1.875, "learning_rate": 6.854061119757658e-06, "loss": 0.4479, "step": 9571 }, { "epoch": 1.1928644723340438, "grad_norm": 1.921875, "learning_rate": 6.852166112203011e-06, "loss": 0.5068, "step": 9572 }, { "epoch": 1.192990764859105, "grad_norm": 2.1875, "learning_rate": 6.850271230113441e-06, "loss": 0.5961, "step": 9573 }, { "epoch": 1.193117057384166, "grad_norm": 1.96875, "learning_rate": 6.848376473564473e-06, "loss": 0.5125, "step": 9574 }, { "epoch": 1.1932433499092272, "grad_norm": 2.0, "learning_rate": 6.846481842631627e-06, "loss": 0.4606, "step": 9575 }, { "epoch": 1.1933696424342883, "grad_norm": 1.875, "learning_rate": 6.844587337390419e-06, "loss": 0.4507, "step": 9576 }, { "epoch": 1.1934959349593495, "grad_norm": 2.09375, "learning_rate": 6.8426929579163595e-06, "loss": 0.5703, "step": 9577 }, { "epoch": 1.1936222274844108, "grad_norm": 2.03125, "learning_rate": 6.840798704284955e-06, "loss": 0.5453, "step": 9578 }, { "epoch": 1.193748520009472, "grad_norm": 2.296875, "learning_rate": 6.8389045765717036e-06, "loss": 0.5158, "step": 9579 }, { "epoch": 1.193874812534533, "grad_norm": 1.953125, "learning_rate": 6.837010574852102e-06, "loss": 0.4941, "step": 9580 }, { "epoch": 1.1940011050595942, "grad_norm": 1.9375, "learning_rate": 6.83511669920164e-06, "loss": 0.4877, "step": 9581 }, { "epoch": 1.1941273975846554, "grad_norm": 2.0625, "learning_rate": 6.833222949695804e-06, "loss": 0.4846, "step": 9582 }, { "epoch": 1.1942536901097167, "grad_norm": 2.15625, "learning_rate": 6.831329326410073e-06, "loss": 0.5968, "step": 9583 }, { "epoch": 1.1943799826347778, "grad_norm": 1.953125, "learning_rate": 6.82943582941992e-06, "loss": 0.4717, "step": 9584 }, { "epoch": 1.194506275159839, "grad_norm": 1.9921875, "learning_rate": 6.82754245880082e-06, "loss": 0.4643, "step": 9585 }, { "epoch": 1.1946325676849001, "grad_norm": 2.0625, "learning_rate": 6.825649214628238e-06, "loss": 0.5688, "step": 9586 }, { "epoch": 1.1947588602099612, "grad_norm": 2.078125, "learning_rate": 6.823756096977632e-06, "loss": 0.4942, "step": 9587 }, { "epoch": 1.1948851527350226, "grad_norm": 2.03125, "learning_rate": 6.821863105924457e-06, "loss": 0.5568, "step": 9588 }, { "epoch": 1.1950114452600837, "grad_norm": 2.171875, "learning_rate": 6.8199702415441635e-06, "loss": 0.5689, "step": 9589 }, { "epoch": 1.1951377377851449, "grad_norm": 1.84375, "learning_rate": 6.818077503912197e-06, "loss": 0.4188, "step": 9590 }, { "epoch": 1.195264030310206, "grad_norm": 1.8828125, "learning_rate": 6.816184893103996e-06, "loss": 0.4779, "step": 9591 }, { "epoch": 1.1953903228352671, "grad_norm": 1.953125, "learning_rate": 6.814292409194998e-06, "loss": 0.4877, "step": 9592 }, { "epoch": 1.1955166153603283, "grad_norm": 1.9765625, "learning_rate": 6.812400052260631e-06, "loss": 0.5156, "step": 9593 }, { "epoch": 1.1956429078853896, "grad_norm": 1.8359375, "learning_rate": 6.81050782237632e-06, "loss": 0.4656, "step": 9594 }, { "epoch": 1.1957692004104508, "grad_norm": 2.078125, "learning_rate": 6.808615719617485e-06, "loss": 0.4865, "step": 9595 }, { "epoch": 1.195895492935512, "grad_norm": 1.8984375, "learning_rate": 6.80672374405954e-06, "loss": 0.533, "step": 9596 }, { "epoch": 1.196021785460573, "grad_norm": 1.875, "learning_rate": 6.804831895777896e-06, "loss": 0.4527, "step": 9597 }, { "epoch": 1.1961480779856342, "grad_norm": 2.0, "learning_rate": 6.802940174847956e-06, "loss": 0.4612, "step": 9598 }, { "epoch": 1.1962743705106953, "grad_norm": 1.7734375, "learning_rate": 6.80104858134512e-06, "loss": 0.4387, "step": 9599 }, { "epoch": 1.1964006630357567, "grad_norm": 2.28125, "learning_rate": 6.799157115344781e-06, "loss": 0.5087, "step": 9600 }, { "epoch": 1.1965269555608178, "grad_norm": 2.390625, "learning_rate": 6.797265776922331e-06, "loss": 0.5678, "step": 9601 }, { "epoch": 1.196653248085879, "grad_norm": 1.8359375, "learning_rate": 6.79537456615315e-06, "loss": 0.55, "step": 9602 }, { "epoch": 1.19677954061094, "grad_norm": 1.90625, "learning_rate": 6.793483483112622e-06, "loss": 0.5054, "step": 9603 }, { "epoch": 1.1969058331360012, "grad_norm": 1.8515625, "learning_rate": 6.79159252787612e-06, "loss": 0.4416, "step": 9604 }, { "epoch": 1.1970321256610625, "grad_norm": 1.96875, "learning_rate": 6.789701700519011e-06, "loss": 0.5028, "step": 9605 }, { "epoch": 1.1971584181861237, "grad_norm": 2.09375, "learning_rate": 6.787811001116661e-06, "loss": 0.651, "step": 9606 }, { "epoch": 1.1972847107111848, "grad_norm": 2.203125, "learning_rate": 6.785920429744427e-06, "loss": 0.5396, "step": 9607 }, { "epoch": 1.197411003236246, "grad_norm": 1.9453125, "learning_rate": 6.784029986477663e-06, "loss": 0.4977, "step": 9608 }, { "epoch": 1.197537295761307, "grad_norm": 1.9765625, "learning_rate": 6.7821396713917174e-06, "loss": 0.5001, "step": 9609 }, { "epoch": 1.1976635882863682, "grad_norm": 2.03125, "learning_rate": 6.780249484561934e-06, "loss": 0.4121, "step": 9610 }, { "epoch": 1.1977898808114296, "grad_norm": 1.9140625, "learning_rate": 6.778359426063651e-06, "loss": 0.4675, "step": 9611 }, { "epoch": 1.1979161733364907, "grad_norm": 1.9765625, "learning_rate": 6.776469495972201e-06, "loss": 0.5384, "step": 9612 }, { "epoch": 1.1980424658615518, "grad_norm": 2.140625, "learning_rate": 6.774579694362913e-06, "loss": 0.5588, "step": 9613 }, { "epoch": 1.198168758386613, "grad_norm": 2.125, "learning_rate": 6.772690021311109e-06, "loss": 0.557, "step": 9614 }, { "epoch": 1.198295050911674, "grad_norm": 2.109375, "learning_rate": 6.770800476892108e-06, "loss": 0.5058, "step": 9615 }, { "epoch": 1.1984213434367352, "grad_norm": 1.8984375, "learning_rate": 6.768911061181222e-06, "loss": 0.5066, "step": 9616 }, { "epoch": 1.1985476359617966, "grad_norm": 2.03125, "learning_rate": 6.767021774253757e-06, "loss": 0.4468, "step": 9617 }, { "epoch": 1.1986739284868577, "grad_norm": 2.0, "learning_rate": 6.765132616185019e-06, "loss": 0.5952, "step": 9618 }, { "epoch": 1.1988002210119189, "grad_norm": 1.9375, "learning_rate": 6.763243587050301e-06, "loss": 0.4413, "step": 9619 }, { "epoch": 1.19892651353698, "grad_norm": 2.046875, "learning_rate": 6.761354686924895e-06, "loss": 0.529, "step": 9620 }, { "epoch": 1.1990528060620411, "grad_norm": 1.9296875, "learning_rate": 6.7594659158840936e-06, "loss": 0.4733, "step": 9621 }, { "epoch": 1.1991790985871025, "grad_norm": 1.90625, "learning_rate": 6.757577274003178e-06, "loss": 0.4568, "step": 9622 }, { "epoch": 1.1993053911121636, "grad_norm": 1.7734375, "learning_rate": 6.75568876135742e-06, "loss": 0.4295, "step": 9623 }, { "epoch": 1.1994316836372247, "grad_norm": 1.8984375, "learning_rate": 6.753800378022093e-06, "loss": 0.4889, "step": 9624 }, { "epoch": 1.1995579761622859, "grad_norm": 2.09375, "learning_rate": 6.751912124072465e-06, "loss": 0.5174, "step": 9625 }, { "epoch": 1.199684268687347, "grad_norm": 1.921875, "learning_rate": 6.750023999583797e-06, "loss": 0.4939, "step": 9626 }, { "epoch": 1.1998105612124081, "grad_norm": 2.03125, "learning_rate": 6.748136004631342e-06, "loss": 0.5364, "step": 9627 }, { "epoch": 1.1999368537374695, "grad_norm": 2.03125, "learning_rate": 6.746248139290355e-06, "loss": 0.5418, "step": 9628 }, { "epoch": 1.2000631462625306, "grad_norm": 2.03125, "learning_rate": 6.74436040363608e-06, "loss": 0.505, "step": 9629 }, { "epoch": 1.2001894387875918, "grad_norm": 2.3125, "learning_rate": 6.742472797743757e-06, "loss": 0.5212, "step": 9630 }, { "epoch": 1.200315731312653, "grad_norm": 2.109375, "learning_rate": 6.740585321688623e-06, "loss": 0.5346, "step": 9631 }, { "epoch": 1.200442023837714, "grad_norm": 1.8828125, "learning_rate": 6.738697975545907e-06, "loss": 0.4552, "step": 9632 }, { "epoch": 1.2005683163627752, "grad_norm": 2.03125, "learning_rate": 6.736810759390833e-06, "loss": 0.4927, "step": 9633 }, { "epoch": 1.2006946088878365, "grad_norm": 1.9296875, "learning_rate": 6.7349236732986235e-06, "loss": 0.4948, "step": 9634 }, { "epoch": 1.2008209014128977, "grad_norm": 1.859375, "learning_rate": 6.733036717344492e-06, "loss": 0.4766, "step": 9635 }, { "epoch": 1.2009471939379588, "grad_norm": 1.9140625, "learning_rate": 6.731149891603648e-06, "loss": 0.4686, "step": 9636 }, { "epoch": 1.20107348646302, "grad_norm": 2.015625, "learning_rate": 6.7292631961512945e-06, "loss": 0.5686, "step": 9637 }, { "epoch": 1.201199778988081, "grad_norm": 2.15625, "learning_rate": 6.72737663106263e-06, "loss": 0.4253, "step": 9638 }, { "epoch": 1.2013260715131424, "grad_norm": 1.84375, "learning_rate": 6.725490196412853e-06, "loss": 0.417, "step": 9639 }, { "epoch": 1.2014523640382035, "grad_norm": 1.9765625, "learning_rate": 6.72360389227715e-06, "loss": 0.5118, "step": 9640 }, { "epoch": 1.2015786565632647, "grad_norm": 1.9609375, "learning_rate": 6.721717718730704e-06, "loss": 0.528, "step": 9641 }, { "epoch": 1.2017049490883258, "grad_norm": 1.9375, "learning_rate": 6.719831675848693e-06, "loss": 0.5225, "step": 9642 }, { "epoch": 1.201831241613387, "grad_norm": 1.921875, "learning_rate": 6.717945763706291e-06, "loss": 0.5084, "step": 9643 }, { "epoch": 1.2019575341384483, "grad_norm": 1.984375, "learning_rate": 6.716059982378667e-06, "loss": 0.432, "step": 9644 }, { "epoch": 1.2020838266635094, "grad_norm": 1.9921875, "learning_rate": 6.71417433194098e-06, "loss": 0.522, "step": 9645 }, { "epoch": 1.2022101191885706, "grad_norm": 2.0625, "learning_rate": 6.71228881246839e-06, "loss": 0.5376, "step": 9646 }, { "epoch": 1.2023364117136317, "grad_norm": 1.8359375, "learning_rate": 6.7104034240360504e-06, "loss": 0.4284, "step": 9647 }, { "epoch": 1.2024627042386928, "grad_norm": 1.8671875, "learning_rate": 6.708518166719107e-06, "loss": 0.4979, "step": 9648 }, { "epoch": 1.202588996763754, "grad_norm": 2.078125, "learning_rate": 6.7066330405927e-06, "loss": 0.5063, "step": 9649 }, { "epoch": 1.202715289288815, "grad_norm": 1.8984375, "learning_rate": 6.704748045731968e-06, "loss": 0.5178, "step": 9650 }, { "epoch": 1.2028415818138765, "grad_norm": 2.046875, "learning_rate": 6.7028631822120414e-06, "loss": 0.4569, "step": 9651 }, { "epoch": 1.2029678743389376, "grad_norm": 1.828125, "learning_rate": 6.700978450108047e-06, "loss": 0.4586, "step": 9652 }, { "epoch": 1.2030941668639987, "grad_norm": 2.34375, "learning_rate": 6.699093849495106e-06, "loss": 0.5067, "step": 9653 }, { "epoch": 1.2032204593890599, "grad_norm": 1.8671875, "learning_rate": 6.697209380448333e-06, "loss": 0.4425, "step": 9654 }, { "epoch": 1.203346751914121, "grad_norm": 1.9765625, "learning_rate": 6.695325043042838e-06, "loss": 0.5239, "step": 9655 }, { "epoch": 1.2034730444391823, "grad_norm": 2.140625, "learning_rate": 6.693440837353724e-06, "loss": 0.5909, "step": 9656 }, { "epoch": 1.2035993369642435, "grad_norm": 2.0625, "learning_rate": 6.691556763456097e-06, "loss": 0.4959, "step": 9657 }, { "epoch": 1.2037256294893046, "grad_norm": 1.96875, "learning_rate": 6.68967282142505e-06, "loss": 0.502, "step": 9658 }, { "epoch": 1.2038519220143657, "grad_norm": 2.109375, "learning_rate": 6.687789011335669e-06, "loss": 0.5193, "step": 9659 }, { "epoch": 1.2039782145394269, "grad_norm": 2.015625, "learning_rate": 6.6859053332630406e-06, "loss": 0.5672, "step": 9660 }, { "epoch": 1.2041045070644882, "grad_norm": 2.03125, "learning_rate": 6.684021787282244e-06, "loss": 0.5506, "step": 9661 }, { "epoch": 1.2042307995895494, "grad_norm": 2.109375, "learning_rate": 6.682138373468352e-06, "loss": 0.6554, "step": 9662 }, { "epoch": 1.2043570921146105, "grad_norm": 2.15625, "learning_rate": 6.680255091896433e-06, "loss": 0.502, "step": 9663 }, { "epoch": 1.2044833846396716, "grad_norm": 1.96875, "learning_rate": 6.678371942641548e-06, "loss": 0.5256, "step": 9664 }, { "epoch": 1.2046096771647328, "grad_norm": 1.953125, "learning_rate": 6.6764889257787594e-06, "loss": 0.4767, "step": 9665 }, { "epoch": 1.204735969689794, "grad_norm": 2.078125, "learning_rate": 6.6746060413831145e-06, "loss": 0.4848, "step": 9666 }, { "epoch": 1.204862262214855, "grad_norm": 1.9296875, "learning_rate": 6.672723289529665e-06, "loss": 0.4657, "step": 9667 }, { "epoch": 1.2049885547399164, "grad_norm": 1.9921875, "learning_rate": 6.67084067029345e-06, "loss": 0.5079, "step": 9668 }, { "epoch": 1.2051148472649775, "grad_norm": 2.0, "learning_rate": 6.668958183749508e-06, "loss": 0.5138, "step": 9669 }, { "epoch": 1.2052411397900387, "grad_norm": 2.0625, "learning_rate": 6.667075829972868e-06, "loss": 0.4303, "step": 9670 }, { "epoch": 1.2053674323150998, "grad_norm": 2.09375, "learning_rate": 6.665193609038558e-06, "loss": 0.5729, "step": 9671 }, { "epoch": 1.205493724840161, "grad_norm": 1.921875, "learning_rate": 6.6633115210215985e-06, "loss": 0.479, "step": 9672 }, { "epoch": 1.2056200173652223, "grad_norm": 1.8359375, "learning_rate": 6.661429565997005e-06, "loss": 0.4696, "step": 9673 }, { "epoch": 1.2057463098902834, "grad_norm": 1.984375, "learning_rate": 6.6595477440397825e-06, "loss": 0.5507, "step": 9674 }, { "epoch": 1.2058726024153446, "grad_norm": 2.109375, "learning_rate": 6.657666055224947e-06, "loss": 0.5284, "step": 9675 }, { "epoch": 1.2059988949404057, "grad_norm": 2.109375, "learning_rate": 6.655784499627491e-06, "loss": 0.4799, "step": 9676 }, { "epoch": 1.2061251874654668, "grad_norm": 2.125, "learning_rate": 6.65390307732241e-06, "loss": 0.5186, "step": 9677 }, { "epoch": 1.2062514799905282, "grad_norm": 1.984375, "learning_rate": 6.6520217883846926e-06, "loss": 0.486, "step": 9678 }, { "epoch": 1.2063777725155893, "grad_norm": 1.9296875, "learning_rate": 6.650140632889323e-06, "loss": 0.4731, "step": 9679 }, { "epoch": 1.2065040650406504, "grad_norm": 2.015625, "learning_rate": 6.648259610911279e-06, "loss": 0.4522, "step": 9680 }, { "epoch": 1.2066303575657116, "grad_norm": 2.1875, "learning_rate": 6.646378722525532e-06, "loss": 0.5882, "step": 9681 }, { "epoch": 1.2067566500907727, "grad_norm": 1.9765625, "learning_rate": 6.6444979678070534e-06, "loss": 0.4038, "step": 9682 }, { "epoch": 1.2068829426158338, "grad_norm": 2.015625, "learning_rate": 6.642617346830803e-06, "loss": 0.5389, "step": 9683 }, { "epoch": 1.207009235140895, "grad_norm": 1.953125, "learning_rate": 6.640736859671739e-06, "loss": 0.5103, "step": 9684 }, { "epoch": 1.2071355276659563, "grad_norm": 2.03125, "learning_rate": 6.638856506404812e-06, "loss": 0.5369, "step": 9685 }, { "epoch": 1.2072618201910175, "grad_norm": 1.921875, "learning_rate": 6.636976287104968e-06, "loss": 0.4564, "step": 9686 }, { "epoch": 1.2073881127160786, "grad_norm": 1.8515625, "learning_rate": 6.63509620184715e-06, "loss": 0.4337, "step": 9687 }, { "epoch": 1.2075144052411397, "grad_norm": 1.7890625, "learning_rate": 6.633216250706291e-06, "loss": 0.4324, "step": 9688 }, { "epoch": 1.2076406977662009, "grad_norm": 1.96875, "learning_rate": 6.631336433757325e-06, "loss": 0.6208, "step": 9689 }, { "epoch": 1.2077669902912622, "grad_norm": 1.8125, "learning_rate": 6.629456751075174e-06, "loss": 0.4299, "step": 9690 }, { "epoch": 1.2078932828163234, "grad_norm": 2.109375, "learning_rate": 6.627577202734759e-06, "loss": 0.5012, "step": 9691 }, { "epoch": 1.2080195753413845, "grad_norm": 2.296875, "learning_rate": 6.625697788810987e-06, "loss": 0.531, "step": 9692 }, { "epoch": 1.2081458678664456, "grad_norm": 2.109375, "learning_rate": 6.62381850937878e-06, "loss": 0.5122, "step": 9693 }, { "epoch": 1.2082721603915068, "grad_norm": 2.234375, "learning_rate": 6.6219393645130344e-06, "loss": 0.5085, "step": 9694 }, { "epoch": 1.208398452916568, "grad_norm": 1.8671875, "learning_rate": 6.62006035428865e-06, "loss": 0.4888, "step": 9695 }, { "epoch": 1.2085247454416292, "grad_norm": 2.03125, "learning_rate": 6.6181814787805175e-06, "loss": 0.4568, "step": 9696 }, { "epoch": 1.2086510379666904, "grad_norm": 1.9140625, "learning_rate": 6.616302738063527e-06, "loss": 0.4913, "step": 9697 }, { "epoch": 1.2087773304917515, "grad_norm": 1.9453125, "learning_rate": 6.614424132212558e-06, "loss": 0.5131, "step": 9698 }, { "epoch": 1.2089036230168126, "grad_norm": 1.96875, "learning_rate": 6.61254566130249e-06, "loss": 0.6095, "step": 9699 }, { "epoch": 1.2090299155418738, "grad_norm": 1.875, "learning_rate": 6.610667325408191e-06, "loss": 0.5108, "step": 9700 }, { "epoch": 1.2091562080669351, "grad_norm": 1.875, "learning_rate": 6.608789124604529e-06, "loss": 0.4441, "step": 9701 }, { "epoch": 1.2092825005919963, "grad_norm": 1.8359375, "learning_rate": 6.606911058966366e-06, "loss": 0.4173, "step": 9702 }, { "epoch": 1.2094087931170574, "grad_norm": 1.7890625, "learning_rate": 6.605033128568553e-06, "loss": 0.4482, "step": 9703 }, { "epoch": 1.2095350856421185, "grad_norm": 2.0, "learning_rate": 6.603155333485945e-06, "loss": 0.4984, "step": 9704 }, { "epoch": 1.2096613781671797, "grad_norm": 1.921875, "learning_rate": 6.601277673793381e-06, "loss": 0.4439, "step": 9705 }, { "epoch": 1.2097876706922408, "grad_norm": 1.8515625, "learning_rate": 6.599400149565703e-06, "loss": 0.4236, "step": 9706 }, { "epoch": 1.2099139632173022, "grad_norm": 2.03125, "learning_rate": 6.597522760877745e-06, "loss": 0.4921, "step": 9707 }, { "epoch": 1.2100402557423633, "grad_norm": 1.9765625, "learning_rate": 6.595645507804333e-06, "loss": 0.5215, "step": 9708 }, { "epoch": 1.2101665482674244, "grad_norm": 1.8203125, "learning_rate": 6.593768390420291e-06, "loss": 0.5177, "step": 9709 }, { "epoch": 1.2102928407924856, "grad_norm": 2.140625, "learning_rate": 6.591891408800435e-06, "loss": 0.5134, "step": 9710 }, { "epoch": 1.2104191333175467, "grad_norm": 1.8203125, "learning_rate": 6.5900145630195826e-06, "loss": 0.4238, "step": 9711 }, { "epoch": 1.210545425842608, "grad_norm": 1.9921875, "learning_rate": 6.588137853152534e-06, "loss": 0.4981, "step": 9712 }, { "epoch": 1.2106717183676692, "grad_norm": 1.828125, "learning_rate": 6.586261279274093e-06, "loss": 0.5231, "step": 9713 }, { "epoch": 1.2107980108927303, "grad_norm": 2.234375, "learning_rate": 6.584384841459056e-06, "loss": 0.5505, "step": 9714 }, { "epoch": 1.2109243034177914, "grad_norm": 2.015625, "learning_rate": 6.582508539782212e-06, "loss": 0.4996, "step": 9715 }, { "epoch": 1.2110505959428526, "grad_norm": 1.8828125, "learning_rate": 6.5806323743183455e-06, "loss": 0.5273, "step": 9716 }, { "epoch": 1.2111768884679137, "grad_norm": 1.859375, "learning_rate": 6.578756345142236e-06, "loss": 0.4363, "step": 9717 }, { "epoch": 1.211303180992975, "grad_norm": 1.875, "learning_rate": 6.576880452328659e-06, "loss": 0.3816, "step": 9718 }, { "epoch": 1.2114294735180362, "grad_norm": 1.859375, "learning_rate": 6.575004695952381e-06, "loss": 0.4304, "step": 9719 }, { "epoch": 1.2115557660430973, "grad_norm": 2.125, "learning_rate": 6.5731290760881675e-06, "loss": 0.4419, "step": 9720 }, { "epoch": 1.2116820585681585, "grad_norm": 1.75, "learning_rate": 6.571253592810774e-06, "loss": 0.4442, "step": 9721 }, { "epoch": 1.2118083510932196, "grad_norm": 1.8515625, "learning_rate": 6.569378246194955e-06, "loss": 0.4626, "step": 9722 }, { "epoch": 1.2119346436182807, "grad_norm": 1.9453125, "learning_rate": 6.567503036315454e-06, "loss": 0.4968, "step": 9723 }, { "epoch": 1.212060936143342, "grad_norm": 2.125, "learning_rate": 6.565627963247015e-06, "loss": 0.4868, "step": 9724 }, { "epoch": 1.2121872286684032, "grad_norm": 1.859375, "learning_rate": 6.5637530270643725e-06, "loss": 0.4545, "step": 9725 }, { "epoch": 1.2123135211934644, "grad_norm": 1.953125, "learning_rate": 6.561878227842258e-06, "loss": 0.4741, "step": 9726 }, { "epoch": 1.2124398137185255, "grad_norm": 2.09375, "learning_rate": 6.560003565655396e-06, "loss": 0.4611, "step": 9727 }, { "epoch": 1.2125661062435866, "grad_norm": 1.90625, "learning_rate": 6.558129040578505e-06, "loss": 0.4871, "step": 9728 }, { "epoch": 1.212692398768648, "grad_norm": 2.109375, "learning_rate": 6.556254652686302e-06, "loss": 0.4483, "step": 9729 }, { "epoch": 1.2128186912937091, "grad_norm": 2.03125, "learning_rate": 6.554380402053493e-06, "loss": 0.517, "step": 9730 }, { "epoch": 1.2129449838187703, "grad_norm": 1.875, "learning_rate": 6.552506288754783e-06, "loss": 0.5071, "step": 9731 }, { "epoch": 1.2130712763438314, "grad_norm": 1.9453125, "learning_rate": 6.550632312864869e-06, "loss": 0.5619, "step": 9732 }, { "epoch": 1.2131975688688925, "grad_norm": 1.96875, "learning_rate": 6.548758474458441e-06, "loss": 0.4729, "step": 9733 }, { "epoch": 1.2133238613939537, "grad_norm": 1.875, "learning_rate": 6.546884773610187e-06, "loss": 0.4639, "step": 9734 }, { "epoch": 1.213450153919015, "grad_norm": 2.125, "learning_rate": 6.545011210394791e-06, "loss": 0.5209, "step": 9735 }, { "epoch": 1.2135764464440761, "grad_norm": 1.984375, "learning_rate": 6.543137784886924e-06, "loss": 0.523, "step": 9736 }, { "epoch": 1.2137027389691373, "grad_norm": 1.7890625, "learning_rate": 6.54126449716126e-06, "loss": 0.4705, "step": 9737 }, { "epoch": 1.2138290314941984, "grad_norm": 1.859375, "learning_rate": 6.539391347292461e-06, "loss": 0.473, "step": 9738 }, { "epoch": 1.2139553240192595, "grad_norm": 2.0, "learning_rate": 6.537518335355189e-06, "loss": 0.4521, "step": 9739 }, { "epoch": 1.2140816165443207, "grad_norm": 2.25, "learning_rate": 6.535645461424096e-06, "loss": 0.5437, "step": 9740 }, { "epoch": 1.214207909069382, "grad_norm": 2.03125, "learning_rate": 6.533772725573827e-06, "loss": 0.5167, "step": 9741 }, { "epoch": 1.2143342015944432, "grad_norm": 1.953125, "learning_rate": 6.5319001278790315e-06, "loss": 0.4774, "step": 9742 }, { "epoch": 1.2144604941195043, "grad_norm": 2.046875, "learning_rate": 6.530027668414342e-06, "loss": 0.4837, "step": 9743 }, { "epoch": 1.2145867866445654, "grad_norm": 2.015625, "learning_rate": 6.528155347254395e-06, "loss": 0.5383, "step": 9744 }, { "epoch": 1.2147130791696266, "grad_norm": 1.9609375, "learning_rate": 6.52628316447381e-06, "loss": 0.5405, "step": 9745 }, { "epoch": 1.214839371694688, "grad_norm": 1.75, "learning_rate": 6.52441112014721e-06, "loss": 0.4627, "step": 9746 }, { "epoch": 1.214965664219749, "grad_norm": 1.9140625, "learning_rate": 6.522539214349214e-06, "loss": 0.4659, "step": 9747 }, { "epoch": 1.2150919567448102, "grad_norm": 1.921875, "learning_rate": 6.520667447154429e-06, "loss": 0.4712, "step": 9748 }, { "epoch": 1.2152182492698713, "grad_norm": 1.8359375, "learning_rate": 6.5187958186374575e-06, "loss": 0.4945, "step": 9749 }, { "epoch": 1.2153445417949325, "grad_norm": 2.1875, "learning_rate": 6.516924328872901e-06, "loss": 0.4782, "step": 9750 }, { "epoch": 1.2154708343199938, "grad_norm": 1.90625, "learning_rate": 6.515052977935351e-06, "loss": 0.4715, "step": 9751 }, { "epoch": 1.215597126845055, "grad_norm": 1.890625, "learning_rate": 6.513181765899395e-06, "loss": 0.4951, "step": 9752 }, { "epoch": 1.215723419370116, "grad_norm": 1.984375, "learning_rate": 6.511310692839616e-06, "loss": 0.4872, "step": 9753 }, { "epoch": 1.2158497118951772, "grad_norm": 1.890625, "learning_rate": 6.509439758830589e-06, "loss": 0.4551, "step": 9754 }, { "epoch": 1.2159760044202383, "grad_norm": 1.984375, "learning_rate": 6.507568963946887e-06, "loss": 0.5371, "step": 9755 }, { "epoch": 1.2161022969452995, "grad_norm": 1.8359375, "learning_rate": 6.505698308263072e-06, "loss": 0.4353, "step": 9756 }, { "epoch": 1.2162285894703606, "grad_norm": 2.234375, "learning_rate": 6.503827791853705e-06, "loss": 0.6019, "step": 9757 }, { "epoch": 1.216354881995422, "grad_norm": 1.875, "learning_rate": 6.5019574147933426e-06, "loss": 0.4686, "step": 9758 }, { "epoch": 1.216481174520483, "grad_norm": 2.0, "learning_rate": 6.500087177156534e-06, "loss": 0.5161, "step": 9759 }, { "epoch": 1.2166074670455442, "grad_norm": 2.15625, "learning_rate": 6.498217079017818e-06, "loss": 0.5, "step": 9760 }, { "epoch": 1.2167337595706054, "grad_norm": 1.9765625, "learning_rate": 6.496347120451736e-06, "loss": 0.4961, "step": 9761 }, { "epoch": 1.2168600520956665, "grad_norm": 1.9453125, "learning_rate": 6.4944773015328176e-06, "loss": 0.4707, "step": 9762 }, { "epoch": 1.2169863446207279, "grad_norm": 1.8984375, "learning_rate": 6.492607622335591e-06, "loss": 0.5034, "step": 9763 }, { "epoch": 1.217112637145789, "grad_norm": 1.9453125, "learning_rate": 6.490738082934576e-06, "loss": 0.5131, "step": 9764 }, { "epoch": 1.2172389296708501, "grad_norm": 1.953125, "learning_rate": 6.488868683404288e-06, "loss": 0.511, "step": 9765 }, { "epoch": 1.2173652221959113, "grad_norm": 2.03125, "learning_rate": 6.486999423819238e-06, "loss": 0.5236, "step": 9766 }, { "epoch": 1.2174915147209724, "grad_norm": 2.078125, "learning_rate": 6.48513030425393e-06, "loss": 0.6019, "step": 9767 }, { "epoch": 1.2176178072460337, "grad_norm": 1.953125, "learning_rate": 6.483261324782861e-06, "loss": 0.4372, "step": 9768 }, { "epoch": 1.2177440997710949, "grad_norm": 1.9375, "learning_rate": 6.481392485480527e-06, "loss": 0.5888, "step": 9769 }, { "epoch": 1.217870392296156, "grad_norm": 1.8984375, "learning_rate": 6.479523786421411e-06, "loss": 0.4857, "step": 9770 }, { "epoch": 1.2179966848212171, "grad_norm": 2.03125, "learning_rate": 6.477655227679997e-06, "loss": 0.5074, "step": 9771 }, { "epoch": 1.2181229773462783, "grad_norm": 1.8515625, "learning_rate": 6.4757868093307645e-06, "loss": 0.4923, "step": 9772 }, { "epoch": 1.2182492698713394, "grad_norm": 2.125, "learning_rate": 6.4739185314481765e-06, "loss": 0.4865, "step": 9773 }, { "epoch": 1.2183755623964005, "grad_norm": 2.015625, "learning_rate": 6.4720503941067055e-06, "loss": 0.5433, "step": 9774 }, { "epoch": 1.218501854921462, "grad_norm": 1.8828125, "learning_rate": 6.470182397380809e-06, "loss": 0.492, "step": 9775 }, { "epoch": 1.218628147446523, "grad_norm": 1.921875, "learning_rate": 6.46831454134494e-06, "loss": 0.4819, "step": 9776 }, { "epoch": 1.2187544399715842, "grad_norm": 2.0625, "learning_rate": 6.466446826073546e-06, "loss": 0.5569, "step": 9777 }, { "epoch": 1.2188807324966453, "grad_norm": 1.9453125, "learning_rate": 6.46457925164107e-06, "loss": 0.4552, "step": 9778 }, { "epoch": 1.2190070250217064, "grad_norm": 1.8671875, "learning_rate": 6.462711818121951e-06, "loss": 0.489, "step": 9779 }, { "epoch": 1.2191333175467678, "grad_norm": 2.140625, "learning_rate": 6.460844525590617e-06, "loss": 0.6372, "step": 9780 }, { "epoch": 1.219259610071829, "grad_norm": 2.015625, "learning_rate": 6.4589773741214965e-06, "loss": 0.5829, "step": 9781 }, { "epoch": 1.21938590259689, "grad_norm": 1.90625, "learning_rate": 6.457110363789008e-06, "loss": 0.5394, "step": 9782 }, { "epoch": 1.2195121951219512, "grad_norm": 2.0, "learning_rate": 6.455243494667567e-06, "loss": 0.6552, "step": 9783 }, { "epoch": 1.2196384876470123, "grad_norm": 1.8515625, "learning_rate": 6.453376766831583e-06, "loss": 0.5079, "step": 9784 }, { "epoch": 1.2197647801720737, "grad_norm": 2.015625, "learning_rate": 6.451510180355458e-06, "loss": 0.4663, "step": 9785 }, { "epoch": 1.2198910726971348, "grad_norm": 1.8125, "learning_rate": 6.449643735313591e-06, "loss": 0.4804, "step": 9786 }, { "epoch": 1.220017365222196, "grad_norm": 1.828125, "learning_rate": 6.447777431780372e-06, "loss": 0.4526, "step": 9787 }, { "epoch": 1.220143657747257, "grad_norm": 2.15625, "learning_rate": 6.445911269830189e-06, "loss": 0.5404, "step": 9788 }, { "epoch": 1.2202699502723182, "grad_norm": 1.8359375, "learning_rate": 6.444045249537421e-06, "loss": 0.422, "step": 9789 }, { "epoch": 1.2203962427973793, "grad_norm": 1.96875, "learning_rate": 6.442179370976445e-06, "loss": 0.6057, "step": 9790 }, { "epoch": 1.2205225353224405, "grad_norm": 1.984375, "learning_rate": 6.440313634221631e-06, "loss": 0.5105, "step": 9791 }, { "epoch": 1.2206488278475018, "grad_norm": 1.8671875, "learning_rate": 6.438448039347341e-06, "loss": 0.4847, "step": 9792 }, { "epoch": 1.220775120372563, "grad_norm": 2.046875, "learning_rate": 6.436582586427933e-06, "loss": 0.5438, "step": 9793 }, { "epoch": 1.220901412897624, "grad_norm": 2.21875, "learning_rate": 6.4347172755377605e-06, "loss": 0.5161, "step": 9794 }, { "epoch": 1.2210277054226852, "grad_norm": 2.0, "learning_rate": 6.43285210675117e-06, "loss": 0.4775, "step": 9795 }, { "epoch": 1.2211539979477464, "grad_norm": 1.8203125, "learning_rate": 6.430987080142502e-06, "loss": 0.4917, "step": 9796 }, { "epoch": 1.2212802904728077, "grad_norm": 2.109375, "learning_rate": 6.4291221957860925e-06, "loss": 0.4671, "step": 9797 }, { "epoch": 1.2214065829978689, "grad_norm": 1.890625, "learning_rate": 6.4272574537562725e-06, "loss": 0.5236, "step": 9798 }, { "epoch": 1.22153287552293, "grad_norm": 1.9921875, "learning_rate": 6.425392854127363e-06, "loss": 0.483, "step": 9799 }, { "epoch": 1.2216591680479911, "grad_norm": 1.9375, "learning_rate": 6.4235283969736835e-06, "loss": 0.5401, "step": 9800 }, { "epoch": 1.2217854605730523, "grad_norm": 1.8828125, "learning_rate": 6.421664082369551e-06, "loss": 0.4782, "step": 9801 }, { "epoch": 1.2219117530981136, "grad_norm": 1.9453125, "learning_rate": 6.419799910389267e-06, "loss": 0.5005, "step": 9802 }, { "epoch": 1.2220380456231748, "grad_norm": 2.078125, "learning_rate": 6.417935881107136e-06, "loss": 0.5034, "step": 9803 }, { "epoch": 1.2221643381482359, "grad_norm": 2.015625, "learning_rate": 6.416071994597452e-06, "loss": 0.4824, "step": 9804 }, { "epoch": 1.222290630673297, "grad_norm": 2.15625, "learning_rate": 6.4142082509345065e-06, "loss": 0.5345, "step": 9805 }, { "epoch": 1.2224169231983582, "grad_norm": 1.8984375, "learning_rate": 6.412344650192583e-06, "loss": 0.5286, "step": 9806 }, { "epoch": 1.2225432157234193, "grad_norm": 2.140625, "learning_rate": 6.4104811924459606e-06, "loss": 0.5279, "step": 9807 }, { "epoch": 1.2226695082484804, "grad_norm": 1.9609375, "learning_rate": 6.408617877768912e-06, "loss": 0.4566, "step": 9808 }, { "epoch": 1.2227958007735418, "grad_norm": 2.0, "learning_rate": 6.406754706235704e-06, "loss": 0.4974, "step": 9809 }, { "epoch": 1.222922093298603, "grad_norm": 1.9609375, "learning_rate": 6.404891677920599e-06, "loss": 0.5491, "step": 9810 }, { "epoch": 1.223048385823664, "grad_norm": 2.140625, "learning_rate": 6.403028792897852e-06, "loss": 0.5037, "step": 9811 }, { "epoch": 1.2231746783487252, "grad_norm": 1.90625, "learning_rate": 6.401166051241714e-06, "loss": 0.4917, "step": 9812 }, { "epoch": 1.2233009708737863, "grad_norm": 1.9609375, "learning_rate": 6.399303453026429e-06, "loss": 0.5715, "step": 9813 }, { "epoch": 1.2234272633988477, "grad_norm": 2.140625, "learning_rate": 6.3974409983262345e-06, "loss": 0.4864, "step": 9814 }, { "epoch": 1.2235535559239088, "grad_norm": 2.078125, "learning_rate": 6.3955786872153654e-06, "loss": 0.5326, "step": 9815 }, { "epoch": 1.22367984844897, "grad_norm": 1.90625, "learning_rate": 6.393716519768047e-06, "loss": 0.4854, "step": 9816 }, { "epoch": 1.223806140974031, "grad_norm": 2.09375, "learning_rate": 6.3918544960585036e-06, "loss": 0.5515, "step": 9817 }, { "epoch": 1.2239324334990922, "grad_norm": 1.90625, "learning_rate": 6.389992616160947e-06, "loss": 0.488, "step": 9818 }, { "epoch": 1.2240587260241536, "grad_norm": 2.015625, "learning_rate": 6.38813088014959e-06, "loss": 0.5191, "step": 9819 }, { "epoch": 1.2241850185492147, "grad_norm": 2.0625, "learning_rate": 6.386269288098637e-06, "loss": 0.5335, "step": 9820 }, { "epoch": 1.2243113110742758, "grad_norm": 1.8515625, "learning_rate": 6.384407840082285e-06, "loss": 0.4837, "step": 9821 }, { "epoch": 1.224437603599337, "grad_norm": 1.9140625, "learning_rate": 6.382546536174728e-06, "loss": 0.4537, "step": 9822 }, { "epoch": 1.224563896124398, "grad_norm": 1.9140625, "learning_rate": 6.380685376450154e-06, "loss": 0.4787, "step": 9823 }, { "epoch": 1.2246901886494592, "grad_norm": 1.828125, "learning_rate": 6.378824360982743e-06, "loss": 0.4517, "step": 9824 }, { "epoch": 1.2248164811745206, "grad_norm": 2.015625, "learning_rate": 6.3769634898466705e-06, "loss": 0.452, "step": 9825 }, { "epoch": 1.2249427736995817, "grad_norm": 1.734375, "learning_rate": 6.375102763116108e-06, "loss": 0.417, "step": 9826 }, { "epoch": 1.2250690662246428, "grad_norm": 1.859375, "learning_rate": 6.3732421808652166e-06, "loss": 0.4709, "step": 9827 }, { "epoch": 1.225195358749704, "grad_norm": 1.859375, "learning_rate": 6.371381743168156e-06, "loss": 0.5111, "step": 9828 }, { "epoch": 1.2253216512747651, "grad_norm": 2.09375, "learning_rate": 6.369521450099081e-06, "loss": 0.5226, "step": 9829 }, { "epoch": 1.2254479437998262, "grad_norm": 1.96875, "learning_rate": 6.367661301732136e-06, "loss": 0.5664, "step": 9830 }, { "epoch": 1.2255742363248876, "grad_norm": 1.9296875, "learning_rate": 6.365801298141461e-06, "loss": 0.4981, "step": 9831 }, { "epoch": 1.2257005288499487, "grad_norm": 1.8984375, "learning_rate": 6.363941439401193e-06, "loss": 0.4728, "step": 9832 }, { "epoch": 1.2258268213750099, "grad_norm": 2.03125, "learning_rate": 6.3620817255854625e-06, "loss": 0.5677, "step": 9833 }, { "epoch": 1.225953113900071, "grad_norm": 2.015625, "learning_rate": 6.360222156768392e-06, "loss": 0.475, "step": 9834 }, { "epoch": 1.2260794064251321, "grad_norm": 1.9765625, "learning_rate": 6.358362733024098e-06, "loss": 0.5026, "step": 9835 }, { "epoch": 1.2262056989501935, "grad_norm": 2.171875, "learning_rate": 6.356503454426692e-06, "loss": 0.5858, "step": 9836 }, { "epoch": 1.2263319914752546, "grad_norm": 2.140625, "learning_rate": 6.3546443210502835e-06, "loss": 0.4187, "step": 9837 }, { "epoch": 1.2264582840003158, "grad_norm": 2.21875, "learning_rate": 6.352785332968974e-06, "loss": 0.5426, "step": 9838 }, { "epoch": 1.226584576525377, "grad_norm": 2.109375, "learning_rate": 6.350926490256857e-06, "loss": 0.5443, "step": 9839 }, { "epoch": 1.226710869050438, "grad_norm": 1.8046875, "learning_rate": 6.349067792988018e-06, "loss": 0.4558, "step": 9840 }, { "epoch": 1.2268371615754992, "grad_norm": 1.984375, "learning_rate": 6.347209241236545e-06, "loss": 0.4945, "step": 9841 }, { "epoch": 1.2269634541005605, "grad_norm": 1.9453125, "learning_rate": 6.345350835076513e-06, "loss": 0.477, "step": 9842 }, { "epoch": 1.2270897466256216, "grad_norm": 2.328125, "learning_rate": 6.3434925745819945e-06, "loss": 0.5727, "step": 9843 }, { "epoch": 1.2272160391506828, "grad_norm": 2.109375, "learning_rate": 6.341634459827053e-06, "loss": 0.5479, "step": 9844 }, { "epoch": 1.227342331675744, "grad_norm": 1.875, "learning_rate": 6.3397764908857516e-06, "loss": 0.5068, "step": 9845 }, { "epoch": 1.227468624200805, "grad_norm": 2.015625, "learning_rate": 6.337918667832144e-06, "loss": 0.516, "step": 9846 }, { "epoch": 1.2275949167258662, "grad_norm": 2.0625, "learning_rate": 6.336060990740276e-06, "loss": 0.4504, "step": 9847 }, { "epoch": 1.2277212092509275, "grad_norm": 2.109375, "learning_rate": 6.334203459684192e-06, "loss": 0.5641, "step": 9848 }, { "epoch": 1.2278475017759887, "grad_norm": 1.9921875, "learning_rate": 6.332346074737927e-06, "loss": 0.5125, "step": 9849 }, { "epoch": 1.2279737943010498, "grad_norm": 2.015625, "learning_rate": 6.330488835975516e-06, "loss": 0.5372, "step": 9850 }, { "epoch": 1.228100086826111, "grad_norm": 1.8671875, "learning_rate": 6.328631743470981e-06, "loss": 0.4645, "step": 9851 }, { "epoch": 1.228226379351172, "grad_norm": 2.0, "learning_rate": 6.32677479729834e-06, "loss": 0.5359, "step": 9852 }, { "epoch": 1.2283526718762334, "grad_norm": 2.078125, "learning_rate": 6.32491799753161e-06, "loss": 0.4887, "step": 9853 }, { "epoch": 1.2284789644012946, "grad_norm": 1.828125, "learning_rate": 6.323061344244791e-06, "loss": 0.4199, "step": 9854 }, { "epoch": 1.2286052569263557, "grad_norm": 2.265625, "learning_rate": 6.321204837511895e-06, "loss": 0.5752, "step": 9855 }, { "epoch": 1.2287315494514168, "grad_norm": 1.890625, "learning_rate": 6.319348477406914e-06, "loss": 0.5013, "step": 9856 }, { "epoch": 1.228857841976478, "grad_norm": 1.890625, "learning_rate": 6.317492264003836e-06, "loss": 0.4399, "step": 9857 }, { "epoch": 1.228984134501539, "grad_norm": 1.96875, "learning_rate": 6.315636197376647e-06, "loss": 0.681, "step": 9858 }, { "epoch": 1.2291104270266004, "grad_norm": 2.015625, "learning_rate": 6.313780277599325e-06, "loss": 0.5242, "step": 9859 }, { "epoch": 1.2292367195516616, "grad_norm": 2.03125, "learning_rate": 6.311924504745842e-06, "loss": 0.522, "step": 9860 }, { "epoch": 1.2293630120767227, "grad_norm": 2.09375, "learning_rate": 6.3100688788901655e-06, "loss": 0.5711, "step": 9861 }, { "epoch": 1.2294893046017839, "grad_norm": 1.9375, "learning_rate": 6.3082134001062555e-06, "loss": 0.5398, "step": 9862 }, { "epoch": 1.229615597126845, "grad_norm": 1.953125, "learning_rate": 6.306358068468067e-06, "loss": 0.5075, "step": 9863 }, { "epoch": 1.2297418896519061, "grad_norm": 2.15625, "learning_rate": 6.304502884049549e-06, "loss": 0.5108, "step": 9864 }, { "epoch": 1.2298681821769675, "grad_norm": 2.171875, "learning_rate": 6.3026478469246455e-06, "loss": 0.5292, "step": 9865 }, { "epoch": 1.2299944747020286, "grad_norm": 1.8125, "learning_rate": 6.300792957167292e-06, "loss": 0.4945, "step": 9866 }, { "epoch": 1.2301207672270897, "grad_norm": 1.9609375, "learning_rate": 6.298938214851422e-06, "loss": 0.5524, "step": 9867 }, { "epoch": 1.2302470597521509, "grad_norm": 1.8046875, "learning_rate": 6.2970836200509614e-06, "loss": 0.4006, "step": 9868 }, { "epoch": 1.230373352277212, "grad_norm": 2.234375, "learning_rate": 6.295229172839827e-06, "loss": 0.574, "step": 9869 }, { "epoch": 1.2304996448022734, "grad_norm": 1.984375, "learning_rate": 6.293374873291935e-06, "loss": 0.4933, "step": 9870 }, { "epoch": 1.2306259373273345, "grad_norm": 1.84375, "learning_rate": 6.2915207214811926e-06, "loss": 0.4877, "step": 9871 }, { "epoch": 1.2307522298523956, "grad_norm": 2.046875, "learning_rate": 6.289666717481497e-06, "loss": 0.4949, "step": 9872 }, { "epoch": 1.2308785223774568, "grad_norm": 2.359375, "learning_rate": 6.2878128613667535e-06, "loss": 0.6247, "step": 9873 }, { "epoch": 1.231004814902518, "grad_norm": 1.8828125, "learning_rate": 6.285959153210848e-06, "loss": 0.4891, "step": 9874 }, { "epoch": 1.2311311074275793, "grad_norm": 2.28125, "learning_rate": 6.2841055930876645e-06, "loss": 0.5143, "step": 9875 }, { "epoch": 1.2312573999526404, "grad_norm": 1.890625, "learning_rate": 6.282252181071082e-06, "loss": 0.4543, "step": 9876 }, { "epoch": 1.2313836924777015, "grad_norm": 1.8125, "learning_rate": 6.280398917234973e-06, "loss": 0.4422, "step": 9877 }, { "epoch": 1.2315099850027627, "grad_norm": 1.875, "learning_rate": 6.278545801653204e-06, "loss": 0.4722, "step": 9878 }, { "epoch": 1.2316362775278238, "grad_norm": 1.9296875, "learning_rate": 6.276692834399636e-06, "loss": 0.5172, "step": 9879 }, { "epoch": 1.231762570052885, "grad_norm": 1.8828125, "learning_rate": 6.274840015548122e-06, "loss": 0.4846, "step": 9880 }, { "epoch": 1.231888862577946, "grad_norm": 1.9921875, "learning_rate": 6.272987345172514e-06, "loss": 0.4689, "step": 9881 }, { "epoch": 1.2320151551030074, "grad_norm": 1.8984375, "learning_rate": 6.271134823346654e-06, "loss": 0.4344, "step": 9882 }, { "epoch": 1.2321414476280685, "grad_norm": 1.828125, "learning_rate": 6.269282450144376e-06, "loss": 0.4751, "step": 9883 }, { "epoch": 1.2322677401531297, "grad_norm": 1.9375, "learning_rate": 6.267430225639515e-06, "loss": 0.5071, "step": 9884 }, { "epoch": 1.2323940326781908, "grad_norm": 2.296875, "learning_rate": 6.265578149905896e-06, "loss": 0.5881, "step": 9885 }, { "epoch": 1.232520325203252, "grad_norm": 2.015625, "learning_rate": 6.263726223017334e-06, "loss": 0.4986, "step": 9886 }, { "epoch": 1.2326466177283133, "grad_norm": 1.8125, "learning_rate": 6.261874445047648e-06, "loss": 0.3929, "step": 9887 }, { "epoch": 1.2327729102533744, "grad_norm": 1.890625, "learning_rate": 6.260022816070641e-06, "loss": 0.4655, "step": 9888 }, { "epoch": 1.2328992027784356, "grad_norm": 1.984375, "learning_rate": 6.258171336160117e-06, "loss": 0.4493, "step": 9889 }, { "epoch": 1.2330254953034967, "grad_norm": 1.9296875, "learning_rate": 6.2563200053898666e-06, "loss": 0.477, "step": 9890 }, { "epoch": 1.2331517878285578, "grad_norm": 1.8828125, "learning_rate": 6.254468823833688e-06, "loss": 0.5408, "step": 9891 }, { "epoch": 1.2332780803536192, "grad_norm": 1.984375, "learning_rate": 6.25261779156536e-06, "loss": 0.513, "step": 9892 }, { "epoch": 1.2334043728786803, "grad_norm": 2.015625, "learning_rate": 6.250766908658661e-06, "loss": 0.537, "step": 9893 }, { "epoch": 1.2335306654037415, "grad_norm": 2.046875, "learning_rate": 6.2489161751873615e-06, "loss": 0.5474, "step": 9894 }, { "epoch": 1.2336569579288026, "grad_norm": 2.09375, "learning_rate": 6.2470655912252295e-06, "loss": 0.5367, "step": 9895 }, { "epoch": 1.2337832504538637, "grad_norm": 2.140625, "learning_rate": 6.245215156846023e-06, "loss": 0.4816, "step": 9896 }, { "epoch": 1.2339095429789249, "grad_norm": 2.078125, "learning_rate": 6.243364872123496e-06, "loss": 0.4462, "step": 9897 }, { "epoch": 1.234035835503986, "grad_norm": 1.8125, "learning_rate": 6.2415147371313975e-06, "loss": 0.4266, "step": 9898 }, { "epoch": 1.2341621280290473, "grad_norm": 2.03125, "learning_rate": 6.239664751943468e-06, "loss": 0.4621, "step": 9899 }, { "epoch": 1.2342884205541085, "grad_norm": 2.03125, "learning_rate": 6.237814916633444e-06, "loss": 0.4617, "step": 9900 }, { "epoch": 1.2344147130791696, "grad_norm": 1.953125, "learning_rate": 6.235965231275056e-06, "loss": 0.5091, "step": 9901 }, { "epoch": 1.2345410056042307, "grad_norm": 1.8828125, "learning_rate": 6.234115695942028e-06, "loss": 0.4662, "step": 9902 }, { "epoch": 1.2346672981292919, "grad_norm": 1.8671875, "learning_rate": 6.232266310708077e-06, "loss": 0.4696, "step": 9903 }, { "epoch": 1.2347935906543532, "grad_norm": 2.1875, "learning_rate": 6.230417075646916e-06, "loss": 0.5669, "step": 9904 }, { "epoch": 1.2349198831794144, "grad_norm": 1.890625, "learning_rate": 6.228567990832251e-06, "loss": 0.4883, "step": 9905 }, { "epoch": 1.2350461757044755, "grad_norm": 2.203125, "learning_rate": 6.226719056337782e-06, "loss": 0.5005, "step": 9906 }, { "epoch": 1.2351724682295366, "grad_norm": 1.953125, "learning_rate": 6.224870272237202e-06, "loss": 0.5167, "step": 9907 }, { "epoch": 1.2352987607545978, "grad_norm": 1.9140625, "learning_rate": 6.2230216386041965e-06, "loss": 0.4851, "step": 9908 }, { "epoch": 1.2354250532796591, "grad_norm": 2.0625, "learning_rate": 6.221173155512456e-06, "loss": 0.505, "step": 9909 }, { "epoch": 1.2355513458047203, "grad_norm": 2.03125, "learning_rate": 6.219324823035651e-06, "loss": 0.5308, "step": 9910 }, { "epoch": 1.2356776383297814, "grad_norm": 1.84375, "learning_rate": 6.217476641247451e-06, "loss": 0.4811, "step": 9911 }, { "epoch": 1.2358039308548425, "grad_norm": 1.984375, "learning_rate": 6.2156286102215225e-06, "loss": 0.5222, "step": 9912 }, { "epoch": 1.2359302233799037, "grad_norm": 2.171875, "learning_rate": 6.213780730031522e-06, "loss": 0.5134, "step": 9913 }, { "epoch": 1.2360565159049648, "grad_norm": 2.015625, "learning_rate": 6.211933000751102e-06, "loss": 0.5022, "step": 9914 }, { "epoch": 1.236182808430026, "grad_norm": 1.8203125, "learning_rate": 6.210085422453909e-06, "loss": 0.4211, "step": 9915 }, { "epoch": 1.2363091009550873, "grad_norm": 1.984375, "learning_rate": 6.208237995213582e-06, "loss": 0.5165, "step": 9916 }, { "epoch": 1.2364353934801484, "grad_norm": 1.9921875, "learning_rate": 6.206390719103754e-06, "loss": 0.476, "step": 9917 }, { "epoch": 1.2365616860052095, "grad_norm": 1.890625, "learning_rate": 6.204543594198057e-06, "loss": 0.4946, "step": 9918 }, { "epoch": 1.2366879785302707, "grad_norm": 2.0, "learning_rate": 6.20269662057011e-06, "loss": 0.467, "step": 9919 }, { "epoch": 1.2368142710553318, "grad_norm": 2.015625, "learning_rate": 6.200849798293528e-06, "loss": 0.5742, "step": 9920 }, { "epoch": 1.2369405635803932, "grad_norm": 1.984375, "learning_rate": 6.199003127441923e-06, "loss": 0.505, "step": 9921 }, { "epoch": 1.2370668561054543, "grad_norm": 2.09375, "learning_rate": 6.197156608088898e-06, "loss": 0.4771, "step": 9922 }, { "epoch": 1.2371931486305154, "grad_norm": 1.96875, "learning_rate": 6.19531024030805e-06, "loss": 0.5415, "step": 9923 }, { "epoch": 1.2373194411555766, "grad_norm": 2.140625, "learning_rate": 6.193464024172973e-06, "loss": 0.4894, "step": 9924 }, { "epoch": 1.2374457336806377, "grad_norm": 2.140625, "learning_rate": 6.19161795975725e-06, "loss": 0.5327, "step": 9925 }, { "epoch": 1.237572026205699, "grad_norm": 1.9140625, "learning_rate": 6.189772047134459e-06, "loss": 0.5725, "step": 9926 }, { "epoch": 1.2376983187307602, "grad_norm": 2.078125, "learning_rate": 6.187926286378181e-06, "loss": 0.5556, "step": 9927 }, { "epoch": 1.2378246112558213, "grad_norm": 1.984375, "learning_rate": 6.1860806775619785e-06, "loss": 0.5371, "step": 9928 }, { "epoch": 1.2379509037808825, "grad_norm": 1.8359375, "learning_rate": 6.1842352207594145e-06, "loss": 0.4381, "step": 9929 }, { "epoch": 1.2380771963059436, "grad_norm": 2.15625, "learning_rate": 6.182389916044044e-06, "loss": 0.5585, "step": 9930 }, { "epoch": 1.2382034888310047, "grad_norm": 2.125, "learning_rate": 6.180544763489416e-06, "loss": 0.574, "step": 9931 }, { "epoch": 1.238329781356066, "grad_norm": 1.859375, "learning_rate": 6.178699763169073e-06, "loss": 0.4799, "step": 9932 }, { "epoch": 1.2384560738811272, "grad_norm": 2.046875, "learning_rate": 6.176854915156554e-06, "loss": 0.4965, "step": 9933 }, { "epoch": 1.2385823664061884, "grad_norm": 1.8671875, "learning_rate": 6.175010219525388e-06, "loss": 0.4928, "step": 9934 }, { "epoch": 1.2387086589312495, "grad_norm": 2.09375, "learning_rate": 6.173165676349103e-06, "loss": 0.5198, "step": 9935 }, { "epoch": 1.2388349514563106, "grad_norm": 1.984375, "learning_rate": 6.171321285701218e-06, "loss": 0.4914, "step": 9936 }, { "epoch": 1.2389612439813718, "grad_norm": 2.21875, "learning_rate": 6.169477047655241e-06, "loss": 0.526, "step": 9937 }, { "epoch": 1.239087536506433, "grad_norm": 1.8515625, "learning_rate": 6.167632962284686e-06, "loss": 0.4838, "step": 9938 }, { "epoch": 1.2392138290314942, "grad_norm": 2.046875, "learning_rate": 6.165789029663048e-06, "loss": 0.5528, "step": 9939 }, { "epoch": 1.2393401215565554, "grad_norm": 1.9453125, "learning_rate": 6.163945249863825e-06, "loss": 0.4212, "step": 9940 }, { "epoch": 1.2394664140816165, "grad_norm": 2.140625, "learning_rate": 6.162101622960505e-06, "loss": 0.4955, "step": 9941 }, { "epoch": 1.2395927066066776, "grad_norm": 1.8828125, "learning_rate": 6.16025814902657e-06, "loss": 0.436, "step": 9942 }, { "epoch": 1.239718999131739, "grad_norm": 1.9375, "learning_rate": 6.158414828135495e-06, "loss": 0.4693, "step": 9943 }, { "epoch": 1.2398452916568001, "grad_norm": 2.234375, "learning_rate": 6.156571660360754e-06, "loss": 0.4993, "step": 9944 }, { "epoch": 1.2399715841818613, "grad_norm": 1.875, "learning_rate": 6.1547286457758036e-06, "loss": 0.5043, "step": 9945 }, { "epoch": 1.2400978767069224, "grad_norm": 1.8984375, "learning_rate": 6.152885784454112e-06, "loss": 0.4826, "step": 9946 }, { "epoch": 1.2402241692319835, "grad_norm": 1.9453125, "learning_rate": 6.1510430764691275e-06, "loss": 0.5056, "step": 9947 }, { "epoch": 1.2403504617570447, "grad_norm": 2.140625, "learning_rate": 6.149200521894294e-06, "loss": 0.539, "step": 9948 }, { "epoch": 1.240476754282106, "grad_norm": 1.8828125, "learning_rate": 6.147358120803053e-06, "loss": 0.561, "step": 9949 }, { "epoch": 1.2406030468071672, "grad_norm": 1.7578125, "learning_rate": 6.145515873268838e-06, "loss": 0.4955, "step": 9950 }, { "epoch": 1.2407293393322283, "grad_norm": 2.015625, "learning_rate": 6.1436737793650754e-06, "loss": 0.5209, "step": 9951 }, { "epoch": 1.2408556318572894, "grad_norm": 2.203125, "learning_rate": 6.1418318391651886e-06, "loss": 0.5331, "step": 9952 }, { "epoch": 1.2409819243823506, "grad_norm": 1.96875, "learning_rate": 6.139990052742591e-06, "loss": 0.5161, "step": 9953 }, { "epoch": 1.2411082169074117, "grad_norm": 1.9140625, "learning_rate": 6.138148420170691e-06, "loss": 0.441, "step": 9954 }, { "epoch": 1.241234509432473, "grad_norm": 1.984375, "learning_rate": 6.136306941522896e-06, "loss": 0.4757, "step": 9955 }, { "epoch": 1.2413608019575342, "grad_norm": 1.859375, "learning_rate": 6.134465616872598e-06, "loss": 0.4625, "step": 9956 }, { "epoch": 1.2414870944825953, "grad_norm": 1.9375, "learning_rate": 6.13262444629319e-06, "loss": 0.5707, "step": 9957 }, { "epoch": 1.2416133870076564, "grad_norm": 1.953125, "learning_rate": 6.130783429858056e-06, "loss": 0.4791, "step": 9958 }, { "epoch": 1.2417396795327176, "grad_norm": 1.953125, "learning_rate": 6.128942567640577e-06, "loss": 0.4926, "step": 9959 }, { "epoch": 1.241865972057779, "grad_norm": 2.03125, "learning_rate": 6.127101859714122e-06, "loss": 0.4935, "step": 9960 }, { "epoch": 1.24199226458284, "grad_norm": 1.875, "learning_rate": 6.125261306152057e-06, "loss": 0.5172, "step": 9961 }, { "epoch": 1.2421185571079012, "grad_norm": 1.921875, "learning_rate": 6.123420907027745e-06, "loss": 0.5293, "step": 9962 }, { "epoch": 1.2422448496329623, "grad_norm": 2.09375, "learning_rate": 6.121580662414535e-06, "loss": 0.515, "step": 9963 }, { "epoch": 1.2423711421580235, "grad_norm": 1.921875, "learning_rate": 6.11974057238578e-06, "loss": 0.4679, "step": 9964 }, { "epoch": 1.2424974346830846, "grad_norm": 1.9140625, "learning_rate": 6.1179006370148205e-06, "loss": 0.5247, "step": 9965 }, { "epoch": 1.242623727208146, "grad_norm": 1.859375, "learning_rate": 6.116060856374991e-06, "loss": 0.4759, "step": 9966 }, { "epoch": 1.242750019733207, "grad_norm": 1.9296875, "learning_rate": 6.114221230539621e-06, "loss": 0.555, "step": 9967 }, { "epoch": 1.2428763122582682, "grad_norm": 1.9609375, "learning_rate": 6.112381759582033e-06, "loss": 0.515, "step": 9968 }, { "epoch": 1.2430026047833294, "grad_norm": 2.09375, "learning_rate": 6.110542443575545e-06, "loss": 0.5299, "step": 9969 }, { "epoch": 1.2431288973083905, "grad_norm": 1.890625, "learning_rate": 6.1087032825934655e-06, "loss": 0.4689, "step": 9970 }, { "epoch": 1.2432551898334516, "grad_norm": 1.921875, "learning_rate": 6.106864276709102e-06, "loss": 0.4724, "step": 9971 }, { "epoch": 1.243381482358513, "grad_norm": 1.8671875, "learning_rate": 6.10502542599575e-06, "loss": 0.5271, "step": 9972 }, { "epoch": 1.2435077748835741, "grad_norm": 1.9609375, "learning_rate": 6.103186730526702e-06, "loss": 0.4459, "step": 9973 }, { "epoch": 1.2436340674086352, "grad_norm": 2.15625, "learning_rate": 6.101348190375247e-06, "loss": 0.5133, "step": 9974 }, { "epoch": 1.2437603599336964, "grad_norm": 1.90625, "learning_rate": 6.099509805614661e-06, "loss": 0.5232, "step": 9975 }, { "epoch": 1.2438866524587575, "grad_norm": 1.9921875, "learning_rate": 6.0976715763182204e-06, "loss": 0.6051, "step": 9976 }, { "epoch": 1.2440129449838189, "grad_norm": 1.953125, "learning_rate": 6.09583350255919e-06, "loss": 0.5169, "step": 9977 }, { "epoch": 1.24413923750888, "grad_norm": 2.125, "learning_rate": 6.0939955844108335e-06, "loss": 0.5304, "step": 9978 }, { "epoch": 1.2442655300339411, "grad_norm": 1.75, "learning_rate": 6.092157821946405e-06, "loss": 0.4375, "step": 9979 }, { "epoch": 1.2443918225590023, "grad_norm": 2.078125, "learning_rate": 6.090320215239151e-06, "loss": 0.483, "step": 9980 }, { "epoch": 1.2445181150840634, "grad_norm": 1.8828125, "learning_rate": 6.088482764362316e-06, "loss": 0.4797, "step": 9981 }, { "epoch": 1.2446444076091248, "grad_norm": 1.9765625, "learning_rate": 6.086645469389137e-06, "loss": 0.5044, "step": 9982 }, { "epoch": 1.244770700134186, "grad_norm": 2.078125, "learning_rate": 6.0848083303928455e-06, "loss": 0.5631, "step": 9983 }, { "epoch": 1.244896992659247, "grad_norm": 2.015625, "learning_rate": 6.082971347446662e-06, "loss": 0.4723, "step": 9984 }, { "epoch": 1.2450232851843082, "grad_norm": 1.8828125, "learning_rate": 6.081134520623808e-06, "loss": 0.4549, "step": 9985 }, { "epoch": 1.2451495777093693, "grad_norm": 2.03125, "learning_rate": 6.079297849997492e-06, "loss": 0.4676, "step": 9986 }, { "epoch": 1.2452758702344304, "grad_norm": 2.078125, "learning_rate": 6.077461335640919e-06, "loss": 0.47, "step": 9987 }, { "epoch": 1.2454021627594916, "grad_norm": 2.140625, "learning_rate": 6.07562497762729e-06, "loss": 0.5065, "step": 9988 }, { "epoch": 1.245528455284553, "grad_norm": 1.890625, "learning_rate": 6.073788776029798e-06, "loss": 0.4926, "step": 9989 }, { "epoch": 1.245654747809614, "grad_norm": 1.9921875, "learning_rate": 6.071952730921628e-06, "loss": 0.5523, "step": 9990 }, { "epoch": 1.2457810403346752, "grad_norm": 1.953125, "learning_rate": 6.070116842375959e-06, "loss": 0.5042, "step": 9991 }, { "epoch": 1.2459073328597363, "grad_norm": 2.125, "learning_rate": 6.068281110465969e-06, "loss": 0.4941, "step": 9992 }, { "epoch": 1.2460336253847974, "grad_norm": 2.109375, "learning_rate": 6.066445535264823e-06, "loss": 0.534, "step": 9993 }, { "epoch": 1.2461599179098588, "grad_norm": 1.9375, "learning_rate": 6.064610116845684e-06, "loss": 0.4748, "step": 9994 }, { "epoch": 1.24628621043492, "grad_norm": 1.9453125, "learning_rate": 6.062774855281706e-06, "loss": 0.4794, "step": 9995 }, { "epoch": 1.246412502959981, "grad_norm": 2.296875, "learning_rate": 6.060939750646039e-06, "loss": 0.5764, "step": 9996 }, { "epoch": 1.2465387954850422, "grad_norm": 2.09375, "learning_rate": 6.059104803011826e-06, "loss": 0.4968, "step": 9997 }, { "epoch": 1.2466650880101033, "grad_norm": 2.171875, "learning_rate": 6.0572700124522034e-06, "loss": 0.5411, "step": 9998 }, { "epoch": 1.2467913805351647, "grad_norm": 1.8828125, "learning_rate": 6.055435379040296e-06, "loss": 0.4481, "step": 9999 }, { "epoch": 1.2469176730602258, "grad_norm": 1.9609375, "learning_rate": 6.0536009028492395e-06, "loss": 0.4967, "step": 10000 }, { "epoch": 1.247043965585287, "grad_norm": 2.078125, "learning_rate": 6.051766583952144e-06, "loss": 0.5264, "step": 10001 }, { "epoch": 1.247170258110348, "grad_norm": 1.8359375, "learning_rate": 6.0499324224221235e-06, "loss": 0.4541, "step": 10002 }, { "epoch": 1.2472965506354092, "grad_norm": 2.1875, "learning_rate": 6.048098418332282e-06, "loss": 0.5785, "step": 10003 }, { "epoch": 1.2474228431604704, "grad_norm": 2.0, "learning_rate": 6.046264571755718e-06, "loss": 0.4485, "step": 10004 }, { "epoch": 1.2475491356855315, "grad_norm": 1.8984375, "learning_rate": 6.0444308827655265e-06, "loss": 0.4889, "step": 10005 }, { "epoch": 1.2476754282105929, "grad_norm": 1.90625, "learning_rate": 6.042597351434791e-06, "loss": 0.5425, "step": 10006 }, { "epoch": 1.247801720735654, "grad_norm": 1.875, "learning_rate": 6.040763977836595e-06, "loss": 0.4825, "step": 10007 }, { "epoch": 1.2479280132607151, "grad_norm": 1.8671875, "learning_rate": 6.038930762044009e-06, "loss": 0.4391, "step": 10008 }, { "epoch": 1.2480543057857763, "grad_norm": 1.875, "learning_rate": 6.037097704130101e-06, "loss": 0.5273, "step": 10009 }, { "epoch": 1.2481805983108374, "grad_norm": 2.125, "learning_rate": 6.035264804167934e-06, "loss": 0.5302, "step": 10010 }, { "epoch": 1.2483068908358987, "grad_norm": 2.0, "learning_rate": 6.033432062230563e-06, "loss": 0.5014, "step": 10011 }, { "epoch": 1.2484331833609599, "grad_norm": 1.953125, "learning_rate": 6.0315994783910345e-06, "loss": 0.5069, "step": 10012 }, { "epoch": 1.248559475886021, "grad_norm": 2.0625, "learning_rate": 6.029767052722395e-06, "loss": 0.532, "step": 10013 }, { "epoch": 1.2486857684110821, "grad_norm": 2.078125, "learning_rate": 6.027934785297675e-06, "loss": 0.5133, "step": 10014 }, { "epoch": 1.2488120609361433, "grad_norm": 2.125, "learning_rate": 6.026102676189909e-06, "loss": 0.6072, "step": 10015 }, { "epoch": 1.2489383534612046, "grad_norm": 1.9375, "learning_rate": 6.024270725472118e-06, "loss": 0.5006, "step": 10016 }, { "epoch": 1.2490646459862658, "grad_norm": 2.078125, "learning_rate": 6.022438933217316e-06, "loss": 0.5163, "step": 10017 }, { "epoch": 1.249190938511327, "grad_norm": 2.078125, "learning_rate": 6.0206072994985195e-06, "loss": 0.5922, "step": 10018 }, { "epoch": 1.249317231036388, "grad_norm": 1.9765625, "learning_rate": 6.018775824388734e-06, "loss": 0.4642, "step": 10019 }, { "epoch": 1.2494435235614492, "grad_norm": 2.03125, "learning_rate": 6.0169445079609535e-06, "loss": 0.5398, "step": 10020 }, { "epoch": 1.2495698160865103, "grad_norm": 2.09375, "learning_rate": 6.0151133502881705e-06, "loss": 0.5119, "step": 10021 }, { "epoch": 1.2496961086115714, "grad_norm": 1.921875, "learning_rate": 6.013282351443372e-06, "loss": 0.4833, "step": 10022 }, { "epoch": 1.2498224011366328, "grad_norm": 2.15625, "learning_rate": 6.011451511499537e-06, "loss": 0.4453, "step": 10023 }, { "epoch": 1.249948693661694, "grad_norm": 2.0625, "learning_rate": 6.0096208305296366e-06, "loss": 0.6839, "step": 10024 }, { "epoch": 1.250074986186755, "grad_norm": 1.8984375, "learning_rate": 6.007790308606638e-06, "loss": 0.4536, "step": 10025 }, { "epoch": 1.2502012787118162, "grad_norm": 2.046875, "learning_rate": 6.0059599458035035e-06, "loss": 0.4745, "step": 10026 }, { "epoch": 1.2503275712368773, "grad_norm": 1.984375, "learning_rate": 6.004129742193184e-06, "loss": 0.4549, "step": 10027 }, { "epoch": 1.2504538637619387, "grad_norm": 1.953125, "learning_rate": 6.002299697848629e-06, "loss": 0.4777, "step": 10028 }, { "epoch": 1.2505801562869998, "grad_norm": 1.9140625, "learning_rate": 6.00046981284278e-06, "loss": 0.5314, "step": 10029 }, { "epoch": 1.250706448812061, "grad_norm": 1.8671875, "learning_rate": 5.9986400872485685e-06, "loss": 0.4322, "step": 10030 }, { "epoch": 1.250832741337122, "grad_norm": 2.015625, "learning_rate": 5.996810521138926e-06, "loss": 0.4841, "step": 10031 }, { "epoch": 1.2509590338621832, "grad_norm": 2.328125, "learning_rate": 5.9949811145867745e-06, "loss": 0.6134, "step": 10032 }, { "epoch": 1.2510853263872446, "grad_norm": 1.8671875, "learning_rate": 5.993151867665028e-06, "loss": 0.444, "step": 10033 }, { "epoch": 1.2512116189123057, "grad_norm": 1.8359375, "learning_rate": 5.991322780446595e-06, "loss": 0.4736, "step": 10034 }, { "epoch": 1.2513379114373668, "grad_norm": 1.921875, "learning_rate": 5.989493853004382e-06, "loss": 0.4808, "step": 10035 }, { "epoch": 1.251464203962428, "grad_norm": 1.859375, "learning_rate": 5.987665085411285e-06, "loss": 0.4867, "step": 10036 }, { "epoch": 1.251590496487489, "grad_norm": 2.0, "learning_rate": 5.9858364777401946e-06, "loss": 0.4494, "step": 10037 }, { "epoch": 1.2517167890125505, "grad_norm": 1.90625, "learning_rate": 5.984008030063993e-06, "loss": 0.4708, "step": 10038 }, { "epoch": 1.2518430815376114, "grad_norm": 1.875, "learning_rate": 5.982179742455556e-06, "loss": 0.5494, "step": 10039 }, { "epoch": 1.2519693740626727, "grad_norm": 1.9453125, "learning_rate": 5.980351614987759e-06, "loss": 0.479, "step": 10040 }, { "epoch": 1.2520956665877339, "grad_norm": 2.078125, "learning_rate": 5.978523647733465e-06, "loss": 0.5506, "step": 10041 }, { "epoch": 1.252221959112795, "grad_norm": 2.03125, "learning_rate": 5.9766958407655315e-06, "loss": 0.4453, "step": 10042 }, { "epoch": 1.2523482516378561, "grad_norm": 2.109375, "learning_rate": 5.974868194156811e-06, "loss": 0.5665, "step": 10043 }, { "epoch": 1.2524745441629173, "grad_norm": 1.9140625, "learning_rate": 5.973040707980149e-06, "loss": 0.4736, "step": 10044 }, { "epoch": 1.2526008366879786, "grad_norm": 1.9921875, "learning_rate": 5.971213382308385e-06, "loss": 0.4716, "step": 10045 }, { "epoch": 1.2527271292130397, "grad_norm": 1.984375, "learning_rate": 5.969386217214351e-06, "loss": 0.4658, "step": 10046 }, { "epoch": 1.2528534217381009, "grad_norm": 2.15625, "learning_rate": 5.967559212770875e-06, "loss": 0.4921, "step": 10047 }, { "epoch": 1.252979714263162, "grad_norm": 2.125, "learning_rate": 5.9657323690507766e-06, "loss": 0.4739, "step": 10048 }, { "epoch": 1.2531060067882231, "grad_norm": 1.875, "learning_rate": 5.963905686126867e-06, "loss": 0.474, "step": 10049 }, { "epoch": 1.2532322993132845, "grad_norm": 2.0625, "learning_rate": 5.962079164071954e-06, "loss": 0.5871, "step": 10050 }, { "epoch": 1.2533585918383456, "grad_norm": 2.0625, "learning_rate": 5.960252802958841e-06, "loss": 0.4216, "step": 10051 }, { "epoch": 1.2534848843634068, "grad_norm": 2.03125, "learning_rate": 5.958426602860322e-06, "loss": 0.4522, "step": 10052 }, { "epoch": 1.253611176888468, "grad_norm": 1.8984375, "learning_rate": 5.956600563849182e-06, "loss": 0.4671, "step": 10053 }, { "epoch": 1.253737469413529, "grad_norm": 2.0625, "learning_rate": 5.954774685998206e-06, "loss": 0.4967, "step": 10054 }, { "epoch": 1.2538637619385904, "grad_norm": 2.0625, "learning_rate": 5.9529489693801665e-06, "loss": 0.4996, "step": 10055 }, { "epoch": 1.2539900544636513, "grad_norm": 2.0625, "learning_rate": 5.951123414067834e-06, "loss": 0.4963, "step": 10056 }, { "epoch": 1.2541163469887127, "grad_norm": 2.0, "learning_rate": 5.94929802013397e-06, "loss": 0.4709, "step": 10057 }, { "epoch": 1.2542426395137738, "grad_norm": 2.046875, "learning_rate": 5.94747278765133e-06, "loss": 0.5089, "step": 10058 }, { "epoch": 1.254368932038835, "grad_norm": 2.046875, "learning_rate": 5.945647716692664e-06, "loss": 0.61, "step": 10059 }, { "epoch": 1.254495224563896, "grad_norm": 1.96875, "learning_rate": 5.943822807330715e-06, "loss": 0.4926, "step": 10060 }, { "epoch": 1.2546215170889572, "grad_norm": 2.015625, "learning_rate": 5.941998059638216e-06, "loss": 0.4456, "step": 10061 }, { "epoch": 1.2547478096140185, "grad_norm": 1.9375, "learning_rate": 5.9401734736879035e-06, "loss": 0.5311, "step": 10062 }, { "epoch": 1.2548741021390797, "grad_norm": 1.984375, "learning_rate": 5.938349049552497e-06, "loss": 0.4704, "step": 10063 }, { "epoch": 1.2550003946641408, "grad_norm": 1.890625, "learning_rate": 5.9365247873047125e-06, "loss": 0.4659, "step": 10064 }, { "epoch": 1.255126687189202, "grad_norm": 2.015625, "learning_rate": 5.934700687017263e-06, "loss": 0.4967, "step": 10065 }, { "epoch": 1.255252979714263, "grad_norm": 2.125, "learning_rate": 5.932876748762852e-06, "loss": 0.5587, "step": 10066 }, { "epoch": 1.2553792722393244, "grad_norm": 1.875, "learning_rate": 5.931052972614178e-06, "loss": 0.4887, "step": 10067 }, { "epoch": 1.2555055647643856, "grad_norm": 1.8125, "learning_rate": 5.929229358643932e-06, "loss": 0.4554, "step": 10068 }, { "epoch": 1.2556318572894467, "grad_norm": 1.8203125, "learning_rate": 5.927405906924799e-06, "loss": 0.4757, "step": 10069 }, { "epoch": 1.2557581498145078, "grad_norm": 2.03125, "learning_rate": 5.925582617529457e-06, "loss": 0.5039, "step": 10070 }, { "epoch": 1.255884442339569, "grad_norm": 2.046875, "learning_rate": 5.9237594905305765e-06, "loss": 0.485, "step": 10071 }, { "epoch": 1.2560107348646303, "grad_norm": 2.09375, "learning_rate": 5.921936526000825e-06, "loss": 0.525, "step": 10072 }, { "epoch": 1.2561370273896912, "grad_norm": 2.078125, "learning_rate": 5.920113724012861e-06, "loss": 0.4718, "step": 10073 }, { "epoch": 1.2562633199147526, "grad_norm": 2.109375, "learning_rate": 5.918291084639338e-06, "loss": 0.5619, "step": 10074 }, { "epoch": 1.2563896124398137, "grad_norm": 1.9609375, "learning_rate": 5.916468607952901e-06, "loss": 0.465, "step": 10075 }, { "epoch": 1.2565159049648749, "grad_norm": 1.9296875, "learning_rate": 5.914646294026189e-06, "loss": 0.4784, "step": 10076 }, { "epoch": 1.256642197489936, "grad_norm": 2.0, "learning_rate": 5.912824142931837e-06, "loss": 0.4765, "step": 10077 }, { "epoch": 1.2567684900149971, "grad_norm": 2.0625, "learning_rate": 5.911002154742468e-06, "loss": 0.5698, "step": 10078 }, { "epoch": 1.2568947825400585, "grad_norm": 2.03125, "learning_rate": 5.9091803295307045e-06, "loss": 0.5604, "step": 10079 }, { "epoch": 1.2570210750651196, "grad_norm": 1.9375, "learning_rate": 5.907358667369161e-06, "loss": 0.4639, "step": 10080 }, { "epoch": 1.2571473675901808, "grad_norm": 2.296875, "learning_rate": 5.905537168330442e-06, "loss": 0.5048, "step": 10081 }, { "epoch": 1.2572736601152419, "grad_norm": 1.984375, "learning_rate": 5.903715832487148e-06, "loss": 0.5144, "step": 10082 }, { "epoch": 1.257399952640303, "grad_norm": 1.8125, "learning_rate": 5.901894659911875e-06, "loss": 0.492, "step": 10083 }, { "epoch": 1.2575262451653644, "grad_norm": 2.0625, "learning_rate": 5.900073650677212e-06, "loss": 0.563, "step": 10084 }, { "epoch": 1.2576525376904255, "grad_norm": 1.9453125, "learning_rate": 5.898252804855737e-06, "loss": 0.4876, "step": 10085 }, { "epoch": 1.2577788302154866, "grad_norm": 2.109375, "learning_rate": 5.896432122520026e-06, "loss": 0.4861, "step": 10086 }, { "epoch": 1.2579051227405478, "grad_norm": 1.8359375, "learning_rate": 5.894611603742646e-06, "loss": 0.4691, "step": 10087 }, { "epoch": 1.258031415265609, "grad_norm": 1.9375, "learning_rate": 5.892791248596159e-06, "loss": 0.5336, "step": 10088 }, { "epoch": 1.2581577077906703, "grad_norm": 2.328125, "learning_rate": 5.8909710571531185e-06, "loss": 0.5182, "step": 10089 }, { "epoch": 1.2582840003157312, "grad_norm": 1.8515625, "learning_rate": 5.889151029486075e-06, "loss": 0.5387, "step": 10090 }, { "epoch": 1.2584102928407925, "grad_norm": 1.9296875, "learning_rate": 5.887331165667571e-06, "loss": 0.5175, "step": 10091 }, { "epoch": 1.2585365853658537, "grad_norm": 1.90625, "learning_rate": 5.885511465770142e-06, "loss": 0.4595, "step": 10092 }, { "epoch": 1.2586628778909148, "grad_norm": 1.9140625, "learning_rate": 5.883691929866314e-06, "loss": 0.444, "step": 10093 }, { "epoch": 1.258789170415976, "grad_norm": 1.9140625, "learning_rate": 5.881872558028612e-06, "loss": 0.468, "step": 10094 }, { "epoch": 1.258915462941037, "grad_norm": 2.0625, "learning_rate": 5.8800533503295486e-06, "loss": 0.5469, "step": 10095 }, { "epoch": 1.2590417554660984, "grad_norm": 1.9296875, "learning_rate": 5.878234306841637e-06, "loss": 0.5179, "step": 10096 }, { "epoch": 1.2591680479911596, "grad_norm": 1.9765625, "learning_rate": 5.876415427637378e-06, "loss": 0.5446, "step": 10097 }, { "epoch": 1.2592943405162207, "grad_norm": 2.109375, "learning_rate": 5.874596712789266e-06, "loss": 0.5095, "step": 10098 }, { "epoch": 1.2594206330412818, "grad_norm": 2.171875, "learning_rate": 5.8727781623697935e-06, "loss": 0.5475, "step": 10099 }, { "epoch": 1.259546925566343, "grad_norm": 1.9296875, "learning_rate": 5.8709597764514414e-06, "loss": 0.4538, "step": 10100 }, { "epoch": 1.2596732180914043, "grad_norm": 1.9921875, "learning_rate": 5.86914155510669e-06, "loss": 0.5295, "step": 10101 }, { "epoch": 1.2597995106164654, "grad_norm": 2.171875, "learning_rate": 5.867323498408004e-06, "loss": 0.6467, "step": 10102 }, { "epoch": 1.2599258031415266, "grad_norm": 2.109375, "learning_rate": 5.86550560642785e-06, "loss": 0.5439, "step": 10103 }, { "epoch": 1.2600520956665877, "grad_norm": 1.9765625, "learning_rate": 5.863687879238685e-06, "loss": 0.4611, "step": 10104 }, { "epoch": 1.2601783881916488, "grad_norm": 2.03125, "learning_rate": 5.861870316912957e-06, "loss": 0.5663, "step": 10105 }, { "epoch": 1.2603046807167102, "grad_norm": 2.0625, "learning_rate": 5.860052919523111e-06, "loss": 0.5211, "step": 10106 }, { "epoch": 1.2604309732417713, "grad_norm": 2.078125, "learning_rate": 5.858235687141581e-06, "loss": 0.4627, "step": 10107 }, { "epoch": 1.2605572657668325, "grad_norm": 2.0, "learning_rate": 5.856418619840805e-06, "loss": 0.5118, "step": 10108 }, { "epoch": 1.2606835582918936, "grad_norm": 1.9140625, "learning_rate": 5.8546017176932e-06, "loss": 0.4566, "step": 10109 }, { "epoch": 1.2608098508169547, "grad_norm": 1.921875, "learning_rate": 5.8527849807711865e-06, "loss": 0.4144, "step": 10110 }, { "epoch": 1.2609361433420159, "grad_norm": 2.03125, "learning_rate": 5.850968409147174e-06, "loss": 0.5874, "step": 10111 }, { "epoch": 1.261062435867077, "grad_norm": 1.9140625, "learning_rate": 5.849152002893568e-06, "loss": 0.4447, "step": 10112 }, { "epoch": 1.2611887283921384, "grad_norm": 2.0, "learning_rate": 5.847335762082764e-06, "loss": 0.4489, "step": 10113 }, { "epoch": 1.2613150209171995, "grad_norm": 2.03125, "learning_rate": 5.8455196867871535e-06, "loss": 0.4721, "step": 10114 }, { "epoch": 1.2614413134422606, "grad_norm": 1.8515625, "learning_rate": 5.843703777079124e-06, "loss": 0.4827, "step": 10115 }, { "epoch": 1.2615676059673218, "grad_norm": 1.8828125, "learning_rate": 5.841888033031049e-06, "loss": 0.4595, "step": 10116 }, { "epoch": 1.261693898492383, "grad_norm": 2.046875, "learning_rate": 5.840072454715303e-06, "loss": 0.4663, "step": 10117 }, { "epoch": 1.2618201910174442, "grad_norm": 1.9921875, "learning_rate": 5.8382570422042504e-06, "loss": 0.4607, "step": 10118 }, { "epoch": 1.2619464835425054, "grad_norm": 1.875, "learning_rate": 5.836441795570247e-06, "loss": 0.4716, "step": 10119 }, { "epoch": 1.2620727760675665, "grad_norm": 1.9765625, "learning_rate": 5.834626714885644e-06, "loss": 0.4751, "step": 10120 }, { "epoch": 1.2621990685926276, "grad_norm": 1.890625, "learning_rate": 5.832811800222787e-06, "loss": 0.5019, "step": 10121 }, { "epoch": 1.2623253611176888, "grad_norm": 2.03125, "learning_rate": 5.830997051654015e-06, "loss": 0.6039, "step": 10122 }, { "epoch": 1.2624516536427501, "grad_norm": 1.8984375, "learning_rate": 5.829182469251659e-06, "loss": 0.6445, "step": 10123 }, { "epoch": 1.2625779461678113, "grad_norm": 1.9609375, "learning_rate": 5.827368053088043e-06, "loss": 0.4896, "step": 10124 }, { "epoch": 1.2627042386928724, "grad_norm": 1.9375, "learning_rate": 5.825553803235483e-06, "loss": 0.4838, "step": 10125 }, { "epoch": 1.2628305312179335, "grad_norm": 1.9140625, "learning_rate": 5.823739719766297e-06, "loss": 0.4464, "step": 10126 }, { "epoch": 1.2629568237429947, "grad_norm": 1.9765625, "learning_rate": 5.821925802752787e-06, "loss": 0.5182, "step": 10127 }, { "epoch": 1.2630831162680558, "grad_norm": 2.640625, "learning_rate": 5.82011205226725e-06, "loss": 0.4993, "step": 10128 }, { "epoch": 1.263209408793117, "grad_norm": 2.171875, "learning_rate": 5.8182984683819795e-06, "loss": 0.5352, "step": 10129 }, { "epoch": 1.2633357013181783, "grad_norm": 2.078125, "learning_rate": 5.81648505116926e-06, "loss": 0.5114, "step": 10130 }, { "epoch": 1.2634619938432394, "grad_norm": 1.9453125, "learning_rate": 5.814671800701369e-06, "loss": 0.5149, "step": 10131 }, { "epoch": 1.2635882863683006, "grad_norm": 1.921875, "learning_rate": 5.8128587170505805e-06, "loss": 0.4187, "step": 10132 }, { "epoch": 1.2637145788933617, "grad_norm": 1.9921875, "learning_rate": 5.811045800289157e-06, "loss": 0.5354, "step": 10133 }, { "epoch": 1.2638408714184228, "grad_norm": 1.890625, "learning_rate": 5.809233050489358e-06, "loss": 0.5709, "step": 10134 }, { "epoch": 1.2639671639434842, "grad_norm": 1.859375, "learning_rate": 5.807420467723435e-06, "loss": 0.5218, "step": 10135 }, { "epoch": 1.2640934564685453, "grad_norm": 1.9140625, "learning_rate": 5.805608052063635e-06, "loss": 0.4296, "step": 10136 }, { "epoch": 1.2642197489936065, "grad_norm": 1.8984375, "learning_rate": 5.803795803582195e-06, "loss": 0.4852, "step": 10137 }, { "epoch": 1.2643460415186676, "grad_norm": 2.015625, "learning_rate": 5.8019837223513464e-06, "loss": 0.555, "step": 10138 }, { "epoch": 1.2644723340437287, "grad_norm": 1.984375, "learning_rate": 5.800171808443315e-06, "loss": 0.5301, "step": 10139 }, { "epoch": 1.26459862656879, "grad_norm": 1.96875, "learning_rate": 5.79836006193032e-06, "loss": 0.4966, "step": 10140 }, { "epoch": 1.2647249190938512, "grad_norm": 2.0, "learning_rate": 5.7965484828845715e-06, "loss": 0.5252, "step": 10141 }, { "epoch": 1.2648512116189123, "grad_norm": 1.984375, "learning_rate": 5.794737071378278e-06, "loss": 0.4833, "step": 10142 }, { "epoch": 1.2649775041439735, "grad_norm": 2.109375, "learning_rate": 5.792925827483631e-06, "loss": 0.5633, "step": 10143 }, { "epoch": 1.2651037966690346, "grad_norm": 2.1875, "learning_rate": 5.79111475127283e-06, "loss": 0.5867, "step": 10144 }, { "epoch": 1.2652300891940957, "grad_norm": 1.9921875, "learning_rate": 5.789303842818059e-06, "loss": 0.4773, "step": 10145 }, { "epoch": 1.2653563817191569, "grad_norm": 1.8203125, "learning_rate": 5.787493102191493e-06, "loss": 0.4603, "step": 10146 }, { "epoch": 1.2654826742442182, "grad_norm": 2.03125, "learning_rate": 5.7856825294653065e-06, "loss": 0.4874, "step": 10147 }, { "epoch": 1.2656089667692794, "grad_norm": 2.03125, "learning_rate": 5.7838721247116634e-06, "loss": 0.4802, "step": 10148 }, { "epoch": 1.2657352592943405, "grad_norm": 1.9921875, "learning_rate": 5.782061888002722e-06, "loss": 0.4527, "step": 10149 }, { "epoch": 1.2658615518194016, "grad_norm": 2.078125, "learning_rate": 5.780251819410634e-06, "loss": 0.5136, "step": 10150 }, { "epoch": 1.2659878443444628, "grad_norm": 2.1875, "learning_rate": 5.778441919007545e-06, "loss": 0.5338, "step": 10151 }, { "epoch": 1.2661141368695241, "grad_norm": 2.234375, "learning_rate": 5.7766321868655935e-06, "loss": 0.5311, "step": 10152 }, { "epoch": 1.2662404293945853, "grad_norm": 2.09375, "learning_rate": 5.774822623056909e-06, "loss": 0.5065, "step": 10153 }, { "epoch": 1.2663667219196464, "grad_norm": 2.0, "learning_rate": 5.773013227653619e-06, "loss": 0.5331, "step": 10154 }, { "epoch": 1.2664930144447075, "grad_norm": 1.8984375, "learning_rate": 5.771204000727839e-06, "loss": 0.4607, "step": 10155 }, { "epoch": 1.2666193069697687, "grad_norm": 1.9375, "learning_rate": 5.769394942351684e-06, "loss": 0.5336, "step": 10156 }, { "epoch": 1.26674559949483, "grad_norm": 1.9375, "learning_rate": 5.767586052597255e-06, "loss": 0.4692, "step": 10157 }, { "epoch": 1.2668718920198911, "grad_norm": 1.9375, "learning_rate": 5.765777331536651e-06, "loss": 0.4876, "step": 10158 }, { "epoch": 1.2669981845449523, "grad_norm": 2.171875, "learning_rate": 5.763968779241965e-06, "loss": 0.5047, "step": 10159 }, { "epoch": 1.2671244770700134, "grad_norm": 2.015625, "learning_rate": 5.76216039578528e-06, "loss": 0.5064, "step": 10160 }, { "epoch": 1.2672507695950745, "grad_norm": 1.8359375, "learning_rate": 5.760352181238671e-06, "loss": 0.4272, "step": 10161 }, { "epoch": 1.267377062120136, "grad_norm": 1.6796875, "learning_rate": 5.758544135674214e-06, "loss": 0.4711, "step": 10162 }, { "epoch": 1.2675033546451968, "grad_norm": 1.984375, "learning_rate": 5.756736259163973e-06, "loss": 0.4679, "step": 10163 }, { "epoch": 1.2676296471702582, "grad_norm": 1.9921875, "learning_rate": 5.7549285517800035e-06, "loss": 0.4965, "step": 10164 }, { "epoch": 1.2677559396953193, "grad_norm": 1.8671875, "learning_rate": 5.753121013594358e-06, "loss": 0.4428, "step": 10165 }, { "epoch": 1.2678822322203804, "grad_norm": 2.046875, "learning_rate": 5.751313644679079e-06, "loss": 0.5665, "step": 10166 }, { "epoch": 1.2680085247454416, "grad_norm": 2.03125, "learning_rate": 5.749506445106205e-06, "loss": 0.4936, "step": 10167 }, { "epoch": 1.2681348172705027, "grad_norm": 2.046875, "learning_rate": 5.747699414947766e-06, "loss": 0.4918, "step": 10168 }, { "epoch": 1.268261109795564, "grad_norm": 2.078125, "learning_rate": 5.745892554275786e-06, "loss": 0.4997, "step": 10169 }, { "epoch": 1.2683874023206252, "grad_norm": 2.046875, "learning_rate": 5.744085863162282e-06, "loss": 0.4828, "step": 10170 }, { "epoch": 1.2685136948456863, "grad_norm": 1.9296875, "learning_rate": 5.742279341679264e-06, "loss": 0.4821, "step": 10171 }, { "epoch": 1.2686399873707475, "grad_norm": 1.96875, "learning_rate": 5.740472989898738e-06, "loss": 0.5459, "step": 10172 }, { "epoch": 1.2687662798958086, "grad_norm": 2.015625, "learning_rate": 5.738666807892697e-06, "loss": 0.4467, "step": 10173 }, { "epoch": 1.26889257242087, "grad_norm": 1.8984375, "learning_rate": 5.736860795733133e-06, "loss": 0.4821, "step": 10174 }, { "epoch": 1.269018864945931, "grad_norm": 2.234375, "learning_rate": 5.7350549534920295e-06, "loss": 0.5561, "step": 10175 }, { "epoch": 1.2691451574709922, "grad_norm": 1.8671875, "learning_rate": 5.733249281241363e-06, "loss": 0.4814, "step": 10176 }, { "epoch": 1.2692714499960533, "grad_norm": 2.28125, "learning_rate": 5.731443779053103e-06, "loss": 0.5957, "step": 10177 }, { "epoch": 1.2693977425211145, "grad_norm": 1.9453125, "learning_rate": 5.729638446999213e-06, "loss": 0.5133, "step": 10178 }, { "epoch": 1.2695240350461758, "grad_norm": 1.8125, "learning_rate": 5.727833285151645e-06, "loss": 0.4307, "step": 10179 }, { "epoch": 1.2696503275712367, "grad_norm": 1.796875, "learning_rate": 5.726028293582355e-06, "loss": 0.4463, "step": 10180 }, { "epoch": 1.269776620096298, "grad_norm": 1.9453125, "learning_rate": 5.724223472363283e-06, "loss": 0.4548, "step": 10181 }, { "epoch": 1.2699029126213592, "grad_norm": 2.125, "learning_rate": 5.722418821566364e-06, "loss": 0.5012, "step": 10182 }, { "epoch": 1.2700292051464204, "grad_norm": 1.9140625, "learning_rate": 5.720614341263528e-06, "loss": 0.418, "step": 10183 }, { "epoch": 1.2701554976714815, "grad_norm": 1.96875, "learning_rate": 5.7188100315266985e-06, "loss": 0.5109, "step": 10184 }, { "epoch": 1.2702817901965426, "grad_norm": 2.09375, "learning_rate": 5.7170058924277875e-06, "loss": 0.5129, "step": 10185 }, { "epoch": 1.270408082721604, "grad_norm": 1.9375, "learning_rate": 5.715201924038705e-06, "loss": 0.4752, "step": 10186 }, { "epoch": 1.2705343752466651, "grad_norm": 1.8828125, "learning_rate": 5.713398126431353e-06, "loss": 0.46, "step": 10187 }, { "epoch": 1.2706606677717263, "grad_norm": 2.09375, "learning_rate": 5.711594499677628e-06, "loss": 0.5817, "step": 10188 }, { "epoch": 1.2707869602967874, "grad_norm": 1.859375, "learning_rate": 5.709791043849416e-06, "loss": 0.4497, "step": 10189 }, { "epoch": 1.2709132528218485, "grad_norm": 1.921875, "learning_rate": 5.7079877590186e-06, "loss": 0.4609, "step": 10190 }, { "epoch": 1.2710395453469099, "grad_norm": 2.015625, "learning_rate": 5.706184645257055e-06, "loss": 0.5318, "step": 10191 }, { "epoch": 1.271165837871971, "grad_norm": 2.078125, "learning_rate": 5.704381702636645e-06, "loss": 0.5728, "step": 10192 }, { "epoch": 1.2712921303970321, "grad_norm": 2.015625, "learning_rate": 5.702578931229237e-06, "loss": 0.5266, "step": 10193 }, { "epoch": 1.2714184229220933, "grad_norm": 1.8203125, "learning_rate": 5.700776331106679e-06, "loss": 0.4425, "step": 10194 }, { "epoch": 1.2715447154471544, "grad_norm": 1.953125, "learning_rate": 5.698973902340823e-06, "loss": 0.4524, "step": 10195 }, { "epoch": 1.2716710079722158, "grad_norm": 2.046875, "learning_rate": 5.697171645003507e-06, "loss": 0.496, "step": 10196 }, { "epoch": 1.2717973004972767, "grad_norm": 1.953125, "learning_rate": 5.695369559166561e-06, "loss": 0.4789, "step": 10197 }, { "epoch": 1.271923593022338, "grad_norm": 2.0625, "learning_rate": 5.69356764490182e-06, "loss": 0.5046, "step": 10198 }, { "epoch": 1.2720498855473992, "grad_norm": 2.09375, "learning_rate": 5.6917659022811e-06, "loss": 0.528, "step": 10199 }, { "epoch": 1.2721761780724603, "grad_norm": 1.828125, "learning_rate": 5.689964331376214e-06, "loss": 0.4683, "step": 10200 }, { "epoch": 1.2723024705975214, "grad_norm": 2.328125, "learning_rate": 5.688162932258969e-06, "loss": 0.4868, "step": 10201 }, { "epoch": 1.2724287631225826, "grad_norm": 1.90625, "learning_rate": 5.686361705001162e-06, "loss": 0.505, "step": 10202 }, { "epoch": 1.272555055647644, "grad_norm": 1.9453125, "learning_rate": 5.684560649674589e-06, "loss": 0.4902, "step": 10203 }, { "epoch": 1.272681348172705, "grad_norm": 1.8984375, "learning_rate": 5.682759766351033e-06, "loss": 0.4792, "step": 10204 }, { "epoch": 1.2728076406977662, "grad_norm": 2.15625, "learning_rate": 5.680959055102276e-06, "loss": 0.4373, "step": 10205 }, { "epoch": 1.2729339332228273, "grad_norm": 1.890625, "learning_rate": 5.679158516000087e-06, "loss": 0.4612, "step": 10206 }, { "epoch": 1.2730602257478885, "grad_norm": 1.96875, "learning_rate": 5.677358149116232e-06, "loss": 0.449, "step": 10207 }, { "epoch": 1.2731865182729498, "grad_norm": 2.015625, "learning_rate": 5.67555795452247e-06, "loss": 0.5024, "step": 10208 }, { "epoch": 1.273312810798011, "grad_norm": 1.84375, "learning_rate": 5.673757932290551e-06, "loss": 0.4697, "step": 10209 }, { "epoch": 1.273439103323072, "grad_norm": 2.125, "learning_rate": 5.6719580824922204e-06, "loss": 0.5622, "step": 10210 }, { "epoch": 1.2735653958481332, "grad_norm": 1.859375, "learning_rate": 5.670158405199216e-06, "loss": 0.4735, "step": 10211 }, { "epoch": 1.2736916883731944, "grad_norm": 2.109375, "learning_rate": 5.668358900483268e-06, "loss": 0.5274, "step": 10212 }, { "epoch": 1.2738179808982557, "grad_norm": 2.109375, "learning_rate": 5.666559568416099e-06, "loss": 0.4324, "step": 10213 }, { "epoch": 1.2739442734233166, "grad_norm": 1.9296875, "learning_rate": 5.66476040906943e-06, "loss": 0.4962, "step": 10214 }, { "epoch": 1.274070565948378, "grad_norm": 2.03125, "learning_rate": 5.66296142251497e-06, "loss": 0.4956, "step": 10215 }, { "epoch": 1.274196858473439, "grad_norm": 2.0, "learning_rate": 5.66116260882442e-06, "loss": 0.4925, "step": 10216 }, { "epoch": 1.2743231509985002, "grad_norm": 2.015625, "learning_rate": 5.659363968069478e-06, "loss": 0.4885, "step": 10217 }, { "epoch": 1.2744494435235614, "grad_norm": 2.03125, "learning_rate": 5.657565500321833e-06, "loss": 0.4696, "step": 10218 }, { "epoch": 1.2745757360486225, "grad_norm": 2.015625, "learning_rate": 5.655767205653169e-06, "loss": 0.4474, "step": 10219 }, { "epoch": 1.2747020285736839, "grad_norm": 2.015625, "learning_rate": 5.65396908413516e-06, "loss": 0.5653, "step": 10220 }, { "epoch": 1.274828321098745, "grad_norm": 1.90625, "learning_rate": 5.652171135839475e-06, "loss": 0.4683, "step": 10221 }, { "epoch": 1.2749546136238061, "grad_norm": 1.921875, "learning_rate": 5.650373360837775e-06, "loss": 0.4635, "step": 10222 }, { "epoch": 1.2750809061488673, "grad_norm": 1.8671875, "learning_rate": 5.648575759201718e-06, "loss": 0.4764, "step": 10223 }, { "epoch": 1.2752071986739284, "grad_norm": 1.96875, "learning_rate": 5.646778331002949e-06, "loss": 0.53, "step": 10224 }, { "epoch": 1.2753334911989898, "grad_norm": 2.125, "learning_rate": 5.6449810763131105e-06, "loss": 0.4812, "step": 10225 }, { "epoch": 1.2754597837240509, "grad_norm": 2.125, "learning_rate": 5.643183995203837e-06, "loss": 0.6252, "step": 10226 }, { "epoch": 1.275586076249112, "grad_norm": 2.0625, "learning_rate": 5.641387087746755e-06, "loss": 0.5186, "step": 10227 }, { "epoch": 1.2757123687741732, "grad_norm": 2.171875, "learning_rate": 5.639590354013485e-06, "loss": 0.55, "step": 10228 }, { "epoch": 1.2758386612992343, "grad_norm": 1.8046875, "learning_rate": 5.637793794075638e-06, "loss": 0.4692, "step": 10229 }, { "epoch": 1.2759649538242956, "grad_norm": 1.9140625, "learning_rate": 5.635997408004826e-06, "loss": 0.4988, "step": 10230 }, { "epoch": 1.2760912463493568, "grad_norm": 2.0, "learning_rate": 5.634201195872646e-06, "loss": 0.5243, "step": 10231 }, { "epoch": 1.276217538874418, "grad_norm": 1.984375, "learning_rate": 5.632405157750692e-06, "loss": 0.4968, "step": 10232 }, { "epoch": 1.276343831399479, "grad_norm": 2.015625, "learning_rate": 5.630609293710547e-06, "loss": 0.4655, "step": 10233 }, { "epoch": 1.2764701239245402, "grad_norm": 2.15625, "learning_rate": 5.628813603823793e-06, "loss": 0.5503, "step": 10234 }, { "epoch": 1.2765964164496013, "grad_norm": 2.046875, "learning_rate": 5.627018088161998e-06, "loss": 0.4763, "step": 10235 }, { "epoch": 1.2767227089746624, "grad_norm": 1.890625, "learning_rate": 5.62522274679673e-06, "loss": 0.4661, "step": 10236 }, { "epoch": 1.2768490014997238, "grad_norm": 1.984375, "learning_rate": 5.623427579799545e-06, "loss": 0.4871, "step": 10237 }, { "epoch": 1.276975294024785, "grad_norm": 2.1875, "learning_rate": 5.621632587241997e-06, "loss": 0.5, "step": 10238 }, { "epoch": 1.277101586549846, "grad_norm": 1.9453125, "learning_rate": 5.6198377691956266e-06, "loss": 0.502, "step": 10239 }, { "epoch": 1.2772278790749072, "grad_norm": 1.8046875, "learning_rate": 5.6180431257319736e-06, "loss": 0.4842, "step": 10240 }, { "epoch": 1.2773541715999683, "grad_norm": 2.109375, "learning_rate": 5.616248656922565e-06, "loss": 0.5712, "step": 10241 }, { "epoch": 1.2774804641250297, "grad_norm": 1.9453125, "learning_rate": 5.614454362838928e-06, "loss": 0.4641, "step": 10242 }, { "epoch": 1.2776067566500908, "grad_norm": 1.84375, "learning_rate": 5.612660243552577e-06, "loss": 0.534, "step": 10243 }, { "epoch": 1.277733049175152, "grad_norm": 2.171875, "learning_rate": 5.610866299135019e-06, "loss": 0.5459, "step": 10244 }, { "epoch": 1.277859341700213, "grad_norm": 2.25, "learning_rate": 5.609072529657755e-06, "loss": 0.5938, "step": 10245 }, { "epoch": 1.2779856342252742, "grad_norm": 1.90625, "learning_rate": 5.607278935192289e-06, "loss": 0.4578, "step": 10246 }, { "epoch": 1.2781119267503356, "grad_norm": 2.03125, "learning_rate": 5.605485515810104e-06, "loss": 0.4746, "step": 10247 }, { "epoch": 1.2782382192753967, "grad_norm": 1.984375, "learning_rate": 5.603692271582681e-06, "loss": 0.4643, "step": 10248 }, { "epoch": 1.2783645118004578, "grad_norm": 2.0625, "learning_rate": 5.601899202581496e-06, "loss": 0.5181, "step": 10249 }, { "epoch": 1.278490804325519, "grad_norm": 1.953125, "learning_rate": 5.600106308878013e-06, "loss": 0.4377, "step": 10250 }, { "epoch": 1.2786170968505801, "grad_norm": 2.046875, "learning_rate": 5.598313590543696e-06, "loss": 0.5045, "step": 10251 }, { "epoch": 1.2787433893756412, "grad_norm": 1.9375, "learning_rate": 5.5965210476499965e-06, "loss": 0.4939, "step": 10252 }, { "epoch": 1.2788696819007024, "grad_norm": 1.9609375, "learning_rate": 5.594728680268362e-06, "loss": 0.4276, "step": 10253 }, { "epoch": 1.2789959744257637, "grad_norm": 2.0, "learning_rate": 5.592936488470233e-06, "loss": 0.486, "step": 10254 }, { "epoch": 1.2791222669508249, "grad_norm": 2.265625, "learning_rate": 5.5911444723270395e-06, "loss": 0.5626, "step": 10255 }, { "epoch": 1.279248559475886, "grad_norm": 2.109375, "learning_rate": 5.589352631910207e-06, "loss": 0.503, "step": 10256 }, { "epoch": 1.2793748520009471, "grad_norm": 2.0625, "learning_rate": 5.587560967291155e-06, "loss": 0.6204, "step": 10257 }, { "epoch": 1.2795011445260083, "grad_norm": 1.8125, "learning_rate": 5.585769478541296e-06, "loss": 0.4733, "step": 10258 }, { "epoch": 1.2796274370510696, "grad_norm": 1.9921875, "learning_rate": 5.583978165732033e-06, "loss": 0.4566, "step": 10259 }, { "epoch": 1.2797537295761308, "grad_norm": 1.9140625, "learning_rate": 5.582187028934762e-06, "loss": 0.4657, "step": 10260 }, { "epoch": 1.279880022101192, "grad_norm": 2.046875, "learning_rate": 5.580396068220873e-06, "loss": 0.5657, "step": 10261 }, { "epoch": 1.280006314626253, "grad_norm": 1.8984375, "learning_rate": 5.578605283661755e-06, "loss": 0.5087, "step": 10262 }, { "epoch": 1.2801326071513142, "grad_norm": 2.25, "learning_rate": 5.57681467532878e-06, "loss": 0.4877, "step": 10263 }, { "epoch": 1.2802588996763755, "grad_norm": 1.8828125, "learning_rate": 5.575024243293319e-06, "loss": 0.4999, "step": 10264 }, { "epoch": 1.2803851922014367, "grad_norm": 1.9609375, "learning_rate": 5.573233987626732e-06, "loss": 0.4893, "step": 10265 }, { "epoch": 1.2805114847264978, "grad_norm": 2.078125, "learning_rate": 5.571443908400378e-06, "loss": 0.5414, "step": 10266 }, { "epoch": 1.280637777251559, "grad_norm": 1.8984375, "learning_rate": 5.5696540056856005e-06, "loss": 0.5229, "step": 10267 }, { "epoch": 1.28076406977662, "grad_norm": 1.9375, "learning_rate": 5.5678642795537455e-06, "loss": 0.4882, "step": 10268 }, { "epoch": 1.2808903623016814, "grad_norm": 2.015625, "learning_rate": 5.566074730076143e-06, "loss": 0.5113, "step": 10269 }, { "epoch": 1.2810166548267423, "grad_norm": 2.015625, "learning_rate": 5.564285357324124e-06, "loss": 0.5657, "step": 10270 }, { "epoch": 1.2811429473518037, "grad_norm": 1.9140625, "learning_rate": 5.5624961613690044e-06, "loss": 0.4718, "step": 10271 }, { "epoch": 1.2812692398768648, "grad_norm": 1.953125, "learning_rate": 5.5607071422821e-06, "loss": 0.4903, "step": 10272 }, { "epoch": 1.281395532401926, "grad_norm": 1.8515625, "learning_rate": 5.5589183001347165e-06, "loss": 0.5276, "step": 10273 }, { "epoch": 1.281521824926987, "grad_norm": 2.125, "learning_rate": 5.557129634998153e-06, "loss": 0.4681, "step": 10274 }, { "epoch": 1.2816481174520482, "grad_norm": 1.9296875, "learning_rate": 5.555341146943701e-06, "loss": 0.4637, "step": 10275 }, { "epoch": 1.2817744099771096, "grad_norm": 2.0625, "learning_rate": 5.553552836042645e-06, "loss": 0.5043, "step": 10276 }, { "epoch": 1.2819007025021707, "grad_norm": 2.109375, "learning_rate": 5.551764702366259e-06, "loss": 0.5356, "step": 10277 }, { "epoch": 1.2820269950272318, "grad_norm": 2.03125, "learning_rate": 5.549976745985821e-06, "loss": 0.5037, "step": 10278 }, { "epoch": 1.282153287552293, "grad_norm": 1.9921875, "learning_rate": 5.548188966972592e-06, "loss": 0.4801, "step": 10279 }, { "epoch": 1.282279580077354, "grad_norm": 1.9921875, "learning_rate": 5.546401365397829e-06, "loss": 0.4837, "step": 10280 }, { "epoch": 1.2824058726024155, "grad_norm": 2.078125, "learning_rate": 5.544613941332778e-06, "loss": 0.5223, "step": 10281 }, { "epoch": 1.2825321651274766, "grad_norm": 1.9140625, "learning_rate": 5.542826694848686e-06, "loss": 0.4494, "step": 10282 }, { "epoch": 1.2826584576525377, "grad_norm": 2.109375, "learning_rate": 5.541039626016784e-06, "loss": 0.4933, "step": 10283 }, { "epoch": 1.2827847501775989, "grad_norm": 1.9921875, "learning_rate": 5.539252734908305e-06, "loss": 0.4532, "step": 10284 }, { "epoch": 1.28291104270266, "grad_norm": 1.953125, "learning_rate": 5.537466021594466e-06, "loss": 0.4449, "step": 10285 }, { "epoch": 1.2830373352277213, "grad_norm": 2.0, "learning_rate": 5.535679486146482e-06, "loss": 0.5382, "step": 10286 }, { "epoch": 1.2831636277527823, "grad_norm": 1.9609375, "learning_rate": 5.533893128635561e-06, "loss": 0.4704, "step": 10287 }, { "epoch": 1.2832899202778436, "grad_norm": 2.015625, "learning_rate": 5.532106949132905e-06, "loss": 0.5205, "step": 10288 }, { "epoch": 1.2834162128029047, "grad_norm": 2.109375, "learning_rate": 5.530320947709702e-06, "loss": 0.4701, "step": 10289 }, { "epoch": 1.2835425053279659, "grad_norm": 1.78125, "learning_rate": 5.52853512443714e-06, "loss": 0.4534, "step": 10290 }, { "epoch": 1.283668797853027, "grad_norm": 1.9921875, "learning_rate": 5.5267494793864e-06, "loss": 0.5051, "step": 10291 }, { "epoch": 1.2837950903780881, "grad_norm": 2.3125, "learning_rate": 5.524964012628648e-06, "loss": 0.5664, "step": 10292 }, { "epoch": 1.2839213829031495, "grad_norm": 1.9296875, "learning_rate": 5.52317872423505e-06, "loss": 0.5317, "step": 10293 }, { "epoch": 1.2840476754282106, "grad_norm": 1.96875, "learning_rate": 5.521393614276769e-06, "loss": 0.4897, "step": 10294 }, { "epoch": 1.2841739679532718, "grad_norm": 2.65625, "learning_rate": 5.51960868282495e-06, "loss": 0.5101, "step": 10295 }, { "epoch": 1.284300260478333, "grad_norm": 1.96875, "learning_rate": 5.5178239299507375e-06, "loss": 0.5019, "step": 10296 }, { "epoch": 1.284426553003394, "grad_norm": 2.09375, "learning_rate": 5.516039355725268e-06, "loss": 0.4473, "step": 10297 }, { "epoch": 1.2845528455284554, "grad_norm": 1.9609375, "learning_rate": 5.514254960219668e-06, "loss": 0.517, "step": 10298 }, { "epoch": 1.2846791380535165, "grad_norm": 1.875, "learning_rate": 5.512470743505062e-06, "loss": 0.4751, "step": 10299 }, { "epoch": 1.2848054305785777, "grad_norm": 2.203125, "learning_rate": 5.510686705652563e-06, "loss": 0.5356, "step": 10300 }, { "epoch": 1.2849317231036388, "grad_norm": 2.015625, "learning_rate": 5.508902846733278e-06, "loss": 0.4896, "step": 10301 }, { "epoch": 1.2850580156287, "grad_norm": 2.015625, "learning_rate": 5.507119166818309e-06, "loss": 0.4844, "step": 10302 }, { "epoch": 1.2851843081537613, "grad_norm": 2.015625, "learning_rate": 5.505335665978748e-06, "loss": 0.5297, "step": 10303 }, { "epoch": 1.2853106006788222, "grad_norm": 2.078125, "learning_rate": 5.5035523442856805e-06, "loss": 0.5495, "step": 10304 }, { "epoch": 1.2854368932038835, "grad_norm": 1.953125, "learning_rate": 5.501769201810188e-06, "loss": 0.4737, "step": 10305 }, { "epoch": 1.2855631857289447, "grad_norm": 2.09375, "learning_rate": 5.4999862386233384e-06, "loss": 0.5006, "step": 10306 }, { "epoch": 1.2856894782540058, "grad_norm": 1.96875, "learning_rate": 5.4982034547961984e-06, "loss": 0.4515, "step": 10307 }, { "epoch": 1.285815770779067, "grad_norm": 2.1875, "learning_rate": 5.496420850399826e-06, "loss": 0.5386, "step": 10308 }, { "epoch": 1.285942063304128, "grad_norm": 1.8125, "learning_rate": 5.494638425505268e-06, "loss": 0.4627, "step": 10309 }, { "epoch": 1.2860683558291894, "grad_norm": 2.078125, "learning_rate": 5.4928561801835735e-06, "loss": 0.4637, "step": 10310 }, { "epoch": 1.2861946483542506, "grad_norm": 1.9609375, "learning_rate": 5.491074114505776e-06, "loss": 0.4199, "step": 10311 }, { "epoch": 1.2863209408793117, "grad_norm": 2.125, "learning_rate": 5.489292228542904e-06, "loss": 0.58, "step": 10312 }, { "epoch": 1.2864472334043728, "grad_norm": 1.78125, "learning_rate": 5.487510522365978e-06, "loss": 0.4591, "step": 10313 }, { "epoch": 1.286573525929434, "grad_norm": 2.015625, "learning_rate": 5.485728996046014e-06, "loss": 0.5081, "step": 10314 }, { "epoch": 1.2866998184544953, "grad_norm": 1.9609375, "learning_rate": 5.483947649654019e-06, "loss": 0.5166, "step": 10315 }, { "epoch": 1.2868261109795565, "grad_norm": 2.171875, "learning_rate": 5.482166483260993e-06, "loss": 0.5893, "step": 10316 }, { "epoch": 1.2869524035046176, "grad_norm": 1.8984375, "learning_rate": 5.48038549693793e-06, "loss": 0.5544, "step": 10317 }, { "epoch": 1.2870786960296787, "grad_norm": 2.0, "learning_rate": 5.478604690755814e-06, "loss": 0.519, "step": 10318 }, { "epoch": 1.2872049885547399, "grad_norm": 1.8515625, "learning_rate": 5.476824064785624e-06, "loss": 0.447, "step": 10319 }, { "epoch": 1.2873312810798012, "grad_norm": 2.03125, "learning_rate": 5.475043619098334e-06, "loss": 0.4797, "step": 10320 }, { "epoch": 1.2874575736048621, "grad_norm": 2.015625, "learning_rate": 5.473263353764904e-06, "loss": 0.5429, "step": 10321 }, { "epoch": 1.2875838661299235, "grad_norm": 2.046875, "learning_rate": 5.471483268856294e-06, "loss": 0.4396, "step": 10322 }, { "epoch": 1.2877101586549846, "grad_norm": 2.015625, "learning_rate": 5.469703364443455e-06, "loss": 0.4696, "step": 10323 }, { "epoch": 1.2878364511800457, "grad_norm": 2.015625, "learning_rate": 5.467923640597326e-06, "loss": 0.6118, "step": 10324 }, { "epoch": 1.2879627437051069, "grad_norm": 1.9375, "learning_rate": 5.466144097388842e-06, "loss": 0.4865, "step": 10325 }, { "epoch": 1.288089036230168, "grad_norm": 2.0, "learning_rate": 5.4643647348889364e-06, "loss": 0.542, "step": 10326 }, { "epoch": 1.2882153287552294, "grad_norm": 2.171875, "learning_rate": 5.462585553168529e-06, "loss": 0.5619, "step": 10327 }, { "epoch": 1.2883416212802905, "grad_norm": 2.03125, "learning_rate": 5.460806552298532e-06, "loss": 0.4887, "step": 10328 }, { "epoch": 1.2884679138053516, "grad_norm": 2.03125, "learning_rate": 5.459027732349851e-06, "loss": 0.4942, "step": 10329 }, { "epoch": 1.2885942063304128, "grad_norm": 1.78125, "learning_rate": 5.4572490933933885e-06, "loss": 0.4636, "step": 10330 }, { "epoch": 1.288720498855474, "grad_norm": 1.9296875, "learning_rate": 5.455470635500035e-06, "loss": 0.5082, "step": 10331 }, { "epoch": 1.2888467913805353, "grad_norm": 1.8984375, "learning_rate": 5.4536923587406765e-06, "loss": 0.4853, "step": 10332 }, { "epoch": 1.2889730839055964, "grad_norm": 2.03125, "learning_rate": 5.451914263186189e-06, "loss": 0.4961, "step": 10333 }, { "epoch": 1.2890993764306575, "grad_norm": 1.890625, "learning_rate": 5.450136348907444e-06, "loss": 0.478, "step": 10334 }, { "epoch": 1.2892256689557187, "grad_norm": 1.890625, "learning_rate": 5.4483586159753065e-06, "loss": 0.4774, "step": 10335 }, { "epoch": 1.2893519614807798, "grad_norm": 1.90625, "learning_rate": 5.44658106446063e-06, "loss": 0.4949, "step": 10336 }, { "epoch": 1.2894782540058412, "grad_norm": 1.8671875, "learning_rate": 5.444803694434265e-06, "loss": 0.4357, "step": 10337 }, { "epoch": 1.2896045465309023, "grad_norm": 2.0625, "learning_rate": 5.4430265059670526e-06, "loss": 0.5412, "step": 10338 }, { "epoch": 1.2897308390559634, "grad_norm": 1.890625, "learning_rate": 5.441249499129828e-06, "loss": 0.4814, "step": 10339 }, { "epoch": 1.2898571315810246, "grad_norm": 2.21875, "learning_rate": 5.439472673993418e-06, "loss": 0.5774, "step": 10340 }, { "epoch": 1.2899834241060857, "grad_norm": 2.03125, "learning_rate": 5.437696030628643e-06, "loss": 0.526, "step": 10341 }, { "epoch": 1.2901097166311468, "grad_norm": 1.9140625, "learning_rate": 5.4359195691063095e-06, "loss": 0.4728, "step": 10342 }, { "epoch": 1.290236009156208, "grad_norm": 1.796875, "learning_rate": 5.434143289497234e-06, "loss": 0.4637, "step": 10343 }, { "epoch": 1.2903623016812693, "grad_norm": 2.296875, "learning_rate": 5.43236719187221e-06, "loss": 0.5118, "step": 10344 }, { "epoch": 1.2904885942063304, "grad_norm": 2.046875, "learning_rate": 5.430591276302026e-06, "loss": 0.5548, "step": 10345 }, { "epoch": 1.2906148867313916, "grad_norm": 2.078125, "learning_rate": 5.428815542857469e-06, "loss": 0.4878, "step": 10346 }, { "epoch": 1.2907411792564527, "grad_norm": 1.90625, "learning_rate": 5.427039991609313e-06, "loss": 0.4865, "step": 10347 }, { "epoch": 1.2908674717815138, "grad_norm": 1.8203125, "learning_rate": 5.42526462262833e-06, "loss": 0.4561, "step": 10348 }, { "epoch": 1.2909937643065752, "grad_norm": 1.9765625, "learning_rate": 5.423489435985279e-06, "loss": 0.4464, "step": 10349 }, { "epoch": 1.2911200568316363, "grad_norm": 2.09375, "learning_rate": 5.4217144317509155e-06, "loss": 0.537, "step": 10350 }, { "epoch": 1.2912463493566975, "grad_norm": 1.8203125, "learning_rate": 5.4199396099959875e-06, "loss": 0.45, "step": 10351 }, { "epoch": 1.2913726418817586, "grad_norm": 1.96875, "learning_rate": 5.418164970791235e-06, "loss": 0.516, "step": 10352 }, { "epoch": 1.2914989344068197, "grad_norm": 2.3125, "learning_rate": 5.416390514207392e-06, "loss": 0.5676, "step": 10353 }, { "epoch": 1.291625226931881, "grad_norm": 2.046875, "learning_rate": 5.414616240315181e-06, "loss": 0.4746, "step": 10354 }, { "epoch": 1.2917515194569422, "grad_norm": 1.890625, "learning_rate": 5.412842149185324e-06, "loss": 0.4787, "step": 10355 }, { "epoch": 1.2918778119820034, "grad_norm": 1.8203125, "learning_rate": 5.411068240888529e-06, "loss": 0.4578, "step": 10356 }, { "epoch": 1.2920041045070645, "grad_norm": 2.0, "learning_rate": 5.4092945154955004e-06, "loss": 0.4611, "step": 10357 }, { "epoch": 1.2921303970321256, "grad_norm": 2.265625, "learning_rate": 5.407520973076934e-06, "loss": 0.5342, "step": 10358 }, { "epoch": 1.2922566895571868, "grad_norm": 1.9296875, "learning_rate": 5.4057476137035214e-06, "loss": 0.4633, "step": 10359 }, { "epoch": 1.2923829820822479, "grad_norm": 1.921875, "learning_rate": 5.403974437445939e-06, "loss": 0.5319, "step": 10360 }, { "epoch": 1.2925092746073092, "grad_norm": 1.9609375, "learning_rate": 5.402201444374869e-06, "loss": 0.5427, "step": 10361 }, { "epoch": 1.2926355671323704, "grad_norm": 1.953125, "learning_rate": 5.400428634560975e-06, "loss": 0.4435, "step": 10362 }, { "epoch": 1.2927618596574315, "grad_norm": 2.015625, "learning_rate": 5.398656008074916e-06, "loss": 0.5194, "step": 10363 }, { "epoch": 1.2928881521824926, "grad_norm": 1.9140625, "learning_rate": 5.396883564987347e-06, "loss": 0.4919, "step": 10364 }, { "epoch": 1.2930144447075538, "grad_norm": 1.953125, "learning_rate": 5.395111305368912e-06, "loss": 0.4607, "step": 10365 }, { "epoch": 1.2931407372326151, "grad_norm": 2.140625, "learning_rate": 5.393339229290246e-06, "loss": 0.5577, "step": 10366 }, { "epoch": 1.2932670297576763, "grad_norm": 2.0625, "learning_rate": 5.391567336821986e-06, "loss": 0.5228, "step": 10367 }, { "epoch": 1.2933933222827374, "grad_norm": 2.171875, "learning_rate": 5.38979562803475e-06, "loss": 0.5547, "step": 10368 }, { "epoch": 1.2935196148077985, "grad_norm": 1.9609375, "learning_rate": 5.388024102999156e-06, "loss": 0.5222, "step": 10369 }, { "epoch": 1.2936459073328597, "grad_norm": 1.90625, "learning_rate": 5.386252761785813e-06, "loss": 0.4809, "step": 10370 }, { "epoch": 1.293772199857921, "grad_norm": 2.078125, "learning_rate": 5.384481604465321e-06, "loss": 0.564, "step": 10371 }, { "epoch": 1.2938984923829822, "grad_norm": 1.9296875, "learning_rate": 5.3827106311082765e-06, "loss": 0.4664, "step": 10372 }, { "epoch": 1.2940247849080433, "grad_norm": 2.09375, "learning_rate": 5.380939841785264e-06, "loss": 0.5231, "step": 10373 }, { "epoch": 1.2941510774331044, "grad_norm": 2.0, "learning_rate": 5.379169236566863e-06, "loss": 0.4975, "step": 10374 }, { "epoch": 1.2942773699581656, "grad_norm": 2.09375, "learning_rate": 5.377398815523648e-06, "loss": 0.4957, "step": 10375 }, { "epoch": 1.2944036624832267, "grad_norm": 1.953125, "learning_rate": 5.375628578726181e-06, "loss": 0.5318, "step": 10376 }, { "epoch": 1.2945299550082878, "grad_norm": 2.0625, "learning_rate": 5.37385852624502e-06, "loss": 0.5386, "step": 10377 }, { "epoch": 1.2946562475333492, "grad_norm": 2.0, "learning_rate": 5.3720886581507125e-06, "loss": 0.5598, "step": 10378 }, { "epoch": 1.2947825400584103, "grad_norm": 1.9765625, "learning_rate": 5.370318974513807e-06, "loss": 0.453, "step": 10379 }, { "epoch": 1.2949088325834714, "grad_norm": 2.0625, "learning_rate": 5.368549475404836e-06, "loss": 0.4951, "step": 10380 }, { "epoch": 1.2950351251085326, "grad_norm": 1.8046875, "learning_rate": 5.366780160894328e-06, "loss": 0.4204, "step": 10381 }, { "epoch": 1.2951614176335937, "grad_norm": 1.90625, "learning_rate": 5.3650110310528025e-06, "loss": 0.4928, "step": 10382 }, { "epoch": 1.295287710158655, "grad_norm": 1.8984375, "learning_rate": 5.363242085950773e-06, "loss": 0.4789, "step": 10383 }, { "epoch": 1.2954140026837162, "grad_norm": 2.203125, "learning_rate": 5.361473325658746e-06, "loss": 0.5661, "step": 10384 }, { "epoch": 1.2955402952087773, "grad_norm": 1.9765625, "learning_rate": 5.35970475024722e-06, "loss": 0.4737, "step": 10385 }, { "epoch": 1.2956665877338385, "grad_norm": 2.21875, "learning_rate": 5.357936359786686e-06, "loss": 0.5203, "step": 10386 }, { "epoch": 1.2957928802588996, "grad_norm": 2.0625, "learning_rate": 5.3561681543476265e-06, "loss": 0.4851, "step": 10387 }, { "epoch": 1.295919172783961, "grad_norm": 1.921875, "learning_rate": 5.354400134000519e-06, "loss": 0.4624, "step": 10388 }, { "epoch": 1.296045465309022, "grad_norm": 2.140625, "learning_rate": 5.3526322988158345e-06, "loss": 0.5499, "step": 10389 }, { "epoch": 1.2961717578340832, "grad_norm": 1.8671875, "learning_rate": 5.350864648864031e-06, "loss": 0.4505, "step": 10390 }, { "epoch": 1.2962980503591444, "grad_norm": 2.125, "learning_rate": 5.349097184215564e-06, "loss": 0.4336, "step": 10391 }, { "epoch": 1.2964243428842055, "grad_norm": 1.9296875, "learning_rate": 5.347329904940881e-06, "loss": 0.5338, "step": 10392 }, { "epoch": 1.2965506354092668, "grad_norm": 2.0625, "learning_rate": 5.345562811110423e-06, "loss": 0.4739, "step": 10393 }, { "epoch": 1.2966769279343278, "grad_norm": 1.875, "learning_rate": 5.343795902794619e-06, "loss": 0.4687, "step": 10394 }, { "epoch": 1.2968032204593891, "grad_norm": 2.234375, "learning_rate": 5.3420291800638945e-06, "loss": 0.5185, "step": 10395 }, { "epoch": 1.2969295129844503, "grad_norm": 2.09375, "learning_rate": 5.340262642988665e-06, "loss": 0.5736, "step": 10396 }, { "epoch": 1.2970558055095114, "grad_norm": 1.9765625, "learning_rate": 5.3384962916393455e-06, "loss": 0.52, "step": 10397 }, { "epoch": 1.2971820980345725, "grad_norm": 2.0, "learning_rate": 5.336730126086338e-06, "loss": 0.5515, "step": 10398 }, { "epoch": 1.2973083905596337, "grad_norm": 2.015625, "learning_rate": 5.334964146400033e-06, "loss": 0.4283, "step": 10399 }, { "epoch": 1.297434683084695, "grad_norm": 2.0, "learning_rate": 5.333198352650821e-06, "loss": 0.4828, "step": 10400 }, { "epoch": 1.2975609756097561, "grad_norm": 1.9375, "learning_rate": 5.3314327449090846e-06, "loss": 0.4705, "step": 10401 }, { "epoch": 1.2976872681348173, "grad_norm": 1.9921875, "learning_rate": 5.329667323245192e-06, "loss": 0.5399, "step": 10402 }, { "epoch": 1.2978135606598784, "grad_norm": 2.046875, "learning_rate": 5.32790208772951e-06, "loss": 0.5543, "step": 10403 }, { "epoch": 1.2979398531849395, "grad_norm": 2.03125, "learning_rate": 5.326137038432399e-06, "loss": 0.5248, "step": 10404 }, { "epoch": 1.298066145710001, "grad_norm": 1.9609375, "learning_rate": 5.3243721754242066e-06, "loss": 0.5539, "step": 10405 }, { "epoch": 1.298192438235062, "grad_norm": 1.8203125, "learning_rate": 5.322607498775279e-06, "loss": 0.4846, "step": 10406 }, { "epoch": 1.2983187307601232, "grad_norm": 2.015625, "learning_rate": 5.3208430085559485e-06, "loss": 0.5353, "step": 10407 }, { "epoch": 1.2984450232851843, "grad_norm": 2.0625, "learning_rate": 5.319078704836545e-06, "loss": 0.4853, "step": 10408 }, { "epoch": 1.2985713158102454, "grad_norm": 2.015625, "learning_rate": 5.317314587687391e-06, "loss": 0.5442, "step": 10409 }, { "epoch": 1.2986976083353068, "grad_norm": 2.046875, "learning_rate": 5.315550657178798e-06, "loss": 0.5014, "step": 10410 }, { "epoch": 1.2988239008603677, "grad_norm": 1.9921875, "learning_rate": 5.313786913381073e-06, "loss": 0.4858, "step": 10411 }, { "epoch": 1.298950193385429, "grad_norm": 1.9140625, "learning_rate": 5.312023356364513e-06, "loss": 0.5032, "step": 10412 }, { "epoch": 1.2990764859104902, "grad_norm": 2.0, "learning_rate": 5.310259986199412e-06, "loss": 0.5133, "step": 10413 }, { "epoch": 1.2992027784355513, "grad_norm": 2.0625, "learning_rate": 5.308496802956047e-06, "loss": 0.4634, "step": 10414 }, { "epoch": 1.2993290709606125, "grad_norm": 2.09375, "learning_rate": 5.306733806704704e-06, "loss": 0.5403, "step": 10415 }, { "epoch": 1.2994553634856736, "grad_norm": 1.8828125, "learning_rate": 5.304970997515646e-06, "loss": 0.5116, "step": 10416 }, { "epoch": 1.299581656010735, "grad_norm": 2.015625, "learning_rate": 5.303208375459136e-06, "loss": 0.4822, "step": 10417 }, { "epoch": 1.299707948535796, "grad_norm": 2.1875, "learning_rate": 5.301445940605428e-06, "loss": 0.6516, "step": 10418 }, { "epoch": 1.2998342410608572, "grad_norm": 2.234375, "learning_rate": 5.299683693024765e-06, "loss": 0.5962, "step": 10419 }, { "epoch": 1.2999605335859183, "grad_norm": 1.9609375, "learning_rate": 5.29792163278739e-06, "loss": 0.4714, "step": 10420 }, { "epoch": 1.3000868261109795, "grad_norm": 1.8515625, "learning_rate": 5.296159759963534e-06, "loss": 0.5356, "step": 10421 }, { "epoch": 1.3002131186360408, "grad_norm": 2.546875, "learning_rate": 5.294398074623419e-06, "loss": 0.5575, "step": 10422 }, { "epoch": 1.300339411161102, "grad_norm": 2.1875, "learning_rate": 5.292636576837264e-06, "loss": 0.5709, "step": 10423 }, { "epoch": 1.300465703686163, "grad_norm": 2.0625, "learning_rate": 5.290875266675275e-06, "loss": 0.4827, "step": 10424 }, { "epoch": 1.3005919962112242, "grad_norm": 1.9921875, "learning_rate": 5.2891141442076564e-06, "loss": 0.6106, "step": 10425 }, { "epoch": 1.3007182887362854, "grad_norm": 1.90625, "learning_rate": 5.2873532095046e-06, "loss": 0.4876, "step": 10426 }, { "epoch": 1.3008445812613467, "grad_norm": 2.03125, "learning_rate": 5.285592462636294e-06, "loss": 0.5257, "step": 10427 }, { "epoch": 1.3009708737864076, "grad_norm": 1.9296875, "learning_rate": 5.283831903672918e-06, "loss": 0.4309, "step": 10428 }, { "epoch": 1.301097166311469, "grad_norm": 1.984375, "learning_rate": 5.282071532684641e-06, "loss": 0.5102, "step": 10429 }, { "epoch": 1.3012234588365301, "grad_norm": 2.0625, "learning_rate": 5.2803113497416295e-06, "loss": 0.5294, "step": 10430 }, { "epoch": 1.3013497513615913, "grad_norm": 2.0625, "learning_rate": 5.27855135491404e-06, "loss": 0.5621, "step": 10431 }, { "epoch": 1.3014760438866524, "grad_norm": 1.9609375, "learning_rate": 5.276791548272018e-06, "loss": 0.5698, "step": 10432 }, { "epoch": 1.3016023364117135, "grad_norm": 1.9453125, "learning_rate": 5.27503192988571e-06, "loss": 0.4299, "step": 10433 }, { "epoch": 1.3017286289367749, "grad_norm": 1.953125, "learning_rate": 5.27327249982525e-06, "loss": 0.4805, "step": 10434 }, { "epoch": 1.301854921461836, "grad_norm": 1.8359375, "learning_rate": 5.271513258160763e-06, "loss": 0.4665, "step": 10435 }, { "epoch": 1.3019812139868971, "grad_norm": 2.09375, "learning_rate": 5.269754204962369e-06, "loss": 0.4404, "step": 10436 }, { "epoch": 1.3021075065119583, "grad_norm": 1.9921875, "learning_rate": 5.267995340300177e-06, "loss": 0.5688, "step": 10437 }, { "epoch": 1.3022337990370194, "grad_norm": 1.8984375, "learning_rate": 5.266236664244294e-06, "loss": 0.4913, "step": 10438 }, { "epoch": 1.3023600915620808, "grad_norm": 1.8984375, "learning_rate": 5.264478176864815e-06, "loss": 0.3939, "step": 10439 }, { "epoch": 1.302486384087142, "grad_norm": 1.9375, "learning_rate": 5.262719878231831e-06, "loss": 0.5194, "step": 10440 }, { "epoch": 1.302612676612203, "grad_norm": 1.8125, "learning_rate": 5.260961768415421e-06, "loss": 0.4329, "step": 10441 }, { "epoch": 1.3027389691372642, "grad_norm": 2.015625, "learning_rate": 5.259203847485661e-06, "loss": 0.5019, "step": 10442 }, { "epoch": 1.3028652616623253, "grad_norm": 1.8984375, "learning_rate": 5.257446115512616e-06, "loss": 0.4874, "step": 10443 }, { "epoch": 1.3029915541873867, "grad_norm": 1.921875, "learning_rate": 5.2556885725663465e-06, "loss": 0.4789, "step": 10444 }, { "epoch": 1.3031178467124476, "grad_norm": 2.109375, "learning_rate": 5.2539312187169034e-06, "loss": 0.5213, "step": 10445 }, { "epoch": 1.303244139237509, "grad_norm": 2.265625, "learning_rate": 5.252174054034329e-06, "loss": 0.5327, "step": 10446 }, { "epoch": 1.30337043176257, "grad_norm": 1.9140625, "learning_rate": 5.250417078588661e-06, "loss": 0.4548, "step": 10447 }, { "epoch": 1.3034967242876312, "grad_norm": 2.078125, "learning_rate": 5.248660292449929e-06, "loss": 0.4412, "step": 10448 }, { "epoch": 1.3036230168126923, "grad_norm": 2.078125, "learning_rate": 5.2469036956881525e-06, "loss": 0.4711, "step": 10449 }, { "epoch": 1.3037493093377535, "grad_norm": 2.03125, "learning_rate": 5.245147288373344e-06, "loss": 0.5667, "step": 10450 }, { "epoch": 1.3038756018628148, "grad_norm": 1.9921875, "learning_rate": 5.2433910705755145e-06, "loss": 0.4789, "step": 10451 }, { "epoch": 1.304001894387876, "grad_norm": 1.921875, "learning_rate": 5.241635042364659e-06, "loss": 0.4745, "step": 10452 }, { "epoch": 1.304128186912937, "grad_norm": 1.8984375, "learning_rate": 5.23987920381077e-06, "loss": 0.5343, "step": 10453 }, { "epoch": 1.3042544794379982, "grad_norm": 2.109375, "learning_rate": 5.238123554983832e-06, "loss": 0.464, "step": 10454 }, { "epoch": 1.3043807719630593, "grad_norm": 2.109375, "learning_rate": 5.236368095953818e-06, "loss": 0.5088, "step": 10455 }, { "epoch": 1.3045070644881207, "grad_norm": 1.9609375, "learning_rate": 5.234612826790697e-06, "loss": 0.4449, "step": 10456 }, { "epoch": 1.3046333570131818, "grad_norm": 1.984375, "learning_rate": 5.232857747564433e-06, "loss": 0.4849, "step": 10457 }, { "epoch": 1.304759649538243, "grad_norm": 1.9765625, "learning_rate": 5.231102858344974e-06, "loss": 0.5103, "step": 10458 }, { "epoch": 1.304885942063304, "grad_norm": 1.9140625, "learning_rate": 5.22934815920227e-06, "loss": 0.5281, "step": 10459 }, { "epoch": 1.3050122345883652, "grad_norm": 1.921875, "learning_rate": 5.227593650206258e-06, "loss": 0.4821, "step": 10460 }, { "epoch": 1.3051385271134266, "grad_norm": 1.9140625, "learning_rate": 5.225839331426867e-06, "loss": 0.4896, "step": 10461 }, { "epoch": 1.3052648196384877, "grad_norm": 1.921875, "learning_rate": 5.2240852029340215e-06, "loss": 0.4497, "step": 10462 }, { "epoch": 1.3053911121635489, "grad_norm": 2.25, "learning_rate": 5.222331264797636e-06, "loss": 0.5177, "step": 10463 }, { "epoch": 1.30551740468861, "grad_norm": 2.03125, "learning_rate": 5.22057751708762e-06, "loss": 0.502, "step": 10464 }, { "epoch": 1.3056436972136711, "grad_norm": 2.046875, "learning_rate": 5.218823959873871e-06, "loss": 0.461, "step": 10465 }, { "epoch": 1.3057699897387323, "grad_norm": 1.8828125, "learning_rate": 5.217070593226282e-06, "loss": 0.5151, "step": 10466 }, { "epoch": 1.3058962822637934, "grad_norm": 2.0625, "learning_rate": 5.2153174172147405e-06, "loss": 0.4649, "step": 10467 }, { "epoch": 1.3060225747888548, "grad_norm": 2.203125, "learning_rate": 5.213564431909116e-06, "loss": 0.4886, "step": 10468 }, { "epoch": 1.3061488673139159, "grad_norm": 1.9296875, "learning_rate": 5.211811637379289e-06, "loss": 0.5095, "step": 10469 }, { "epoch": 1.306275159838977, "grad_norm": 2.140625, "learning_rate": 5.210059033695119e-06, "loss": 0.499, "step": 10470 }, { "epoch": 1.3064014523640382, "grad_norm": 1.8828125, "learning_rate": 5.208306620926456e-06, "loss": 0.4225, "step": 10471 }, { "epoch": 1.3065277448890993, "grad_norm": 1.875, "learning_rate": 5.206554399143151e-06, "loss": 0.4671, "step": 10472 }, { "epoch": 1.3066540374141606, "grad_norm": 2.125, "learning_rate": 5.204802368415042e-06, "loss": 0.4933, "step": 10473 }, { "epoch": 1.3067803299392218, "grad_norm": 2.21875, "learning_rate": 5.20305052881196e-06, "loss": 0.5214, "step": 10474 }, { "epoch": 1.306906622464283, "grad_norm": 1.9609375, "learning_rate": 5.2012988804037305e-06, "loss": 0.4662, "step": 10475 }, { "epoch": 1.307032914989344, "grad_norm": 2.078125, "learning_rate": 5.199547423260168e-06, "loss": 0.4648, "step": 10476 }, { "epoch": 1.3071592075144052, "grad_norm": 2.015625, "learning_rate": 5.197796157451084e-06, "loss": 0.5686, "step": 10477 }, { "epoch": 1.3072855000394665, "grad_norm": 2.046875, "learning_rate": 5.196045083046277e-06, "loss": 0.5213, "step": 10478 }, { "epoch": 1.3074117925645277, "grad_norm": 2.046875, "learning_rate": 5.1942942001155434e-06, "loss": 0.54, "step": 10479 }, { "epoch": 1.3075380850895888, "grad_norm": 2.21875, "learning_rate": 5.1925435087286666e-06, "loss": 0.5271, "step": 10480 }, { "epoch": 1.30766437761465, "grad_norm": 1.8984375, "learning_rate": 5.190793008955426e-06, "loss": 0.4929, "step": 10481 }, { "epoch": 1.307790670139711, "grad_norm": 2.078125, "learning_rate": 5.189042700865594e-06, "loss": 0.5269, "step": 10482 }, { "epoch": 1.3079169626647722, "grad_norm": 1.875, "learning_rate": 5.18729258452893e-06, "loss": 0.5195, "step": 10483 }, { "epoch": 1.3080432551898333, "grad_norm": 2.015625, "learning_rate": 5.185542660015192e-06, "loss": 0.4901, "step": 10484 }, { "epoch": 1.3081695477148947, "grad_norm": 1.9375, "learning_rate": 5.1837929273941275e-06, "loss": 0.4776, "step": 10485 }, { "epoch": 1.3082958402399558, "grad_norm": 2.125, "learning_rate": 5.182043386735474e-06, "loss": 0.5741, "step": 10486 }, { "epoch": 1.308422132765017, "grad_norm": 1.984375, "learning_rate": 5.180294038108967e-06, "loss": 0.4402, "step": 10487 }, { "epoch": 1.308548425290078, "grad_norm": 2.1875, "learning_rate": 5.1785448815843334e-06, "loss": 0.4365, "step": 10488 }, { "epoch": 1.3086747178151392, "grad_norm": 2.015625, "learning_rate": 5.176795917231285e-06, "loss": 0.4327, "step": 10489 }, { "epoch": 1.3088010103402006, "grad_norm": 2.15625, "learning_rate": 5.1750471451195365e-06, "loss": 0.5196, "step": 10490 }, { "epoch": 1.3089273028652617, "grad_norm": 1.953125, "learning_rate": 5.173298565318786e-06, "loss": 0.5031, "step": 10491 }, { "epoch": 1.3090535953903228, "grad_norm": 1.9453125, "learning_rate": 5.171550177898729e-06, "loss": 0.4664, "step": 10492 }, { "epoch": 1.309179887915384, "grad_norm": 1.796875, "learning_rate": 5.169801982929053e-06, "loss": 0.4649, "step": 10493 }, { "epoch": 1.309306180440445, "grad_norm": 2.015625, "learning_rate": 5.1680539804794326e-06, "loss": 0.4511, "step": 10494 }, { "epoch": 1.3094324729655065, "grad_norm": 2.109375, "learning_rate": 5.166306170619544e-06, "loss": 0.4787, "step": 10495 }, { "epoch": 1.3095587654905676, "grad_norm": 2.078125, "learning_rate": 5.1645585534190505e-06, "loss": 0.5743, "step": 10496 }, { "epoch": 1.3096850580156287, "grad_norm": 2.0, "learning_rate": 5.1628111289476025e-06, "loss": 0.4506, "step": 10497 }, { "epoch": 1.3098113505406899, "grad_norm": 2.0625, "learning_rate": 5.161063897274854e-06, "loss": 0.5211, "step": 10498 }, { "epoch": 1.309937643065751, "grad_norm": 1.890625, "learning_rate": 5.159316858470444e-06, "loss": 0.4368, "step": 10499 }, { "epoch": 1.3100639355908124, "grad_norm": 2.125, "learning_rate": 5.157570012604003e-06, "loss": 0.4637, "step": 10500 }, { "epoch": 1.3101902281158733, "grad_norm": 2.03125, "learning_rate": 5.155823359745158e-06, "loss": 0.5544, "step": 10501 }, { "epoch": 1.3103165206409346, "grad_norm": 2.03125, "learning_rate": 5.154076899963526e-06, "loss": 0.5067, "step": 10502 }, { "epoch": 1.3104428131659958, "grad_norm": 1.9140625, "learning_rate": 5.152330633328717e-06, "loss": 0.4471, "step": 10503 }, { "epoch": 1.310569105691057, "grad_norm": 1.90625, "learning_rate": 5.150584559910327e-06, "loss": 0.4959, "step": 10504 }, { "epoch": 1.310695398216118, "grad_norm": 1.8671875, "learning_rate": 5.1488386797779625e-06, "loss": 0.4673, "step": 10505 }, { "epoch": 1.3108216907411792, "grad_norm": 1.8203125, "learning_rate": 5.147092993001202e-06, "loss": 0.4892, "step": 10506 }, { "epoch": 1.3109479832662405, "grad_norm": 1.828125, "learning_rate": 5.145347499649625e-06, "loss": 0.4297, "step": 10507 }, { "epoch": 1.3110742757913016, "grad_norm": 2.03125, "learning_rate": 5.143602199792805e-06, "loss": 0.3937, "step": 10508 }, { "epoch": 1.3112005683163628, "grad_norm": 1.9609375, "learning_rate": 5.141857093500303e-06, "loss": 0.5052, "step": 10509 }, { "epoch": 1.311326860841424, "grad_norm": 1.953125, "learning_rate": 5.140112180841676e-06, "loss": 0.4733, "step": 10510 }, { "epoch": 1.311453153366485, "grad_norm": 2.015625, "learning_rate": 5.138367461886472e-06, "loss": 0.4846, "step": 10511 }, { "epoch": 1.3115794458915464, "grad_norm": 1.84375, "learning_rate": 5.136622936704231e-06, "loss": 0.4395, "step": 10512 }, { "epoch": 1.3117057384166075, "grad_norm": 2.0, "learning_rate": 5.134878605364485e-06, "loss": 0.4999, "step": 10513 }, { "epoch": 1.3118320309416687, "grad_norm": 1.9140625, "learning_rate": 5.13313446793676e-06, "loss": 0.4594, "step": 10514 }, { "epoch": 1.3119583234667298, "grad_norm": 1.90625, "learning_rate": 5.1313905244905715e-06, "loss": 0.4741, "step": 10515 }, { "epoch": 1.312084615991791, "grad_norm": 2.125, "learning_rate": 5.129646775095432e-06, "loss": 0.5088, "step": 10516 }, { "epoch": 1.3122109085168523, "grad_norm": 2.015625, "learning_rate": 5.127903219820839e-06, "loss": 0.4774, "step": 10517 }, { "epoch": 1.3123372010419132, "grad_norm": 2.0, "learning_rate": 5.126159858736289e-06, "loss": 0.505, "step": 10518 }, { "epoch": 1.3124634935669746, "grad_norm": 2.125, "learning_rate": 5.124416691911268e-06, "loss": 0.496, "step": 10519 }, { "epoch": 1.3125897860920357, "grad_norm": 1.9765625, "learning_rate": 5.122673719415255e-06, "loss": 0.4814, "step": 10520 }, { "epoch": 1.3127160786170968, "grad_norm": 2.046875, "learning_rate": 5.120930941317718e-06, "loss": 0.514, "step": 10521 }, { "epoch": 1.312842371142158, "grad_norm": 1.96875, "learning_rate": 5.119188357688119e-06, "loss": 0.4785, "step": 10522 }, { "epoch": 1.312968663667219, "grad_norm": 1.8671875, "learning_rate": 5.1174459685959175e-06, "loss": 0.5243, "step": 10523 }, { "epoch": 1.3130949561922804, "grad_norm": 2.125, "learning_rate": 5.115703774110562e-06, "loss": 0.5016, "step": 10524 }, { "epoch": 1.3132212487173416, "grad_norm": 1.984375, "learning_rate": 5.113961774301488e-06, "loss": 0.5244, "step": 10525 }, { "epoch": 1.3133475412424027, "grad_norm": 1.96875, "learning_rate": 5.1122199692381305e-06, "loss": 0.47, "step": 10526 }, { "epoch": 1.3134738337674638, "grad_norm": 2.03125, "learning_rate": 5.1104783589899105e-06, "loss": 0.4553, "step": 10527 }, { "epoch": 1.313600126292525, "grad_norm": 2.046875, "learning_rate": 5.108736943626247e-06, "loss": 0.5181, "step": 10528 }, { "epoch": 1.3137264188175863, "grad_norm": 1.9453125, "learning_rate": 5.106995723216547e-06, "loss": 0.4972, "step": 10529 }, { "epoch": 1.3138527113426475, "grad_norm": 1.9296875, "learning_rate": 5.105254697830212e-06, "loss": 0.4789, "step": 10530 }, { "epoch": 1.3139790038677086, "grad_norm": 1.921875, "learning_rate": 5.103513867536636e-06, "loss": 0.4924, "step": 10531 }, { "epoch": 1.3141052963927697, "grad_norm": 2.046875, "learning_rate": 5.101773232405201e-06, "loss": 0.5588, "step": 10532 }, { "epoch": 1.3142315889178309, "grad_norm": 1.9765625, "learning_rate": 5.10003279250529e-06, "loss": 0.5255, "step": 10533 }, { "epoch": 1.3143578814428922, "grad_norm": 1.8671875, "learning_rate": 5.098292547906267e-06, "loss": 0.5146, "step": 10534 }, { "epoch": 1.3144841739679531, "grad_norm": 2.203125, "learning_rate": 5.0965524986774975e-06, "loss": 0.4967, "step": 10535 }, { "epoch": 1.3146104664930145, "grad_norm": 2.125, "learning_rate": 5.094812644888334e-06, "loss": 0.5428, "step": 10536 }, { "epoch": 1.3147367590180756, "grad_norm": 2.140625, "learning_rate": 5.093072986608124e-06, "loss": 0.5406, "step": 10537 }, { "epoch": 1.3148630515431368, "grad_norm": 2.0625, "learning_rate": 5.091333523906205e-06, "loss": 0.5394, "step": 10538 }, { "epoch": 1.314989344068198, "grad_norm": 1.9296875, "learning_rate": 5.089594256851911e-06, "loss": 0.5649, "step": 10539 }, { "epoch": 1.315115636593259, "grad_norm": 1.890625, "learning_rate": 5.087855185514556e-06, "loss": 0.491, "step": 10540 }, { "epoch": 1.3152419291183204, "grad_norm": 2.1875, "learning_rate": 5.086116309963467e-06, "loss": 0.5195, "step": 10541 }, { "epoch": 1.3153682216433815, "grad_norm": 1.84375, "learning_rate": 5.084377630267945e-06, "loss": 0.4481, "step": 10542 }, { "epoch": 1.3154945141684427, "grad_norm": 1.9140625, "learning_rate": 5.082639146497291e-06, "loss": 0.5383, "step": 10543 }, { "epoch": 1.3156208066935038, "grad_norm": 1.9296875, "learning_rate": 5.0809008587207965e-06, "loss": 0.4804, "step": 10544 }, { "epoch": 1.315747099218565, "grad_norm": 2.03125, "learning_rate": 5.079162767007746e-06, "loss": 0.4965, "step": 10545 }, { "epoch": 1.3158733917436263, "grad_norm": 1.953125, "learning_rate": 5.0774248714274145e-06, "loss": 0.4593, "step": 10546 }, { "epoch": 1.3159996842686874, "grad_norm": 1.8515625, "learning_rate": 5.075687172049071e-06, "loss": 0.5091, "step": 10547 }, { "epoch": 1.3161259767937485, "grad_norm": 1.8828125, "learning_rate": 5.073949668941975e-06, "loss": 0.4793, "step": 10548 }, { "epoch": 1.3162522693188097, "grad_norm": 1.8984375, "learning_rate": 5.072212362175382e-06, "loss": 0.4885, "step": 10549 }, { "epoch": 1.3163785618438708, "grad_norm": 1.984375, "learning_rate": 5.070475251818532e-06, "loss": 0.4761, "step": 10550 }, { "epoch": 1.3165048543689322, "grad_norm": 2.28125, "learning_rate": 5.068738337940667e-06, "loss": 0.4979, "step": 10551 }, { "epoch": 1.316631146893993, "grad_norm": 1.984375, "learning_rate": 5.067001620611014e-06, "loss": 0.5048, "step": 10552 }, { "epoch": 1.3167574394190544, "grad_norm": 2.265625, "learning_rate": 5.065265099898795e-06, "loss": 0.5579, "step": 10553 }, { "epoch": 1.3168837319441156, "grad_norm": 1.8984375, "learning_rate": 5.063528775873221e-06, "loss": 0.4735, "step": 10554 }, { "epoch": 1.3170100244691767, "grad_norm": 2.09375, "learning_rate": 5.061792648603502e-06, "loss": 0.5021, "step": 10555 }, { "epoch": 1.3171363169942378, "grad_norm": 1.828125, "learning_rate": 5.060056718158832e-06, "loss": 0.4549, "step": 10556 }, { "epoch": 1.317262609519299, "grad_norm": 1.984375, "learning_rate": 5.0583209846084045e-06, "loss": 0.467, "step": 10557 }, { "epoch": 1.3173889020443603, "grad_norm": 2.109375, "learning_rate": 5.056585448021394e-06, "loss": 0.5294, "step": 10558 }, { "epoch": 1.3175151945694215, "grad_norm": 2.0, "learning_rate": 5.054850108466986e-06, "loss": 0.4959, "step": 10559 }, { "epoch": 1.3176414870944826, "grad_norm": 1.8046875, "learning_rate": 5.0531149660143405e-06, "loss": 0.463, "step": 10560 }, { "epoch": 1.3177677796195437, "grad_norm": 2.0625, "learning_rate": 5.051380020732617e-06, "loss": 0.5034, "step": 10561 }, { "epoch": 1.3178940721446049, "grad_norm": 1.9921875, "learning_rate": 5.049645272690966e-06, "loss": 0.4944, "step": 10562 }, { "epoch": 1.3180203646696662, "grad_norm": 2.0, "learning_rate": 5.047910721958532e-06, "loss": 0.5343, "step": 10563 }, { "epoch": 1.3181466571947273, "grad_norm": 1.9765625, "learning_rate": 5.0461763686044485e-06, "loss": 0.5128, "step": 10564 }, { "epoch": 1.3182729497197885, "grad_norm": 2.09375, "learning_rate": 5.044442212697842e-06, "loss": 0.5424, "step": 10565 }, { "epoch": 1.3183992422448496, "grad_norm": 1.9375, "learning_rate": 5.042708254307834e-06, "loss": 0.436, "step": 10566 }, { "epoch": 1.3185255347699107, "grad_norm": 1.90625, "learning_rate": 5.040974493503534e-06, "loss": 0.4267, "step": 10567 }, { "epoch": 1.318651827294972, "grad_norm": 1.9609375, "learning_rate": 5.039240930354045e-06, "loss": 0.4412, "step": 10568 }, { "epoch": 1.3187781198200332, "grad_norm": 1.9921875, "learning_rate": 5.037507564928465e-06, "loss": 0.4623, "step": 10569 }, { "epoch": 1.3189044123450944, "grad_norm": 2.09375, "learning_rate": 5.035774397295882e-06, "loss": 0.4919, "step": 10570 }, { "epoch": 1.3190307048701555, "grad_norm": 1.9140625, "learning_rate": 5.034041427525372e-06, "loss": 0.4855, "step": 10571 }, { "epoch": 1.3191569973952166, "grad_norm": 2.015625, "learning_rate": 5.032308655686011e-06, "loss": 0.4859, "step": 10572 }, { "epoch": 1.3192832899202778, "grad_norm": 2.15625, "learning_rate": 5.030576081846861e-06, "loss": 0.559, "step": 10573 }, { "epoch": 1.319409582445339, "grad_norm": 2.03125, "learning_rate": 5.028843706076978e-06, "loss": 0.5609, "step": 10574 }, { "epoch": 1.3195358749704003, "grad_norm": 2.0625, "learning_rate": 5.027111528445412e-06, "loss": 0.5324, "step": 10575 }, { "epoch": 1.3196621674954614, "grad_norm": 1.890625, "learning_rate": 5.025379549021199e-06, "loss": 0.4972, "step": 10576 }, { "epoch": 1.3197884600205225, "grad_norm": 2.046875, "learning_rate": 5.023647767873377e-06, "loss": 0.472, "step": 10577 }, { "epoch": 1.3199147525455837, "grad_norm": 2.1875, "learning_rate": 5.021916185070971e-06, "loss": 0.5536, "step": 10578 }, { "epoch": 1.3200410450706448, "grad_norm": 1.96875, "learning_rate": 5.020184800682995e-06, "loss": 0.4833, "step": 10579 }, { "epoch": 1.3201673375957061, "grad_norm": 1.84375, "learning_rate": 5.018453614778457e-06, "loss": 0.4757, "step": 10580 }, { "epoch": 1.3202936301207673, "grad_norm": 1.9453125, "learning_rate": 5.016722627426359e-06, "loss": 0.5687, "step": 10581 }, { "epoch": 1.3204199226458284, "grad_norm": 1.9453125, "learning_rate": 5.014991838695695e-06, "loss": 0.4954, "step": 10582 }, { "epoch": 1.3205462151708895, "grad_norm": 2.3125, "learning_rate": 5.0132612486554485e-06, "loss": 0.5769, "step": 10583 }, { "epoch": 1.3206725076959507, "grad_norm": 1.8828125, "learning_rate": 5.011530857374598e-06, "loss": 0.4846, "step": 10584 }, { "epoch": 1.320798800221012, "grad_norm": 1.9609375, "learning_rate": 5.009800664922112e-06, "loss": 0.5405, "step": 10585 }, { "epoch": 1.3209250927460732, "grad_norm": 2.234375, "learning_rate": 5.008070671366951e-06, "loss": 0.4909, "step": 10586 }, { "epoch": 1.3210513852711343, "grad_norm": 1.984375, "learning_rate": 5.006340876778071e-06, "loss": 0.5216, "step": 10587 }, { "epoch": 1.3211776777961954, "grad_norm": 1.9765625, "learning_rate": 5.004611281224414e-06, "loss": 0.523, "step": 10588 }, { "epoch": 1.3213039703212566, "grad_norm": 1.9296875, "learning_rate": 5.002881884774919e-06, "loss": 0.4964, "step": 10589 }, { "epoch": 1.3214302628463177, "grad_norm": 2.265625, "learning_rate": 5.0011526874985175e-06, "loss": 0.5194, "step": 10590 }, { "epoch": 1.3215565553713788, "grad_norm": 2.109375, "learning_rate": 4.999423689464128e-06, "loss": 0.5821, "step": 10591 }, { "epoch": 1.3216828478964402, "grad_norm": 1.9296875, "learning_rate": 4.997694890740666e-06, "loss": 0.4704, "step": 10592 }, { "epoch": 1.3218091404215013, "grad_norm": 1.765625, "learning_rate": 4.995966291397037e-06, "loss": 0.4231, "step": 10593 }, { "epoch": 1.3219354329465625, "grad_norm": 1.9609375, "learning_rate": 4.994237891502135e-06, "loss": 0.4918, "step": 10594 }, { "epoch": 1.3220617254716236, "grad_norm": 1.953125, "learning_rate": 4.9925096911248575e-06, "loss": 0.4942, "step": 10595 }, { "epoch": 1.3221880179966847, "grad_norm": 2.03125, "learning_rate": 4.990781690334081e-06, "loss": 0.4673, "step": 10596 }, { "epoch": 1.322314310521746, "grad_norm": 1.796875, "learning_rate": 4.989053889198682e-06, "loss": 0.4387, "step": 10597 }, { "epoch": 1.3224406030468072, "grad_norm": 1.90625, "learning_rate": 4.987326287787525e-06, "loss": 0.5113, "step": 10598 }, { "epoch": 1.3225668955718684, "grad_norm": 1.890625, "learning_rate": 4.985598886169468e-06, "loss": 0.4626, "step": 10599 }, { "epoch": 1.3226931880969295, "grad_norm": 2.09375, "learning_rate": 4.983871684413363e-06, "loss": 0.4631, "step": 10600 }, { "epoch": 1.3228194806219906, "grad_norm": 2.1875, "learning_rate": 4.9821446825880494e-06, "loss": 0.5737, "step": 10601 }, { "epoch": 1.322945773147052, "grad_norm": 2.109375, "learning_rate": 4.980417880762363e-06, "loss": 0.4594, "step": 10602 }, { "epoch": 1.323072065672113, "grad_norm": 2.015625, "learning_rate": 4.978691279005128e-06, "loss": 0.4844, "step": 10603 }, { "epoch": 1.3231983581971742, "grad_norm": 1.8125, "learning_rate": 4.976964877385166e-06, "loss": 0.4343, "step": 10604 }, { "epoch": 1.3233246507222354, "grad_norm": 2.0625, "learning_rate": 4.975238675971283e-06, "loss": 0.5184, "step": 10605 }, { "epoch": 1.3234509432472965, "grad_norm": 1.859375, "learning_rate": 4.973512674832286e-06, "loss": 0.4474, "step": 10606 }, { "epoch": 1.3235772357723576, "grad_norm": 1.890625, "learning_rate": 4.971786874036965e-06, "loss": 0.4248, "step": 10607 }, { "epoch": 1.3237035282974188, "grad_norm": 2.09375, "learning_rate": 4.970061273654109e-06, "loss": 0.5657, "step": 10608 }, { "epoch": 1.3238298208224801, "grad_norm": 1.9453125, "learning_rate": 4.968335873752494e-06, "loss": 0.4566, "step": 10609 }, { "epoch": 1.3239561133475413, "grad_norm": 2.078125, "learning_rate": 4.966610674400893e-06, "loss": 0.4979, "step": 10610 }, { "epoch": 1.3240824058726024, "grad_norm": 1.8828125, "learning_rate": 4.964885675668066e-06, "loss": 0.4325, "step": 10611 }, { "epoch": 1.3242086983976635, "grad_norm": 2.203125, "learning_rate": 4.9631608776227645e-06, "loss": 0.5183, "step": 10612 }, { "epoch": 1.3243349909227247, "grad_norm": 2.015625, "learning_rate": 4.961436280333743e-06, "loss": 0.4269, "step": 10613 }, { "epoch": 1.324461283447786, "grad_norm": 2.09375, "learning_rate": 4.9597118838697344e-06, "loss": 0.5095, "step": 10614 }, { "epoch": 1.3245875759728472, "grad_norm": 1.828125, "learning_rate": 4.957987688299469e-06, "loss": 0.4692, "step": 10615 }, { "epoch": 1.3247138684979083, "grad_norm": 2.1875, "learning_rate": 4.956263693691671e-06, "loss": 0.4698, "step": 10616 }, { "epoch": 1.3248401610229694, "grad_norm": 2.03125, "learning_rate": 4.954539900115054e-06, "loss": 0.4599, "step": 10617 }, { "epoch": 1.3249664535480306, "grad_norm": 1.9140625, "learning_rate": 4.9528163076383226e-06, "loss": 0.4532, "step": 10618 }, { "epoch": 1.325092746073092, "grad_norm": 2.140625, "learning_rate": 4.951092916330177e-06, "loss": 0.4709, "step": 10619 }, { "epoch": 1.325219038598153, "grad_norm": 2.015625, "learning_rate": 4.9493697262593065e-06, "loss": 0.5331, "step": 10620 }, { "epoch": 1.3253453311232142, "grad_norm": 2.03125, "learning_rate": 4.947646737494394e-06, "loss": 0.5518, "step": 10621 }, { "epoch": 1.3254716236482753, "grad_norm": 2.1875, "learning_rate": 4.945923950104112e-06, "loss": 0.5763, "step": 10622 }, { "epoch": 1.3255979161733364, "grad_norm": 2.015625, "learning_rate": 4.94420136415713e-06, "loss": 0.5302, "step": 10623 }, { "epoch": 1.3257242086983978, "grad_norm": 2.0625, "learning_rate": 4.942478979722102e-06, "loss": 0.5236, "step": 10624 }, { "epoch": 1.3258505012234587, "grad_norm": 1.9765625, "learning_rate": 4.940756796867681e-06, "loss": 0.5028, "step": 10625 }, { "epoch": 1.32597679374852, "grad_norm": 2.25, "learning_rate": 4.939034815662507e-06, "loss": 0.5382, "step": 10626 }, { "epoch": 1.3261030862735812, "grad_norm": 1.890625, "learning_rate": 4.937313036175217e-06, "loss": 0.4672, "step": 10627 }, { "epoch": 1.3262293787986423, "grad_norm": 1.9375, "learning_rate": 4.935591458474433e-06, "loss": 0.4984, "step": 10628 }, { "epoch": 1.3263556713237035, "grad_norm": 1.8671875, "learning_rate": 4.933870082628776e-06, "loss": 0.5056, "step": 10629 }, { "epoch": 1.3264819638487646, "grad_norm": 2.15625, "learning_rate": 4.932148908706852e-06, "loss": 0.5881, "step": 10630 }, { "epoch": 1.326608256373826, "grad_norm": 1.859375, "learning_rate": 4.930427936777269e-06, "loss": 0.4338, "step": 10631 }, { "epoch": 1.326734548898887, "grad_norm": 2.0625, "learning_rate": 4.928707166908617e-06, "loss": 0.5509, "step": 10632 }, { "epoch": 1.3268608414239482, "grad_norm": 2.078125, "learning_rate": 4.9269865991694835e-06, "loss": 0.5828, "step": 10633 }, { "epoch": 1.3269871339490094, "grad_norm": 1.890625, "learning_rate": 4.925266233628444e-06, "loss": 0.4462, "step": 10634 }, { "epoch": 1.3271134264740705, "grad_norm": 1.8125, "learning_rate": 4.923546070354069e-06, "loss": 0.5195, "step": 10635 }, { "epoch": 1.3272397189991318, "grad_norm": 1.9609375, "learning_rate": 4.921826109414921e-06, "loss": 0.4981, "step": 10636 }, { "epoch": 1.327366011524193, "grad_norm": 2.015625, "learning_rate": 4.920106350879552e-06, "loss": 0.5482, "step": 10637 }, { "epoch": 1.3274923040492541, "grad_norm": 2.0625, "learning_rate": 4.91838679481651e-06, "loss": 0.5095, "step": 10638 }, { "epoch": 1.3276185965743152, "grad_norm": 2.234375, "learning_rate": 4.9166674412943285e-06, "loss": 0.5484, "step": 10639 }, { "epoch": 1.3277448890993764, "grad_norm": 1.7890625, "learning_rate": 4.914948290381539e-06, "loss": 0.4372, "step": 10640 }, { "epoch": 1.3278711816244377, "grad_norm": 2.03125, "learning_rate": 4.9132293421466635e-06, "loss": 0.4634, "step": 10641 }, { "epoch": 1.3279974741494986, "grad_norm": 1.8203125, "learning_rate": 4.911510596658214e-06, "loss": 0.4284, "step": 10642 }, { "epoch": 1.32812376667456, "grad_norm": 1.953125, "learning_rate": 4.909792053984695e-06, "loss": 0.5502, "step": 10643 }, { "epoch": 1.3282500591996211, "grad_norm": 2.078125, "learning_rate": 4.908073714194605e-06, "loss": 0.5454, "step": 10644 }, { "epoch": 1.3283763517246823, "grad_norm": 1.984375, "learning_rate": 4.906355577356432e-06, "loss": 0.5463, "step": 10645 }, { "epoch": 1.3285026442497434, "grad_norm": 1.8515625, "learning_rate": 4.904637643538657e-06, "loss": 0.4763, "step": 10646 }, { "epoch": 1.3286289367748045, "grad_norm": 1.8671875, "learning_rate": 4.902919912809751e-06, "loss": 0.537, "step": 10647 }, { "epoch": 1.328755229299866, "grad_norm": 2.046875, "learning_rate": 4.901202385238178e-06, "loss": 0.5011, "step": 10648 }, { "epoch": 1.328881521824927, "grad_norm": 2.046875, "learning_rate": 4.899485060892401e-06, "loss": 0.5681, "step": 10649 }, { "epoch": 1.3290078143499882, "grad_norm": 1.953125, "learning_rate": 4.897767939840862e-06, "loss": 0.466, "step": 10650 }, { "epoch": 1.3291341068750493, "grad_norm": 2.03125, "learning_rate": 4.896051022152005e-06, "loss": 0.454, "step": 10651 }, { "epoch": 1.3292603994001104, "grad_norm": 1.953125, "learning_rate": 4.894334307894259e-06, "loss": 0.5181, "step": 10652 }, { "epoch": 1.3293866919251718, "grad_norm": 1.9296875, "learning_rate": 4.892617797136053e-06, "loss": 0.5039, "step": 10653 }, { "epoch": 1.329512984450233, "grad_norm": 1.921875, "learning_rate": 4.890901489945796e-06, "loss": 0.4808, "step": 10654 }, { "epoch": 1.329639276975294, "grad_norm": 1.859375, "learning_rate": 4.889185386391902e-06, "loss": 0.4504, "step": 10655 }, { "epoch": 1.3297655695003552, "grad_norm": 1.9375, "learning_rate": 4.8874694865427676e-06, "loss": 0.4237, "step": 10656 }, { "epoch": 1.3298918620254163, "grad_norm": 1.953125, "learning_rate": 4.885753790466785e-06, "loss": 0.5271, "step": 10657 }, { "epoch": 1.3300181545504777, "grad_norm": 2.046875, "learning_rate": 4.884038298232338e-06, "loss": 0.4574, "step": 10658 }, { "epoch": 1.3301444470755386, "grad_norm": 2.109375, "learning_rate": 4.882323009907802e-06, "loss": 0.5369, "step": 10659 }, { "epoch": 1.3302707396006, "grad_norm": 2.0625, "learning_rate": 4.880607925561543e-06, "loss": 0.5204, "step": 10660 }, { "epoch": 1.330397032125661, "grad_norm": 2.140625, "learning_rate": 4.878893045261923e-06, "loss": 0.5304, "step": 10661 }, { "epoch": 1.3305233246507222, "grad_norm": 2.046875, "learning_rate": 4.877178369077291e-06, "loss": 0.5119, "step": 10662 }, { "epoch": 1.3306496171757833, "grad_norm": 2.046875, "learning_rate": 4.875463897075989e-06, "loss": 0.4949, "step": 10663 }, { "epoch": 1.3307759097008445, "grad_norm": 1.9375, "learning_rate": 4.873749629326354e-06, "loss": 0.4698, "step": 10664 }, { "epoch": 1.3309022022259058, "grad_norm": 2.03125, "learning_rate": 4.872035565896711e-06, "loss": 0.4846, "step": 10665 }, { "epoch": 1.331028494750967, "grad_norm": 2.078125, "learning_rate": 4.870321706855374e-06, "loss": 0.5002, "step": 10666 }, { "epoch": 1.331154787276028, "grad_norm": 1.875, "learning_rate": 4.868608052270664e-06, "loss": 0.5203, "step": 10667 }, { "epoch": 1.3312810798010892, "grad_norm": 2.0625, "learning_rate": 4.866894602210875e-06, "loss": 0.5321, "step": 10668 }, { "epoch": 1.3314073723261504, "grad_norm": 1.9765625, "learning_rate": 4.865181356744305e-06, "loss": 0.472, "step": 10669 }, { "epoch": 1.3315336648512117, "grad_norm": 2.140625, "learning_rate": 4.863468315939238e-06, "loss": 0.4969, "step": 10670 }, { "epoch": 1.3316599573762729, "grad_norm": 2.28125, "learning_rate": 4.861755479863951e-06, "loss": 0.5165, "step": 10671 }, { "epoch": 1.331786249901334, "grad_norm": 1.90625, "learning_rate": 4.860042848586716e-06, "loss": 0.4987, "step": 10672 }, { "epoch": 1.3319125424263951, "grad_norm": 2.015625, "learning_rate": 4.858330422175791e-06, "loss": 0.5183, "step": 10673 }, { "epoch": 1.3320388349514563, "grad_norm": 2.109375, "learning_rate": 4.856618200699431e-06, "loss": 0.5472, "step": 10674 }, { "epoch": 1.3321651274765176, "grad_norm": 2.171875, "learning_rate": 4.854906184225882e-06, "loss": 0.4887, "step": 10675 }, { "epoch": 1.3322914200015785, "grad_norm": 2.125, "learning_rate": 4.853194372823379e-06, "loss": 0.4806, "step": 10676 }, { "epoch": 1.3324177125266399, "grad_norm": 2.203125, "learning_rate": 4.85148276656015e-06, "loss": 0.5369, "step": 10677 }, { "epoch": 1.332544005051701, "grad_norm": 1.96875, "learning_rate": 4.84977136550442e-06, "loss": 0.4618, "step": 10678 }, { "epoch": 1.3326702975767621, "grad_norm": 1.9609375, "learning_rate": 4.848060169724395e-06, "loss": 0.4916, "step": 10679 }, { "epoch": 1.3327965901018233, "grad_norm": 1.9140625, "learning_rate": 4.846349179288285e-06, "loss": 0.4865, "step": 10680 }, { "epoch": 1.3329228826268844, "grad_norm": 2.0, "learning_rate": 4.844638394264283e-06, "loss": 0.5908, "step": 10681 }, { "epoch": 1.3330491751519458, "grad_norm": 1.7890625, "learning_rate": 4.842927814720576e-06, "loss": 0.4589, "step": 10682 }, { "epoch": 1.333175467677007, "grad_norm": 2.09375, "learning_rate": 4.841217440725346e-06, "loss": 0.4926, "step": 10683 }, { "epoch": 1.333301760202068, "grad_norm": 1.796875, "learning_rate": 4.8395072723467585e-06, "loss": 0.425, "step": 10684 }, { "epoch": 1.3334280527271292, "grad_norm": 2.125, "learning_rate": 4.837797309652985e-06, "loss": 0.5876, "step": 10685 }, { "epoch": 1.3335543452521903, "grad_norm": 2.015625, "learning_rate": 4.836087552712178e-06, "loss": 0.4514, "step": 10686 }, { "epoch": 1.3336806377772517, "grad_norm": 1.96875, "learning_rate": 4.834378001592481e-06, "loss": 0.5327, "step": 10687 }, { "epoch": 1.3338069303023128, "grad_norm": 2.015625, "learning_rate": 4.832668656362036e-06, "loss": 0.5465, "step": 10688 }, { "epoch": 1.333933222827374, "grad_norm": 1.8359375, "learning_rate": 4.830959517088972e-06, "loss": 0.4734, "step": 10689 }, { "epoch": 1.334059515352435, "grad_norm": 1.9921875, "learning_rate": 4.829250583841411e-06, "loss": 0.5095, "step": 10690 }, { "epoch": 1.3341858078774962, "grad_norm": 2.0, "learning_rate": 4.827541856687467e-06, "loss": 0.5296, "step": 10691 }, { "epoch": 1.3343121004025575, "grad_norm": 2.109375, "learning_rate": 4.825833335695247e-06, "loss": 0.5317, "step": 10692 }, { "epoch": 1.3344383929276187, "grad_norm": 2.046875, "learning_rate": 4.824125020932846e-06, "loss": 0.4937, "step": 10693 }, { "epoch": 1.3345646854526798, "grad_norm": 1.8828125, "learning_rate": 4.822416912468355e-06, "loss": 0.4965, "step": 10694 }, { "epoch": 1.334690977977741, "grad_norm": 2.0625, "learning_rate": 4.820709010369856e-06, "loss": 0.5167, "step": 10695 }, { "epoch": 1.334817270502802, "grad_norm": 2.0, "learning_rate": 4.81900131470542e-06, "loss": 0.5104, "step": 10696 }, { "epoch": 1.3349435630278632, "grad_norm": 1.9296875, "learning_rate": 4.817293825543112e-06, "loss": 0.4829, "step": 10697 }, { "epoch": 1.3350698555529243, "grad_norm": 2.265625, "learning_rate": 4.81558654295099e-06, "loss": 0.5385, "step": 10698 }, { "epoch": 1.3351961480779857, "grad_norm": 1.859375, "learning_rate": 4.8138794669971e-06, "loss": 0.4356, "step": 10699 }, { "epoch": 1.3353224406030468, "grad_norm": 2.15625, "learning_rate": 4.812172597749483e-06, "loss": 0.5861, "step": 10700 }, { "epoch": 1.335448733128108, "grad_norm": 2.171875, "learning_rate": 4.8104659352761706e-06, "loss": 0.4776, "step": 10701 }, { "epoch": 1.335575025653169, "grad_norm": 2.09375, "learning_rate": 4.808759479645183e-06, "loss": 0.4547, "step": 10702 }, { "epoch": 1.3357013181782302, "grad_norm": 1.8984375, "learning_rate": 4.8070532309245425e-06, "loss": 0.4801, "step": 10703 }, { "epoch": 1.3358276107032916, "grad_norm": 1.9609375, "learning_rate": 4.805347189182251e-06, "loss": 0.4951, "step": 10704 }, { "epoch": 1.3359539032283527, "grad_norm": 1.96875, "learning_rate": 4.8036413544863095e-06, "loss": 0.4974, "step": 10705 }, { "epoch": 1.3360801957534139, "grad_norm": 2.0, "learning_rate": 4.801935726904707e-06, "loss": 0.5234, "step": 10706 }, { "epoch": 1.336206488278475, "grad_norm": 2.015625, "learning_rate": 4.800230306505425e-06, "loss": 0.4704, "step": 10707 }, { "epoch": 1.3363327808035361, "grad_norm": 1.96875, "learning_rate": 4.798525093356441e-06, "loss": 0.5007, "step": 10708 }, { "epoch": 1.3364590733285975, "grad_norm": 2.03125, "learning_rate": 4.796820087525716e-06, "loss": 0.5241, "step": 10709 }, { "epoch": 1.3365853658536586, "grad_norm": 1.9609375, "learning_rate": 4.795115289081212e-06, "loss": 0.5502, "step": 10710 }, { "epoch": 1.3367116583787197, "grad_norm": 2.140625, "learning_rate": 4.793410698090873e-06, "loss": 0.5125, "step": 10711 }, { "epoch": 1.3368379509037809, "grad_norm": 1.875, "learning_rate": 4.791706314622645e-06, "loss": 0.5092, "step": 10712 }, { "epoch": 1.336964243428842, "grad_norm": 1.953125, "learning_rate": 4.790002138744457e-06, "loss": 0.4536, "step": 10713 }, { "epoch": 1.3370905359539031, "grad_norm": 1.96875, "learning_rate": 4.788298170524236e-06, "loss": 0.4998, "step": 10714 }, { "epoch": 1.3372168284789643, "grad_norm": 2.1875, "learning_rate": 4.786594410029896e-06, "loss": 0.5123, "step": 10715 }, { "epoch": 1.3373431210040256, "grad_norm": 1.8671875, "learning_rate": 4.784890857329345e-06, "loss": 0.4648, "step": 10716 }, { "epoch": 1.3374694135290868, "grad_norm": 1.84375, "learning_rate": 4.7831875124904836e-06, "loss": 0.4485, "step": 10717 }, { "epoch": 1.337595706054148, "grad_norm": 1.8984375, "learning_rate": 4.781484375581202e-06, "loss": 0.41, "step": 10718 }, { "epoch": 1.337721998579209, "grad_norm": 1.9296875, "learning_rate": 4.779781446669385e-06, "loss": 0.4954, "step": 10719 }, { "epoch": 1.3378482911042702, "grad_norm": 1.9609375, "learning_rate": 4.7780787258229e-06, "loss": 0.5086, "step": 10720 }, { "epoch": 1.3379745836293315, "grad_norm": 2.03125, "learning_rate": 4.776376213109624e-06, "loss": 0.5132, "step": 10721 }, { "epoch": 1.3381008761543927, "grad_norm": 1.984375, "learning_rate": 4.774673908597411e-06, "loss": 0.4588, "step": 10722 }, { "epoch": 1.3382271686794538, "grad_norm": 1.8046875, "learning_rate": 4.7729718123541094e-06, "loss": 0.4413, "step": 10723 }, { "epoch": 1.338353461204515, "grad_norm": 1.96875, "learning_rate": 4.771269924447561e-06, "loss": 0.5108, "step": 10724 }, { "epoch": 1.338479753729576, "grad_norm": 1.984375, "learning_rate": 4.769568244945599e-06, "loss": 0.5059, "step": 10725 }, { "epoch": 1.3386060462546374, "grad_norm": 1.8828125, "learning_rate": 4.7678667739160485e-06, "loss": 0.4539, "step": 10726 }, { "epoch": 1.3387323387796985, "grad_norm": 2.0, "learning_rate": 4.766165511426727e-06, "loss": 0.4124, "step": 10727 }, { "epoch": 1.3388586313047597, "grad_norm": 2.1875, "learning_rate": 4.764464457545441e-06, "loss": 0.5299, "step": 10728 }, { "epoch": 1.3389849238298208, "grad_norm": 2.15625, "learning_rate": 4.7627636123399915e-06, "loss": 0.5689, "step": 10729 }, { "epoch": 1.339111216354882, "grad_norm": 2.15625, "learning_rate": 4.76106297587817e-06, "loss": 0.5555, "step": 10730 }, { "epoch": 1.3392375088799433, "grad_norm": 1.8671875, "learning_rate": 4.7593625482277585e-06, "loss": 0.4705, "step": 10731 }, { "epoch": 1.3393638014050042, "grad_norm": 2.03125, "learning_rate": 4.757662329456534e-06, "loss": 0.4549, "step": 10732 }, { "epoch": 1.3394900939300656, "grad_norm": 2.078125, "learning_rate": 4.755962319632261e-06, "loss": 0.5608, "step": 10733 }, { "epoch": 1.3396163864551267, "grad_norm": 1.9609375, "learning_rate": 4.7542625188226995e-06, "loss": 0.5142, "step": 10734 }, { "epoch": 1.3397426789801878, "grad_norm": 1.8984375, "learning_rate": 4.752562927095599e-06, "loss": 0.4604, "step": 10735 }, { "epoch": 1.339868971505249, "grad_norm": 1.96875, "learning_rate": 4.750863544518701e-06, "loss": 0.4948, "step": 10736 }, { "epoch": 1.33999526403031, "grad_norm": 1.8671875, "learning_rate": 4.7491643711597375e-06, "loss": 0.4239, "step": 10737 }, { "epoch": 1.3401215565553715, "grad_norm": 2.015625, "learning_rate": 4.747465407086437e-06, "loss": 0.4552, "step": 10738 }, { "epoch": 1.3402478490804326, "grad_norm": 2.0625, "learning_rate": 4.745766652366509e-06, "loss": 0.4789, "step": 10739 }, { "epoch": 1.3403741416054937, "grad_norm": 2.0, "learning_rate": 4.74406810706767e-06, "loss": 0.4655, "step": 10740 }, { "epoch": 1.3405004341305549, "grad_norm": 2.046875, "learning_rate": 4.742369771257617e-06, "loss": 0.5263, "step": 10741 }, { "epoch": 1.340626726655616, "grad_norm": 2.1875, "learning_rate": 4.74067164500404e-06, "loss": 0.5079, "step": 10742 }, { "epoch": 1.3407530191806774, "grad_norm": 1.8671875, "learning_rate": 4.738973728374625e-06, "loss": 0.4552, "step": 10743 }, { "epoch": 1.3408793117057385, "grad_norm": 1.7578125, "learning_rate": 4.737276021437045e-06, "loss": 0.4514, "step": 10744 }, { "epoch": 1.3410056042307996, "grad_norm": 1.8671875, "learning_rate": 4.7355785242589656e-06, "loss": 0.4653, "step": 10745 }, { "epoch": 1.3411318967558608, "grad_norm": 2.078125, "learning_rate": 4.733881236908047e-06, "loss": 0.4985, "step": 10746 }, { "epoch": 1.3412581892809219, "grad_norm": 1.8125, "learning_rate": 4.732184159451938e-06, "loss": 0.4858, "step": 10747 }, { "epoch": 1.3413844818059832, "grad_norm": 2.0625, "learning_rate": 4.730487291958279e-06, "loss": 0.4862, "step": 10748 }, { "epoch": 1.3415107743310442, "grad_norm": 1.9609375, "learning_rate": 4.728790634494705e-06, "loss": 0.4363, "step": 10749 }, { "epoch": 1.3416370668561055, "grad_norm": 1.8984375, "learning_rate": 4.72709418712884e-06, "loss": 0.4348, "step": 10750 }, { "epoch": 1.3417633593811666, "grad_norm": 2.015625, "learning_rate": 4.7253979499283e-06, "loss": 0.4962, "step": 10751 }, { "epoch": 1.3418896519062278, "grad_norm": 2.015625, "learning_rate": 4.723701922960693e-06, "loss": 0.5091, "step": 10752 }, { "epoch": 1.342015944431289, "grad_norm": 1.9296875, "learning_rate": 4.722006106293619e-06, "loss": 0.4969, "step": 10753 }, { "epoch": 1.34214223695635, "grad_norm": 1.984375, "learning_rate": 4.720310499994668e-06, "loss": 0.5053, "step": 10754 }, { "epoch": 1.3422685294814114, "grad_norm": 1.9609375, "learning_rate": 4.718615104131423e-06, "loss": 0.4786, "step": 10755 }, { "epoch": 1.3423948220064725, "grad_norm": 2.0625, "learning_rate": 4.71691991877146e-06, "loss": 0.5433, "step": 10756 }, { "epoch": 1.3425211145315337, "grad_norm": 2.109375, "learning_rate": 4.71522494398234e-06, "loss": 0.5114, "step": 10757 }, { "epoch": 1.3426474070565948, "grad_norm": 2.140625, "learning_rate": 4.713530179831628e-06, "loss": 0.4814, "step": 10758 }, { "epoch": 1.342773699581656, "grad_norm": 1.9296875, "learning_rate": 4.71183562638687e-06, "loss": 0.4738, "step": 10759 }, { "epoch": 1.3428999921067173, "grad_norm": 1.9609375, "learning_rate": 4.710141283715607e-06, "loss": 0.4966, "step": 10760 }, { "epoch": 1.3430262846317784, "grad_norm": 1.8046875, "learning_rate": 4.708447151885371e-06, "loss": 0.4624, "step": 10761 }, { "epoch": 1.3431525771568396, "grad_norm": 2.078125, "learning_rate": 4.706753230963685e-06, "loss": 0.5681, "step": 10762 }, { "epoch": 1.3432788696819007, "grad_norm": 1.921875, "learning_rate": 4.7050595210180664e-06, "loss": 0.461, "step": 10763 }, { "epoch": 1.3434051622069618, "grad_norm": 1.9453125, "learning_rate": 4.703366022116022e-06, "loss": 0.509, "step": 10764 }, { "epoch": 1.3435314547320232, "grad_norm": 2.03125, "learning_rate": 4.701672734325049e-06, "loss": 0.5363, "step": 10765 }, { "epoch": 1.343657747257084, "grad_norm": 2.0625, "learning_rate": 4.69997965771264e-06, "loss": 0.5316, "step": 10766 }, { "epoch": 1.3437840397821454, "grad_norm": 2.0, "learning_rate": 4.698286792346275e-06, "loss": 0.4634, "step": 10767 }, { "epoch": 1.3439103323072066, "grad_norm": 2.140625, "learning_rate": 4.69659413829343e-06, "loss": 0.575, "step": 10768 }, { "epoch": 1.3440366248322677, "grad_norm": 1.9375, "learning_rate": 4.694901695621568e-06, "loss": 0.4784, "step": 10769 }, { "epoch": 1.3441629173573288, "grad_norm": 2.140625, "learning_rate": 4.693209464398145e-06, "loss": 0.5162, "step": 10770 }, { "epoch": 1.34428920988239, "grad_norm": 2.15625, "learning_rate": 4.691517444690612e-06, "loss": 0.5194, "step": 10771 }, { "epoch": 1.3444155024074513, "grad_norm": 2.015625, "learning_rate": 4.6898256365664065e-06, "loss": 0.4784, "step": 10772 }, { "epoch": 1.3445417949325125, "grad_norm": 2.09375, "learning_rate": 4.688134040092961e-06, "loss": 0.5416, "step": 10773 }, { "epoch": 1.3446680874575736, "grad_norm": 2.0625, "learning_rate": 4.6864426553376985e-06, "loss": 0.5102, "step": 10774 }, { "epoch": 1.3447943799826347, "grad_norm": 2.234375, "learning_rate": 4.6847514823680295e-06, "loss": 0.5867, "step": 10775 }, { "epoch": 1.3449206725076959, "grad_norm": 2.03125, "learning_rate": 4.683060521251367e-06, "loss": 0.5471, "step": 10776 }, { "epoch": 1.3450469650327572, "grad_norm": 2.0, "learning_rate": 4.681369772055107e-06, "loss": 0.5167, "step": 10777 }, { "epoch": 1.3451732575578184, "grad_norm": 2.203125, "learning_rate": 4.679679234846636e-06, "loss": 0.5171, "step": 10778 }, { "epoch": 1.3452995500828795, "grad_norm": 2.015625, "learning_rate": 4.677988909693335e-06, "loss": 0.5155, "step": 10779 }, { "epoch": 1.3454258426079406, "grad_norm": 1.9296875, "learning_rate": 4.676298796662579e-06, "loss": 0.4857, "step": 10780 }, { "epoch": 1.3455521351330018, "grad_norm": 2.1875, "learning_rate": 4.6746088958217285e-06, "loss": 0.4927, "step": 10781 }, { "epoch": 1.3456784276580631, "grad_norm": 1.96875, "learning_rate": 4.672919207238141e-06, "loss": 0.4952, "step": 10782 }, { "epoch": 1.345804720183124, "grad_norm": 1.8828125, "learning_rate": 4.671229730979163e-06, "loss": 0.4923, "step": 10783 }, { "epoch": 1.3459310127081854, "grad_norm": 1.875, "learning_rate": 4.669540467112134e-06, "loss": 0.4548, "step": 10784 }, { "epoch": 1.3460573052332465, "grad_norm": 2.203125, "learning_rate": 4.667851415704381e-06, "loss": 0.5204, "step": 10785 }, { "epoch": 1.3461835977583076, "grad_norm": 1.984375, "learning_rate": 4.666162576823228e-06, "loss": 0.4826, "step": 10786 }, { "epoch": 1.3463098902833688, "grad_norm": 2.046875, "learning_rate": 4.664473950535987e-06, "loss": 0.5406, "step": 10787 }, { "epoch": 1.34643618280843, "grad_norm": 2.15625, "learning_rate": 4.662785536909963e-06, "loss": 0.4909, "step": 10788 }, { "epoch": 1.3465624753334913, "grad_norm": 1.9921875, "learning_rate": 4.661097336012451e-06, "loss": 0.5071, "step": 10789 }, { "epoch": 1.3466887678585524, "grad_norm": 2.078125, "learning_rate": 4.659409347910742e-06, "loss": 0.6716, "step": 10790 }, { "epoch": 1.3468150603836135, "grad_norm": 1.875, "learning_rate": 4.657721572672111e-06, "loss": 0.5002, "step": 10791 }, { "epoch": 1.3469413529086747, "grad_norm": 2.015625, "learning_rate": 4.65603401036383e-06, "loss": 0.4583, "step": 10792 }, { "epoch": 1.3470676454337358, "grad_norm": 1.796875, "learning_rate": 4.654346661053159e-06, "loss": 0.4175, "step": 10793 }, { "epoch": 1.3471939379587972, "grad_norm": 1.9375, "learning_rate": 4.652659524807358e-06, "loss": 0.4843, "step": 10794 }, { "epoch": 1.3473202304838583, "grad_norm": 2.25, "learning_rate": 4.650972601693668e-06, "loss": 0.5123, "step": 10795 }, { "epoch": 1.3474465230089194, "grad_norm": 2.203125, "learning_rate": 4.649285891779327e-06, "loss": 0.5031, "step": 10796 }, { "epoch": 1.3475728155339806, "grad_norm": 1.7578125, "learning_rate": 4.647599395131561e-06, "loss": 0.4331, "step": 10797 }, { "epoch": 1.3476991080590417, "grad_norm": 1.921875, "learning_rate": 4.645913111817592e-06, "loss": 0.53, "step": 10798 }, { "epoch": 1.347825400584103, "grad_norm": 1.796875, "learning_rate": 4.64422704190463e-06, "loss": 0.5018, "step": 10799 }, { "epoch": 1.3479516931091642, "grad_norm": 2.09375, "learning_rate": 4.642541185459878e-06, "loss": 0.5212, "step": 10800 }, { "epoch": 1.3480779856342253, "grad_norm": 2.140625, "learning_rate": 4.64085554255053e-06, "loss": 0.5011, "step": 10801 }, { "epoch": 1.3482042781592865, "grad_norm": 2.078125, "learning_rate": 4.639170113243772e-06, "loss": 0.5165, "step": 10802 }, { "epoch": 1.3483305706843476, "grad_norm": 2.078125, "learning_rate": 4.637484897606781e-06, "loss": 0.5198, "step": 10803 }, { "epoch": 1.3484568632094087, "grad_norm": 1.8515625, "learning_rate": 4.635799895706725e-06, "loss": 0.4299, "step": 10804 }, { "epoch": 1.3485831557344699, "grad_norm": 1.96875, "learning_rate": 4.634115107610765e-06, "loss": 0.501, "step": 10805 }, { "epoch": 1.3487094482595312, "grad_norm": 1.984375, "learning_rate": 4.632430533386052e-06, "loss": 0.4818, "step": 10806 }, { "epoch": 1.3488357407845923, "grad_norm": 1.90625, "learning_rate": 4.63074617309973e-06, "loss": 0.5392, "step": 10807 }, { "epoch": 1.3489620333096535, "grad_norm": 1.890625, "learning_rate": 4.629062026818932e-06, "loss": 0.5296, "step": 10808 }, { "epoch": 1.3490883258347146, "grad_norm": 1.8671875, "learning_rate": 4.627378094610785e-06, "loss": 0.4556, "step": 10809 }, { "epoch": 1.3492146183597757, "grad_norm": 1.984375, "learning_rate": 4.625694376542406e-06, "loss": 0.5289, "step": 10810 }, { "epoch": 1.349340910884837, "grad_norm": 2.15625, "learning_rate": 4.6240108726809016e-06, "loss": 0.502, "step": 10811 }, { "epoch": 1.3494672034098982, "grad_norm": 1.9453125, "learning_rate": 4.622327583093379e-06, "loss": 0.487, "step": 10812 }, { "epoch": 1.3495934959349594, "grad_norm": 2.15625, "learning_rate": 4.6206445078469254e-06, "loss": 0.5197, "step": 10813 }, { "epoch": 1.3497197884600205, "grad_norm": 1.9609375, "learning_rate": 4.618961647008624e-06, "loss": 0.5458, "step": 10814 }, { "epoch": 1.3498460809850816, "grad_norm": 2.359375, "learning_rate": 4.617279000645552e-06, "loss": 0.5998, "step": 10815 }, { "epoch": 1.349972373510143, "grad_norm": 1.9140625, "learning_rate": 4.615596568824773e-06, "loss": 0.4974, "step": 10816 }, { "epoch": 1.3500986660352041, "grad_norm": 2.109375, "learning_rate": 4.613914351613344e-06, "loss": 0.4923, "step": 10817 }, { "epoch": 1.3502249585602653, "grad_norm": 1.9453125, "learning_rate": 4.612232349078318e-06, "loss": 0.5196, "step": 10818 }, { "epoch": 1.3503512510853264, "grad_norm": 2.03125, "learning_rate": 4.610550561286733e-06, "loss": 0.5764, "step": 10819 }, { "epoch": 1.3504775436103875, "grad_norm": 1.9765625, "learning_rate": 4.60886898830562e-06, "loss": 0.4376, "step": 10820 }, { "epoch": 1.3506038361354487, "grad_norm": 2.0, "learning_rate": 4.6071876302020054e-06, "loss": 0.475, "step": 10821 }, { "epoch": 1.3507301286605098, "grad_norm": 2.078125, "learning_rate": 4.605506487042902e-06, "loss": 0.5832, "step": 10822 }, { "epoch": 1.3508564211855711, "grad_norm": 2.09375, "learning_rate": 4.603825558895316e-06, "loss": 0.5448, "step": 10823 }, { "epoch": 1.3509827137106323, "grad_norm": 2.03125, "learning_rate": 4.602144845826246e-06, "loss": 0.538, "step": 10824 }, { "epoch": 1.3511090062356934, "grad_norm": 1.8828125, "learning_rate": 4.600464347902681e-06, "loss": 0.4959, "step": 10825 }, { "epoch": 1.3512352987607545, "grad_norm": 1.9765625, "learning_rate": 4.598784065191601e-06, "loss": 0.5675, "step": 10826 }, { "epoch": 1.3513615912858157, "grad_norm": 2.453125, "learning_rate": 4.5971039977599796e-06, "loss": 0.5924, "step": 10827 }, { "epoch": 1.351487883810877, "grad_norm": 1.953125, "learning_rate": 4.5954241456747785e-06, "loss": 0.4516, "step": 10828 }, { "epoch": 1.3516141763359382, "grad_norm": 1.9453125, "learning_rate": 4.5937445090029495e-06, "loss": 0.5229, "step": 10829 }, { "epoch": 1.3517404688609993, "grad_norm": 1.9609375, "learning_rate": 4.592065087811446e-06, "loss": 0.4695, "step": 10830 }, { "epoch": 1.3518667613860604, "grad_norm": 2.078125, "learning_rate": 4.590385882167203e-06, "loss": 0.5111, "step": 10831 }, { "epoch": 1.3519930539111216, "grad_norm": 2.015625, "learning_rate": 4.588706892137148e-06, "loss": 0.5322, "step": 10832 }, { "epoch": 1.352119346436183, "grad_norm": 1.8359375, "learning_rate": 4.587028117788203e-06, "loss": 0.4432, "step": 10833 }, { "epoch": 1.352245638961244, "grad_norm": 2.21875, "learning_rate": 4.585349559187279e-06, "loss": 0.6802, "step": 10834 }, { "epoch": 1.3523719314863052, "grad_norm": 1.8671875, "learning_rate": 4.58367121640128e-06, "loss": 0.4842, "step": 10835 }, { "epoch": 1.3524982240113663, "grad_norm": 1.84375, "learning_rate": 4.5819930894971e-06, "loss": 0.5513, "step": 10836 }, { "epoch": 1.3526245165364275, "grad_norm": 2.0, "learning_rate": 4.580315178541625e-06, "loss": 0.5065, "step": 10837 }, { "epoch": 1.3527508090614886, "grad_norm": 1.8046875, "learning_rate": 4.578637483601733e-06, "loss": 0.4298, "step": 10838 }, { "epoch": 1.3528771015865497, "grad_norm": 2.125, "learning_rate": 4.576960004744293e-06, "loss": 0.5724, "step": 10839 }, { "epoch": 1.353003394111611, "grad_norm": 1.8203125, "learning_rate": 4.575282742036164e-06, "loss": 0.3902, "step": 10840 }, { "epoch": 1.3531296866366722, "grad_norm": 1.859375, "learning_rate": 4.5736056955442e-06, "loss": 0.4535, "step": 10841 }, { "epoch": 1.3532559791617333, "grad_norm": 1.9296875, "learning_rate": 4.571928865335242e-06, "loss": 0.5367, "step": 10842 }, { "epoch": 1.3533822716867945, "grad_norm": 2.03125, "learning_rate": 4.570252251476126e-06, "loss": 0.4796, "step": 10843 }, { "epoch": 1.3535085642118556, "grad_norm": 1.96875, "learning_rate": 4.568575854033675e-06, "loss": 0.5941, "step": 10844 }, { "epoch": 1.353634856736917, "grad_norm": 2.078125, "learning_rate": 4.56689967307471e-06, "loss": 0.521, "step": 10845 }, { "epoch": 1.353761149261978, "grad_norm": 2.171875, "learning_rate": 4.565223708666036e-06, "loss": 0.5217, "step": 10846 }, { "epoch": 1.3538874417870392, "grad_norm": 1.96875, "learning_rate": 4.563547960874453e-06, "loss": 0.5023, "step": 10847 }, { "epoch": 1.3540137343121004, "grad_norm": 1.8984375, "learning_rate": 4.561872429766756e-06, "loss": 0.4779, "step": 10848 }, { "epoch": 1.3541400268371615, "grad_norm": 1.9765625, "learning_rate": 4.560197115409726e-06, "loss": 0.4613, "step": 10849 }, { "epoch": 1.3542663193622229, "grad_norm": 2.25, "learning_rate": 4.558522017870135e-06, "loss": 0.5292, "step": 10850 }, { "epoch": 1.354392611887284, "grad_norm": 2.046875, "learning_rate": 4.55684713721475e-06, "loss": 0.5015, "step": 10851 }, { "epoch": 1.3545189044123451, "grad_norm": 1.921875, "learning_rate": 4.5551724735103285e-06, "loss": 0.5609, "step": 10852 }, { "epoch": 1.3546451969374063, "grad_norm": 1.96875, "learning_rate": 4.5534980268236175e-06, "loss": 0.4664, "step": 10853 }, { "epoch": 1.3547714894624674, "grad_norm": 2.140625, "learning_rate": 4.551823797221356e-06, "loss": 0.5262, "step": 10854 }, { "epoch": 1.3548977819875287, "grad_norm": 1.9296875, "learning_rate": 4.550149784770275e-06, "loss": 0.4526, "step": 10855 }, { "epoch": 1.3550240745125897, "grad_norm": 1.90625, "learning_rate": 4.548475989537097e-06, "loss": 0.5454, "step": 10856 }, { "epoch": 1.355150367037651, "grad_norm": 2.0, "learning_rate": 4.546802411588536e-06, "loss": 0.5077, "step": 10857 }, { "epoch": 1.3552766595627121, "grad_norm": 2.015625, "learning_rate": 4.5451290509912946e-06, "loss": 0.5028, "step": 10858 }, { "epoch": 1.3554029520877733, "grad_norm": 1.8828125, "learning_rate": 4.543455907812071e-06, "loss": 0.499, "step": 10859 }, { "epoch": 1.3555292446128344, "grad_norm": 2.09375, "learning_rate": 4.541782982117552e-06, "loss": 0.5611, "step": 10860 }, { "epoch": 1.3556555371378955, "grad_norm": 2.046875, "learning_rate": 4.540110273974416e-06, "loss": 0.5067, "step": 10861 }, { "epoch": 1.355781829662957, "grad_norm": 2.125, "learning_rate": 4.538437783449334e-06, "loss": 0.5547, "step": 10862 }, { "epoch": 1.355908122188018, "grad_norm": 1.9921875, "learning_rate": 4.536765510608963e-06, "loss": 0.4174, "step": 10863 }, { "epoch": 1.3560344147130792, "grad_norm": 2.265625, "learning_rate": 4.535093455519964e-06, "loss": 0.4679, "step": 10864 }, { "epoch": 1.3561607072381403, "grad_norm": 1.96875, "learning_rate": 4.533421618248977e-06, "loss": 0.4956, "step": 10865 }, { "epoch": 1.3562869997632014, "grad_norm": 1.8203125, "learning_rate": 4.531749998862636e-06, "loss": 0.424, "step": 10866 }, { "epoch": 1.3564132922882628, "grad_norm": 2.140625, "learning_rate": 4.53007859742757e-06, "loss": 0.5639, "step": 10867 }, { "epoch": 1.356539584813324, "grad_norm": 2.125, "learning_rate": 4.528407414010395e-06, "loss": 0.5873, "step": 10868 }, { "epoch": 1.356665877338385, "grad_norm": 2.125, "learning_rate": 4.526736448677722e-06, "loss": 0.4773, "step": 10869 }, { "epoch": 1.3567921698634462, "grad_norm": 2.03125, "learning_rate": 4.5250657014961505e-06, "loss": 0.4811, "step": 10870 }, { "epoch": 1.3569184623885073, "grad_norm": 1.984375, "learning_rate": 4.523395172532273e-06, "loss": 0.4594, "step": 10871 }, { "epoch": 1.3570447549135687, "grad_norm": 1.859375, "learning_rate": 4.521724861852673e-06, "loss": 0.5108, "step": 10872 }, { "epoch": 1.3571710474386296, "grad_norm": 1.9609375, "learning_rate": 4.520054769523925e-06, "loss": 0.5148, "step": 10873 }, { "epoch": 1.357297339963691, "grad_norm": 1.875, "learning_rate": 4.518384895612594e-06, "loss": 0.4078, "step": 10874 }, { "epoch": 1.357423632488752, "grad_norm": 2.09375, "learning_rate": 4.516715240185239e-06, "loss": 0.5115, "step": 10875 }, { "epoch": 1.3575499250138132, "grad_norm": 1.890625, "learning_rate": 4.515045803308407e-06, "loss": 0.4786, "step": 10876 }, { "epoch": 1.3576762175388744, "grad_norm": 2.234375, "learning_rate": 4.5133765850486366e-06, "loss": 0.5621, "step": 10877 }, { "epoch": 1.3578025100639355, "grad_norm": 1.9375, "learning_rate": 4.51170758547246e-06, "loss": 0.527, "step": 10878 }, { "epoch": 1.3579288025889968, "grad_norm": 1.921875, "learning_rate": 4.510038804646397e-06, "loss": 0.5362, "step": 10879 }, { "epoch": 1.358055095114058, "grad_norm": 1.8828125, "learning_rate": 4.508370242636968e-06, "loss": 0.4861, "step": 10880 }, { "epoch": 1.358181387639119, "grad_norm": 1.9609375, "learning_rate": 4.5067018995106726e-06, "loss": 0.476, "step": 10881 }, { "epoch": 1.3583076801641802, "grad_norm": 1.9765625, "learning_rate": 4.505033775334008e-06, "loss": 0.475, "step": 10882 }, { "epoch": 1.3584339726892414, "grad_norm": 2.0625, "learning_rate": 4.503365870173463e-06, "loss": 0.5426, "step": 10883 }, { "epoch": 1.3585602652143027, "grad_norm": 2.109375, "learning_rate": 4.501698184095513e-06, "loss": 0.5403, "step": 10884 }, { "epoch": 1.3586865577393639, "grad_norm": 2.015625, "learning_rate": 4.500030717166631e-06, "loss": 0.5046, "step": 10885 }, { "epoch": 1.358812850264425, "grad_norm": 1.796875, "learning_rate": 4.498363469453277e-06, "loss": 0.457, "step": 10886 }, { "epoch": 1.3589391427894861, "grad_norm": 2.046875, "learning_rate": 4.496696441021904e-06, "loss": 0.4568, "step": 10887 }, { "epoch": 1.3590654353145473, "grad_norm": 2.015625, "learning_rate": 4.495029631938955e-06, "loss": 0.5364, "step": 10888 }, { "epoch": 1.3591917278396086, "grad_norm": 1.984375, "learning_rate": 4.4933630422708655e-06, "loss": 0.4806, "step": 10889 }, { "epoch": 1.3593180203646695, "grad_norm": 1.9453125, "learning_rate": 4.491696672084062e-06, "loss": 0.4663, "step": 10890 }, { "epoch": 1.3594443128897309, "grad_norm": 2.03125, "learning_rate": 4.49003052144496e-06, "loss": 0.5204, "step": 10891 }, { "epoch": 1.359570605414792, "grad_norm": 2.078125, "learning_rate": 4.488364590419971e-06, "loss": 0.5408, "step": 10892 }, { "epoch": 1.3596968979398532, "grad_norm": 2.046875, "learning_rate": 4.486698879075494e-06, "loss": 0.4252, "step": 10893 }, { "epoch": 1.3598231904649143, "grad_norm": 1.984375, "learning_rate": 4.485033387477919e-06, "loss": 0.4651, "step": 10894 }, { "epoch": 1.3599494829899754, "grad_norm": 3.984375, "learning_rate": 4.483368115693626e-06, "loss": 0.522, "step": 10895 }, { "epoch": 1.3600757755150368, "grad_norm": 2.109375, "learning_rate": 4.481703063788997e-06, "loss": 0.6021, "step": 10896 }, { "epoch": 1.360202068040098, "grad_norm": 2.140625, "learning_rate": 4.480038231830391e-06, "loss": 0.4602, "step": 10897 }, { "epoch": 1.360328360565159, "grad_norm": 1.875, "learning_rate": 4.478373619884166e-06, "loss": 0.4595, "step": 10898 }, { "epoch": 1.3604546530902202, "grad_norm": 1.8359375, "learning_rate": 4.4767092280166695e-06, "loss": 0.3909, "step": 10899 }, { "epoch": 1.3605809456152813, "grad_norm": 2.171875, "learning_rate": 4.475045056294239e-06, "loss": 0.4625, "step": 10900 }, { "epoch": 1.3607072381403427, "grad_norm": 1.9375, "learning_rate": 4.473381104783204e-06, "loss": 0.4847, "step": 10901 }, { "epoch": 1.3608335306654038, "grad_norm": 2.03125, "learning_rate": 4.471717373549887e-06, "loss": 0.5453, "step": 10902 }, { "epoch": 1.360959823190465, "grad_norm": 1.9609375, "learning_rate": 4.4700538626606005e-06, "loss": 0.4627, "step": 10903 }, { "epoch": 1.361086115715526, "grad_norm": 2.515625, "learning_rate": 4.4683905721816474e-06, "loss": 0.5904, "step": 10904 }, { "epoch": 1.3612124082405872, "grad_norm": 2.140625, "learning_rate": 4.466727502179322e-06, "loss": 0.5349, "step": 10905 }, { "epoch": 1.3613387007656486, "grad_norm": 2.0, "learning_rate": 4.46506465271991e-06, "loss": 0.4975, "step": 10906 }, { "epoch": 1.3614649932907095, "grad_norm": 2.296875, "learning_rate": 4.46340202386969e-06, "loss": 0.4473, "step": 10907 }, { "epoch": 1.3615912858157708, "grad_norm": 2.046875, "learning_rate": 4.461739615694929e-06, "loss": 0.5389, "step": 10908 }, { "epoch": 1.361717578340832, "grad_norm": 1.9140625, "learning_rate": 4.460077428261888e-06, "loss": 0.5001, "step": 10909 }, { "epoch": 1.361843870865893, "grad_norm": 2.0, "learning_rate": 4.458415461636816e-06, "loss": 0.4511, "step": 10910 }, { "epoch": 1.3619701633909542, "grad_norm": 1.9453125, "learning_rate": 4.4567537158859534e-06, "loss": 0.4803, "step": 10911 }, { "epoch": 1.3620964559160154, "grad_norm": 2.0625, "learning_rate": 4.4550921910755385e-06, "loss": 0.5326, "step": 10912 }, { "epoch": 1.3622227484410767, "grad_norm": 2.046875, "learning_rate": 4.453430887271794e-06, "loss": 0.5421, "step": 10913 }, { "epoch": 1.3623490409661378, "grad_norm": 2.0625, "learning_rate": 4.451769804540933e-06, "loss": 0.5151, "step": 10914 }, { "epoch": 1.362475333491199, "grad_norm": 1.8984375, "learning_rate": 4.450108942949165e-06, "loss": 0.4908, "step": 10915 }, { "epoch": 1.3626016260162601, "grad_norm": 1.9921875, "learning_rate": 4.448448302562687e-06, "loss": 0.4926, "step": 10916 }, { "epoch": 1.3627279185413212, "grad_norm": 1.9609375, "learning_rate": 4.446787883447687e-06, "loss": 0.5078, "step": 10917 }, { "epoch": 1.3628542110663826, "grad_norm": 2.15625, "learning_rate": 4.445127685670347e-06, "loss": 0.5404, "step": 10918 }, { "epoch": 1.3629805035914437, "grad_norm": 2.046875, "learning_rate": 4.443467709296838e-06, "loss": 0.4923, "step": 10919 }, { "epoch": 1.3631067961165049, "grad_norm": 1.9140625, "learning_rate": 4.4418079543933225e-06, "loss": 0.4488, "step": 10920 }, { "epoch": 1.363233088641566, "grad_norm": 2.125, "learning_rate": 4.440148421025954e-06, "loss": 0.4651, "step": 10921 }, { "epoch": 1.3633593811666271, "grad_norm": 1.9140625, "learning_rate": 4.438489109260876e-06, "loss": 0.4962, "step": 10922 }, { "epoch": 1.3634856736916885, "grad_norm": 1.8984375, "learning_rate": 4.436830019164229e-06, "loss": 0.5261, "step": 10923 }, { "epoch": 1.3636119662167496, "grad_norm": 1.96875, "learning_rate": 4.435171150802135e-06, "loss": 0.5112, "step": 10924 }, { "epoch": 1.3637382587418108, "grad_norm": 2.0, "learning_rate": 4.433512504240718e-06, "loss": 0.4785, "step": 10925 }, { "epoch": 1.363864551266872, "grad_norm": 2.046875, "learning_rate": 4.431854079546083e-06, "loss": 0.5468, "step": 10926 }, { "epoch": 1.363990843791933, "grad_norm": 2.078125, "learning_rate": 4.430195876784329e-06, "loss": 0.4498, "step": 10927 }, { "epoch": 1.3641171363169942, "grad_norm": 2.015625, "learning_rate": 4.4285378960215564e-06, "loss": 0.5177, "step": 10928 }, { "epoch": 1.3642434288420553, "grad_norm": 2.09375, "learning_rate": 4.426880137323842e-06, "loss": 0.5532, "step": 10929 }, { "epoch": 1.3643697213671167, "grad_norm": 1.8828125, "learning_rate": 4.425222600757263e-06, "loss": 0.4755, "step": 10930 }, { "epoch": 1.3644960138921778, "grad_norm": 2.0625, "learning_rate": 4.4235652863878834e-06, "loss": 0.5481, "step": 10931 }, { "epoch": 1.364622306417239, "grad_norm": 1.890625, "learning_rate": 4.4219081942817586e-06, "loss": 0.4255, "step": 10932 }, { "epoch": 1.3647485989423, "grad_norm": 1.890625, "learning_rate": 4.420251324504938e-06, "loss": 0.4707, "step": 10933 }, { "epoch": 1.3648748914673612, "grad_norm": 1.96875, "learning_rate": 4.418594677123461e-06, "loss": 0.5147, "step": 10934 }, { "epoch": 1.3650011839924225, "grad_norm": 1.8984375, "learning_rate": 4.416938252203355e-06, "loss": 0.4696, "step": 10935 }, { "epoch": 1.3651274765174837, "grad_norm": 1.8515625, "learning_rate": 4.415282049810644e-06, "loss": 0.4415, "step": 10936 }, { "epoch": 1.3652537690425448, "grad_norm": 1.9765625, "learning_rate": 4.413626070011338e-06, "loss": 0.5124, "step": 10937 }, { "epoch": 1.365380061567606, "grad_norm": 1.859375, "learning_rate": 4.411970312871442e-06, "loss": 0.4202, "step": 10938 }, { "epoch": 1.365506354092667, "grad_norm": 2.09375, "learning_rate": 4.410314778456949e-06, "loss": 0.5381, "step": 10939 }, { "epoch": 1.3656326466177284, "grad_norm": 1.9921875, "learning_rate": 4.4086594668338454e-06, "loss": 0.5197, "step": 10940 }, { "epoch": 1.3657589391427896, "grad_norm": 2.0625, "learning_rate": 4.407004378068107e-06, "loss": 0.5138, "step": 10941 }, { "epoch": 1.3658852316678507, "grad_norm": 2.125, "learning_rate": 4.405349512225704e-06, "loss": 0.5086, "step": 10942 }, { "epoch": 1.3660115241929118, "grad_norm": 1.9921875, "learning_rate": 4.40369486937259e-06, "loss": 0.4517, "step": 10943 }, { "epoch": 1.366137816717973, "grad_norm": 2.21875, "learning_rate": 4.402040449574721e-06, "loss": 0.5282, "step": 10944 }, { "epoch": 1.366264109243034, "grad_norm": 2.09375, "learning_rate": 4.400386252898039e-06, "loss": 0.4617, "step": 10945 }, { "epoch": 1.3663904017680952, "grad_norm": 1.984375, "learning_rate": 4.3987322794084704e-06, "loss": 0.497, "step": 10946 }, { "epoch": 1.3665166942931566, "grad_norm": 2.0625, "learning_rate": 4.3970785291719445e-06, "loss": 0.5077, "step": 10947 }, { "epoch": 1.3666429868182177, "grad_norm": 2.0625, "learning_rate": 4.395425002254371e-06, "loss": 0.5192, "step": 10948 }, { "epoch": 1.3667692793432789, "grad_norm": 2.125, "learning_rate": 4.393771698721658e-06, "loss": 0.504, "step": 10949 }, { "epoch": 1.36689557186834, "grad_norm": 1.953125, "learning_rate": 4.3921186186397016e-06, "loss": 0.4788, "step": 10950 }, { "epoch": 1.3670218643934011, "grad_norm": 1.9765625, "learning_rate": 4.39046576207439e-06, "loss": 0.4746, "step": 10951 }, { "epoch": 1.3671481569184625, "grad_norm": 1.921875, "learning_rate": 4.388813129091601e-06, "loss": 0.5274, "step": 10952 }, { "epoch": 1.3672744494435236, "grad_norm": 2.0, "learning_rate": 4.387160719757206e-06, "loss": 0.4993, "step": 10953 }, { "epoch": 1.3674007419685847, "grad_norm": 2.078125, "learning_rate": 4.385508534137065e-06, "loss": 0.5604, "step": 10954 }, { "epoch": 1.3675270344936459, "grad_norm": 1.8046875, "learning_rate": 4.383856572297031e-06, "loss": 0.495, "step": 10955 }, { "epoch": 1.367653327018707, "grad_norm": 2.21875, "learning_rate": 4.382204834302945e-06, "loss": 0.5569, "step": 10956 }, { "epoch": 1.3677796195437684, "grad_norm": 2.21875, "learning_rate": 4.380553320220645e-06, "loss": 0.5396, "step": 10957 }, { "epoch": 1.3679059120688295, "grad_norm": 1.96875, "learning_rate": 4.378902030115953e-06, "loss": 0.4724, "step": 10958 }, { "epoch": 1.3680322045938906, "grad_norm": 2.109375, "learning_rate": 4.3772509640546844e-06, "loss": 0.5231, "step": 10959 }, { "epoch": 1.3681584971189518, "grad_norm": 1.9296875, "learning_rate": 4.375600122102652e-06, "loss": 0.5095, "step": 10960 }, { "epoch": 1.368284789644013, "grad_norm": 2.078125, "learning_rate": 4.373949504325652e-06, "loss": 0.4731, "step": 10961 }, { "epoch": 1.3684110821690743, "grad_norm": 1.875, "learning_rate": 4.372299110789473e-06, "loss": 0.4202, "step": 10962 }, { "epoch": 1.3685373746941352, "grad_norm": 2.1875, "learning_rate": 4.370648941559896e-06, "loss": 0.4678, "step": 10963 }, { "epoch": 1.3686636672191965, "grad_norm": 2.09375, "learning_rate": 4.368998996702694e-06, "loss": 0.4728, "step": 10964 }, { "epoch": 1.3687899597442577, "grad_norm": 1.9375, "learning_rate": 4.367349276283627e-06, "loss": 0.491, "step": 10965 }, { "epoch": 1.3689162522693188, "grad_norm": 2.015625, "learning_rate": 4.3656997803684534e-06, "loss": 0.4924, "step": 10966 }, { "epoch": 1.36904254479438, "grad_norm": 1.9921875, "learning_rate": 4.364050509022915e-06, "loss": 0.4569, "step": 10967 }, { "epoch": 1.369168837319441, "grad_norm": 2.0625, "learning_rate": 4.362401462312746e-06, "loss": 0.5334, "step": 10968 }, { "epoch": 1.3692951298445024, "grad_norm": 2.25, "learning_rate": 4.360752640303678e-06, "loss": 0.5619, "step": 10969 }, { "epoch": 1.3694214223695635, "grad_norm": 2.046875, "learning_rate": 4.359104043061426e-06, "loss": 0.4649, "step": 10970 }, { "epoch": 1.3695477148946247, "grad_norm": 1.953125, "learning_rate": 4.3574556706517e-06, "loss": 0.4489, "step": 10971 }, { "epoch": 1.3696740074196858, "grad_norm": 2.140625, "learning_rate": 4.355807523140199e-06, "loss": 0.5456, "step": 10972 }, { "epoch": 1.369800299944747, "grad_norm": 2.078125, "learning_rate": 4.354159600592617e-06, "loss": 0.466, "step": 10973 }, { "epoch": 1.3699265924698083, "grad_norm": 1.984375, "learning_rate": 4.352511903074633e-06, "loss": 0.5121, "step": 10974 }, { "epoch": 1.3700528849948694, "grad_norm": 2.046875, "learning_rate": 4.350864430651919e-06, "loss": 0.5083, "step": 10975 }, { "epoch": 1.3701791775199306, "grad_norm": 1.96875, "learning_rate": 4.349217183390145e-06, "loss": 0.4317, "step": 10976 }, { "epoch": 1.3703054700449917, "grad_norm": 2.125, "learning_rate": 4.347570161354964e-06, "loss": 0.5395, "step": 10977 }, { "epoch": 1.3704317625700528, "grad_norm": 1.9453125, "learning_rate": 4.345923364612022e-06, "loss": 0.4971, "step": 10978 }, { "epoch": 1.3705580550951142, "grad_norm": 2.046875, "learning_rate": 4.344276793226955e-06, "loss": 0.4719, "step": 10979 }, { "epoch": 1.370684347620175, "grad_norm": 2.3125, "learning_rate": 4.342630447265392e-06, "loss": 0.5305, "step": 10980 }, { "epoch": 1.3708106401452365, "grad_norm": 2.046875, "learning_rate": 4.340984326792954e-06, "loss": 0.5085, "step": 10981 }, { "epoch": 1.3709369326702976, "grad_norm": 2.046875, "learning_rate": 4.33933843187525e-06, "loss": 0.5641, "step": 10982 }, { "epoch": 1.3710632251953587, "grad_norm": 2.234375, "learning_rate": 4.337692762577881e-06, "loss": 0.4924, "step": 10983 }, { "epoch": 1.3711895177204199, "grad_norm": 2.046875, "learning_rate": 4.33604731896644e-06, "loss": 0.5087, "step": 10984 }, { "epoch": 1.371315810245481, "grad_norm": 1.9140625, "learning_rate": 4.33440210110651e-06, "loss": 0.498, "step": 10985 }, { "epoch": 1.3714421027705423, "grad_norm": 2.078125, "learning_rate": 4.332757109063667e-06, "loss": 0.4543, "step": 10986 }, { "epoch": 1.3715683952956035, "grad_norm": 1.859375, "learning_rate": 4.331112342903474e-06, "loss": 0.5083, "step": 10987 }, { "epoch": 1.3716946878206646, "grad_norm": 1.984375, "learning_rate": 4.329467802691489e-06, "loss": 0.5179, "step": 10988 }, { "epoch": 1.3718209803457257, "grad_norm": 1.921875, "learning_rate": 4.3278234884932594e-06, "loss": 0.4642, "step": 10989 }, { "epoch": 1.3719472728707869, "grad_norm": 1.7265625, "learning_rate": 4.326179400374323e-06, "loss": 0.3906, "step": 10990 }, { "epoch": 1.3720735653958482, "grad_norm": 2.0, "learning_rate": 4.324535538400205e-06, "loss": 0.4901, "step": 10991 }, { "epoch": 1.3721998579209094, "grad_norm": 2.015625, "learning_rate": 4.3228919026364345e-06, "loss": 0.5391, "step": 10992 }, { "epoch": 1.3723261504459705, "grad_norm": 2.046875, "learning_rate": 4.321248493148519e-06, "loss": 0.5047, "step": 10993 }, { "epoch": 1.3724524429710316, "grad_norm": 1.84375, "learning_rate": 4.319605310001961e-06, "loss": 0.4632, "step": 10994 }, { "epoch": 1.3725787354960928, "grad_norm": 1.8046875, "learning_rate": 4.317962353262252e-06, "loss": 0.4341, "step": 10995 }, { "epoch": 1.3727050280211541, "grad_norm": 1.9296875, "learning_rate": 4.316319622994878e-06, "loss": 0.4889, "step": 10996 }, { "epoch": 1.372831320546215, "grad_norm": 1.75, "learning_rate": 4.314677119265314e-06, "loss": 0.4187, "step": 10997 }, { "epoch": 1.3729576130712764, "grad_norm": 2.015625, "learning_rate": 4.313034842139025e-06, "loss": 0.5137, "step": 10998 }, { "epoch": 1.3730839055963375, "grad_norm": 2.09375, "learning_rate": 4.31139279168147e-06, "loss": 0.5078, "step": 10999 }, { "epoch": 1.3732101981213987, "grad_norm": 1.8203125, "learning_rate": 4.309750967958097e-06, "loss": 0.4427, "step": 11000 }, { "epoch": 1.3733364906464598, "grad_norm": 1.9296875, "learning_rate": 4.308109371034344e-06, "loss": 0.5247, "step": 11001 }, { "epoch": 1.373462783171521, "grad_norm": 1.859375, "learning_rate": 4.306468000975642e-06, "loss": 0.4486, "step": 11002 }, { "epoch": 1.3735890756965823, "grad_norm": 2.0625, "learning_rate": 4.304826857847412e-06, "loss": 0.4374, "step": 11003 }, { "epoch": 1.3737153682216434, "grad_norm": 1.984375, "learning_rate": 4.3031859417150655e-06, "loss": 0.48, "step": 11004 }, { "epoch": 1.3738416607467046, "grad_norm": 1.859375, "learning_rate": 4.301545252644006e-06, "loss": 0.444, "step": 11005 }, { "epoch": 1.3739679532717657, "grad_norm": 2.0, "learning_rate": 4.299904790699626e-06, "loss": 0.5363, "step": 11006 }, { "epoch": 1.3740942457968268, "grad_norm": 1.9609375, "learning_rate": 4.298264555947313e-06, "loss": 0.4756, "step": 11007 }, { "epoch": 1.3742205383218882, "grad_norm": 2.09375, "learning_rate": 4.29662454845244e-06, "loss": 0.5108, "step": 11008 }, { "epoch": 1.3743468308469493, "grad_norm": 1.8828125, "learning_rate": 4.294984768280372e-06, "loss": 0.5438, "step": 11009 }, { "epoch": 1.3744731233720104, "grad_norm": 1.9609375, "learning_rate": 4.293345215496474e-06, "loss": 0.4565, "step": 11010 }, { "epoch": 1.3745994158970716, "grad_norm": 2.25, "learning_rate": 4.291705890166089e-06, "loss": 0.563, "step": 11011 }, { "epoch": 1.3747257084221327, "grad_norm": 1.9921875, "learning_rate": 4.290066792354559e-06, "loss": 0.5522, "step": 11012 }, { "epoch": 1.374852000947194, "grad_norm": 2.046875, "learning_rate": 4.288427922127213e-06, "loss": 0.5109, "step": 11013 }, { "epoch": 1.374978293472255, "grad_norm": 2.109375, "learning_rate": 4.286789279549373e-06, "loss": 0.4566, "step": 11014 }, { "epoch": 1.3751045859973163, "grad_norm": 1.9296875, "learning_rate": 4.285150864686351e-06, "loss": 0.4357, "step": 11015 }, { "epoch": 1.3752308785223775, "grad_norm": 1.953125, "learning_rate": 4.283512677603451e-06, "loss": 0.4967, "step": 11016 }, { "epoch": 1.3753571710474386, "grad_norm": 1.8515625, "learning_rate": 4.281874718365966e-06, "loss": 0.5099, "step": 11017 }, { "epoch": 1.3754834635724997, "grad_norm": 2.0, "learning_rate": 4.280236987039183e-06, "loss": 0.4453, "step": 11018 }, { "epoch": 1.3756097560975609, "grad_norm": 1.890625, "learning_rate": 4.278599483688375e-06, "loss": 0.5208, "step": 11019 }, { "epoch": 1.3757360486226222, "grad_norm": 1.8515625, "learning_rate": 4.276962208378811e-06, "loss": 0.4924, "step": 11020 }, { "epoch": 1.3758623411476834, "grad_norm": 1.71875, "learning_rate": 4.27532516117575e-06, "loss": 0.4258, "step": 11021 }, { "epoch": 1.3759886336727445, "grad_norm": 2.203125, "learning_rate": 4.2736883421444374e-06, "loss": 0.4966, "step": 11022 }, { "epoch": 1.3761149261978056, "grad_norm": 2.03125, "learning_rate": 4.272051751350116e-06, "loss": 0.5811, "step": 11023 }, { "epoch": 1.3762412187228668, "grad_norm": 1.9296875, "learning_rate": 4.270415388858015e-06, "loss": 0.4509, "step": 11024 }, { "epoch": 1.376367511247928, "grad_norm": 1.9609375, "learning_rate": 4.268779254733356e-06, "loss": 0.4873, "step": 11025 }, { "epoch": 1.3764938037729892, "grad_norm": 1.90625, "learning_rate": 4.267143349041352e-06, "loss": 0.4824, "step": 11026 }, { "epoch": 1.3766200962980504, "grad_norm": 1.8359375, "learning_rate": 4.265507671847201e-06, "loss": 0.412, "step": 11027 }, { "epoch": 1.3767463888231115, "grad_norm": 1.984375, "learning_rate": 4.263872223216106e-06, "loss": 0.5653, "step": 11028 }, { "epoch": 1.3768726813481726, "grad_norm": 2.265625, "learning_rate": 4.2622370032132474e-06, "loss": 0.4813, "step": 11029 }, { "epoch": 1.376998973873234, "grad_norm": 1.9296875, "learning_rate": 4.260602011903801e-06, "loss": 0.5071, "step": 11030 }, { "epoch": 1.3771252663982951, "grad_norm": 1.9140625, "learning_rate": 4.258967249352937e-06, "loss": 0.5184, "step": 11031 }, { "epoch": 1.3772515589233563, "grad_norm": 2.203125, "learning_rate": 4.2573327156258075e-06, "loss": 0.4956, "step": 11032 }, { "epoch": 1.3773778514484174, "grad_norm": 2.125, "learning_rate": 4.255698410787565e-06, "loss": 0.5537, "step": 11033 }, { "epoch": 1.3775041439734785, "grad_norm": 1.96875, "learning_rate": 4.254064334903347e-06, "loss": 0.5063, "step": 11034 }, { "epoch": 1.3776304364985397, "grad_norm": 2.0, "learning_rate": 4.252430488038286e-06, "loss": 0.467, "step": 11035 }, { "epoch": 1.3777567290236008, "grad_norm": 2.078125, "learning_rate": 4.2507968702575005e-06, "loss": 0.5807, "step": 11036 }, { "epoch": 1.3778830215486622, "grad_norm": 1.8828125, "learning_rate": 4.2491634816261055e-06, "loss": 0.4919, "step": 11037 }, { "epoch": 1.3780093140737233, "grad_norm": 2.09375, "learning_rate": 4.247530322209201e-06, "loss": 0.521, "step": 11038 }, { "epoch": 1.3781356065987844, "grad_norm": 1.9296875, "learning_rate": 4.245897392071883e-06, "loss": 0.4305, "step": 11039 }, { "epoch": 1.3782618991238456, "grad_norm": 2.0, "learning_rate": 4.244264691279234e-06, "loss": 0.4813, "step": 11040 }, { "epoch": 1.3783881916489067, "grad_norm": 2.03125, "learning_rate": 4.242632219896332e-06, "loss": 0.549, "step": 11041 }, { "epoch": 1.378514484173968, "grad_norm": 2.703125, "learning_rate": 4.240999977988242e-06, "loss": 0.5833, "step": 11042 }, { "epoch": 1.3786407766990292, "grad_norm": 2.203125, "learning_rate": 4.2393679656200215e-06, "loss": 0.5706, "step": 11043 }, { "epoch": 1.3787670692240903, "grad_norm": 1.90625, "learning_rate": 4.237736182856719e-06, "loss": 0.4865, "step": 11044 }, { "epoch": 1.3788933617491514, "grad_norm": 1.9765625, "learning_rate": 4.236104629763368e-06, "loss": 0.44, "step": 11045 }, { "epoch": 1.3790196542742126, "grad_norm": 1.90625, "learning_rate": 4.234473306405008e-06, "loss": 0.4826, "step": 11046 }, { "epoch": 1.379145946799274, "grad_norm": 2.1875, "learning_rate": 4.232842212846654e-06, "loss": 0.5101, "step": 11047 }, { "epoch": 1.379272239324335, "grad_norm": 1.953125, "learning_rate": 4.231211349153319e-06, "loss": 0.4418, "step": 11048 }, { "epoch": 1.3793985318493962, "grad_norm": 1.8671875, "learning_rate": 4.229580715390005e-06, "loss": 0.4712, "step": 11049 }, { "epoch": 1.3795248243744573, "grad_norm": 1.9453125, "learning_rate": 4.227950311621703e-06, "loss": 0.4689, "step": 11050 }, { "epoch": 1.3796511168995185, "grad_norm": 1.796875, "learning_rate": 4.226320137913401e-06, "loss": 0.4573, "step": 11051 }, { "epoch": 1.3797774094245796, "grad_norm": 1.953125, "learning_rate": 4.22469019433007e-06, "loss": 0.5086, "step": 11052 }, { "epoch": 1.3799037019496407, "grad_norm": 1.9296875, "learning_rate": 4.223060480936678e-06, "loss": 0.4656, "step": 11053 }, { "epoch": 1.380029994474702, "grad_norm": 1.96875, "learning_rate": 4.221430997798181e-06, "loss": 0.4558, "step": 11054 }, { "epoch": 1.3801562869997632, "grad_norm": 2.265625, "learning_rate": 4.219801744979524e-06, "loss": 0.5194, "step": 11055 }, { "epoch": 1.3802825795248244, "grad_norm": 1.9765625, "learning_rate": 4.218172722545648e-06, "loss": 0.4588, "step": 11056 }, { "epoch": 1.3804088720498855, "grad_norm": 1.78125, "learning_rate": 4.216543930561482e-06, "loss": 0.4228, "step": 11057 }, { "epoch": 1.3805351645749466, "grad_norm": 1.953125, "learning_rate": 4.214915369091943e-06, "loss": 0.4931, "step": 11058 }, { "epoch": 1.380661457100008, "grad_norm": 1.9453125, "learning_rate": 4.213287038201943e-06, "loss": 0.4819, "step": 11059 }, { "epoch": 1.3807877496250691, "grad_norm": 1.921875, "learning_rate": 4.211658937956384e-06, "loss": 0.4432, "step": 11060 }, { "epoch": 1.3809140421501302, "grad_norm": 2.0625, "learning_rate": 4.210031068420159e-06, "loss": 0.5023, "step": 11061 }, { "epoch": 1.3810403346751914, "grad_norm": 2.09375, "learning_rate": 4.208403429658148e-06, "loss": 0.5072, "step": 11062 }, { "epoch": 1.3811666272002525, "grad_norm": 1.9609375, "learning_rate": 4.206776021735222e-06, "loss": 0.5306, "step": 11063 }, { "epoch": 1.3812929197253139, "grad_norm": 1.9296875, "learning_rate": 4.205148844716256e-06, "loss": 0.4964, "step": 11064 }, { "epoch": 1.381419212250375, "grad_norm": 2.109375, "learning_rate": 4.2035218986660975e-06, "loss": 0.5245, "step": 11065 }, { "epoch": 1.3815455047754361, "grad_norm": 1.8359375, "learning_rate": 4.201895183649595e-06, "loss": 0.4619, "step": 11066 }, { "epoch": 1.3816717973004973, "grad_norm": 2.203125, "learning_rate": 4.200268699731584e-06, "loss": 0.5488, "step": 11067 }, { "epoch": 1.3817980898255584, "grad_norm": 2.09375, "learning_rate": 4.198642446976895e-06, "loss": 0.4949, "step": 11068 }, { "epoch": 1.3819243823506195, "grad_norm": 1.921875, "learning_rate": 4.197016425450343e-06, "loss": 0.4557, "step": 11069 }, { "epoch": 1.3820506748756807, "grad_norm": 1.9296875, "learning_rate": 4.195390635216739e-06, "loss": 0.4481, "step": 11070 }, { "epoch": 1.382176967400742, "grad_norm": 2.25, "learning_rate": 4.1937650763408845e-06, "loss": 0.5655, "step": 11071 }, { "epoch": 1.3823032599258032, "grad_norm": 1.9609375, "learning_rate": 4.192139748887567e-06, "loss": 0.4907, "step": 11072 }, { "epoch": 1.3824295524508643, "grad_norm": 1.984375, "learning_rate": 4.190514652921571e-06, "loss": 0.493, "step": 11073 }, { "epoch": 1.3825558449759254, "grad_norm": 2.109375, "learning_rate": 4.188889788507667e-06, "loss": 0.5123, "step": 11074 }, { "epoch": 1.3826821375009866, "grad_norm": 2.0625, "learning_rate": 4.1872651557106204e-06, "loss": 0.4783, "step": 11075 }, { "epoch": 1.382808430026048, "grad_norm": 2.109375, "learning_rate": 4.185640754595183e-06, "loss": 0.4919, "step": 11076 }, { "epoch": 1.382934722551109, "grad_norm": 2.015625, "learning_rate": 4.184016585226101e-06, "loss": 0.5693, "step": 11077 }, { "epoch": 1.3830610150761702, "grad_norm": 1.8515625, "learning_rate": 4.182392647668108e-06, "loss": 0.4881, "step": 11078 }, { "epoch": 1.3831873076012313, "grad_norm": 1.84375, "learning_rate": 4.180768941985933e-06, "loss": 0.458, "step": 11079 }, { "epoch": 1.3833136001262925, "grad_norm": 2.046875, "learning_rate": 4.179145468244291e-06, "loss": 0.5157, "step": 11080 }, { "epoch": 1.3834398926513538, "grad_norm": 2.140625, "learning_rate": 4.177522226507886e-06, "loss": 0.5144, "step": 11081 }, { "epoch": 1.383566185176415, "grad_norm": 1.96875, "learning_rate": 4.1758992168414246e-06, "loss": 0.4697, "step": 11082 }, { "epoch": 1.383692477701476, "grad_norm": 1.984375, "learning_rate": 4.174276439309593e-06, "loss": 0.4836, "step": 11083 }, { "epoch": 1.3838187702265372, "grad_norm": 1.890625, "learning_rate": 4.172653893977069e-06, "loss": 0.4989, "step": 11084 }, { "epoch": 1.3839450627515983, "grad_norm": 2.078125, "learning_rate": 4.171031580908525e-06, "loss": 0.597, "step": 11085 }, { "epoch": 1.3840713552766597, "grad_norm": 1.9453125, "learning_rate": 4.169409500168623e-06, "loss": 0.4958, "step": 11086 }, { "epoch": 1.3841976478017206, "grad_norm": 2.09375, "learning_rate": 4.167787651822015e-06, "loss": 0.553, "step": 11087 }, { "epoch": 1.384323940326782, "grad_norm": 2.0625, "learning_rate": 4.166166035933342e-06, "loss": 0.4738, "step": 11088 }, { "epoch": 1.384450232851843, "grad_norm": 1.90625, "learning_rate": 4.16454465256724e-06, "loss": 0.4883, "step": 11089 }, { "epoch": 1.3845765253769042, "grad_norm": 2.125, "learning_rate": 4.162923501788332e-06, "loss": 0.511, "step": 11090 }, { "epoch": 1.3847028179019654, "grad_norm": 2.0625, "learning_rate": 4.161302583661235e-06, "loss": 0.5041, "step": 11091 }, { "epoch": 1.3848291104270265, "grad_norm": 2.03125, "learning_rate": 4.159681898250552e-06, "loss": 0.4963, "step": 11092 }, { "epoch": 1.3849554029520879, "grad_norm": 1.96875, "learning_rate": 4.158061445620883e-06, "loss": 0.5272, "step": 11093 }, { "epoch": 1.385081695477149, "grad_norm": 1.8984375, "learning_rate": 4.1564412258368114e-06, "loss": 0.4534, "step": 11094 }, { "epoch": 1.3852079880022101, "grad_norm": 1.96875, "learning_rate": 4.154821238962919e-06, "loss": 0.4906, "step": 11095 }, { "epoch": 1.3853342805272713, "grad_norm": 1.7578125, "learning_rate": 4.153201485063773e-06, "loss": 0.3858, "step": 11096 }, { "epoch": 1.3854605730523324, "grad_norm": 2.09375, "learning_rate": 4.151581964203931e-06, "loss": 0.5785, "step": 11097 }, { "epoch": 1.3855868655773937, "grad_norm": 2.25, "learning_rate": 4.149962676447947e-06, "loss": 0.5305, "step": 11098 }, { "epoch": 1.3857131581024549, "grad_norm": 1.875, "learning_rate": 4.148343621860355e-06, "loss": 0.4667, "step": 11099 }, { "epoch": 1.385839450627516, "grad_norm": 1.9140625, "learning_rate": 4.146724800505697e-06, "loss": 0.4702, "step": 11100 }, { "epoch": 1.3859657431525771, "grad_norm": 1.890625, "learning_rate": 4.14510621244849e-06, "loss": 0.4686, "step": 11101 }, { "epoch": 1.3860920356776383, "grad_norm": 2.0, "learning_rate": 4.143487857753246e-06, "loss": 0.4914, "step": 11102 }, { "epoch": 1.3862183282026996, "grad_norm": 1.875, "learning_rate": 4.141869736484469e-06, "loss": 0.476, "step": 11103 }, { "epoch": 1.3863446207277605, "grad_norm": 1.921875, "learning_rate": 4.140251848706656e-06, "loss": 0.4917, "step": 11104 }, { "epoch": 1.386470913252822, "grad_norm": 1.8828125, "learning_rate": 4.138634194484289e-06, "loss": 0.4875, "step": 11105 }, { "epoch": 1.386597205777883, "grad_norm": 2.078125, "learning_rate": 4.1370167738818444e-06, "loss": 0.5289, "step": 11106 }, { "epoch": 1.3867234983029442, "grad_norm": 1.921875, "learning_rate": 4.135399586963791e-06, "loss": 0.4435, "step": 11107 }, { "epoch": 1.3868497908280053, "grad_norm": 1.8671875, "learning_rate": 4.133782633794583e-06, "loss": 0.4585, "step": 11108 }, { "epoch": 1.3869760833530664, "grad_norm": 2.046875, "learning_rate": 4.1321659144386706e-06, "loss": 0.5371, "step": 11109 }, { "epoch": 1.3871023758781278, "grad_norm": 2.0, "learning_rate": 4.1305494289604894e-06, "loss": 0.5305, "step": 11110 }, { "epoch": 1.387228668403189, "grad_norm": 1.9296875, "learning_rate": 4.128933177424473e-06, "loss": 0.5516, "step": 11111 }, { "epoch": 1.38735496092825, "grad_norm": 2.15625, "learning_rate": 4.127317159895036e-06, "loss": 0.6057, "step": 11112 }, { "epoch": 1.3874812534533112, "grad_norm": 1.9453125, "learning_rate": 4.125701376436594e-06, "loss": 0.5268, "step": 11113 }, { "epoch": 1.3876075459783723, "grad_norm": 2.0, "learning_rate": 4.124085827113545e-06, "loss": 0.4754, "step": 11114 }, { "epoch": 1.3877338385034337, "grad_norm": 2.0, "learning_rate": 4.122470511990283e-06, "loss": 0.5549, "step": 11115 }, { "epoch": 1.3878601310284948, "grad_norm": 2.09375, "learning_rate": 4.120855431131189e-06, "loss": 0.4972, "step": 11116 }, { "epoch": 1.387986423553556, "grad_norm": 1.9140625, "learning_rate": 4.1192405846006354e-06, "loss": 0.4773, "step": 11117 }, { "epoch": 1.388112716078617, "grad_norm": 2.0, "learning_rate": 4.117625972462986e-06, "loss": 0.5796, "step": 11118 }, { "epoch": 1.3882390086036782, "grad_norm": 2.078125, "learning_rate": 4.116011594782599e-06, "loss": 0.5173, "step": 11119 }, { "epoch": 1.3883653011287396, "grad_norm": 2.046875, "learning_rate": 4.114397451623817e-06, "loss": 0.5846, "step": 11120 }, { "epoch": 1.3884915936538005, "grad_norm": 2.046875, "learning_rate": 4.112783543050977e-06, "loss": 0.5287, "step": 11121 }, { "epoch": 1.3886178861788618, "grad_norm": 1.859375, "learning_rate": 4.111169869128404e-06, "loss": 0.4607, "step": 11122 }, { "epoch": 1.388744178703923, "grad_norm": 1.8671875, "learning_rate": 4.109556429920416e-06, "loss": 0.4636, "step": 11123 }, { "epoch": 1.388870471228984, "grad_norm": 2.09375, "learning_rate": 4.107943225491322e-06, "loss": 0.4288, "step": 11124 }, { "epoch": 1.3889967637540452, "grad_norm": 1.921875, "learning_rate": 4.106330255905417e-06, "loss": 0.4302, "step": 11125 }, { "epoch": 1.3891230562791064, "grad_norm": 1.9609375, "learning_rate": 4.104717521226994e-06, "loss": 0.4166, "step": 11126 }, { "epoch": 1.3892493488041677, "grad_norm": 1.984375, "learning_rate": 4.103105021520331e-06, "loss": 0.4035, "step": 11127 }, { "epoch": 1.3893756413292289, "grad_norm": 1.9765625, "learning_rate": 4.101492756849697e-06, "loss": 0.4366, "step": 11128 }, { "epoch": 1.38950193385429, "grad_norm": 1.9609375, "learning_rate": 4.099880727279355e-06, "loss": 0.4631, "step": 11129 }, { "epoch": 1.3896282263793511, "grad_norm": 2.359375, "learning_rate": 4.098268932873556e-06, "loss": 0.4584, "step": 11130 }, { "epoch": 1.3897545189044123, "grad_norm": 1.828125, "learning_rate": 4.096657373696541e-06, "loss": 0.428, "step": 11131 }, { "epoch": 1.3898808114294736, "grad_norm": 2.03125, "learning_rate": 4.095046049812545e-06, "loss": 0.4782, "step": 11132 }, { "epoch": 1.3900071039545348, "grad_norm": 1.8984375, "learning_rate": 4.09343496128579e-06, "loss": 0.4869, "step": 11133 }, { "epoch": 1.3901333964795959, "grad_norm": 2.140625, "learning_rate": 4.091824108180491e-06, "loss": 0.5459, "step": 11134 }, { "epoch": 1.390259689004657, "grad_norm": 1.984375, "learning_rate": 4.0902134905608525e-06, "loss": 0.5248, "step": 11135 }, { "epoch": 1.3903859815297182, "grad_norm": 1.8828125, "learning_rate": 4.088603108491066e-06, "loss": 0.491, "step": 11136 }, { "epoch": 1.3905122740547795, "grad_norm": 1.890625, "learning_rate": 4.086992962035324e-06, "loss": 0.4169, "step": 11137 }, { "epoch": 1.3906385665798404, "grad_norm": 2.578125, "learning_rate": 4.085383051257801e-06, "loss": 0.5699, "step": 11138 }, { "epoch": 1.3907648591049018, "grad_norm": 2.09375, "learning_rate": 4.083773376222663e-06, "loss": 0.5171, "step": 11139 }, { "epoch": 1.390891151629963, "grad_norm": 2.15625, "learning_rate": 4.082163936994068e-06, "loss": 0.5759, "step": 11140 }, { "epoch": 1.391017444155024, "grad_norm": 2.15625, "learning_rate": 4.080554733636164e-06, "loss": 0.5379, "step": 11141 }, { "epoch": 1.3911437366800852, "grad_norm": 1.9453125, "learning_rate": 4.078945766213092e-06, "loss": 0.5018, "step": 11142 }, { "epoch": 1.3912700292051463, "grad_norm": 2.046875, "learning_rate": 4.077337034788978e-06, "loss": 0.4474, "step": 11143 }, { "epoch": 1.3913963217302077, "grad_norm": 1.9921875, "learning_rate": 4.075728539427946e-06, "loss": 0.4814, "step": 11144 }, { "epoch": 1.3915226142552688, "grad_norm": 2.03125, "learning_rate": 4.074120280194103e-06, "loss": 0.502, "step": 11145 }, { "epoch": 1.39164890678033, "grad_norm": 1.9296875, "learning_rate": 4.072512257151553e-06, "loss": 0.4827, "step": 11146 }, { "epoch": 1.391775199305391, "grad_norm": 1.875, "learning_rate": 4.070904470364387e-06, "loss": 0.4427, "step": 11147 }, { "epoch": 1.3919014918304522, "grad_norm": 1.9140625, "learning_rate": 4.069296919896689e-06, "loss": 0.4937, "step": 11148 }, { "epoch": 1.3920277843555136, "grad_norm": 2.0625, "learning_rate": 4.06768960581253e-06, "loss": 0.4883, "step": 11149 }, { "epoch": 1.3921540768805747, "grad_norm": 1.9453125, "learning_rate": 4.0660825281759745e-06, "loss": 0.4523, "step": 11150 }, { "epoch": 1.3922803694056358, "grad_norm": 1.8046875, "learning_rate": 4.064475687051076e-06, "loss": 0.4536, "step": 11151 }, { "epoch": 1.392406661930697, "grad_norm": 1.84375, "learning_rate": 4.062869082501881e-06, "loss": 0.414, "step": 11152 }, { "epoch": 1.392532954455758, "grad_norm": 1.8203125, "learning_rate": 4.061262714592423e-06, "loss": 0.4677, "step": 11153 }, { "epoch": 1.3926592469808194, "grad_norm": 1.96875, "learning_rate": 4.0596565833867265e-06, "loss": 0.5483, "step": 11154 }, { "epoch": 1.3927855395058806, "grad_norm": 1.9375, "learning_rate": 4.0580506889488124e-06, "loss": 0.4495, "step": 11155 }, { "epoch": 1.3929118320309417, "grad_norm": 2.0, "learning_rate": 4.056445031342687e-06, "loss": 0.4704, "step": 11156 }, { "epoch": 1.3930381245560028, "grad_norm": 1.9765625, "learning_rate": 4.0548396106323465e-06, "loss": 0.4822, "step": 11157 }, { "epoch": 1.393164417081064, "grad_norm": 2.015625, "learning_rate": 4.053234426881779e-06, "loss": 0.5454, "step": 11158 }, { "epoch": 1.393290709606125, "grad_norm": 2.015625, "learning_rate": 4.0516294801549635e-06, "loss": 0.481, "step": 11159 }, { "epoch": 1.3934170021311862, "grad_norm": 2.140625, "learning_rate": 4.050024770515869e-06, "loss": 0.4818, "step": 11160 }, { "epoch": 1.3935432946562476, "grad_norm": 1.90625, "learning_rate": 4.048420298028457e-06, "loss": 0.4898, "step": 11161 }, { "epoch": 1.3936695871813087, "grad_norm": 1.8203125, "learning_rate": 4.046816062756676e-06, "loss": 0.4488, "step": 11162 }, { "epoch": 1.3937958797063699, "grad_norm": 2.171875, "learning_rate": 4.045212064764468e-06, "loss": 0.4719, "step": 11163 }, { "epoch": 1.393922172231431, "grad_norm": 2.0625, "learning_rate": 4.0436083041157625e-06, "loss": 0.5106, "step": 11164 }, { "epoch": 1.3940484647564921, "grad_norm": 2.03125, "learning_rate": 4.042004780874485e-06, "loss": 0.4933, "step": 11165 }, { "epoch": 1.3941747572815535, "grad_norm": 1.9375, "learning_rate": 4.0404014951045445e-06, "loss": 0.4719, "step": 11166 }, { "epoch": 1.3943010498066146, "grad_norm": 2.109375, "learning_rate": 4.038798446869847e-06, "loss": 0.5271, "step": 11167 }, { "epoch": 1.3944273423316758, "grad_norm": 1.859375, "learning_rate": 4.037195636234284e-06, "loss": 0.4325, "step": 11168 }, { "epoch": 1.394553634856737, "grad_norm": 2.125, "learning_rate": 4.035593063261742e-06, "loss": 0.5323, "step": 11169 }, { "epoch": 1.394679927381798, "grad_norm": 2.125, "learning_rate": 4.033990728016094e-06, "loss": 0.5588, "step": 11170 }, { "epoch": 1.3948062199068594, "grad_norm": 1.9765625, "learning_rate": 4.032388630561204e-06, "loss": 0.4969, "step": 11171 }, { "epoch": 1.3949325124319205, "grad_norm": 2.359375, "learning_rate": 4.030786770960927e-06, "loss": 0.5341, "step": 11172 }, { "epoch": 1.3950588049569816, "grad_norm": 2.09375, "learning_rate": 4.029185149279116e-06, "loss": 0.4868, "step": 11173 }, { "epoch": 1.3951850974820428, "grad_norm": 2.015625, "learning_rate": 4.027583765579602e-06, "loss": 0.4132, "step": 11174 }, { "epoch": 1.395311390007104, "grad_norm": 2.125, "learning_rate": 4.025982619926214e-06, "loss": 0.5318, "step": 11175 }, { "epoch": 1.395437682532165, "grad_norm": 1.8828125, "learning_rate": 4.024381712382769e-06, "loss": 0.4177, "step": 11176 }, { "epoch": 1.3955639750572262, "grad_norm": 1.921875, "learning_rate": 4.022781043013076e-06, "loss": 0.4816, "step": 11177 }, { "epoch": 1.3956902675822875, "grad_norm": 1.984375, "learning_rate": 4.0211806118809335e-06, "loss": 0.5139, "step": 11178 }, { "epoch": 1.3958165601073487, "grad_norm": 2.046875, "learning_rate": 4.019580419050132e-06, "loss": 0.5738, "step": 11179 }, { "epoch": 1.3959428526324098, "grad_norm": 2.359375, "learning_rate": 4.017980464584449e-06, "loss": 0.5625, "step": 11180 }, { "epoch": 1.396069145157471, "grad_norm": 2.015625, "learning_rate": 4.016380748547657e-06, "loss": 0.4975, "step": 11181 }, { "epoch": 1.396195437682532, "grad_norm": 2.765625, "learning_rate": 4.014781271003515e-06, "loss": 0.5718, "step": 11182 }, { "epoch": 1.3963217302075934, "grad_norm": 1.96875, "learning_rate": 4.013182032015777e-06, "loss": 0.5269, "step": 11183 }, { "epoch": 1.3964480227326546, "grad_norm": 1.9921875, "learning_rate": 4.011583031648181e-06, "loss": 0.4816, "step": 11184 }, { "epoch": 1.3965743152577157, "grad_norm": 2.140625, "learning_rate": 4.009984269964462e-06, "loss": 0.6197, "step": 11185 }, { "epoch": 1.3967006077827768, "grad_norm": 1.9140625, "learning_rate": 4.008385747028343e-06, "loss": 0.457, "step": 11186 }, { "epoch": 1.396826900307838, "grad_norm": 1.9921875, "learning_rate": 4.006787462903535e-06, "loss": 0.4674, "step": 11187 }, { "epoch": 1.3969531928328993, "grad_norm": 2.421875, "learning_rate": 4.005189417653743e-06, "loss": 0.5333, "step": 11188 }, { "epoch": 1.3970794853579604, "grad_norm": 1.8671875, "learning_rate": 4.003591611342663e-06, "loss": 0.3984, "step": 11189 }, { "epoch": 1.3972057778830216, "grad_norm": 1.96875, "learning_rate": 4.001994044033974e-06, "loss": 0.4488, "step": 11190 }, { "epoch": 1.3973320704080827, "grad_norm": 2.0625, "learning_rate": 4.000396715791358e-06, "loss": 0.4493, "step": 11191 }, { "epoch": 1.3974583629331438, "grad_norm": 1.984375, "learning_rate": 3.998799626678479e-06, "loss": 0.4752, "step": 11192 }, { "epoch": 1.3975846554582052, "grad_norm": 2.203125, "learning_rate": 3.997202776758992e-06, "loss": 0.5821, "step": 11193 }, { "epoch": 1.3977109479832661, "grad_norm": 2.046875, "learning_rate": 3.995606166096543e-06, "loss": 0.515, "step": 11194 }, { "epoch": 1.3978372405083275, "grad_norm": 1.9921875, "learning_rate": 3.99400979475477e-06, "loss": 0.4391, "step": 11195 }, { "epoch": 1.3979635330333886, "grad_norm": 2.359375, "learning_rate": 3.9924136627973e-06, "loss": 0.5966, "step": 11196 }, { "epoch": 1.3980898255584497, "grad_norm": 1.921875, "learning_rate": 3.990817770287751e-06, "loss": 0.5145, "step": 11197 }, { "epoch": 1.3982161180835109, "grad_norm": 1.9296875, "learning_rate": 3.989222117289733e-06, "loss": 0.4607, "step": 11198 }, { "epoch": 1.398342410608572, "grad_norm": 1.953125, "learning_rate": 3.987626703866842e-06, "loss": 0.4745, "step": 11199 }, { "epoch": 1.3984687031336334, "grad_norm": 2.234375, "learning_rate": 3.98603153008267e-06, "loss": 0.5114, "step": 11200 }, { "epoch": 1.3985949956586945, "grad_norm": 1.8828125, "learning_rate": 3.984436596000796e-06, "loss": 0.4651, "step": 11201 }, { "epoch": 1.3987212881837556, "grad_norm": 2.0625, "learning_rate": 3.9828419016847885e-06, "loss": 0.5426, "step": 11202 }, { "epoch": 1.3988475807088168, "grad_norm": 1.9375, "learning_rate": 3.981247447198211e-06, "loss": 0.4651, "step": 11203 }, { "epoch": 1.398973873233878, "grad_norm": 1.984375, "learning_rate": 3.979653232604612e-06, "loss": 0.4619, "step": 11204 }, { "epoch": 1.3991001657589393, "grad_norm": 1.9296875, "learning_rate": 3.978059257967534e-06, "loss": 0.4663, "step": 11205 }, { "epoch": 1.3992264582840004, "grad_norm": 2.015625, "learning_rate": 3.97646552335051e-06, "loss": 0.5138, "step": 11206 }, { "epoch": 1.3993527508090615, "grad_norm": 2.03125, "learning_rate": 3.974872028817062e-06, "loss": 0.4412, "step": 11207 }, { "epoch": 1.3994790433341227, "grad_norm": 1.8671875, "learning_rate": 3.9732787744306985e-06, "loss": 0.4629, "step": 11208 }, { "epoch": 1.3996053358591838, "grad_norm": 1.9921875, "learning_rate": 3.971685760254931e-06, "loss": 0.5011, "step": 11209 }, { "epoch": 1.3997316283842451, "grad_norm": 2.421875, "learning_rate": 3.9700929863532475e-06, "loss": 0.5197, "step": 11210 }, { "epoch": 1.399857920909306, "grad_norm": 1.8125, "learning_rate": 3.968500452789135e-06, "loss": 0.4421, "step": 11211 }, { "epoch": 1.3999842134343674, "grad_norm": 1.9375, "learning_rate": 3.966908159626066e-06, "loss": 0.479, "step": 11212 }, { "epoch": 1.4001105059594285, "grad_norm": 2.140625, "learning_rate": 3.965316106927506e-06, "loss": 0.5328, "step": 11213 }, { "epoch": 1.4002367984844897, "grad_norm": 1.9296875, "learning_rate": 3.963724294756911e-06, "loss": 0.5094, "step": 11214 }, { "epoch": 1.4003630910095508, "grad_norm": 1.953125, "learning_rate": 3.962132723177726e-06, "loss": 0.533, "step": 11215 }, { "epoch": 1.400489383534612, "grad_norm": 1.859375, "learning_rate": 3.960541392253387e-06, "loss": 0.4379, "step": 11216 }, { "epoch": 1.4006156760596733, "grad_norm": 1.8046875, "learning_rate": 3.958950302047322e-06, "loss": 0.4663, "step": 11217 }, { "epoch": 1.4007419685847344, "grad_norm": 2.125, "learning_rate": 3.957359452622947e-06, "loss": 0.5419, "step": 11218 }, { "epoch": 1.4008682611097956, "grad_norm": 2.0625, "learning_rate": 3.955768844043669e-06, "loss": 0.5322, "step": 11219 }, { "epoch": 1.4009945536348567, "grad_norm": 1.921875, "learning_rate": 3.954178476372887e-06, "loss": 0.4757, "step": 11220 }, { "epoch": 1.4011208461599178, "grad_norm": 1.8671875, "learning_rate": 3.952588349673989e-06, "loss": 0.4591, "step": 11221 }, { "epoch": 1.4012471386849792, "grad_norm": 2.09375, "learning_rate": 3.950998464010354e-06, "loss": 0.4907, "step": 11222 }, { "epoch": 1.4013734312100403, "grad_norm": 2.0625, "learning_rate": 3.9494088194453485e-06, "loss": 0.5152, "step": 11223 }, { "epoch": 1.4014997237351015, "grad_norm": 2.03125, "learning_rate": 3.9478194160423345e-06, "loss": 0.4789, "step": 11224 }, { "epoch": 1.4016260162601626, "grad_norm": 1.828125, "learning_rate": 3.946230253864661e-06, "loss": 0.4387, "step": 11225 }, { "epoch": 1.4017523087852237, "grad_norm": 2.15625, "learning_rate": 3.944641332975665e-06, "loss": 0.4897, "step": 11226 }, { "epoch": 1.401878601310285, "grad_norm": 2.15625, "learning_rate": 3.943052653438682e-06, "loss": 0.4858, "step": 11227 }, { "epoch": 1.402004893835346, "grad_norm": 1.9296875, "learning_rate": 3.941464215317033e-06, "loss": 0.4968, "step": 11228 }, { "epoch": 1.4021311863604073, "grad_norm": 1.9921875, "learning_rate": 3.939876018674027e-06, "loss": 0.4444, "step": 11229 }, { "epoch": 1.4022574788854685, "grad_norm": 2.296875, "learning_rate": 3.938288063572966e-06, "loss": 0.5142, "step": 11230 }, { "epoch": 1.4023837714105296, "grad_norm": 2.109375, "learning_rate": 3.936700350077142e-06, "loss": 0.486, "step": 11231 }, { "epoch": 1.4025100639355907, "grad_norm": 2.078125, "learning_rate": 3.9351128782498385e-06, "loss": 0.5565, "step": 11232 }, { "epoch": 1.4026363564606519, "grad_norm": 2.03125, "learning_rate": 3.933525648154327e-06, "loss": 0.584, "step": 11233 }, { "epoch": 1.4027626489857132, "grad_norm": 2.03125, "learning_rate": 3.931938659853871e-06, "loss": 0.4458, "step": 11234 }, { "epoch": 1.4028889415107744, "grad_norm": 1.9375, "learning_rate": 3.930351913411724e-06, "loss": 0.4318, "step": 11235 }, { "epoch": 1.4030152340358355, "grad_norm": 1.8515625, "learning_rate": 3.928765408891131e-06, "loss": 0.489, "step": 11236 }, { "epoch": 1.4031415265608966, "grad_norm": 1.953125, "learning_rate": 3.927179146355324e-06, "loss": 0.505, "step": 11237 }, { "epoch": 1.4032678190859578, "grad_norm": 2.140625, "learning_rate": 3.925593125867531e-06, "loss": 0.4719, "step": 11238 }, { "epoch": 1.4033941116110191, "grad_norm": 1.9609375, "learning_rate": 3.924007347490962e-06, "loss": 0.4945, "step": 11239 }, { "epoch": 1.4035204041360803, "grad_norm": 1.9921875, "learning_rate": 3.922421811288828e-06, "loss": 0.5088, "step": 11240 }, { "epoch": 1.4036466966611414, "grad_norm": 1.9609375, "learning_rate": 3.9208365173243205e-06, "loss": 0.4403, "step": 11241 }, { "epoch": 1.4037729891862025, "grad_norm": 1.953125, "learning_rate": 3.919251465660626e-06, "loss": 0.5329, "step": 11242 }, { "epoch": 1.4038992817112637, "grad_norm": 2.0, "learning_rate": 3.917666656360923e-06, "loss": 0.4675, "step": 11243 }, { "epoch": 1.404025574236325, "grad_norm": 1.9921875, "learning_rate": 3.916082089488372e-06, "loss": 0.4585, "step": 11244 }, { "epoch": 1.404151866761386, "grad_norm": 2.046875, "learning_rate": 3.91449776510614e-06, "loss": 0.54, "step": 11245 }, { "epoch": 1.4042781592864473, "grad_norm": 1.9765625, "learning_rate": 3.912913683277369e-06, "loss": 0.4797, "step": 11246 }, { "epoch": 1.4044044518115084, "grad_norm": 2.09375, "learning_rate": 3.911329844065196e-06, "loss": 0.5397, "step": 11247 }, { "epoch": 1.4045307443365695, "grad_norm": 1.9609375, "learning_rate": 3.9097462475327504e-06, "loss": 0.5073, "step": 11248 }, { "epoch": 1.4046570368616307, "grad_norm": 1.9921875, "learning_rate": 3.90816289374315e-06, "loss": 0.4605, "step": 11249 }, { "epoch": 1.4047833293866918, "grad_norm": 2.171875, "learning_rate": 3.906579782759503e-06, "loss": 0.5962, "step": 11250 }, { "epoch": 1.4049096219117532, "grad_norm": 1.828125, "learning_rate": 3.904996914644909e-06, "loss": 0.4687, "step": 11251 }, { "epoch": 1.4050359144368143, "grad_norm": 1.96875, "learning_rate": 3.903414289462458e-06, "loss": 0.4624, "step": 11252 }, { "epoch": 1.4051622069618754, "grad_norm": 1.8984375, "learning_rate": 3.90183190727523e-06, "loss": 0.5158, "step": 11253 }, { "epoch": 1.4052884994869366, "grad_norm": 1.875, "learning_rate": 3.9002497681462915e-06, "loss": 0.4962, "step": 11254 }, { "epoch": 1.4054147920119977, "grad_norm": 2.125, "learning_rate": 3.898667872138706e-06, "loss": 0.4762, "step": 11255 }, { "epoch": 1.405541084537059, "grad_norm": 2.171875, "learning_rate": 3.897086219315524e-06, "loss": 0.5591, "step": 11256 }, { "epoch": 1.4056673770621202, "grad_norm": 1.921875, "learning_rate": 3.895504809739784e-06, "loss": 0.4849, "step": 11257 }, { "epoch": 1.4057936695871813, "grad_norm": 2.140625, "learning_rate": 3.89392364347452e-06, "loss": 0.5754, "step": 11258 }, { "epoch": 1.4059199621122425, "grad_norm": 1.984375, "learning_rate": 3.892342720582751e-06, "loss": 0.4376, "step": 11259 }, { "epoch": 1.4060462546373036, "grad_norm": 1.9375, "learning_rate": 3.89076204112749e-06, "loss": 0.5036, "step": 11260 }, { "epoch": 1.406172547162365, "grad_norm": 1.828125, "learning_rate": 3.88918160517174e-06, "loss": 0.4035, "step": 11261 }, { "epoch": 1.406298839687426, "grad_norm": 2.015625, "learning_rate": 3.887601412778488e-06, "loss": 0.4757, "step": 11262 }, { "epoch": 1.4064251322124872, "grad_norm": 1.828125, "learning_rate": 3.886021464010726e-06, "loss": 0.4467, "step": 11263 }, { "epoch": 1.4065514247375484, "grad_norm": 1.9296875, "learning_rate": 3.88444175893142e-06, "loss": 0.4757, "step": 11264 }, { "epoch": 1.4066777172626095, "grad_norm": 1.9609375, "learning_rate": 3.882862297603537e-06, "loss": 0.4137, "step": 11265 }, { "epoch": 1.4068040097876706, "grad_norm": 2.125, "learning_rate": 3.881283080090027e-06, "loss": 0.487, "step": 11266 }, { "epoch": 1.4069303023127318, "grad_norm": 1.9921875, "learning_rate": 3.879704106453837e-06, "loss": 0.4861, "step": 11267 }, { "epoch": 1.407056594837793, "grad_norm": 2.28125, "learning_rate": 3.878125376757899e-06, "loss": 0.5245, "step": 11268 }, { "epoch": 1.4071828873628542, "grad_norm": 2.171875, "learning_rate": 3.8765468910651385e-06, "loss": 0.5716, "step": 11269 }, { "epoch": 1.4073091798879154, "grad_norm": 1.9296875, "learning_rate": 3.8749686494384686e-06, "loss": 0.4702, "step": 11270 }, { "epoch": 1.4074354724129765, "grad_norm": 1.96875, "learning_rate": 3.873390651940796e-06, "loss": 0.4572, "step": 11271 }, { "epoch": 1.4075617649380376, "grad_norm": 2.0, "learning_rate": 3.8718128986350154e-06, "loss": 0.52, "step": 11272 }, { "epoch": 1.407688057463099, "grad_norm": 1.9140625, "learning_rate": 3.870235389584012e-06, "loss": 0.4542, "step": 11273 }, { "epoch": 1.4078143499881601, "grad_norm": 1.921875, "learning_rate": 3.868658124850662e-06, "loss": 0.454, "step": 11274 }, { "epoch": 1.4079406425132213, "grad_norm": 1.9296875, "learning_rate": 3.86708110449783e-06, "loss": 0.4616, "step": 11275 }, { "epoch": 1.4080669350382824, "grad_norm": 1.9375, "learning_rate": 3.865504328588373e-06, "loss": 0.456, "step": 11276 }, { "epoch": 1.4081932275633435, "grad_norm": 2.171875, "learning_rate": 3.863927797185137e-06, "loss": 0.6492, "step": 11277 }, { "epoch": 1.4083195200884049, "grad_norm": 2.296875, "learning_rate": 3.86235151035096e-06, "loss": 0.4555, "step": 11278 }, { "epoch": 1.408445812613466, "grad_norm": 1.7265625, "learning_rate": 3.860775468148669e-06, "loss": 0.4297, "step": 11279 }, { "epoch": 1.4085721051385272, "grad_norm": 2.25, "learning_rate": 3.859199670641077e-06, "loss": 0.5878, "step": 11280 }, { "epoch": 1.4086983976635883, "grad_norm": 1.9453125, "learning_rate": 3.857624117890998e-06, "loss": 0.471, "step": 11281 }, { "epoch": 1.4088246901886494, "grad_norm": 1.9296875, "learning_rate": 3.856048809961227e-06, "loss": 0.4332, "step": 11282 }, { "epoch": 1.4089509827137106, "grad_norm": 1.9921875, "learning_rate": 3.854473746914552e-06, "loss": 0.4742, "step": 11283 }, { "epoch": 1.4090772752387717, "grad_norm": 2.0, "learning_rate": 3.852898928813751e-06, "loss": 0.4606, "step": 11284 }, { "epoch": 1.409203567763833, "grad_norm": 1.9765625, "learning_rate": 3.851324355721592e-06, "loss": 0.5673, "step": 11285 }, { "epoch": 1.4093298602888942, "grad_norm": 2.09375, "learning_rate": 3.849750027700835e-06, "loss": 0.4566, "step": 11286 }, { "epoch": 1.4094561528139553, "grad_norm": 1.9765625, "learning_rate": 3.848175944814227e-06, "loss": 0.4832, "step": 11287 }, { "epoch": 1.4095824453390164, "grad_norm": 1.8203125, "learning_rate": 3.8466021071245095e-06, "loss": 0.4496, "step": 11288 }, { "epoch": 1.4097087378640776, "grad_norm": 1.9140625, "learning_rate": 3.84502851469441e-06, "loss": 0.528, "step": 11289 }, { "epoch": 1.409835030389139, "grad_norm": 2.0625, "learning_rate": 3.8434551675866485e-06, "loss": 0.4918, "step": 11290 }, { "epoch": 1.4099613229142, "grad_norm": 1.9375, "learning_rate": 3.8418820658639356e-06, "loss": 0.4693, "step": 11291 }, { "epoch": 1.4100876154392612, "grad_norm": 1.921875, "learning_rate": 3.84030920958897e-06, "loss": 0.4506, "step": 11292 }, { "epoch": 1.4102139079643223, "grad_norm": 1.984375, "learning_rate": 3.838736598824443e-06, "loss": 0.5824, "step": 11293 }, { "epoch": 1.4103402004893835, "grad_norm": 1.9453125, "learning_rate": 3.837164233633035e-06, "loss": 0.4909, "step": 11294 }, { "epoch": 1.4104664930144448, "grad_norm": 1.9296875, "learning_rate": 3.8355921140774164e-06, "loss": 0.4868, "step": 11295 }, { "epoch": 1.410592785539506, "grad_norm": 1.765625, "learning_rate": 3.834020240220249e-06, "loss": 0.4232, "step": 11296 }, { "epoch": 1.410719078064567, "grad_norm": 2.015625, "learning_rate": 3.832448612124182e-06, "loss": 0.537, "step": 11297 }, { "epoch": 1.4108453705896282, "grad_norm": 2.125, "learning_rate": 3.830877229851855e-06, "loss": 0.5014, "step": 11298 }, { "epoch": 1.4109716631146894, "grad_norm": 2.078125, "learning_rate": 3.829306093465905e-06, "loss": 0.4885, "step": 11299 }, { "epoch": 1.4110979556397505, "grad_norm": 1.984375, "learning_rate": 3.827735203028953e-06, "loss": 0.4807, "step": 11300 }, { "epoch": 1.4112242481648116, "grad_norm": 2.0, "learning_rate": 3.826164558603608e-06, "loss": 0.4634, "step": 11301 }, { "epoch": 1.411350540689873, "grad_norm": 2.0, "learning_rate": 3.824594160252473e-06, "loss": 0.4678, "step": 11302 }, { "epoch": 1.4114768332149341, "grad_norm": 2.359375, "learning_rate": 3.82302400803814e-06, "loss": 0.6342, "step": 11303 }, { "epoch": 1.4116031257399952, "grad_norm": 1.9765625, "learning_rate": 3.821454102023192e-06, "loss": 0.4245, "step": 11304 }, { "epoch": 1.4117294182650564, "grad_norm": 2.09375, "learning_rate": 3.819884442270201e-06, "loss": 0.5346, "step": 11305 }, { "epoch": 1.4118557107901175, "grad_norm": 1.96875, "learning_rate": 3.818315028841732e-06, "loss": 0.5194, "step": 11306 }, { "epoch": 1.4119820033151789, "grad_norm": 1.828125, "learning_rate": 3.816745861800335e-06, "loss": 0.4129, "step": 11307 }, { "epoch": 1.41210829584024, "grad_norm": 1.8984375, "learning_rate": 3.815176941208555e-06, "loss": 0.4065, "step": 11308 }, { "epoch": 1.4122345883653011, "grad_norm": 2.109375, "learning_rate": 3.8136082671289254e-06, "loss": 0.5431, "step": 11309 }, { "epoch": 1.4123608808903623, "grad_norm": 1.953125, "learning_rate": 3.812039839623969e-06, "loss": 0.5052, "step": 11310 }, { "epoch": 1.4124871734154234, "grad_norm": 1.9296875, "learning_rate": 3.8104716587561998e-06, "loss": 0.4311, "step": 11311 }, { "epoch": 1.4126134659404848, "grad_norm": 1.9765625, "learning_rate": 3.808903724588122e-06, "loss": 0.6014, "step": 11312 }, { "epoch": 1.412739758465546, "grad_norm": 2.046875, "learning_rate": 3.80733603718223e-06, "loss": 0.4645, "step": 11313 }, { "epoch": 1.412866050990607, "grad_norm": 2.21875, "learning_rate": 3.805768596601007e-06, "loss": 0.4886, "step": 11314 }, { "epoch": 1.4129923435156682, "grad_norm": 1.9921875, "learning_rate": 3.8042014029069285e-06, "loss": 0.52, "step": 11315 }, { "epoch": 1.4131186360407293, "grad_norm": 1.90625, "learning_rate": 3.8026344561624553e-06, "loss": 0.4344, "step": 11316 }, { "epoch": 1.4132449285657906, "grad_norm": 2.015625, "learning_rate": 3.801067756430048e-06, "loss": 0.4999, "step": 11317 }, { "epoch": 1.4133712210908516, "grad_norm": 2.109375, "learning_rate": 3.7995013037721497e-06, "loss": 0.4939, "step": 11318 }, { "epoch": 1.413497513615913, "grad_norm": 2.15625, "learning_rate": 3.7979350982511933e-06, "loss": 0.5278, "step": 11319 }, { "epoch": 1.413623806140974, "grad_norm": 1.9921875, "learning_rate": 3.7963691399296065e-06, "loss": 0.4522, "step": 11320 }, { "epoch": 1.4137500986660352, "grad_norm": 1.9140625, "learning_rate": 3.794803428869803e-06, "loss": 0.468, "step": 11321 }, { "epoch": 1.4138763911910963, "grad_norm": 2.09375, "learning_rate": 3.793237965134189e-06, "loss": 0.5223, "step": 11322 }, { "epoch": 1.4140026837161574, "grad_norm": 2.109375, "learning_rate": 3.791672748785159e-06, "loss": 0.4893, "step": 11323 }, { "epoch": 1.4141289762412188, "grad_norm": 2.109375, "learning_rate": 3.7901077798851006e-06, "loss": 0.521, "step": 11324 }, { "epoch": 1.41425526876628, "grad_norm": 1.90625, "learning_rate": 3.788543058496388e-06, "loss": 0.4875, "step": 11325 }, { "epoch": 1.414381561291341, "grad_norm": 2.171875, "learning_rate": 3.7869785846813878e-06, "loss": 0.4838, "step": 11326 }, { "epoch": 1.4145078538164022, "grad_norm": 1.890625, "learning_rate": 3.785414358502456e-06, "loss": 0.4681, "step": 11327 }, { "epoch": 1.4146341463414633, "grad_norm": 2.015625, "learning_rate": 3.7838503800219393e-06, "loss": 0.5128, "step": 11328 }, { "epoch": 1.4147604388665247, "grad_norm": 1.9375, "learning_rate": 3.7822866493021747e-06, "loss": 0.5295, "step": 11329 }, { "epoch": 1.4148867313915858, "grad_norm": 2.203125, "learning_rate": 3.780723166405488e-06, "loss": 0.5625, "step": 11330 }, { "epoch": 1.415013023916647, "grad_norm": 2.8125, "learning_rate": 3.779159931394195e-06, "loss": 0.5769, "step": 11331 }, { "epoch": 1.415139316441708, "grad_norm": 2.09375, "learning_rate": 3.7775969443306024e-06, "loss": 0.5012, "step": 11332 }, { "epoch": 1.4152656089667692, "grad_norm": 1.984375, "learning_rate": 3.7760342052770092e-06, "loss": 0.4936, "step": 11333 }, { "epoch": 1.4153919014918306, "grad_norm": 2.25, "learning_rate": 3.7744717142956976e-06, "loss": 0.4894, "step": 11334 }, { "epoch": 1.4155181940168915, "grad_norm": 1.9921875, "learning_rate": 3.772909471448952e-06, "loss": 0.4345, "step": 11335 }, { "epoch": 1.4156444865419529, "grad_norm": 2.03125, "learning_rate": 3.7713474767990367e-06, "loss": 0.4493, "step": 11336 }, { "epoch": 1.415770779067014, "grad_norm": 1.9453125, "learning_rate": 3.7697857304082064e-06, "loss": 0.5454, "step": 11337 }, { "epoch": 1.4158970715920751, "grad_norm": 1.9296875, "learning_rate": 3.768224232338712e-06, "loss": 0.4756, "step": 11338 }, { "epoch": 1.4160233641171363, "grad_norm": 1.953125, "learning_rate": 3.766662982652789e-06, "loss": 0.5525, "step": 11339 }, { "epoch": 1.4161496566421974, "grad_norm": 1.984375, "learning_rate": 3.7651019814126656e-06, "loss": 0.4996, "step": 11340 }, { "epoch": 1.4162759491672587, "grad_norm": 1.9765625, "learning_rate": 3.7635412286805593e-06, "loss": 0.432, "step": 11341 }, { "epoch": 1.4164022416923199, "grad_norm": 1.8984375, "learning_rate": 3.761980724518679e-06, "loss": 0.5059, "step": 11342 }, { "epoch": 1.416528534217381, "grad_norm": 1.9609375, "learning_rate": 3.760420468989221e-06, "loss": 0.4871, "step": 11343 }, { "epoch": 1.4166548267424421, "grad_norm": 2.015625, "learning_rate": 3.7588604621543744e-06, "loss": 0.4489, "step": 11344 }, { "epoch": 1.4167811192675033, "grad_norm": 2.0, "learning_rate": 3.7573007040763164e-06, "loss": 0.533, "step": 11345 }, { "epoch": 1.4169074117925646, "grad_norm": 2.015625, "learning_rate": 3.7557411948172174e-06, "loss": 0.5163, "step": 11346 }, { "epoch": 1.4170337043176258, "grad_norm": 1.9765625, "learning_rate": 3.7541819344392326e-06, "loss": 0.4913, "step": 11347 }, { "epoch": 1.417159996842687, "grad_norm": 2.03125, "learning_rate": 3.752622923004513e-06, "loss": 0.5341, "step": 11348 }, { "epoch": 1.417286289367748, "grad_norm": 1.9375, "learning_rate": 3.751064160575195e-06, "loss": 0.4075, "step": 11349 }, { "epoch": 1.4174125818928092, "grad_norm": 1.921875, "learning_rate": 3.7495056472134084e-06, "loss": 0.4862, "step": 11350 }, { "epoch": 1.4175388744178705, "grad_norm": 2.046875, "learning_rate": 3.747947382981272e-06, "loss": 0.4961, "step": 11351 }, { "epoch": 1.4176651669429314, "grad_norm": 1.90625, "learning_rate": 3.746389367940889e-06, "loss": 0.4813, "step": 11352 }, { "epoch": 1.4177914594679928, "grad_norm": 2.046875, "learning_rate": 3.7448316021543686e-06, "loss": 0.4835, "step": 11353 }, { "epoch": 1.417917751993054, "grad_norm": 2.0625, "learning_rate": 3.7432740856837925e-06, "loss": 0.4947, "step": 11354 }, { "epoch": 1.418044044518115, "grad_norm": 2.234375, "learning_rate": 3.741716818591241e-06, "loss": 0.5311, "step": 11355 }, { "epoch": 1.4181703370431762, "grad_norm": 1.9453125, "learning_rate": 3.740159800938784e-06, "loss": 0.471, "step": 11356 }, { "epoch": 1.4182966295682373, "grad_norm": 1.8671875, "learning_rate": 3.7386030327884793e-06, "loss": 0.4943, "step": 11357 }, { "epoch": 1.4184229220932987, "grad_norm": 1.96875, "learning_rate": 3.737046514202376e-06, "loss": 0.4764, "step": 11358 }, { "epoch": 1.4185492146183598, "grad_norm": 2.109375, "learning_rate": 3.7354902452425133e-06, "loss": 0.488, "step": 11359 }, { "epoch": 1.418675507143421, "grad_norm": 1.875, "learning_rate": 3.73393422597092e-06, "loss": 0.4392, "step": 11360 }, { "epoch": 1.418801799668482, "grad_norm": 1.9296875, "learning_rate": 3.7323784564496168e-06, "loss": 0.4655, "step": 11361 }, { "epoch": 1.4189280921935432, "grad_norm": 2.015625, "learning_rate": 3.7308229367406114e-06, "loss": 0.531, "step": 11362 }, { "epoch": 1.4190543847186046, "grad_norm": 1.953125, "learning_rate": 3.7292676669059024e-06, "loss": 0.5102, "step": 11363 }, { "epoch": 1.4191806772436657, "grad_norm": 2.015625, "learning_rate": 3.7277126470074807e-06, "loss": 0.5553, "step": 11364 }, { "epoch": 1.4193069697687268, "grad_norm": 2.078125, "learning_rate": 3.726157877107326e-06, "loss": 0.5247, "step": 11365 }, { "epoch": 1.419433262293788, "grad_norm": 1.9453125, "learning_rate": 3.7246033572674067e-06, "loss": 0.5402, "step": 11366 }, { "epoch": 1.419559554818849, "grad_norm": 1.9140625, "learning_rate": 3.7230490875496817e-06, "loss": 0.5253, "step": 11367 }, { "epoch": 1.4196858473439105, "grad_norm": 2.921875, "learning_rate": 3.721495068016101e-06, "loss": 0.5156, "step": 11368 }, { "epoch": 1.4198121398689714, "grad_norm": 2.09375, "learning_rate": 3.7199412987286044e-06, "loss": 0.5354, "step": 11369 }, { "epoch": 1.4199384323940327, "grad_norm": 2.09375, "learning_rate": 3.7183877797491186e-06, "loss": 0.5464, "step": 11370 }, { "epoch": 1.4200647249190939, "grad_norm": 1.953125, "learning_rate": 3.716834511139568e-06, "loss": 0.5476, "step": 11371 }, { "epoch": 1.420191017444155, "grad_norm": 1.8828125, "learning_rate": 3.7152814929618597e-06, "loss": 0.4507, "step": 11372 }, { "epoch": 1.4203173099692161, "grad_norm": 1.96875, "learning_rate": 3.713728725277894e-06, "loss": 0.4378, "step": 11373 }, { "epoch": 1.4204436024942773, "grad_norm": 2.015625, "learning_rate": 3.7121762081495605e-06, "loss": 0.5181, "step": 11374 }, { "epoch": 1.4205698950193386, "grad_norm": 1.9453125, "learning_rate": 3.710623941638737e-06, "loss": 0.4772, "step": 11375 }, { "epoch": 1.4206961875443997, "grad_norm": 2.03125, "learning_rate": 3.7090719258072947e-06, "loss": 0.4814, "step": 11376 }, { "epoch": 1.4208224800694609, "grad_norm": 2.171875, "learning_rate": 3.707520160717093e-06, "loss": 0.4626, "step": 11377 }, { "epoch": 1.420948772594522, "grad_norm": 1.9609375, "learning_rate": 3.7059686464299803e-06, "loss": 0.4994, "step": 11378 }, { "epoch": 1.4210750651195831, "grad_norm": 1.9765625, "learning_rate": 3.704417383007799e-06, "loss": 0.5187, "step": 11379 }, { "epoch": 1.4212013576446445, "grad_norm": 2.0, "learning_rate": 3.7028663705123757e-06, "loss": 0.4755, "step": 11380 }, { "epoch": 1.4213276501697056, "grad_norm": 1.7890625, "learning_rate": 3.7013156090055313e-06, "loss": 0.4647, "step": 11381 }, { "epoch": 1.4214539426947668, "grad_norm": 1.828125, "learning_rate": 3.6997650985490764e-06, "loss": 0.3842, "step": 11382 }, { "epoch": 1.421580235219828, "grad_norm": 2.015625, "learning_rate": 3.6982148392048076e-06, "loss": 0.5004, "step": 11383 }, { "epoch": 1.421706527744889, "grad_norm": 1.8984375, "learning_rate": 3.696664831034519e-06, "loss": 0.4248, "step": 11384 }, { "epoch": 1.4218328202699504, "grad_norm": 2.0625, "learning_rate": 3.6951150740999866e-06, "loss": 0.4618, "step": 11385 }, { "epoch": 1.4219591127950115, "grad_norm": 2.0625, "learning_rate": 3.6935655684629803e-06, "loss": 0.4498, "step": 11386 }, { "epoch": 1.4220854053200727, "grad_norm": 1.9921875, "learning_rate": 3.6920163141852606e-06, "loss": 0.5504, "step": 11387 }, { "epoch": 1.4222116978451338, "grad_norm": 1.734375, "learning_rate": 3.690467311328574e-06, "loss": 0.3932, "step": 11388 }, { "epoch": 1.422337990370195, "grad_norm": 1.8984375, "learning_rate": 3.6889185599546663e-06, "loss": 0.4936, "step": 11389 }, { "epoch": 1.422464282895256, "grad_norm": 2.015625, "learning_rate": 3.6873700601252638e-06, "loss": 0.4557, "step": 11390 }, { "epoch": 1.4225905754203172, "grad_norm": 1.8984375, "learning_rate": 3.6858218119020862e-06, "loss": 0.4633, "step": 11391 }, { "epoch": 1.4227168679453785, "grad_norm": 1.96875, "learning_rate": 3.684273815346842e-06, "loss": 0.5071, "step": 11392 }, { "epoch": 1.4228431604704397, "grad_norm": 2.109375, "learning_rate": 3.6827260705212296e-06, "loss": 0.5136, "step": 11393 }, { "epoch": 1.4229694529955008, "grad_norm": 2.21875, "learning_rate": 3.6811785774869424e-06, "loss": 0.5428, "step": 11394 }, { "epoch": 1.423095745520562, "grad_norm": 2.046875, "learning_rate": 3.6796313363056557e-06, "loss": 0.5194, "step": 11395 }, { "epoch": 1.423222038045623, "grad_norm": 1.9609375, "learning_rate": 3.6780843470390415e-06, "loss": 0.4643, "step": 11396 }, { "epoch": 1.4233483305706844, "grad_norm": 2.0625, "learning_rate": 3.676537609748757e-06, "loss": 0.6112, "step": 11397 }, { "epoch": 1.4234746230957456, "grad_norm": 2.125, "learning_rate": 3.674991124496452e-06, "loss": 0.5628, "step": 11398 }, { "epoch": 1.4236009156208067, "grad_norm": 2.078125, "learning_rate": 3.6734448913437672e-06, "loss": 0.5358, "step": 11399 }, { "epoch": 1.4237272081458678, "grad_norm": 1.9921875, "learning_rate": 3.6718989103523306e-06, "loss": 0.4974, "step": 11400 }, { "epoch": 1.423853500670929, "grad_norm": 2.28125, "learning_rate": 3.6703531815837602e-06, "loss": 0.5073, "step": 11401 }, { "epoch": 1.4239797931959903, "grad_norm": 2.140625, "learning_rate": 3.6688077050996664e-06, "loss": 0.508, "step": 11402 }, { "epoch": 1.4241060857210515, "grad_norm": 2.125, "learning_rate": 3.6672624809616487e-06, "loss": 0.5348, "step": 11403 }, { "epoch": 1.4242323782461126, "grad_norm": 1.984375, "learning_rate": 3.665717509231295e-06, "loss": 0.4576, "step": 11404 }, { "epoch": 1.4243586707711737, "grad_norm": 2.0, "learning_rate": 3.664172789970183e-06, "loss": 0.5095, "step": 11405 }, { "epoch": 1.4244849632962349, "grad_norm": 2.0625, "learning_rate": 3.6626283232398795e-06, "loss": 0.5341, "step": 11406 }, { "epoch": 1.424611255821296, "grad_norm": 2.265625, "learning_rate": 3.6610841091019513e-06, "loss": 0.5801, "step": 11407 }, { "epoch": 1.4247375483463571, "grad_norm": 2.078125, "learning_rate": 3.6595401476179414e-06, "loss": 0.5142, "step": 11408 }, { "epoch": 1.4248638408714185, "grad_norm": 2.0, "learning_rate": 3.6579964388493893e-06, "loss": 0.4674, "step": 11409 }, { "epoch": 1.4249901333964796, "grad_norm": 1.984375, "learning_rate": 3.6564529828578245e-06, "loss": 0.538, "step": 11410 }, { "epoch": 1.4251164259215408, "grad_norm": 1.9921875, "learning_rate": 3.6549097797047627e-06, "loss": 0.5976, "step": 11411 }, { "epoch": 1.4252427184466019, "grad_norm": 2.046875, "learning_rate": 3.6533668294517154e-06, "loss": 0.4635, "step": 11412 }, { "epoch": 1.425369010971663, "grad_norm": 1.984375, "learning_rate": 3.651824132160179e-06, "loss": 0.4973, "step": 11413 }, { "epoch": 1.4254953034967244, "grad_norm": 2.046875, "learning_rate": 3.6502816878916413e-06, "loss": 0.4723, "step": 11414 }, { "epoch": 1.4256215960217855, "grad_norm": 2.046875, "learning_rate": 3.6487394967075817e-06, "loss": 0.5086, "step": 11415 }, { "epoch": 1.4257478885468466, "grad_norm": 1.9765625, "learning_rate": 3.6471975586694684e-06, "loss": 0.4659, "step": 11416 }, { "epoch": 1.4258741810719078, "grad_norm": 2.171875, "learning_rate": 3.6456558738387583e-06, "loss": 0.4777, "step": 11417 }, { "epoch": 1.426000473596969, "grad_norm": 2.0625, "learning_rate": 3.6441144422769e-06, "loss": 0.5603, "step": 11418 }, { "epoch": 1.4261267661220303, "grad_norm": 1.9453125, "learning_rate": 3.6425732640453303e-06, "loss": 0.4982, "step": 11419 }, { "epoch": 1.4262530586470914, "grad_norm": 1.9375, "learning_rate": 3.641032339205478e-06, "loss": 0.4978, "step": 11420 }, { "epoch": 1.4263793511721525, "grad_norm": 1.8671875, "learning_rate": 3.6394916678187607e-06, "loss": 0.4341, "step": 11421 }, { "epoch": 1.4265056436972137, "grad_norm": 2.0, "learning_rate": 3.637951249946584e-06, "loss": 0.4709, "step": 11422 }, { "epoch": 1.4266319362222748, "grad_norm": 2.109375, "learning_rate": 3.6364110856503476e-06, "loss": 0.5694, "step": 11423 }, { "epoch": 1.4267582287473362, "grad_norm": 1.9296875, "learning_rate": 3.6348711749914334e-06, "loss": 0.4708, "step": 11424 }, { "epoch": 1.426884521272397, "grad_norm": 2.109375, "learning_rate": 3.633331518031227e-06, "loss": 0.4589, "step": 11425 }, { "epoch": 1.4270108137974584, "grad_norm": 2.03125, "learning_rate": 3.631792114831091e-06, "loss": 0.4857, "step": 11426 }, { "epoch": 1.4271371063225196, "grad_norm": 1.828125, "learning_rate": 3.630252965452381e-06, "loss": 0.4931, "step": 11427 }, { "epoch": 1.4272633988475807, "grad_norm": 1.9453125, "learning_rate": 3.6287140699564474e-06, "loss": 0.5132, "step": 11428 }, { "epoch": 1.4273896913726418, "grad_norm": 2.0, "learning_rate": 3.6271754284046234e-06, "loss": 0.498, "step": 11429 }, { "epoch": 1.427515983897703, "grad_norm": 1.9375, "learning_rate": 3.6256370408582377e-06, "loss": 0.4779, "step": 11430 }, { "epoch": 1.4276422764227643, "grad_norm": 1.8515625, "learning_rate": 3.624098907378606e-06, "loss": 0.4748, "step": 11431 }, { "epoch": 1.4277685689478254, "grad_norm": 2.09375, "learning_rate": 3.622561028027034e-06, "loss": 0.5355, "step": 11432 }, { "epoch": 1.4278948614728866, "grad_norm": 1.9375, "learning_rate": 3.6210234028648182e-06, "loss": 0.4398, "step": 11433 }, { "epoch": 1.4280211539979477, "grad_norm": 2.0, "learning_rate": 3.6194860319532454e-06, "loss": 0.4829, "step": 11434 }, { "epoch": 1.4281474465230088, "grad_norm": 2.015625, "learning_rate": 3.6179489153535907e-06, "loss": 0.5954, "step": 11435 }, { "epoch": 1.4282737390480702, "grad_norm": 1.921875, "learning_rate": 3.616412053127121e-06, "loss": 0.5086, "step": 11436 }, { "epoch": 1.4284000315731313, "grad_norm": 2.015625, "learning_rate": 3.6148754453350897e-06, "loss": 0.4506, "step": 11437 }, { "epoch": 1.4285263240981925, "grad_norm": 2.375, "learning_rate": 3.6133390920387434e-06, "loss": 0.6022, "step": 11438 }, { "epoch": 1.4286526166232536, "grad_norm": 2.03125, "learning_rate": 3.6118029932993195e-06, "loss": 0.5166, "step": 11439 }, { "epoch": 1.4287789091483147, "grad_norm": 2.171875, "learning_rate": 3.6102671491780393e-06, "loss": 0.5796, "step": 11440 }, { "epoch": 1.428905201673376, "grad_norm": 2.09375, "learning_rate": 3.6087315597361217e-06, "loss": 0.4844, "step": 11441 }, { "epoch": 1.429031494198437, "grad_norm": 2.203125, "learning_rate": 3.607196225034765e-06, "loss": 0.4867, "step": 11442 }, { "epoch": 1.4291577867234984, "grad_norm": 1.859375, "learning_rate": 3.605661145135173e-06, "loss": 0.4268, "step": 11443 }, { "epoch": 1.4292840792485595, "grad_norm": 2.125, "learning_rate": 3.6041263200985253e-06, "loss": 0.5202, "step": 11444 }, { "epoch": 1.4294103717736206, "grad_norm": 1.9921875, "learning_rate": 3.6025917499859977e-06, "loss": 0.5102, "step": 11445 }, { "epoch": 1.4295366642986818, "grad_norm": 1.9765625, "learning_rate": 3.6010574348587534e-06, "loss": 0.4984, "step": 11446 }, { "epoch": 1.429662956823743, "grad_norm": 1.8125, "learning_rate": 3.5995233747779467e-06, "loss": 0.4215, "step": 11447 }, { "epoch": 1.4297892493488042, "grad_norm": 1.8984375, "learning_rate": 3.597989569804723e-06, "loss": 0.4799, "step": 11448 }, { "epoch": 1.4299155418738654, "grad_norm": 2.0, "learning_rate": 3.596456020000213e-06, "loss": 0.4737, "step": 11449 }, { "epoch": 1.4300418343989265, "grad_norm": 2.078125, "learning_rate": 3.5949227254255437e-06, "loss": 0.4824, "step": 11450 }, { "epoch": 1.4301681269239876, "grad_norm": 1.8359375, "learning_rate": 3.593389686141826e-06, "loss": 0.5009, "step": 11451 }, { "epoch": 1.4302944194490488, "grad_norm": 1.8984375, "learning_rate": 3.5918569022101646e-06, "loss": 0.4969, "step": 11452 }, { "epoch": 1.4304207119741101, "grad_norm": 2.21875, "learning_rate": 3.5903243736916515e-06, "loss": 0.5807, "step": 11453 }, { "epoch": 1.4305470044991713, "grad_norm": 1.9921875, "learning_rate": 3.588792100647371e-06, "loss": 0.5186, "step": 11454 }, { "epoch": 1.4306732970242324, "grad_norm": 1.8671875, "learning_rate": 3.587260083138395e-06, "loss": 0.4443, "step": 11455 }, { "epoch": 1.4307995895492935, "grad_norm": 1.9765625, "learning_rate": 3.5857283212257855e-06, "loss": 0.4837, "step": 11456 }, { "epoch": 1.4309258820743547, "grad_norm": 2.140625, "learning_rate": 3.5841968149705964e-06, "loss": 0.5063, "step": 11457 }, { "epoch": 1.431052174599416, "grad_norm": 1.9296875, "learning_rate": 3.582665564433867e-06, "loss": 0.548, "step": 11458 }, { "epoch": 1.431178467124477, "grad_norm": 1.8515625, "learning_rate": 3.581134569676633e-06, "loss": 0.4041, "step": 11459 }, { "epoch": 1.4313047596495383, "grad_norm": 1.921875, "learning_rate": 3.579603830759909e-06, "loss": 0.4147, "step": 11460 }, { "epoch": 1.4314310521745994, "grad_norm": 2.234375, "learning_rate": 3.578073347744717e-06, "loss": 0.483, "step": 11461 }, { "epoch": 1.4315573446996606, "grad_norm": 1.84375, "learning_rate": 3.576543120692052e-06, "loss": 0.4816, "step": 11462 }, { "epoch": 1.4316836372247217, "grad_norm": 1.84375, "learning_rate": 3.575013149662907e-06, "loss": 0.5195, "step": 11463 }, { "epoch": 1.4318099297497828, "grad_norm": 1.9609375, "learning_rate": 3.573483434718261e-06, "loss": 0.461, "step": 11464 }, { "epoch": 1.4319362222748442, "grad_norm": 1.953125, "learning_rate": 3.571953975919088e-06, "loss": 0.4311, "step": 11465 }, { "epoch": 1.4320625147999053, "grad_norm": 1.90625, "learning_rate": 3.5704247733263454e-06, "loss": 0.473, "step": 11466 }, { "epoch": 1.4321888073249665, "grad_norm": 2.0, "learning_rate": 3.5688958270009856e-06, "loss": 0.4616, "step": 11467 }, { "epoch": 1.4323150998500276, "grad_norm": 2.0625, "learning_rate": 3.5673671370039464e-06, "loss": 0.4812, "step": 11468 }, { "epoch": 1.4324413923750887, "grad_norm": 1.921875, "learning_rate": 3.5658387033961604e-06, "loss": 0.4761, "step": 11469 }, { "epoch": 1.43256768490015, "grad_norm": 1.9765625, "learning_rate": 3.564310526238546e-06, "loss": 0.4683, "step": 11470 }, { "epoch": 1.4326939774252112, "grad_norm": 2.078125, "learning_rate": 3.5627826055920124e-06, "loss": 0.5118, "step": 11471 }, { "epoch": 1.4328202699502723, "grad_norm": 2.109375, "learning_rate": 3.561254941517458e-06, "loss": 0.5764, "step": 11472 }, { "epoch": 1.4329465624753335, "grad_norm": 1.953125, "learning_rate": 3.559727534075774e-06, "loss": 0.534, "step": 11473 }, { "epoch": 1.4330728550003946, "grad_norm": 1.9765625, "learning_rate": 3.558200383327839e-06, "loss": 0.5102, "step": 11474 }, { "epoch": 1.433199147525456, "grad_norm": 2.015625, "learning_rate": 3.5566734893345188e-06, "loss": 0.4312, "step": 11475 }, { "epoch": 1.4333254400505169, "grad_norm": 1.9296875, "learning_rate": 3.5551468521566754e-06, "loss": 0.4404, "step": 11476 }, { "epoch": 1.4334517325755782, "grad_norm": 2.09375, "learning_rate": 3.553620471855154e-06, "loss": 0.4524, "step": 11477 }, { "epoch": 1.4335780251006394, "grad_norm": 1.875, "learning_rate": 3.55209434849079e-06, "loss": 0.468, "step": 11478 }, { "epoch": 1.4337043176257005, "grad_norm": 2.0625, "learning_rate": 3.550568482124419e-06, "loss": 0.5215, "step": 11479 }, { "epoch": 1.4338306101507616, "grad_norm": 1.8984375, "learning_rate": 3.5490428728168526e-06, "loss": 0.5441, "step": 11480 }, { "epoch": 1.4339569026758228, "grad_norm": 2.140625, "learning_rate": 3.5475175206289003e-06, "loss": 0.4601, "step": 11481 }, { "epoch": 1.4340831952008841, "grad_norm": 2.109375, "learning_rate": 3.5459924256213575e-06, "loss": 0.4782, "step": 11482 }, { "epoch": 1.4342094877259453, "grad_norm": 2.015625, "learning_rate": 3.5444675878550107e-06, "loss": 0.5515, "step": 11483 }, { "epoch": 1.4343357802510064, "grad_norm": 1.9921875, "learning_rate": 3.5429430073906366e-06, "loss": 0.5273, "step": 11484 }, { "epoch": 1.4344620727760675, "grad_norm": 2.046875, "learning_rate": 3.541418684289002e-06, "loss": 0.4929, "step": 11485 }, { "epoch": 1.4345883653011287, "grad_norm": 1.796875, "learning_rate": 3.539894618610862e-06, "loss": 0.4581, "step": 11486 }, { "epoch": 1.43471465782619, "grad_norm": 1.9296875, "learning_rate": 3.538370810416962e-06, "loss": 0.4598, "step": 11487 }, { "epoch": 1.4348409503512511, "grad_norm": 1.8359375, "learning_rate": 3.536847259768038e-06, "loss": 0.4658, "step": 11488 }, { "epoch": 1.4349672428763123, "grad_norm": 1.953125, "learning_rate": 3.535323966724815e-06, "loss": 0.4853, "step": 11489 }, { "epoch": 1.4350935354013734, "grad_norm": 1.8125, "learning_rate": 3.533800931348007e-06, "loss": 0.5158, "step": 11490 }, { "epoch": 1.4352198279264345, "grad_norm": 2.03125, "learning_rate": 3.5322781536983197e-06, "loss": 0.5382, "step": 11491 }, { "epoch": 1.435346120451496, "grad_norm": 2.09375, "learning_rate": 3.530755633836446e-06, "loss": 0.4574, "step": 11492 }, { "epoch": 1.435472412976557, "grad_norm": 2.0, "learning_rate": 3.529233371823071e-06, "loss": 0.6452, "step": 11493 }, { "epoch": 1.4355987055016182, "grad_norm": 1.875, "learning_rate": 3.527711367718868e-06, "loss": 0.3944, "step": 11494 }, { "epoch": 1.4357249980266793, "grad_norm": 2.09375, "learning_rate": 3.5261896215845003e-06, "loss": 0.5748, "step": 11495 }, { "epoch": 1.4358512905517404, "grad_norm": 1.9453125, "learning_rate": 3.5246681334806177e-06, "loss": 0.5116, "step": 11496 }, { "epoch": 1.4359775830768016, "grad_norm": 1.9375, "learning_rate": 3.52314690346787e-06, "loss": 0.4841, "step": 11497 }, { "epoch": 1.4361038756018627, "grad_norm": 1.9609375, "learning_rate": 3.521625931606887e-06, "loss": 0.4962, "step": 11498 }, { "epoch": 1.436230168126924, "grad_norm": 2.171875, "learning_rate": 3.5201052179582883e-06, "loss": 0.507, "step": 11499 }, { "epoch": 1.4363564606519852, "grad_norm": 1.8515625, "learning_rate": 3.5185847625826897e-06, "loss": 0.5155, "step": 11500 }, { "epoch": 1.4364827531770463, "grad_norm": 1.8828125, "learning_rate": 3.5170645655406898e-06, "loss": 0.4615, "step": 11501 }, { "epoch": 1.4366090457021075, "grad_norm": 2.265625, "learning_rate": 3.5155446268928815e-06, "loss": 0.5698, "step": 11502 }, { "epoch": 1.4367353382271686, "grad_norm": 2.3125, "learning_rate": 3.5140249466998455e-06, "loss": 0.54, "step": 11503 }, { "epoch": 1.43686163075223, "grad_norm": 2.28125, "learning_rate": 3.5125055250221527e-06, "loss": 0.5103, "step": 11504 }, { "epoch": 1.436987923277291, "grad_norm": 1.90625, "learning_rate": 3.5109863619203643e-06, "loss": 0.4091, "step": 11505 }, { "epoch": 1.4371142158023522, "grad_norm": 1.8203125, "learning_rate": 3.509467457455029e-06, "loss": 0.4548, "step": 11506 }, { "epoch": 1.4372405083274133, "grad_norm": 2.03125, "learning_rate": 3.507948811686688e-06, "loss": 0.4909, "step": 11507 }, { "epoch": 1.4373668008524745, "grad_norm": 1.9296875, "learning_rate": 3.5064304246758694e-06, "loss": 0.4933, "step": 11508 }, { "epoch": 1.4374930933775358, "grad_norm": 1.96875, "learning_rate": 3.504912296483094e-06, "loss": 0.4944, "step": 11509 }, { "epoch": 1.437619385902597, "grad_norm": 2.03125, "learning_rate": 3.5033944271688704e-06, "loss": 0.4731, "step": 11510 }, { "epoch": 1.437745678427658, "grad_norm": 1.8828125, "learning_rate": 3.5018768167936958e-06, "loss": 0.469, "step": 11511 }, { "epoch": 1.4378719709527192, "grad_norm": 2.125, "learning_rate": 3.5003594654180605e-06, "loss": 0.5509, "step": 11512 }, { "epoch": 1.4379982634777804, "grad_norm": 2.203125, "learning_rate": 3.4988423731024378e-06, "loss": 0.5493, "step": 11513 }, { "epoch": 1.4381245560028415, "grad_norm": 2.140625, "learning_rate": 3.4973255399073027e-06, "loss": 0.5422, "step": 11514 }, { "epoch": 1.4382508485279026, "grad_norm": 2.078125, "learning_rate": 3.4958089658931084e-06, "loss": 0.4759, "step": 11515 }, { "epoch": 1.438377141052964, "grad_norm": 2.046875, "learning_rate": 3.4942926511203023e-06, "loss": 0.4795, "step": 11516 }, { "epoch": 1.4385034335780251, "grad_norm": 1.8515625, "learning_rate": 3.492776595649322e-06, "loss": 0.5036, "step": 11517 }, { "epoch": 1.4386297261030863, "grad_norm": 1.8984375, "learning_rate": 3.4912607995405922e-06, "loss": 0.431, "step": 11518 }, { "epoch": 1.4387560186281474, "grad_norm": 1.796875, "learning_rate": 3.489745262854529e-06, "loss": 0.4237, "step": 11519 }, { "epoch": 1.4388823111532085, "grad_norm": 1.734375, "learning_rate": 3.48822998565154e-06, "loss": 0.3942, "step": 11520 }, { "epoch": 1.4390086036782699, "grad_norm": 1.9921875, "learning_rate": 3.4867149679920185e-06, "loss": 0.4271, "step": 11521 }, { "epoch": 1.439134896203331, "grad_norm": 2.1875, "learning_rate": 3.4852002099363512e-06, "loss": 0.5125, "step": 11522 }, { "epoch": 1.4392611887283921, "grad_norm": 2.078125, "learning_rate": 3.483685711544911e-06, "loss": 0.5281, "step": 11523 }, { "epoch": 1.4393874812534533, "grad_norm": 2.15625, "learning_rate": 3.482171472878062e-06, "loss": 0.5124, "step": 11524 }, { "epoch": 1.4395137737785144, "grad_norm": 1.984375, "learning_rate": 3.48065749399616e-06, "loss": 0.4626, "step": 11525 }, { "epoch": 1.4396400663035758, "grad_norm": 1.9453125, "learning_rate": 3.479143774959547e-06, "loss": 0.476, "step": 11526 }, { "epoch": 1.439766358828637, "grad_norm": 1.9609375, "learning_rate": 3.4776303158285574e-06, "loss": 0.512, "step": 11527 }, { "epoch": 1.439892651353698, "grad_norm": 1.90625, "learning_rate": 3.476117116663512e-06, "loss": 0.4538, "step": 11528 }, { "epoch": 1.4400189438787592, "grad_norm": 2.078125, "learning_rate": 3.474604177524722e-06, "loss": 0.5464, "step": 11529 }, { "epoch": 1.4401452364038203, "grad_norm": 2.015625, "learning_rate": 3.473091498472495e-06, "loss": 0.4868, "step": 11530 }, { "epoch": 1.4402715289288814, "grad_norm": 1.9765625, "learning_rate": 3.47157907956712e-06, "loss": 0.5007, "step": 11531 }, { "epoch": 1.4403978214539426, "grad_norm": 1.9296875, "learning_rate": 3.4700669208688787e-06, "loss": 0.4899, "step": 11532 }, { "epoch": 1.440524113979004, "grad_norm": 1.828125, "learning_rate": 3.4685550224380416e-06, "loss": 0.4918, "step": 11533 }, { "epoch": 1.440650406504065, "grad_norm": 2.15625, "learning_rate": 3.4670433843348684e-06, "loss": 0.5557, "step": 11534 }, { "epoch": 1.4407766990291262, "grad_norm": 2.015625, "learning_rate": 3.465532006619611e-06, "loss": 0.5837, "step": 11535 }, { "epoch": 1.4409029915541873, "grad_norm": 1.84375, "learning_rate": 3.4640208893525085e-06, "loss": 0.4597, "step": 11536 }, { "epoch": 1.4410292840792485, "grad_norm": 2.0, "learning_rate": 3.46251003259379e-06, "loss": 0.4852, "step": 11537 }, { "epoch": 1.4411555766043098, "grad_norm": 1.859375, "learning_rate": 3.460999436403676e-06, "loss": 0.4696, "step": 11538 }, { "epoch": 1.441281869129371, "grad_norm": 1.8203125, "learning_rate": 3.4594891008423738e-06, "loss": 0.4221, "step": 11539 }, { "epoch": 1.441408161654432, "grad_norm": 1.859375, "learning_rate": 3.4579790259700816e-06, "loss": 0.4855, "step": 11540 }, { "epoch": 1.4415344541794932, "grad_norm": 2.140625, "learning_rate": 3.456469211846989e-06, "loss": 0.5382, "step": 11541 }, { "epoch": 1.4416607467045544, "grad_norm": 2.140625, "learning_rate": 3.4549596585332724e-06, "loss": 0.6359, "step": 11542 }, { "epoch": 1.4417870392296157, "grad_norm": 1.9609375, "learning_rate": 3.4534503660890983e-06, "loss": 0.5505, "step": 11543 }, { "epoch": 1.4419133317546768, "grad_norm": 1.875, "learning_rate": 3.4519413345746243e-06, "loss": 0.4237, "step": 11544 }, { "epoch": 1.442039624279738, "grad_norm": 1.9140625, "learning_rate": 3.450432564049995e-06, "loss": 0.5161, "step": 11545 }, { "epoch": 1.442165916804799, "grad_norm": 2.203125, "learning_rate": 3.44892405457535e-06, "loss": 0.519, "step": 11546 }, { "epoch": 1.4422922093298602, "grad_norm": 2.0625, "learning_rate": 3.447415806210813e-06, "loss": 0.5044, "step": 11547 }, { "epoch": 1.4424185018549216, "grad_norm": 2.09375, "learning_rate": 3.4459078190164985e-06, "loss": 0.5336, "step": 11548 }, { "epoch": 1.4425447943799825, "grad_norm": 1.8046875, "learning_rate": 3.4444000930525133e-06, "loss": 0.4682, "step": 11549 }, { "epoch": 1.4426710869050439, "grad_norm": 2.203125, "learning_rate": 3.4428926283789486e-06, "loss": 0.5118, "step": 11550 }, { "epoch": 1.442797379430105, "grad_norm": 2.0, "learning_rate": 3.441385425055891e-06, "loss": 0.4838, "step": 11551 }, { "epoch": 1.4429236719551661, "grad_norm": 1.890625, "learning_rate": 3.4398784831434127e-06, "loss": 0.4752, "step": 11552 }, { "epoch": 1.4430499644802273, "grad_norm": 2.15625, "learning_rate": 3.438371802701578e-06, "loss": 0.4871, "step": 11553 }, { "epoch": 1.4431762570052884, "grad_norm": 1.8203125, "learning_rate": 3.4368653837904376e-06, "loss": 0.4836, "step": 11554 }, { "epoch": 1.4433025495303498, "grad_norm": 2.3125, "learning_rate": 3.435359226470034e-06, "loss": 0.5143, "step": 11555 }, { "epoch": 1.4434288420554109, "grad_norm": 1.953125, "learning_rate": 3.4338533308004006e-06, "loss": 0.4673, "step": 11556 }, { "epoch": 1.443555134580472, "grad_norm": 1.96875, "learning_rate": 3.432347696841558e-06, "loss": 0.5159, "step": 11557 }, { "epoch": 1.4436814271055332, "grad_norm": 2.453125, "learning_rate": 3.4308423246535173e-06, "loss": 0.502, "step": 11558 }, { "epoch": 1.4438077196305943, "grad_norm": 1.828125, "learning_rate": 3.4293372142962778e-06, "loss": 0.4329, "step": 11559 }, { "epoch": 1.4439340121556556, "grad_norm": 1.921875, "learning_rate": 3.4278323658298306e-06, "loss": 0.4539, "step": 11560 }, { "epoch": 1.4440603046807168, "grad_norm": 2.234375, "learning_rate": 3.4263277793141524e-06, "loss": 0.5254, "step": 11561 }, { "epoch": 1.444186597205778, "grad_norm": 1.8125, "learning_rate": 3.4248234548092184e-06, "loss": 0.4185, "step": 11562 }, { "epoch": 1.444312889730839, "grad_norm": 2.0625, "learning_rate": 3.423319392374984e-06, "loss": 0.4732, "step": 11563 }, { "epoch": 1.4444391822559002, "grad_norm": 2.0625, "learning_rate": 3.421815592071398e-06, "loss": 0.4841, "step": 11564 }, { "epoch": 1.4445654747809615, "grad_norm": 1.9375, "learning_rate": 3.420312053958398e-06, "loss": 0.49, "step": 11565 }, { "epoch": 1.4446917673060224, "grad_norm": 1.9375, "learning_rate": 3.4188087780959113e-06, "loss": 0.5043, "step": 11566 }, { "epoch": 1.4448180598310838, "grad_norm": 1.921875, "learning_rate": 3.4173057645438545e-06, "loss": 0.4825, "step": 11567 }, { "epoch": 1.444944352356145, "grad_norm": 1.8125, "learning_rate": 3.4158030133621343e-06, "loss": 0.5402, "step": 11568 }, { "epoch": 1.445070644881206, "grad_norm": 2.046875, "learning_rate": 3.4143005246106475e-06, "loss": 0.4731, "step": 11569 }, { "epoch": 1.4451969374062672, "grad_norm": 1.9375, "learning_rate": 3.4127982983492793e-06, "loss": 0.4684, "step": 11570 }, { "epoch": 1.4453232299313283, "grad_norm": 2.09375, "learning_rate": 3.4112963346379046e-06, "loss": 0.5509, "step": 11571 }, { "epoch": 1.4454495224563897, "grad_norm": 2.265625, "learning_rate": 3.4097946335363873e-06, "loss": 0.5296, "step": 11572 }, { "epoch": 1.4455758149814508, "grad_norm": 1.9375, "learning_rate": 3.408293195104584e-06, "loss": 0.4782, "step": 11573 }, { "epoch": 1.445702107506512, "grad_norm": 1.953125, "learning_rate": 3.4067920194023352e-06, "loss": 0.476, "step": 11574 }, { "epoch": 1.445828400031573, "grad_norm": 2.1875, "learning_rate": 3.4052911064894766e-06, "loss": 0.5317, "step": 11575 }, { "epoch": 1.4459546925566342, "grad_norm": 1.9453125, "learning_rate": 3.4037904564258293e-06, "loss": 0.47, "step": 11576 }, { "epoch": 1.4460809850816956, "grad_norm": 2.0, "learning_rate": 3.4022900692712046e-06, "loss": 0.4075, "step": 11577 }, { "epoch": 1.4462072776067567, "grad_norm": 1.9765625, "learning_rate": 3.400789945085409e-06, "loss": 0.4429, "step": 11578 }, { "epoch": 1.4463335701318178, "grad_norm": 2.078125, "learning_rate": 3.3992900839282307e-06, "loss": 0.5117, "step": 11579 }, { "epoch": 1.446459862656879, "grad_norm": 1.859375, "learning_rate": 3.39779048585945e-06, "loss": 0.4761, "step": 11580 }, { "epoch": 1.4465861551819401, "grad_norm": 2.09375, "learning_rate": 3.3962911509388397e-06, "loss": 0.5535, "step": 11581 }, { "epoch": 1.4467124477070015, "grad_norm": 2.140625, "learning_rate": 3.394792079226157e-06, "loss": 0.5005, "step": 11582 }, { "epoch": 1.4468387402320624, "grad_norm": 1.8359375, "learning_rate": 3.393293270781153e-06, "loss": 0.4975, "step": 11583 }, { "epoch": 1.4469650327571237, "grad_norm": 2.078125, "learning_rate": 3.3917947256635653e-06, "loss": 0.5452, "step": 11584 }, { "epoch": 1.4470913252821849, "grad_norm": 1.9609375, "learning_rate": 3.390296443933123e-06, "loss": 0.5108, "step": 11585 }, { "epoch": 1.447217617807246, "grad_norm": 2.15625, "learning_rate": 3.3887984256495444e-06, "loss": 0.4458, "step": 11586 }, { "epoch": 1.4473439103323071, "grad_norm": 1.9140625, "learning_rate": 3.3873006708725365e-06, "loss": 0.4549, "step": 11587 }, { "epoch": 1.4474702028573683, "grad_norm": 1.7421875, "learning_rate": 3.385803179661796e-06, "loss": 0.4462, "step": 11588 }, { "epoch": 1.4475964953824296, "grad_norm": 1.9765625, "learning_rate": 3.384305952077009e-06, "loss": 0.4302, "step": 11589 }, { "epoch": 1.4477227879074908, "grad_norm": 1.9375, "learning_rate": 3.3828089881778537e-06, "loss": 0.5103, "step": 11590 }, { "epoch": 1.447849080432552, "grad_norm": 1.984375, "learning_rate": 3.3813122880239924e-06, "loss": 0.4438, "step": 11591 }, { "epoch": 1.447975372957613, "grad_norm": 1.9375, "learning_rate": 3.37981585167508e-06, "loss": 0.4911, "step": 11592 }, { "epoch": 1.4481016654826742, "grad_norm": 1.9296875, "learning_rate": 3.37831967919076e-06, "loss": 0.5123, "step": 11593 }, { "epoch": 1.4482279580077355, "grad_norm": 2.078125, "learning_rate": 3.3768237706306716e-06, "loss": 0.5597, "step": 11594 }, { "epoch": 1.4483542505327966, "grad_norm": 2.203125, "learning_rate": 3.3753281260544344e-06, "loss": 0.5651, "step": 11595 }, { "epoch": 1.4484805430578578, "grad_norm": 2.015625, "learning_rate": 3.3738327455216625e-06, "loss": 0.4675, "step": 11596 }, { "epoch": 1.448606835582919, "grad_norm": 2.0625, "learning_rate": 3.3723376290919563e-06, "loss": 0.5271, "step": 11597 }, { "epoch": 1.44873312810798, "grad_norm": 1.9453125, "learning_rate": 3.3708427768249086e-06, "loss": 0.4605, "step": 11598 }, { "epoch": 1.4488594206330414, "grad_norm": 1.96875, "learning_rate": 3.3693481887801004e-06, "loss": 0.4916, "step": 11599 }, { "epoch": 1.4489857131581023, "grad_norm": 2.0625, "learning_rate": 3.3678538650171035e-06, "loss": 0.5037, "step": 11600 }, { "epoch": 1.4491120056831637, "grad_norm": 2.125, "learning_rate": 3.366359805595476e-06, "loss": 0.536, "step": 11601 }, { "epoch": 1.4492382982082248, "grad_norm": 2.015625, "learning_rate": 3.364866010574769e-06, "loss": 0.5087, "step": 11602 }, { "epoch": 1.449364590733286, "grad_norm": 2.0, "learning_rate": 3.3633724800145207e-06, "loss": 0.4935, "step": 11603 }, { "epoch": 1.449490883258347, "grad_norm": 1.984375, "learning_rate": 3.3618792139742597e-06, "loss": 0.5298, "step": 11604 }, { "epoch": 1.4496171757834082, "grad_norm": 1.9765625, "learning_rate": 3.3603862125135044e-06, "loss": 0.5036, "step": 11605 }, { "epoch": 1.4497434683084696, "grad_norm": 1.984375, "learning_rate": 3.3588934756917624e-06, "loss": 0.4507, "step": 11606 }, { "epoch": 1.4498697608335307, "grad_norm": 2.0625, "learning_rate": 3.3574010035685312e-06, "loss": 0.4777, "step": 11607 }, { "epoch": 1.4499960533585918, "grad_norm": 2.078125, "learning_rate": 3.3559087962032956e-06, "loss": 0.4729, "step": 11608 }, { "epoch": 1.450122345883653, "grad_norm": 2.03125, "learning_rate": 3.3544168536555278e-06, "loss": 0.5836, "step": 11609 }, { "epoch": 1.450248638408714, "grad_norm": 1.890625, "learning_rate": 3.3529251759847013e-06, "loss": 0.4818, "step": 11610 }, { "epoch": 1.4503749309337755, "grad_norm": 2.046875, "learning_rate": 3.351433763250268e-06, "loss": 0.4786, "step": 11611 }, { "epoch": 1.4505012234588366, "grad_norm": 2.0, "learning_rate": 3.3499426155116697e-06, "loss": 0.5527, "step": 11612 }, { "epoch": 1.4506275159838977, "grad_norm": 2.0625, "learning_rate": 3.3484517328283415e-06, "loss": 0.5389, "step": 11613 }, { "epoch": 1.4507538085089589, "grad_norm": 2.078125, "learning_rate": 3.3469611152597058e-06, "loss": 0.5642, "step": 11614 }, { "epoch": 1.45088010103402, "grad_norm": 2.0625, "learning_rate": 3.345470762865175e-06, "loss": 0.5028, "step": 11615 }, { "epoch": 1.4510063935590813, "grad_norm": 2.03125, "learning_rate": 3.3439806757041516e-06, "loss": 0.4414, "step": 11616 }, { "epoch": 1.4511326860841425, "grad_norm": 2.34375, "learning_rate": 3.3424908538360257e-06, "loss": 0.5688, "step": 11617 }, { "epoch": 1.4512589786092036, "grad_norm": 1.9375, "learning_rate": 3.3410012973201787e-06, "loss": 0.4884, "step": 11618 }, { "epoch": 1.4513852711342647, "grad_norm": 2.046875, "learning_rate": 3.3395120062159812e-06, "loss": 0.5469, "step": 11619 }, { "epoch": 1.4515115636593259, "grad_norm": 2.25, "learning_rate": 3.3380229805827925e-06, "loss": 0.5636, "step": 11620 }, { "epoch": 1.451637856184387, "grad_norm": 2.15625, "learning_rate": 3.3365342204799613e-06, "loss": 0.6566, "step": 11621 }, { "epoch": 1.4517641487094481, "grad_norm": 1.96875, "learning_rate": 3.3350457259668267e-06, "loss": 0.4965, "step": 11622 }, { "epoch": 1.4518904412345095, "grad_norm": 2.390625, "learning_rate": 3.333557497102715e-06, "loss": 0.5869, "step": 11623 }, { "epoch": 1.4520167337595706, "grad_norm": 1.9453125, "learning_rate": 3.3320695339469445e-06, "loss": 0.5775, "step": 11624 }, { "epoch": 1.4521430262846318, "grad_norm": 1.9921875, "learning_rate": 3.3305818365588182e-06, "loss": 0.4706, "step": 11625 }, { "epoch": 1.452269318809693, "grad_norm": 1.9296875, "learning_rate": 3.3290944049976393e-06, "loss": 0.5032, "step": 11626 }, { "epoch": 1.452395611334754, "grad_norm": 1.9375, "learning_rate": 3.3276072393226897e-06, "loss": 0.4831, "step": 11627 }, { "epoch": 1.4525219038598154, "grad_norm": 1.8828125, "learning_rate": 3.326120339593244e-06, "loss": 0.39, "step": 11628 }, { "epoch": 1.4526481963848765, "grad_norm": 1.984375, "learning_rate": 3.3246337058685663e-06, "loss": 0.4886, "step": 11629 }, { "epoch": 1.4527744889099377, "grad_norm": 2.03125, "learning_rate": 3.3231473382079106e-06, "loss": 0.4739, "step": 11630 }, { "epoch": 1.4529007814349988, "grad_norm": 2.03125, "learning_rate": 3.3216612366705192e-06, "loss": 0.5156, "step": 11631 }, { "epoch": 1.45302707396006, "grad_norm": 2.09375, "learning_rate": 3.320175401315626e-06, "loss": 0.6209, "step": 11632 }, { "epoch": 1.4531533664851213, "grad_norm": 1.8359375, "learning_rate": 3.318689832202452e-06, "loss": 0.4684, "step": 11633 }, { "epoch": 1.4532796590101824, "grad_norm": 1.984375, "learning_rate": 3.3172045293902077e-06, "loss": 0.5607, "step": 11634 }, { "epoch": 1.4534059515352435, "grad_norm": 2.03125, "learning_rate": 3.3157194929380943e-06, "loss": 0.4151, "step": 11635 }, { "epoch": 1.4535322440603047, "grad_norm": 1.9921875, "learning_rate": 3.314234722905302e-06, "loss": 0.4293, "step": 11636 }, { "epoch": 1.4536585365853658, "grad_norm": 1.9609375, "learning_rate": 3.31275021935101e-06, "loss": 0.4834, "step": 11637 }, { "epoch": 1.453784829110427, "grad_norm": 1.84375, "learning_rate": 3.3112659823343874e-06, "loss": 0.424, "step": 11638 }, { "epoch": 1.453911121635488, "grad_norm": 1.875, "learning_rate": 3.3097820119145907e-06, "loss": 0.4825, "step": 11639 }, { "epoch": 1.4540374141605494, "grad_norm": 1.9765625, "learning_rate": 3.30829830815077e-06, "loss": 0.4936, "step": 11640 }, { "epoch": 1.4541637066856106, "grad_norm": 2.03125, "learning_rate": 3.3068148711020566e-06, "loss": 0.4687, "step": 11641 }, { "epoch": 1.4542899992106717, "grad_norm": 2.03125, "learning_rate": 3.3053317008275844e-06, "loss": 0.4437, "step": 11642 }, { "epoch": 1.4544162917357328, "grad_norm": 2.125, "learning_rate": 3.303848797386465e-06, "loss": 0.5238, "step": 11643 }, { "epoch": 1.454542584260794, "grad_norm": 1.9609375, "learning_rate": 3.3023661608378054e-06, "loss": 0.485, "step": 11644 }, { "epoch": 1.4546688767858553, "grad_norm": 1.9765625, "learning_rate": 3.3008837912406965e-06, "loss": 0.5108, "step": 11645 }, { "epoch": 1.4547951693109165, "grad_norm": 1.90625, "learning_rate": 3.2994016886542257e-06, "loss": 0.4439, "step": 11646 }, { "epoch": 1.4549214618359776, "grad_norm": 1.9765625, "learning_rate": 3.297919853137462e-06, "loss": 0.5011, "step": 11647 }, { "epoch": 1.4550477543610387, "grad_norm": 1.9921875, "learning_rate": 3.2964382847494724e-06, "loss": 0.4877, "step": 11648 }, { "epoch": 1.4551740468860999, "grad_norm": 2.15625, "learning_rate": 3.2949569835493044e-06, "loss": 0.4988, "step": 11649 }, { "epoch": 1.4553003394111612, "grad_norm": 2.171875, "learning_rate": 3.2934759495960022e-06, "loss": 0.5056, "step": 11650 }, { "epoch": 1.4554266319362223, "grad_norm": 2.03125, "learning_rate": 3.291995182948594e-06, "loss": 0.4412, "step": 11651 }, { "epoch": 1.4555529244612835, "grad_norm": 1.953125, "learning_rate": 3.2905146836661005e-06, "loss": 0.41, "step": 11652 }, { "epoch": 1.4556792169863446, "grad_norm": 1.9765625, "learning_rate": 3.289034451807531e-06, "loss": 0.5059, "step": 11653 }, { "epoch": 1.4558055095114057, "grad_norm": 2.015625, "learning_rate": 3.287554487431884e-06, "loss": 0.5358, "step": 11654 }, { "epoch": 1.455931802036467, "grad_norm": 2.15625, "learning_rate": 3.2860747905981484e-06, "loss": 0.4881, "step": 11655 }, { "epoch": 1.456058094561528, "grad_norm": 2.1875, "learning_rate": 3.2845953613652993e-06, "loss": 0.4892, "step": 11656 }, { "epoch": 1.4561843870865894, "grad_norm": 1.9609375, "learning_rate": 3.283116199792303e-06, "loss": 0.4679, "step": 11657 }, { "epoch": 1.4563106796116505, "grad_norm": 1.8984375, "learning_rate": 3.281637305938118e-06, "loss": 0.4706, "step": 11658 }, { "epoch": 1.4564369721367116, "grad_norm": 2.078125, "learning_rate": 3.2801586798616837e-06, "loss": 0.4863, "step": 11659 }, { "epoch": 1.4565632646617728, "grad_norm": 2.015625, "learning_rate": 3.278680321621941e-06, "loss": 0.5266, "step": 11660 }, { "epoch": 1.456689557186834, "grad_norm": 2.015625, "learning_rate": 3.2772022312778127e-06, "loss": 0.5391, "step": 11661 }, { "epoch": 1.4568158497118953, "grad_norm": 2.15625, "learning_rate": 3.27572440888821e-06, "loss": 0.5058, "step": 11662 }, { "epoch": 1.4569421422369564, "grad_norm": 2.109375, "learning_rate": 3.2742468545120363e-06, "loss": 0.5275, "step": 11663 }, { "epoch": 1.4570684347620175, "grad_norm": 1.9921875, "learning_rate": 3.272769568208183e-06, "loss": 0.5038, "step": 11664 }, { "epoch": 1.4571947272870787, "grad_norm": 1.90625, "learning_rate": 3.2712925500355307e-06, "loss": 0.4604, "step": 11665 }, { "epoch": 1.4573210198121398, "grad_norm": 1.9609375, "learning_rate": 3.269815800052951e-06, "loss": 0.5105, "step": 11666 }, { "epoch": 1.4574473123372012, "grad_norm": 2.0625, "learning_rate": 3.2683393183193024e-06, "loss": 0.4768, "step": 11667 }, { "epoch": 1.4575736048622623, "grad_norm": 2.0, "learning_rate": 3.2668631048934343e-06, "loss": 0.4825, "step": 11668 }, { "epoch": 1.4576998973873234, "grad_norm": 1.921875, "learning_rate": 3.2653871598341845e-06, "loss": 0.4971, "step": 11669 }, { "epoch": 1.4578261899123846, "grad_norm": 2.25, "learning_rate": 3.2639114832003814e-06, "loss": 0.507, "step": 11670 }, { "epoch": 1.4579524824374457, "grad_norm": 2.046875, "learning_rate": 3.2624360750508423e-06, "loss": 0.4671, "step": 11671 }, { "epoch": 1.458078774962507, "grad_norm": 2.078125, "learning_rate": 3.2609609354443726e-06, "loss": 0.4751, "step": 11672 }, { "epoch": 1.458205067487568, "grad_norm": 1.953125, "learning_rate": 3.2594860644397686e-06, "loss": 0.4454, "step": 11673 }, { "epoch": 1.4583313600126293, "grad_norm": 2.15625, "learning_rate": 3.2580114620958137e-06, "loss": 0.5727, "step": 11674 }, { "epoch": 1.4584576525376904, "grad_norm": 2.21875, "learning_rate": 3.2565371284712833e-06, "loss": 0.5324, "step": 11675 }, { "epoch": 1.4585839450627516, "grad_norm": 2.046875, "learning_rate": 3.255063063624939e-06, "loss": 0.5075, "step": 11676 }, { "epoch": 1.4587102375878127, "grad_norm": 1.9375, "learning_rate": 3.2535892676155335e-06, "loss": 0.4486, "step": 11677 }, { "epoch": 1.4588365301128738, "grad_norm": 2.0, "learning_rate": 3.252115740501812e-06, "loss": 0.4942, "step": 11678 }, { "epoch": 1.4589628226379352, "grad_norm": 1.921875, "learning_rate": 3.250642482342504e-06, "loss": 0.5299, "step": 11679 }, { "epoch": 1.4590891151629963, "grad_norm": 2.1875, "learning_rate": 3.2491694931963303e-06, "loss": 0.478, "step": 11680 }, { "epoch": 1.4592154076880575, "grad_norm": 1.90625, "learning_rate": 3.2476967731219997e-06, "loss": 0.4742, "step": 11681 }, { "epoch": 1.4593417002131186, "grad_norm": 1.9765625, "learning_rate": 3.2462243221782118e-06, "loss": 0.5007, "step": 11682 }, { "epoch": 1.4594679927381797, "grad_norm": 1.96875, "learning_rate": 3.244752140423655e-06, "loss": 0.5268, "step": 11683 }, { "epoch": 1.459594285263241, "grad_norm": 2.046875, "learning_rate": 3.243280227917007e-06, "loss": 0.5375, "step": 11684 }, { "epoch": 1.4597205777883022, "grad_norm": 1.9765625, "learning_rate": 3.2418085847169344e-06, "loss": 0.5167, "step": 11685 }, { "epoch": 1.4598468703133634, "grad_norm": 1.8828125, "learning_rate": 3.240337210882094e-06, "loss": 0.4455, "step": 11686 }, { "epoch": 1.4599731628384245, "grad_norm": 2.171875, "learning_rate": 3.23886610647113e-06, "loss": 0.5235, "step": 11687 }, { "epoch": 1.4600994553634856, "grad_norm": 2.015625, "learning_rate": 3.2373952715426784e-06, "loss": 0.4778, "step": 11688 }, { "epoch": 1.460225747888547, "grad_norm": 2.015625, "learning_rate": 3.235924706155362e-06, "loss": 0.5018, "step": 11689 }, { "epoch": 1.4603520404136079, "grad_norm": 1.9609375, "learning_rate": 3.2344544103677945e-06, "loss": 0.4369, "step": 11690 }, { "epoch": 1.4604783329386692, "grad_norm": 2.15625, "learning_rate": 3.232984384238579e-06, "loss": 0.4651, "step": 11691 }, { "epoch": 1.4606046254637304, "grad_norm": 1.9609375, "learning_rate": 3.2315146278263053e-06, "loss": 0.4996, "step": 11692 }, { "epoch": 1.4607309179887915, "grad_norm": 2.015625, "learning_rate": 3.230045141189556e-06, "loss": 0.4569, "step": 11693 }, { "epoch": 1.4608572105138526, "grad_norm": 1.796875, "learning_rate": 3.228575924386902e-06, "loss": 0.4416, "step": 11694 }, { "epoch": 1.4609835030389138, "grad_norm": 1.8203125, "learning_rate": 3.227106977476898e-06, "loss": 0.391, "step": 11695 }, { "epoch": 1.4611097955639751, "grad_norm": 1.84375, "learning_rate": 3.2256383005180992e-06, "loss": 0.4295, "step": 11696 }, { "epoch": 1.4612360880890363, "grad_norm": 1.984375, "learning_rate": 3.22416989356904e-06, "loss": 0.55, "step": 11697 }, { "epoch": 1.4613623806140974, "grad_norm": 1.8125, "learning_rate": 3.22270175668825e-06, "loss": 0.4333, "step": 11698 }, { "epoch": 1.4614886731391585, "grad_norm": 1.9921875, "learning_rate": 3.221233889934242e-06, "loss": 0.5059, "step": 11699 }, { "epoch": 1.4616149656642197, "grad_norm": 2.078125, "learning_rate": 3.219766293365525e-06, "loss": 0.5197, "step": 11700 }, { "epoch": 1.461741258189281, "grad_norm": 1.8828125, "learning_rate": 3.218298967040592e-06, "loss": 0.5212, "step": 11701 }, { "epoch": 1.4618675507143422, "grad_norm": 2.15625, "learning_rate": 3.216831911017927e-06, "loss": 0.5034, "step": 11702 }, { "epoch": 1.4619938432394033, "grad_norm": 2.0625, "learning_rate": 3.2153651253560035e-06, "loss": 0.461, "step": 11703 }, { "epoch": 1.4621201357644644, "grad_norm": 2.140625, "learning_rate": 3.213898610113284e-06, "loss": 0.4961, "step": 11704 }, { "epoch": 1.4622464282895256, "grad_norm": 1.9375, "learning_rate": 3.2124323653482215e-06, "loss": 0.4609, "step": 11705 }, { "epoch": 1.462372720814587, "grad_norm": 1.8359375, "learning_rate": 3.210966391119256e-06, "loss": 0.4654, "step": 11706 }, { "epoch": 1.4624990133396478, "grad_norm": 2.265625, "learning_rate": 3.2095006874848167e-06, "loss": 0.6083, "step": 11707 }, { "epoch": 1.4626253058647092, "grad_norm": 1.984375, "learning_rate": 3.2080352545033254e-06, "loss": 0.4651, "step": 11708 }, { "epoch": 1.4627515983897703, "grad_norm": 2.234375, "learning_rate": 3.206570092233189e-06, "loss": 0.5864, "step": 11709 }, { "epoch": 1.4628778909148314, "grad_norm": 2.140625, "learning_rate": 3.2051052007328053e-06, "loss": 0.5347, "step": 11710 }, { "epoch": 1.4630041834398926, "grad_norm": 2.109375, "learning_rate": 3.203640580060562e-06, "loss": 0.5636, "step": 11711 }, { "epoch": 1.4631304759649537, "grad_norm": 2.0625, "learning_rate": 3.202176230274835e-06, "loss": 0.5651, "step": 11712 }, { "epoch": 1.463256768490015, "grad_norm": 2.015625, "learning_rate": 3.200712151433987e-06, "loss": 0.5155, "step": 11713 }, { "epoch": 1.4633830610150762, "grad_norm": 1.890625, "learning_rate": 3.1992483435963785e-06, "loss": 0.4916, "step": 11714 }, { "epoch": 1.4635093535401373, "grad_norm": 1.9296875, "learning_rate": 3.19778480682035e-06, "loss": 0.5172, "step": 11715 }, { "epoch": 1.4636356460651985, "grad_norm": 2.0625, "learning_rate": 3.1963215411642358e-06, "loss": 0.5095, "step": 11716 }, { "epoch": 1.4637619385902596, "grad_norm": 2.015625, "learning_rate": 3.1948585466863557e-06, "loss": 0.5597, "step": 11717 }, { "epoch": 1.463888231115321, "grad_norm": 1.84375, "learning_rate": 3.1933958234450247e-06, "loss": 0.4658, "step": 11718 }, { "epoch": 1.464014523640382, "grad_norm": 1.9453125, "learning_rate": 3.1919333714985402e-06, "loss": 0.532, "step": 11719 }, { "epoch": 1.4641408161654432, "grad_norm": 1.9765625, "learning_rate": 3.1904711909051933e-06, "loss": 0.4837, "step": 11720 }, { "epoch": 1.4642671086905044, "grad_norm": 1.9296875, "learning_rate": 3.189009281723263e-06, "loss": 0.4742, "step": 11721 }, { "epoch": 1.4643934012155655, "grad_norm": 2.078125, "learning_rate": 3.1875476440110186e-06, "loss": 0.474, "step": 11722 }, { "epoch": 1.4645196937406268, "grad_norm": 1.8046875, "learning_rate": 3.1860862778267156e-06, "loss": 0.4038, "step": 11723 }, { "epoch": 1.464645986265688, "grad_norm": 1.921875, "learning_rate": 3.1846251832286e-06, "loss": 0.4954, "step": 11724 }, { "epoch": 1.4647722787907491, "grad_norm": 2.03125, "learning_rate": 3.18316436027491e-06, "loss": 0.4968, "step": 11725 }, { "epoch": 1.4648985713158102, "grad_norm": 2.015625, "learning_rate": 3.1817038090238695e-06, "loss": 0.456, "step": 11726 }, { "epoch": 1.4650248638408714, "grad_norm": 2.078125, "learning_rate": 3.1802435295336908e-06, "loss": 0.5673, "step": 11727 }, { "epoch": 1.4651511563659325, "grad_norm": 2.171875, "learning_rate": 3.1787835218625796e-06, "loss": 0.5255, "step": 11728 }, { "epoch": 1.4652774488909937, "grad_norm": 1.9765625, "learning_rate": 3.177323786068728e-06, "loss": 0.4929, "step": 11729 }, { "epoch": 1.465403741416055, "grad_norm": 2.046875, "learning_rate": 3.1758643222103157e-06, "loss": 0.512, "step": 11730 }, { "epoch": 1.4655300339411161, "grad_norm": 2.015625, "learning_rate": 3.1744051303455113e-06, "loss": 0.4871, "step": 11731 }, { "epoch": 1.4656563264661773, "grad_norm": 1.8359375, "learning_rate": 3.1729462105324816e-06, "loss": 0.4334, "step": 11732 }, { "epoch": 1.4657826189912384, "grad_norm": 1.90625, "learning_rate": 3.171487562829372e-06, "loss": 0.5071, "step": 11733 }, { "epoch": 1.4659089115162995, "grad_norm": 2.0, "learning_rate": 3.1700291872943213e-06, "loss": 0.473, "step": 11734 }, { "epoch": 1.466035204041361, "grad_norm": 2.03125, "learning_rate": 3.1685710839854545e-06, "loss": 0.4284, "step": 11735 }, { "epoch": 1.466161496566422, "grad_norm": 1.8984375, "learning_rate": 3.1671132529608906e-06, "loss": 0.513, "step": 11736 }, { "epoch": 1.4662877890914832, "grad_norm": 2.25, "learning_rate": 3.1656556942787353e-06, "loss": 0.5487, "step": 11737 }, { "epoch": 1.4664140816165443, "grad_norm": 2.171875, "learning_rate": 3.164198407997081e-06, "loss": 0.6187, "step": 11738 }, { "epoch": 1.4665403741416054, "grad_norm": 2.03125, "learning_rate": 3.162741394174014e-06, "loss": 0.5084, "step": 11739 }, { "epoch": 1.4666666666666668, "grad_norm": 2.03125, "learning_rate": 3.1612846528676065e-06, "loss": 0.5028, "step": 11740 }, { "epoch": 1.466792959191728, "grad_norm": 1.921875, "learning_rate": 3.1598281841359204e-06, "loss": 0.4619, "step": 11741 }, { "epoch": 1.466919251716789, "grad_norm": 2.09375, "learning_rate": 3.1583719880370066e-06, "loss": 0.5307, "step": 11742 }, { "epoch": 1.4670455442418502, "grad_norm": 2.1875, "learning_rate": 3.1569160646289076e-06, "loss": 0.5019, "step": 11743 }, { "epoch": 1.4671718367669113, "grad_norm": 2.109375, "learning_rate": 3.155460413969651e-06, "loss": 0.523, "step": 11744 }, { "epoch": 1.4672981292919725, "grad_norm": 2.0, "learning_rate": 3.154005036117256e-06, "loss": 0.5352, "step": 11745 }, { "epoch": 1.4674244218170336, "grad_norm": 1.9453125, "learning_rate": 3.15254993112973e-06, "loss": 0.5235, "step": 11746 }, { "epoch": 1.467550714342095, "grad_norm": 1.984375, "learning_rate": 3.1510950990650724e-06, "loss": 0.4516, "step": 11747 }, { "epoch": 1.467677006867156, "grad_norm": 1.9140625, "learning_rate": 3.149640539981267e-06, "loss": 0.4757, "step": 11748 }, { "epoch": 1.4678032993922172, "grad_norm": 1.9921875, "learning_rate": 3.148186253936285e-06, "loss": 0.428, "step": 11749 }, { "epoch": 1.4679295919172783, "grad_norm": 2.0, "learning_rate": 3.1467322409880994e-06, "loss": 0.4872, "step": 11750 }, { "epoch": 1.4680558844423395, "grad_norm": 1.96875, "learning_rate": 3.1452785011946596e-06, "loss": 0.461, "step": 11751 }, { "epoch": 1.4681821769674008, "grad_norm": 1.9921875, "learning_rate": 3.1438250346139086e-06, "loss": 0.4924, "step": 11752 }, { "epoch": 1.468308469492462, "grad_norm": 1.90625, "learning_rate": 3.1423718413037774e-06, "loss": 0.4223, "step": 11753 }, { "epoch": 1.468434762017523, "grad_norm": 1.9140625, "learning_rate": 3.1409189213221857e-06, "loss": 0.4562, "step": 11754 }, { "epoch": 1.4685610545425842, "grad_norm": 2.046875, "learning_rate": 3.1394662747270465e-06, "loss": 0.4975, "step": 11755 }, { "epoch": 1.4686873470676454, "grad_norm": 1.9609375, "learning_rate": 3.1380139015762558e-06, "loss": 0.4108, "step": 11756 }, { "epoch": 1.4688136395927067, "grad_norm": 1.9453125, "learning_rate": 3.1365618019277035e-06, "loss": 0.4522, "step": 11757 }, { "epoch": 1.4689399321177679, "grad_norm": 2.078125, "learning_rate": 3.1351099758392654e-06, "loss": 0.5026, "step": 11758 }, { "epoch": 1.469066224642829, "grad_norm": 2.015625, "learning_rate": 3.133658423368808e-06, "loss": 0.4426, "step": 11759 }, { "epoch": 1.4691925171678901, "grad_norm": 2.03125, "learning_rate": 3.1322071445741887e-06, "loss": 0.6303, "step": 11760 }, { "epoch": 1.4693188096929513, "grad_norm": 2.125, "learning_rate": 3.1307561395132503e-06, "loss": 0.4583, "step": 11761 }, { "epoch": 1.4694451022180124, "grad_norm": 2.078125, "learning_rate": 3.1293054082438256e-06, "loss": 0.6144, "step": 11762 }, { "epoch": 1.4695713947430735, "grad_norm": 2.015625, "learning_rate": 3.1278549508237387e-06, "loss": 0.4639, "step": 11763 }, { "epoch": 1.4696976872681349, "grad_norm": 2.0, "learning_rate": 3.1264047673108012e-06, "loss": 0.4611, "step": 11764 }, { "epoch": 1.469823979793196, "grad_norm": 2.046875, "learning_rate": 3.1249548577628143e-06, "loss": 0.6604, "step": 11765 }, { "epoch": 1.4699502723182571, "grad_norm": 1.9609375, "learning_rate": 3.123505222237566e-06, "loss": 0.4813, "step": 11766 }, { "epoch": 1.4700765648433183, "grad_norm": 1.921875, "learning_rate": 3.122055860792833e-06, "loss": 0.4738, "step": 11767 }, { "epoch": 1.4702028573683794, "grad_norm": 2.015625, "learning_rate": 3.120606773486391e-06, "loss": 0.5246, "step": 11768 }, { "epoch": 1.4703291498934408, "grad_norm": 1.984375, "learning_rate": 3.1191579603759925e-06, "loss": 0.4929, "step": 11769 }, { "epoch": 1.470455442418502, "grad_norm": 1.9921875, "learning_rate": 3.1177094215193837e-06, "loss": 0.4846, "step": 11770 }, { "epoch": 1.470581734943563, "grad_norm": 2.15625, "learning_rate": 3.1162611569743008e-06, "loss": 0.5318, "step": 11771 }, { "epoch": 1.4707080274686242, "grad_norm": 1.9453125, "learning_rate": 3.1148131667984682e-06, "loss": 0.4548, "step": 11772 }, { "epoch": 1.4708343199936853, "grad_norm": 1.890625, "learning_rate": 3.1133654510495983e-06, "loss": 0.5071, "step": 11773 }, { "epoch": 1.4709606125187467, "grad_norm": 2.09375, "learning_rate": 3.111918009785393e-06, "loss": 0.5063, "step": 11774 }, { "epoch": 1.4710869050438078, "grad_norm": 1.984375, "learning_rate": 3.1104708430635456e-06, "loss": 0.4845, "step": 11775 }, { "epoch": 1.471213197568869, "grad_norm": 2.1875, "learning_rate": 3.1090239509417364e-06, "loss": 0.5373, "step": 11776 }, { "epoch": 1.47133949009393, "grad_norm": 2.046875, "learning_rate": 3.1075773334776338e-06, "loss": 0.5569, "step": 11777 }, { "epoch": 1.4714657826189912, "grad_norm": 1.953125, "learning_rate": 3.1061309907288983e-06, "loss": 0.4626, "step": 11778 }, { "epoch": 1.4715920751440525, "grad_norm": 1.875, "learning_rate": 3.104684922753176e-06, "loss": 0.4577, "step": 11779 }, { "epoch": 1.4717183676691135, "grad_norm": 1.90625, "learning_rate": 3.103239129608104e-06, "loss": 0.5228, "step": 11780 }, { "epoch": 1.4718446601941748, "grad_norm": 1.9296875, "learning_rate": 3.1017936113513094e-06, "loss": 0.5004, "step": 11781 }, { "epoch": 1.471970952719236, "grad_norm": 1.96875, "learning_rate": 3.1003483680404046e-06, "loss": 0.4842, "step": 11782 }, { "epoch": 1.472097245244297, "grad_norm": 2.09375, "learning_rate": 3.0989033997329955e-06, "loss": 0.4712, "step": 11783 }, { "epoch": 1.4722235377693582, "grad_norm": 2.0625, "learning_rate": 3.0974587064866745e-06, "loss": 0.5435, "step": 11784 }, { "epoch": 1.4723498302944193, "grad_norm": 2.09375, "learning_rate": 3.0960142883590215e-06, "loss": 0.5192, "step": 11785 }, { "epoch": 1.4724761228194807, "grad_norm": 1.8515625, "learning_rate": 3.0945701454076117e-06, "loss": 0.4489, "step": 11786 }, { "epoch": 1.4726024153445418, "grad_norm": 1.9296875, "learning_rate": 3.093126277690004e-06, "loss": 0.465, "step": 11787 }, { "epoch": 1.472728707869603, "grad_norm": 1.90625, "learning_rate": 3.091682685263746e-06, "loss": 0.5218, "step": 11788 }, { "epoch": 1.472855000394664, "grad_norm": 1.9140625, "learning_rate": 3.090239368186376e-06, "loss": 0.5201, "step": 11789 }, { "epoch": 1.4729812929197252, "grad_norm": 1.984375, "learning_rate": 3.088796326515422e-06, "loss": 0.4514, "step": 11790 }, { "epoch": 1.4731075854447866, "grad_norm": 2.4375, "learning_rate": 3.0873535603084004e-06, "loss": 0.5798, "step": 11791 }, { "epoch": 1.4732338779698477, "grad_norm": 2.0, "learning_rate": 3.0859110696228155e-06, "loss": 0.4351, "step": 11792 }, { "epoch": 1.4733601704949089, "grad_norm": 2.015625, "learning_rate": 3.0844688545161617e-06, "loss": 0.5597, "step": 11793 }, { "epoch": 1.47348646301997, "grad_norm": 1.984375, "learning_rate": 3.0830269150459213e-06, "loss": 0.5544, "step": 11794 }, { "epoch": 1.4736127555450311, "grad_norm": 1.8359375, "learning_rate": 3.081585251269569e-06, "loss": 0.4703, "step": 11795 }, { "epoch": 1.4737390480700925, "grad_norm": 2.09375, "learning_rate": 3.0801438632445633e-06, "loss": 0.5056, "step": 11796 }, { "epoch": 1.4738653405951534, "grad_norm": 2.15625, "learning_rate": 3.0787027510283563e-06, "loss": 0.4964, "step": 11797 }, { "epoch": 1.4739916331202148, "grad_norm": 2.265625, "learning_rate": 3.0772619146783864e-06, "loss": 0.4474, "step": 11798 }, { "epoch": 1.4741179256452759, "grad_norm": 2.015625, "learning_rate": 3.075821354252082e-06, "loss": 0.5117, "step": 11799 }, { "epoch": 1.474244218170337, "grad_norm": 1.9765625, "learning_rate": 3.074381069806861e-06, "loss": 0.5396, "step": 11800 }, { "epoch": 1.4743705106953982, "grad_norm": 1.984375, "learning_rate": 3.0729410614001286e-06, "loss": 0.4813, "step": 11801 }, { "epoch": 1.4744968032204593, "grad_norm": 1.8515625, "learning_rate": 3.0715013290892813e-06, "loss": 0.4341, "step": 11802 }, { "epoch": 1.4746230957455206, "grad_norm": 2.078125, "learning_rate": 3.0700618729316988e-06, "loss": 0.5409, "step": 11803 }, { "epoch": 1.4747493882705818, "grad_norm": 1.9921875, "learning_rate": 3.0686226929847617e-06, "loss": 0.5523, "step": 11804 }, { "epoch": 1.474875680795643, "grad_norm": 2.5, "learning_rate": 3.067183789305829e-06, "loss": 0.5732, "step": 11805 }, { "epoch": 1.475001973320704, "grad_norm": 2.171875, "learning_rate": 3.0657451619522518e-06, "loss": 0.5167, "step": 11806 }, { "epoch": 1.4751282658457652, "grad_norm": 1.9375, "learning_rate": 3.0643068109813702e-06, "loss": 0.4989, "step": 11807 }, { "epoch": 1.4752545583708265, "grad_norm": 1.9296875, "learning_rate": 3.0628687364505127e-06, "loss": 0.512, "step": 11808 }, { "epoch": 1.4753808508958877, "grad_norm": 1.984375, "learning_rate": 3.0614309384169984e-06, "loss": 0.4866, "step": 11809 }, { "epoch": 1.4755071434209488, "grad_norm": 2.109375, "learning_rate": 3.059993416938135e-06, "loss": 0.575, "step": 11810 }, { "epoch": 1.47563343594601, "grad_norm": 2.1875, "learning_rate": 3.0585561720712178e-06, "loss": 0.5576, "step": 11811 }, { "epoch": 1.475759728471071, "grad_norm": 2.03125, "learning_rate": 3.057119203873532e-06, "loss": 0.4503, "step": 11812 }, { "epoch": 1.4758860209961324, "grad_norm": 1.9375, "learning_rate": 3.0556825124023516e-06, "loss": 0.4404, "step": 11813 }, { "epoch": 1.4760123135211933, "grad_norm": 1.828125, "learning_rate": 3.05424609771494e-06, "loss": 0.4871, "step": 11814 }, { "epoch": 1.4761386060462547, "grad_norm": 1.859375, "learning_rate": 3.052809959868549e-06, "loss": 0.5014, "step": 11815 }, { "epoch": 1.4762648985713158, "grad_norm": 1.96875, "learning_rate": 3.051374098920419e-06, "loss": 0.4637, "step": 11816 }, { "epoch": 1.476391191096377, "grad_norm": 2.109375, "learning_rate": 3.0499385149277814e-06, "loss": 0.5411, "step": 11817 }, { "epoch": 1.476517483621438, "grad_norm": 2.03125, "learning_rate": 3.048503207947854e-06, "loss": 0.5597, "step": 11818 }, { "epoch": 1.4766437761464992, "grad_norm": 1.96875, "learning_rate": 3.0470681780378454e-06, "loss": 0.5197, "step": 11819 }, { "epoch": 1.4767700686715606, "grad_norm": 2.0, "learning_rate": 3.045633425254951e-06, "loss": 0.4578, "step": 11820 }, { "epoch": 1.4768963611966217, "grad_norm": 2.234375, "learning_rate": 3.044198949656355e-06, "loss": 0.5049, "step": 11821 }, { "epoch": 1.4770226537216828, "grad_norm": 1.9453125, "learning_rate": 3.0427647512992386e-06, "loss": 0.5054, "step": 11822 }, { "epoch": 1.477148946246744, "grad_norm": 1.96875, "learning_rate": 3.0413308302407617e-06, "loss": 0.55, "step": 11823 }, { "epoch": 1.477275238771805, "grad_norm": 2.0625, "learning_rate": 3.0398971865380756e-06, "loss": 0.4679, "step": 11824 }, { "epoch": 1.4774015312968665, "grad_norm": 2.1875, "learning_rate": 3.038463820248324e-06, "loss": 0.5253, "step": 11825 }, { "epoch": 1.4775278238219276, "grad_norm": 2.0625, "learning_rate": 3.0370307314286373e-06, "loss": 0.4295, "step": 11826 }, { "epoch": 1.4776541163469887, "grad_norm": 1.9921875, "learning_rate": 3.035597920136133e-06, "loss": 0.4942, "step": 11827 }, { "epoch": 1.4777804088720499, "grad_norm": 2.015625, "learning_rate": 3.034165386427922e-06, "loss": 0.4587, "step": 11828 }, { "epoch": 1.477906701397111, "grad_norm": 1.96875, "learning_rate": 3.0327331303610998e-06, "loss": 0.4386, "step": 11829 }, { "epoch": 1.4780329939221724, "grad_norm": 1.84375, "learning_rate": 3.031301151992753e-06, "loss": 0.4436, "step": 11830 }, { "epoch": 1.4781592864472333, "grad_norm": 1.9140625, "learning_rate": 3.0298694513799576e-06, "loss": 0.4357, "step": 11831 }, { "epoch": 1.4782855789722946, "grad_norm": 1.7890625, "learning_rate": 3.0284380285797767e-06, "loss": 0.3911, "step": 11832 }, { "epoch": 1.4784118714973558, "grad_norm": 2.0, "learning_rate": 3.027006883649264e-06, "loss": 0.5069, "step": 11833 }, { "epoch": 1.478538164022417, "grad_norm": 1.9140625, "learning_rate": 3.0255760166454617e-06, "loss": 0.4824, "step": 11834 }, { "epoch": 1.478664456547478, "grad_norm": 2.09375, "learning_rate": 3.0241454276254014e-06, "loss": 0.5138, "step": 11835 }, { "epoch": 1.4787907490725392, "grad_norm": 2.421875, "learning_rate": 3.0227151166461e-06, "loss": 0.5528, "step": 11836 }, { "epoch": 1.4789170415976005, "grad_norm": 1.9375, "learning_rate": 3.021285083764569e-06, "loss": 0.4699, "step": 11837 }, { "epoch": 1.4790433341226616, "grad_norm": 2.015625, "learning_rate": 3.019855329037806e-06, "loss": 0.5437, "step": 11838 }, { "epoch": 1.4791696266477228, "grad_norm": 2.0625, "learning_rate": 3.018425852522793e-06, "loss": 0.5326, "step": 11839 }, { "epoch": 1.479295919172784, "grad_norm": 1.9453125, "learning_rate": 3.0169966542765127e-06, "loss": 0.4694, "step": 11840 }, { "epoch": 1.479422211697845, "grad_norm": 1.8515625, "learning_rate": 3.0155677343559262e-06, "loss": 0.4488, "step": 11841 }, { "epoch": 1.4795485042229064, "grad_norm": 2.171875, "learning_rate": 3.014139092817987e-06, "loss": 0.4934, "step": 11842 }, { "epoch": 1.4796747967479675, "grad_norm": 2.015625, "learning_rate": 3.012710729719637e-06, "loss": 0.4952, "step": 11843 }, { "epoch": 1.4798010892730287, "grad_norm": 1.890625, "learning_rate": 3.011282645117807e-06, "loss": 0.512, "step": 11844 }, { "epoch": 1.4799273817980898, "grad_norm": 2.0, "learning_rate": 3.0098548390694173e-06, "loss": 0.4637, "step": 11845 }, { "epoch": 1.480053674323151, "grad_norm": 1.9140625, "learning_rate": 3.0084273116313766e-06, "loss": 0.4802, "step": 11846 }, { "epoch": 1.4801799668482123, "grad_norm": 2.421875, "learning_rate": 3.007000062860583e-06, "loss": 0.5631, "step": 11847 }, { "epoch": 1.4803062593732734, "grad_norm": 2.140625, "learning_rate": 3.0055730928139236e-06, "loss": 0.5027, "step": 11848 }, { "epoch": 1.4804325518983346, "grad_norm": 2.5625, "learning_rate": 3.0041464015482737e-06, "loss": 0.5611, "step": 11849 }, { "epoch": 1.4805588444233957, "grad_norm": 2.140625, "learning_rate": 3.0027199891204985e-06, "loss": 0.5331, "step": 11850 }, { "epoch": 1.4806851369484568, "grad_norm": 1.9375, "learning_rate": 3.0012938555874493e-06, "loss": 0.4883, "step": 11851 }, { "epoch": 1.480811429473518, "grad_norm": 1.9765625, "learning_rate": 2.99986800100597e-06, "loss": 0.4434, "step": 11852 }, { "epoch": 1.480937721998579, "grad_norm": 1.7578125, "learning_rate": 2.9984424254328914e-06, "loss": 0.3809, "step": 11853 }, { "epoch": 1.4810640145236404, "grad_norm": 1.890625, "learning_rate": 2.9970171289250325e-06, "loss": 0.504, "step": 11854 }, { "epoch": 1.4811903070487016, "grad_norm": 1.9453125, "learning_rate": 2.995592111539204e-06, "loss": 0.4744, "step": 11855 }, { "epoch": 1.4813165995737627, "grad_norm": 1.9765625, "learning_rate": 2.994167373332203e-06, "loss": 0.464, "step": 11856 }, { "epoch": 1.4814428920988238, "grad_norm": 1.8984375, "learning_rate": 2.992742914360812e-06, "loss": 0.5528, "step": 11857 }, { "epoch": 1.481569184623885, "grad_norm": 2.078125, "learning_rate": 2.991318734681814e-06, "loss": 0.4742, "step": 11858 }, { "epoch": 1.4816954771489463, "grad_norm": 1.96875, "learning_rate": 2.9898948343519698e-06, "loss": 0.5481, "step": 11859 }, { "epoch": 1.4818217696740075, "grad_norm": 1.8515625, "learning_rate": 2.9884712134280324e-06, "loss": 0.4567, "step": 11860 }, { "epoch": 1.4819480621990686, "grad_norm": 2.125, "learning_rate": 2.9870478719667438e-06, "loss": 0.5541, "step": 11861 }, { "epoch": 1.4820743547241297, "grad_norm": 2.0625, "learning_rate": 2.9856248100248353e-06, "loss": 0.5319, "step": 11862 }, { "epoch": 1.4822006472491909, "grad_norm": 1.9296875, "learning_rate": 2.9842020276590276e-06, "loss": 0.4504, "step": 11863 }, { "epoch": 1.4823269397742522, "grad_norm": 1.8984375, "learning_rate": 2.982779524926027e-06, "loss": 0.4802, "step": 11864 }, { "epoch": 1.4824532322993134, "grad_norm": 2.296875, "learning_rate": 2.9813573018825338e-06, "loss": 0.5172, "step": 11865 }, { "epoch": 1.4825795248243745, "grad_norm": 2.125, "learning_rate": 2.979935358585233e-06, "loss": 0.486, "step": 11866 }, { "epoch": 1.4827058173494356, "grad_norm": 1.9609375, "learning_rate": 2.9785136950908e-06, "loss": 0.4614, "step": 11867 }, { "epoch": 1.4828321098744968, "grad_norm": 2.0, "learning_rate": 2.977092311455898e-06, "loss": 0.4661, "step": 11868 }, { "epoch": 1.482958402399558, "grad_norm": 1.9609375, "learning_rate": 2.975671207737181e-06, "loss": 0.5008, "step": 11869 }, { "epoch": 1.483084694924619, "grad_norm": 1.9765625, "learning_rate": 2.9742503839912904e-06, "loss": 0.4513, "step": 11870 }, { "epoch": 1.4832109874496804, "grad_norm": 2.0625, "learning_rate": 2.972829840274858e-06, "loss": 0.5342, "step": 11871 }, { "epoch": 1.4833372799747415, "grad_norm": 1.984375, "learning_rate": 2.9714095766445006e-06, "loss": 0.5, "step": 11872 }, { "epoch": 1.4834635724998027, "grad_norm": 2.0625, "learning_rate": 2.969989593156829e-06, "loss": 0.525, "step": 11873 }, { "epoch": 1.4835898650248638, "grad_norm": 2.09375, "learning_rate": 2.968569889868439e-06, "loss": 0.5022, "step": 11874 }, { "epoch": 1.483716157549925, "grad_norm": 1.9375, "learning_rate": 2.967150466835914e-06, "loss": 0.45, "step": 11875 }, { "epoch": 1.4838424500749863, "grad_norm": 1.984375, "learning_rate": 2.9657313241158347e-06, "loss": 0.5037, "step": 11876 }, { "epoch": 1.4839687426000474, "grad_norm": 1.9765625, "learning_rate": 2.964312461764761e-06, "loss": 0.4867, "step": 11877 }, { "epoch": 1.4839687426000474, "eval_loss": 0.8111534118652344, "eval_runtime": 4340.2795, "eval_samples_per_second": 11.483, "eval_steps_per_second": 3.828, "step": 11877 }, { "epoch": 1.4840950351251085, "grad_norm": 1.9609375, "learning_rate": 2.962893879839247e-06, "loss": 0.487, "step": 11878 }, { "epoch": 1.4842213276501697, "grad_norm": 2.0, "learning_rate": 2.961475578395834e-06, "loss": 0.5127, "step": 11879 }, { "epoch": 1.4843476201752308, "grad_norm": 1.984375, "learning_rate": 2.9600575574910495e-06, "loss": 0.4434, "step": 11880 }, { "epoch": 1.4844739127002922, "grad_norm": 1.734375, "learning_rate": 2.9586398171814144e-06, "loss": 0.4396, "step": 11881 }, { "epoch": 1.4846002052253533, "grad_norm": 2.0, "learning_rate": 2.957222357523436e-06, "loss": 0.4983, "step": 11882 }, { "epoch": 1.4847264977504144, "grad_norm": 2.09375, "learning_rate": 2.955805178573611e-06, "loss": 0.4874, "step": 11883 }, { "epoch": 1.4848527902754756, "grad_norm": 2.015625, "learning_rate": 2.954388280388425e-06, "loss": 0.5282, "step": 11884 }, { "epoch": 1.4849790828005367, "grad_norm": 2.125, "learning_rate": 2.952971663024351e-06, "loss": 0.611, "step": 11885 }, { "epoch": 1.485105375325598, "grad_norm": 1.8203125, "learning_rate": 2.9515553265378526e-06, "loss": 0.4572, "step": 11886 }, { "epoch": 1.485231667850659, "grad_norm": 1.984375, "learning_rate": 2.9501392709853817e-06, "loss": 0.5269, "step": 11887 }, { "epoch": 1.4853579603757203, "grad_norm": 2.046875, "learning_rate": 2.948723496423379e-06, "loss": 0.4562, "step": 11888 }, { "epoch": 1.4854842529007815, "grad_norm": 1.953125, "learning_rate": 2.9473080029082736e-06, "loss": 0.5074, "step": 11889 }, { "epoch": 1.4856105454258426, "grad_norm": 2.296875, "learning_rate": 2.945892790496484e-06, "loss": 0.562, "step": 11890 }, { "epoch": 1.4857368379509037, "grad_norm": 2.125, "learning_rate": 2.9444778592444166e-06, "loss": 0.5034, "step": 11891 }, { "epoch": 1.4858631304759649, "grad_norm": 1.796875, "learning_rate": 2.9430632092084676e-06, "loss": 0.414, "step": 11892 }, { "epoch": 1.4859894230010262, "grad_norm": 2.03125, "learning_rate": 2.9416488404450185e-06, "loss": 0.5112, "step": 11893 }, { "epoch": 1.4861157155260873, "grad_norm": 2.15625, "learning_rate": 2.940234753010448e-06, "loss": 0.584, "step": 11894 }, { "epoch": 1.4862420080511485, "grad_norm": 1.8515625, "learning_rate": 2.938820946961116e-06, "loss": 0.4716, "step": 11895 }, { "epoch": 1.4863683005762096, "grad_norm": 2.4375, "learning_rate": 2.9374074223533733e-06, "loss": 0.5737, "step": 11896 }, { "epoch": 1.4864945931012707, "grad_norm": 2.03125, "learning_rate": 2.9359941792435598e-06, "loss": 0.5541, "step": 11897 }, { "epoch": 1.486620885626332, "grad_norm": 1.8671875, "learning_rate": 2.9345812176880027e-06, "loss": 0.5129, "step": 11898 }, { "epoch": 1.4867471781513932, "grad_norm": 1.8671875, "learning_rate": 2.933168537743021e-06, "loss": 0.5091, "step": 11899 }, { "epoch": 1.4868734706764544, "grad_norm": 1.8515625, "learning_rate": 2.9317561394649197e-06, "loss": 0.4573, "step": 11900 }, { "epoch": 1.4869997632015155, "grad_norm": 2.15625, "learning_rate": 2.9303440229099933e-06, "loss": 0.5282, "step": 11901 }, { "epoch": 1.4871260557265766, "grad_norm": 2.0625, "learning_rate": 2.9289321881345257e-06, "loss": 0.5003, "step": 11902 }, { "epoch": 1.487252348251638, "grad_norm": 2.1875, "learning_rate": 2.9275206351947895e-06, "loss": 0.5979, "step": 11903 }, { "epoch": 1.487378640776699, "grad_norm": 1.9609375, "learning_rate": 2.926109364147046e-06, "loss": 0.4043, "step": 11904 }, { "epoch": 1.4875049333017603, "grad_norm": 2.03125, "learning_rate": 2.9246983750475442e-06, "loss": 0.4629, "step": 11905 }, { "epoch": 1.4876312258268214, "grad_norm": 1.9453125, "learning_rate": 2.9232876679525236e-06, "loss": 0.452, "step": 11906 }, { "epoch": 1.4877575183518825, "grad_norm": 1.8359375, "learning_rate": 2.9218772429182106e-06, "loss": 0.4508, "step": 11907 }, { "epoch": 1.4878838108769437, "grad_norm": 2.0625, "learning_rate": 2.9204671000008232e-06, "loss": 0.5204, "step": 11908 }, { "epoch": 1.4880101034020048, "grad_norm": 1.984375, "learning_rate": 2.9190572392565643e-06, "loss": 0.4433, "step": 11909 }, { "epoch": 1.4881363959270661, "grad_norm": 1.9765625, "learning_rate": 2.917647660741628e-06, "loss": 0.4411, "step": 11910 }, { "epoch": 1.4882626884521273, "grad_norm": 2.0625, "learning_rate": 2.9162383645121982e-06, "loss": 0.5553, "step": 11911 }, { "epoch": 1.4883889809771884, "grad_norm": 1.953125, "learning_rate": 2.9148293506244407e-06, "loss": 0.5116, "step": 11912 }, { "epoch": 1.4885152735022495, "grad_norm": 1.828125, "learning_rate": 2.9134206191345227e-06, "loss": 0.4296, "step": 11913 }, { "epoch": 1.4886415660273107, "grad_norm": 2.375, "learning_rate": 2.9120121700985905e-06, "loss": 0.5121, "step": 11914 }, { "epoch": 1.488767858552372, "grad_norm": 2.03125, "learning_rate": 2.91060400357278e-06, "loss": 0.4572, "step": 11915 }, { "epoch": 1.4888941510774332, "grad_norm": 1.8984375, "learning_rate": 2.909196119613218e-06, "loss": 0.478, "step": 11916 }, { "epoch": 1.4890204436024943, "grad_norm": 1.9921875, "learning_rate": 2.90778851827602e-06, "loss": 0.4756, "step": 11917 }, { "epoch": 1.4891467361275554, "grad_norm": 2.015625, "learning_rate": 2.9063811996172887e-06, "loss": 0.5104, "step": 11918 }, { "epoch": 1.4892730286526166, "grad_norm": 1.921875, "learning_rate": 2.9049741636931174e-06, "loss": 0.4383, "step": 11919 }, { "epoch": 1.489399321177678, "grad_norm": 2.1875, "learning_rate": 2.9035674105595845e-06, "loss": 0.5588, "step": 11920 }, { "epoch": 1.4895256137027388, "grad_norm": 1.9296875, "learning_rate": 2.9021609402727634e-06, "loss": 0.4035, "step": 11921 }, { "epoch": 1.4896519062278002, "grad_norm": 1.921875, "learning_rate": 2.9007547528887116e-06, "loss": 0.4262, "step": 11922 }, { "epoch": 1.4897781987528613, "grad_norm": 1.7265625, "learning_rate": 2.8993488484634745e-06, "loss": 0.4064, "step": 11923 }, { "epoch": 1.4899044912779225, "grad_norm": 2.015625, "learning_rate": 2.8979432270530895e-06, "loss": 0.4921, "step": 11924 }, { "epoch": 1.4900307838029836, "grad_norm": 1.875, "learning_rate": 2.8965378887135807e-06, "loss": 0.4628, "step": 11925 }, { "epoch": 1.4901570763280447, "grad_norm": 2.125, "learning_rate": 2.8951328335009633e-06, "loss": 0.573, "step": 11926 }, { "epoch": 1.490283368853106, "grad_norm": 2.296875, "learning_rate": 2.893728061471237e-06, "loss": 0.6707, "step": 11927 }, { "epoch": 1.4904096613781672, "grad_norm": 1.890625, "learning_rate": 2.8923235726803934e-06, "loss": 0.4164, "step": 11928 }, { "epoch": 1.4905359539032284, "grad_norm": 1.9609375, "learning_rate": 2.890919367184413e-06, "loss": 0.4331, "step": 11929 }, { "epoch": 1.4906622464282895, "grad_norm": 1.984375, "learning_rate": 2.889515445039259e-06, "loss": 0.4114, "step": 11930 }, { "epoch": 1.4907885389533506, "grad_norm": 2.1875, "learning_rate": 2.888111806300897e-06, "loss": 0.5072, "step": 11931 }, { "epoch": 1.490914831478412, "grad_norm": 1.96875, "learning_rate": 2.8867084510252684e-06, "loss": 0.4679, "step": 11932 }, { "epoch": 1.491041124003473, "grad_norm": 1.890625, "learning_rate": 2.8853053792683073e-06, "loss": 0.4488, "step": 11933 }, { "epoch": 1.4911674165285342, "grad_norm": 2.125, "learning_rate": 2.8839025910859364e-06, "loss": 0.5955, "step": 11934 }, { "epoch": 1.4912937090535954, "grad_norm": 2.0, "learning_rate": 2.882500086534069e-06, "loss": 0.4365, "step": 11935 }, { "epoch": 1.4914200015786565, "grad_norm": 1.9609375, "learning_rate": 2.8810978656686038e-06, "loss": 0.5504, "step": 11936 }, { "epoch": 1.4915462941037179, "grad_norm": 2.078125, "learning_rate": 2.8796959285454305e-06, "loss": 0.5396, "step": 11937 }, { "epoch": 1.4916725866287788, "grad_norm": 2.046875, "learning_rate": 2.8782942752204277e-06, "loss": 0.4664, "step": 11938 }, { "epoch": 1.4917988791538401, "grad_norm": 2.015625, "learning_rate": 2.876892905749461e-06, "loss": 0.4556, "step": 11939 }, { "epoch": 1.4919251716789013, "grad_norm": 1.9765625, "learning_rate": 2.8754918201883863e-06, "loss": 0.5157, "step": 11940 }, { "epoch": 1.4920514642039624, "grad_norm": 1.7265625, "learning_rate": 2.874091018593047e-06, "loss": 0.4234, "step": 11941 }, { "epoch": 1.4921777567290235, "grad_norm": 1.90625, "learning_rate": 2.872690501019276e-06, "loss": 0.3816, "step": 11942 }, { "epoch": 1.4923040492540847, "grad_norm": 2.0625, "learning_rate": 2.871290267522895e-06, "loss": 0.5602, "step": 11943 }, { "epoch": 1.492430341779146, "grad_norm": 2.03125, "learning_rate": 2.869890318159713e-06, "loss": 0.5179, "step": 11944 }, { "epoch": 1.4925566343042072, "grad_norm": 2.015625, "learning_rate": 2.8684906529855285e-06, "loss": 0.5155, "step": 11945 }, { "epoch": 1.4926829268292683, "grad_norm": 1.921875, "learning_rate": 2.8670912720561296e-06, "loss": 0.4452, "step": 11946 }, { "epoch": 1.4928092193543294, "grad_norm": 2.109375, "learning_rate": 2.8656921754272913e-06, "loss": 0.5374, "step": 11947 }, { "epoch": 1.4929355118793906, "grad_norm": 1.9375, "learning_rate": 2.8642933631547766e-06, "loss": 0.427, "step": 11948 }, { "epoch": 1.493061804404452, "grad_norm": 2.03125, "learning_rate": 2.862894835294344e-06, "loss": 0.5083, "step": 11949 }, { "epoch": 1.493188096929513, "grad_norm": 1.9140625, "learning_rate": 2.8614965919017323e-06, "loss": 0.4749, "step": 11950 }, { "epoch": 1.4933143894545742, "grad_norm": 2.015625, "learning_rate": 2.8600986330326717e-06, "loss": 0.4424, "step": 11951 }, { "epoch": 1.4934406819796353, "grad_norm": 1.8984375, "learning_rate": 2.8587009587428828e-06, "loss": 0.4597, "step": 11952 }, { "epoch": 1.4935669745046964, "grad_norm": 2.140625, "learning_rate": 2.8573035690880735e-06, "loss": 0.5723, "step": 11953 }, { "epoch": 1.4936932670297578, "grad_norm": 1.9140625, "learning_rate": 2.8559064641239385e-06, "loss": 0.5225, "step": 11954 }, { "epoch": 1.493819559554819, "grad_norm": 1.859375, "learning_rate": 2.8545096439061638e-06, "loss": 0.4131, "step": 11955 }, { "epoch": 1.49394585207988, "grad_norm": 1.953125, "learning_rate": 2.8531131084904253e-06, "loss": 0.4326, "step": 11956 }, { "epoch": 1.4940721446049412, "grad_norm": 1.953125, "learning_rate": 2.8517168579323827e-06, "loss": 0.4946, "step": 11957 }, { "epoch": 1.4941984371300023, "grad_norm": 2.0, "learning_rate": 2.850320892287689e-06, "loss": 0.4588, "step": 11958 }, { "epoch": 1.4943247296550635, "grad_norm": 2.03125, "learning_rate": 2.8489252116119826e-06, "loss": 0.4591, "step": 11959 }, { "epoch": 1.4944510221801246, "grad_norm": 1.953125, "learning_rate": 2.8475298159608933e-06, "loss": 0.4686, "step": 11960 }, { "epoch": 1.494577314705186, "grad_norm": 2.03125, "learning_rate": 2.846134705390038e-06, "loss": 0.5398, "step": 11961 }, { "epoch": 1.494703607230247, "grad_norm": 1.921875, "learning_rate": 2.844739879955022e-06, "loss": 0.4681, "step": 11962 }, { "epoch": 1.4948298997553082, "grad_norm": 2.09375, "learning_rate": 2.8433453397114397e-06, "loss": 0.4851, "step": 11963 }, { "epoch": 1.4949561922803694, "grad_norm": 1.96875, "learning_rate": 2.8419510847148747e-06, "loss": 0.537, "step": 11964 }, { "epoch": 1.4950824848054305, "grad_norm": 2.046875, "learning_rate": 2.8405571150208975e-06, "loss": 0.524, "step": 11965 }, { "epoch": 1.4952087773304918, "grad_norm": 2.078125, "learning_rate": 2.8391634306850667e-06, "loss": 0.5023, "step": 11966 }, { "epoch": 1.495335069855553, "grad_norm": 1.890625, "learning_rate": 2.8377700317629365e-06, "loss": 0.4945, "step": 11967 }, { "epoch": 1.4954613623806141, "grad_norm": 1.8359375, "learning_rate": 2.836376918310042e-06, "loss": 0.4519, "step": 11968 }, { "epoch": 1.4955876549056752, "grad_norm": 2.1875, "learning_rate": 2.834984090381908e-06, "loss": 0.5255, "step": 11969 }, { "epoch": 1.4957139474307364, "grad_norm": 1.9609375, "learning_rate": 2.8335915480340504e-06, "loss": 0.462, "step": 11970 }, { "epoch": 1.4958402399557977, "grad_norm": 1.96875, "learning_rate": 2.832199291321973e-06, "loss": 0.4691, "step": 11971 }, { "epoch": 1.4959665324808589, "grad_norm": 2.09375, "learning_rate": 2.8308073203011667e-06, "loss": 0.505, "step": 11972 }, { "epoch": 1.49609282500592, "grad_norm": 1.984375, "learning_rate": 2.829415635027112e-06, "loss": 0.5224, "step": 11973 }, { "epoch": 1.4962191175309811, "grad_norm": 2.0, "learning_rate": 2.828024235555279e-06, "loss": 0.4679, "step": 11974 }, { "epoch": 1.4963454100560423, "grad_norm": 2.015625, "learning_rate": 2.8266331219411247e-06, "loss": 0.4875, "step": 11975 }, { "epoch": 1.4964717025811034, "grad_norm": 2.03125, "learning_rate": 2.825242294240097e-06, "loss": 0.5384, "step": 11976 }, { "epoch": 1.4965979951061645, "grad_norm": 1.9609375, "learning_rate": 2.823851752507629e-06, "loss": 0.4639, "step": 11977 }, { "epoch": 1.496724287631226, "grad_norm": 1.890625, "learning_rate": 2.822461496799146e-06, "loss": 0.3967, "step": 11978 }, { "epoch": 1.496850580156287, "grad_norm": 1.9140625, "learning_rate": 2.8210715271700594e-06, "loss": 0.5574, "step": 11979 }, { "epoch": 1.4969768726813482, "grad_norm": 1.9296875, "learning_rate": 2.81968184367577e-06, "loss": 0.5105, "step": 11980 }, { "epoch": 1.4971031652064093, "grad_norm": 2.0625, "learning_rate": 2.8182924463716676e-06, "loss": 0.5482, "step": 11981 }, { "epoch": 1.4972294577314704, "grad_norm": 2.03125, "learning_rate": 2.8169033353131303e-06, "loss": 0.5058, "step": 11982 }, { "epoch": 1.4973557502565318, "grad_norm": 1.90625, "learning_rate": 2.815514510555524e-06, "loss": 0.4931, "step": 11983 }, { "epoch": 1.497482042781593, "grad_norm": 1.8984375, "learning_rate": 2.8141259721542016e-06, "loss": 0.4957, "step": 11984 }, { "epoch": 1.497608335306654, "grad_norm": 2.1875, "learning_rate": 2.8127377201645135e-06, "loss": 0.7112, "step": 11985 }, { "epoch": 1.4977346278317152, "grad_norm": 2.03125, "learning_rate": 2.8113497546417866e-06, "loss": 0.4797, "step": 11986 }, { "epoch": 1.4978609203567763, "grad_norm": 1.859375, "learning_rate": 2.809962075641345e-06, "loss": 0.498, "step": 11987 }, { "epoch": 1.4979872128818377, "grad_norm": 2.046875, "learning_rate": 2.808574683218496e-06, "loss": 0.5071, "step": 11988 }, { "epoch": 1.4981135054068988, "grad_norm": 2.015625, "learning_rate": 2.8071875774285382e-06, "loss": 0.522, "step": 11989 }, { "epoch": 1.49823979793196, "grad_norm": 2.15625, "learning_rate": 2.805800758326759e-06, "loss": 0.5444, "step": 11990 }, { "epoch": 1.498366090457021, "grad_norm": 1.875, "learning_rate": 2.8044142259684336e-06, "loss": 0.4434, "step": 11991 }, { "epoch": 1.4984923829820822, "grad_norm": 1.890625, "learning_rate": 2.8030279804088255e-06, "loss": 0.4711, "step": 11992 }, { "epoch": 1.4986186755071433, "grad_norm": 1.84375, "learning_rate": 2.801642021703187e-06, "loss": 0.4418, "step": 11993 }, { "epoch": 1.4987449680322045, "grad_norm": 2.0, "learning_rate": 2.800256349906758e-06, "loss": 0.536, "step": 11994 }, { "epoch": 1.4988712605572658, "grad_norm": 1.9296875, "learning_rate": 2.79887096507477e-06, "loss": 0.4359, "step": 11995 }, { "epoch": 1.498997553082327, "grad_norm": 1.921875, "learning_rate": 2.7974858672624417e-06, "loss": 0.4409, "step": 11996 }, { "epoch": 1.499123845607388, "grad_norm": 2.015625, "learning_rate": 2.796101056524977e-06, "loss": 0.4379, "step": 11997 }, { "epoch": 1.4992501381324492, "grad_norm": 2.09375, "learning_rate": 2.794716532917573e-06, "loss": 0.4712, "step": 11998 }, { "epoch": 1.4993764306575104, "grad_norm": 1.96875, "learning_rate": 2.7933322964954125e-06, "loss": 0.5067, "step": 11999 }, { "epoch": 1.4995027231825717, "grad_norm": 2.0625, "learning_rate": 2.7919483473136678e-06, "loss": 0.5535, "step": 12000 }, { "epoch": 1.4996290157076329, "grad_norm": 1.875, "learning_rate": 2.7905646854275015e-06, "loss": 0.4467, "step": 12001 }, { "epoch": 1.499755308232694, "grad_norm": 2.046875, "learning_rate": 2.7891813108920583e-06, "loss": 0.4926, "step": 12002 }, { "epoch": 1.4998816007577551, "grad_norm": 2.25, "learning_rate": 2.7877982237624824e-06, "loss": 0.6097, "step": 12003 }, { "epoch": 1.5000078932828163, "grad_norm": 1.90625, "learning_rate": 2.786415424093898e-06, "loss": 0.4814, "step": 12004 }, { "epoch": 1.5001341858078776, "grad_norm": 1.796875, "learning_rate": 2.78503291194142e-06, "loss": 0.4778, "step": 12005 }, { "epoch": 1.5002604783329385, "grad_norm": 1.96875, "learning_rate": 2.783650687360152e-06, "loss": 0.5193, "step": 12006 }, { "epoch": 1.5003867708579999, "grad_norm": 1.828125, "learning_rate": 2.782268750405185e-06, "loss": 0.4336, "step": 12007 }, { "epoch": 1.500513063383061, "grad_norm": 1.9765625, "learning_rate": 2.7808871011316006e-06, "loss": 0.5296, "step": 12008 }, { "epoch": 1.5006393559081221, "grad_norm": 1.921875, "learning_rate": 2.7795057395944693e-06, "loss": 0.4504, "step": 12009 }, { "epoch": 1.5007656484331835, "grad_norm": 1.9765625, "learning_rate": 2.778124665848847e-06, "loss": 0.5428, "step": 12010 }, { "epoch": 1.5008919409582444, "grad_norm": 1.84375, "learning_rate": 2.7767438799497802e-06, "loss": 0.4948, "step": 12011 }, { "epoch": 1.5010182334833058, "grad_norm": 1.890625, "learning_rate": 2.775363381952304e-06, "loss": 0.4942, "step": 12012 }, { "epoch": 1.501144526008367, "grad_norm": 2.0, "learning_rate": 2.773983171911442e-06, "loss": 0.5407, "step": 12013 }, { "epoch": 1.501270818533428, "grad_norm": 2.21875, "learning_rate": 2.7726032498822053e-06, "loss": 0.5546, "step": 12014 }, { "epoch": 1.5013971110584894, "grad_norm": 2.09375, "learning_rate": 2.771223615919596e-06, "loss": 0.487, "step": 12015 }, { "epoch": 1.5015234035835503, "grad_norm": 1.96875, "learning_rate": 2.769844270078601e-06, "loss": 0.5005, "step": 12016 }, { "epoch": 1.5016496961086117, "grad_norm": 1.953125, "learning_rate": 2.7684652124141977e-06, "loss": 0.4699, "step": 12017 }, { "epoch": 1.5017759886336728, "grad_norm": 1.8046875, "learning_rate": 2.7670864429813536e-06, "loss": 0.46, "step": 12018 }, { "epoch": 1.501902281158734, "grad_norm": 1.953125, "learning_rate": 2.7657079618350225e-06, "loss": 0.5211, "step": 12019 }, { "epoch": 1.502028573683795, "grad_norm": 2.015625, "learning_rate": 2.764329769030144e-06, "loss": 0.5039, "step": 12020 }, { "epoch": 1.5021548662088562, "grad_norm": 1.984375, "learning_rate": 2.7629518646216556e-06, "loss": 0.4584, "step": 12021 }, { "epoch": 1.5022811587339175, "grad_norm": 1.90625, "learning_rate": 2.7615742486644746e-06, "loss": 0.4423, "step": 12022 }, { "epoch": 1.5024074512589785, "grad_norm": 2.171875, "learning_rate": 2.7601969212135095e-06, "loss": 0.5453, "step": 12023 }, { "epoch": 1.5025337437840398, "grad_norm": 2.09375, "learning_rate": 2.7588198823236556e-06, "loss": 0.5662, "step": 12024 }, { "epoch": 1.502660036309101, "grad_norm": 2.046875, "learning_rate": 2.7574431320498007e-06, "loss": 0.4782, "step": 12025 }, { "epoch": 1.502786328834162, "grad_norm": 1.96875, "learning_rate": 2.7560666704468177e-06, "loss": 0.4189, "step": 12026 }, { "epoch": 1.5029126213592234, "grad_norm": 2.03125, "learning_rate": 2.754690497569569e-06, "loss": 0.5464, "step": 12027 }, { "epoch": 1.5030389138842843, "grad_norm": 2.015625, "learning_rate": 2.753314613472906e-06, "loss": 0.4985, "step": 12028 }, { "epoch": 1.5031652064093457, "grad_norm": 1.984375, "learning_rate": 2.7519390182116678e-06, "loss": 0.4895, "step": 12029 }, { "epoch": 1.5032914989344068, "grad_norm": 2.84375, "learning_rate": 2.750563711840681e-06, "loss": 0.4662, "step": 12030 }, { "epoch": 1.503417791459468, "grad_norm": 1.8984375, "learning_rate": 2.749188694414765e-06, "loss": 0.4231, "step": 12031 }, { "epoch": 1.5035440839845293, "grad_norm": 1.9921875, "learning_rate": 2.7478139659887216e-06, "loss": 0.5546, "step": 12032 }, { "epoch": 1.5036703765095902, "grad_norm": 2.25, "learning_rate": 2.746439526617346e-06, "loss": 0.5762, "step": 12033 }, { "epoch": 1.5037966690346516, "grad_norm": 1.890625, "learning_rate": 2.74506537635542e-06, "loss": 0.4901, "step": 12034 }, { "epoch": 1.5039229615597127, "grad_norm": 1.890625, "learning_rate": 2.743691515257714e-06, "loss": 0.4689, "step": 12035 }, { "epoch": 1.5040492540847739, "grad_norm": 2.1875, "learning_rate": 2.742317943378985e-06, "loss": 0.4842, "step": 12036 }, { "epoch": 1.504175546609835, "grad_norm": 2.078125, "learning_rate": 2.740944660773982e-06, "loss": 0.5075, "step": 12037 }, { "epoch": 1.5043018391348961, "grad_norm": 2.03125, "learning_rate": 2.7395716674974373e-06, "loss": 0.4721, "step": 12038 }, { "epoch": 1.5044281316599575, "grad_norm": 1.9453125, "learning_rate": 2.7381989636040806e-06, "loss": 0.53, "step": 12039 }, { "epoch": 1.5045544241850184, "grad_norm": 1.9921875, "learning_rate": 2.7368265491486236e-06, "loss": 0.4491, "step": 12040 }, { "epoch": 1.5046807167100797, "grad_norm": 2.0625, "learning_rate": 2.735454424185764e-06, "loss": 0.4697, "step": 12041 }, { "epoch": 1.5048070092351409, "grad_norm": 2.046875, "learning_rate": 2.734082588770195e-06, "loss": 0.4805, "step": 12042 }, { "epoch": 1.504933301760202, "grad_norm": 1.9765625, "learning_rate": 2.7327110429565917e-06, "loss": 0.4711, "step": 12043 }, { "epoch": 1.5050595942852634, "grad_norm": 1.921875, "learning_rate": 2.731339786799623e-06, "loss": 0.4886, "step": 12044 }, { "epoch": 1.5051858868103243, "grad_norm": 2.0625, "learning_rate": 2.729968820353941e-06, "loss": 0.5496, "step": 12045 }, { "epoch": 1.5053121793353856, "grad_norm": 2.03125, "learning_rate": 2.7285981436741926e-06, "loss": 0.4685, "step": 12046 }, { "epoch": 1.5054384718604468, "grad_norm": 1.8125, "learning_rate": 2.7272277568150073e-06, "loss": 0.4254, "step": 12047 }, { "epoch": 1.505564764385508, "grad_norm": 2.046875, "learning_rate": 2.725857659831006e-06, "loss": 0.507, "step": 12048 }, { "epoch": 1.5056910569105693, "grad_norm": 2.046875, "learning_rate": 2.724487852776797e-06, "loss": 0.4521, "step": 12049 }, { "epoch": 1.5058173494356302, "grad_norm": 2.15625, "learning_rate": 2.723118335706979e-06, "loss": 0.4612, "step": 12050 }, { "epoch": 1.5059436419606915, "grad_norm": 2.0, "learning_rate": 2.721749108676136e-06, "loss": 0.4652, "step": 12051 }, { "epoch": 1.5060699344857527, "grad_norm": 2.03125, "learning_rate": 2.7203801717388423e-06, "loss": 0.5073, "step": 12052 }, { "epoch": 1.5061962270108138, "grad_norm": 2.0625, "learning_rate": 2.719011524949662e-06, "loss": 0.4888, "step": 12053 }, { "epoch": 1.506322519535875, "grad_norm": 1.9609375, "learning_rate": 2.7176431683631445e-06, "loss": 0.5065, "step": 12054 }, { "epoch": 1.506448812060936, "grad_norm": 1.875, "learning_rate": 2.7162751020338295e-06, "loss": 0.4665, "step": 12055 }, { "epoch": 1.5065751045859974, "grad_norm": 1.90625, "learning_rate": 2.7149073260162416e-06, "loss": 0.4571, "step": 12056 }, { "epoch": 1.5067013971110583, "grad_norm": 1.90625, "learning_rate": 2.713539840364905e-06, "loss": 0.4794, "step": 12057 }, { "epoch": 1.5068276896361197, "grad_norm": 2.0, "learning_rate": 2.7121726451343176e-06, "loss": 0.4857, "step": 12058 }, { "epoch": 1.5069539821611808, "grad_norm": 1.8046875, "learning_rate": 2.7108057403789765e-06, "loss": 0.4223, "step": 12059 }, { "epoch": 1.507080274686242, "grad_norm": 2.0625, "learning_rate": 2.7094391261533605e-06, "loss": 0.5537, "step": 12060 }, { "epoch": 1.5072065672113033, "grad_norm": 1.828125, "learning_rate": 2.7080728025119405e-06, "loss": 0.3879, "step": 12061 }, { "epoch": 1.5073328597363642, "grad_norm": 1.96875, "learning_rate": 2.7067067695091762e-06, "loss": 0.4688, "step": 12062 }, { "epoch": 1.5074591522614256, "grad_norm": 1.984375, "learning_rate": 2.705341027199512e-06, "loss": 0.5149, "step": 12063 }, { "epoch": 1.5075854447864867, "grad_norm": 1.9765625, "learning_rate": 2.703975575637384e-06, "loss": 0.4943, "step": 12064 }, { "epoch": 1.5077117373115478, "grad_norm": 2.171875, "learning_rate": 2.702610414877217e-06, "loss": 0.4939, "step": 12065 }, { "epoch": 1.5078380298366092, "grad_norm": 2.1875, "learning_rate": 2.701245544973421e-06, "loss": 0.5646, "step": 12066 }, { "epoch": 1.50796432236167, "grad_norm": 2.0, "learning_rate": 2.699880965980397e-06, "loss": 0.4533, "step": 12067 }, { "epoch": 1.5080906148867315, "grad_norm": 2.140625, "learning_rate": 2.698516677952535e-06, "loss": 0.5195, "step": 12068 }, { "epoch": 1.5082169074117926, "grad_norm": 2.078125, "learning_rate": 2.697152680944212e-06, "loss": 0.4807, "step": 12069 }, { "epoch": 1.5083431999368537, "grad_norm": 1.9296875, "learning_rate": 2.695788975009792e-06, "loss": 0.4788, "step": 12070 }, { "epoch": 1.5084694924619149, "grad_norm": 2.03125, "learning_rate": 2.6944255602036305e-06, "loss": 0.5158, "step": 12071 }, { "epoch": 1.508595784986976, "grad_norm": 2.203125, "learning_rate": 2.6930624365800704e-06, "loss": 0.5893, "step": 12072 }, { "epoch": 1.5087220775120374, "grad_norm": 1.9453125, "learning_rate": 2.691699604193442e-06, "loss": 0.4295, "step": 12073 }, { "epoch": 1.5088483700370983, "grad_norm": 2.140625, "learning_rate": 2.6903370630980595e-06, "loss": 0.501, "step": 12074 }, { "epoch": 1.5089746625621596, "grad_norm": 2.34375, "learning_rate": 2.68897481334824e-06, "loss": 0.6095, "step": 12075 }, { "epoch": 1.5091009550872208, "grad_norm": 1.9453125, "learning_rate": 2.6876128549982725e-06, "loss": 0.4765, "step": 12076 }, { "epoch": 1.5092272476122819, "grad_norm": 2.03125, "learning_rate": 2.6862511881024456e-06, "loss": 0.5075, "step": 12077 }, { "epoch": 1.5093535401373432, "grad_norm": 2.15625, "learning_rate": 2.684889812715029e-06, "loss": 0.522, "step": 12078 }, { "epoch": 1.5094798326624042, "grad_norm": 2.046875, "learning_rate": 2.6835287288902857e-06, "loss": 0.5174, "step": 12079 }, { "epoch": 1.5096061251874655, "grad_norm": 2.0, "learning_rate": 2.682167936682464e-06, "loss": 0.4728, "step": 12080 }, { "epoch": 1.5097324177125266, "grad_norm": 2.234375, "learning_rate": 2.6808074361458024e-06, "loss": 0.6081, "step": 12081 }, { "epoch": 1.5098587102375878, "grad_norm": 1.9921875, "learning_rate": 2.6794472273345272e-06, "loss": 0.5, "step": 12082 }, { "epoch": 1.5099850027626491, "grad_norm": 1.8984375, "learning_rate": 2.6780873103028526e-06, "loss": 0.4757, "step": 12083 }, { "epoch": 1.51011129528771, "grad_norm": 2.0, "learning_rate": 2.6767276851049818e-06, "loss": 0.4795, "step": 12084 }, { "epoch": 1.5102375878127714, "grad_norm": 1.890625, "learning_rate": 2.6753683517951055e-06, "loss": 0.4535, "step": 12085 }, { "epoch": 1.5103638803378325, "grad_norm": 2.3125, "learning_rate": 2.6740093104274055e-06, "loss": 0.654, "step": 12086 }, { "epoch": 1.5104901728628937, "grad_norm": 1.984375, "learning_rate": 2.672650561056048e-06, "loss": 0.5571, "step": 12087 }, { "epoch": 1.5106164653879548, "grad_norm": 1.875, "learning_rate": 2.6712921037351903e-06, "loss": 0.5326, "step": 12088 }, { "epoch": 1.510742757913016, "grad_norm": 2.09375, "learning_rate": 2.6699339385189772e-06, "loss": 0.5915, "step": 12089 }, { "epoch": 1.5108690504380773, "grad_norm": 2.140625, "learning_rate": 2.6685760654615424e-06, "loss": 0.5043, "step": 12090 }, { "epoch": 1.5109953429631384, "grad_norm": 2.0, "learning_rate": 2.6672184846170057e-06, "loss": 0.4416, "step": 12091 }, { "epoch": 1.5111216354881996, "grad_norm": 1.953125, "learning_rate": 2.665861196039475e-06, "loss": 0.4941, "step": 12092 }, { "epoch": 1.5112479280132607, "grad_norm": 2.015625, "learning_rate": 2.6645041997830546e-06, "loss": 0.5543, "step": 12093 }, { "epoch": 1.5113742205383218, "grad_norm": 2.21875, "learning_rate": 2.6631474959018288e-06, "loss": 0.5838, "step": 12094 }, { "epoch": 1.5115005130633832, "grad_norm": 2.0625, "learning_rate": 2.661791084449872e-06, "loss": 0.4852, "step": 12095 }, { "epoch": 1.511626805588444, "grad_norm": 1.9921875, "learning_rate": 2.6604349654812465e-06, "loss": 0.5064, "step": 12096 }, { "epoch": 1.5117530981135054, "grad_norm": 2.078125, "learning_rate": 2.6590791390500058e-06, "loss": 0.4893, "step": 12097 }, { "epoch": 1.5118793906385666, "grad_norm": 2.15625, "learning_rate": 2.6577236052101886e-06, "loss": 0.4954, "step": 12098 }, { "epoch": 1.5120056831636277, "grad_norm": 2.015625, "learning_rate": 2.656368364015823e-06, "loss": 0.5131, "step": 12099 }, { "epoch": 1.512131975688689, "grad_norm": 2.484375, "learning_rate": 2.6550134155209274e-06, "loss": 0.5758, "step": 12100 }, { "epoch": 1.51225826821375, "grad_norm": 1.90625, "learning_rate": 2.6536587597795038e-06, "loss": 0.4527, "step": 12101 }, { "epoch": 1.5123845607388113, "grad_norm": 1.84375, "learning_rate": 2.652304396845549e-06, "loss": 0.44, "step": 12102 }, { "epoch": 1.5125108532638725, "grad_norm": 1.953125, "learning_rate": 2.6509503267730418e-06, "loss": 0.4379, "step": 12103 }, { "epoch": 1.5126371457889336, "grad_norm": 1.796875, "learning_rate": 2.6495965496159536e-06, "loss": 0.4245, "step": 12104 }, { "epoch": 1.5127634383139947, "grad_norm": 1.953125, "learning_rate": 2.6482430654282422e-06, "loss": 0.5044, "step": 12105 }, { "epoch": 1.5128897308390559, "grad_norm": 2.046875, "learning_rate": 2.6468898742638536e-06, "loss": 0.4489, "step": 12106 }, { "epoch": 1.5130160233641172, "grad_norm": 2.203125, "learning_rate": 2.6455369761767247e-06, "loss": 0.5675, "step": 12107 }, { "epoch": 1.5131423158891784, "grad_norm": 2.25, "learning_rate": 2.644184371220778e-06, "loss": 0.4609, "step": 12108 }, { "epoch": 1.5132686084142395, "grad_norm": 2.140625, "learning_rate": 2.6428320594499234e-06, "loss": 0.4678, "step": 12109 }, { "epoch": 1.5133949009393006, "grad_norm": 1.9375, "learning_rate": 2.6414800409180597e-06, "loss": 0.4709, "step": 12110 }, { "epoch": 1.5135211934643618, "grad_norm": 2.0625, "learning_rate": 2.64012831567908e-06, "loss": 0.5204, "step": 12111 }, { "epoch": 1.5136474859894231, "grad_norm": 1.96875, "learning_rate": 2.63877688378686e-06, "loss": 0.4604, "step": 12112 }, { "epoch": 1.513773778514484, "grad_norm": 1.921875, "learning_rate": 2.6374257452952613e-06, "loss": 0.4249, "step": 12113 }, { "epoch": 1.5139000710395454, "grad_norm": 1.9921875, "learning_rate": 2.636074900258139e-06, "loss": 0.4761, "step": 12114 }, { "epoch": 1.5140263635646065, "grad_norm": 2.109375, "learning_rate": 2.634724348729333e-06, "loss": 0.4718, "step": 12115 }, { "epoch": 1.5141526560896676, "grad_norm": 2.09375, "learning_rate": 2.6333740907626747e-06, "loss": 0.5055, "step": 12116 }, { "epoch": 1.514278948614729, "grad_norm": 1.984375, "learning_rate": 2.6320241264119816e-06, "loss": 0.4868, "step": 12117 }, { "epoch": 1.51440524113979, "grad_norm": 1.8828125, "learning_rate": 2.6306744557310604e-06, "loss": 0.5028, "step": 12118 }, { "epoch": 1.5145315336648513, "grad_norm": 2.03125, "learning_rate": 2.6293250787737046e-06, "loss": 0.4454, "step": 12119 }, { "epoch": 1.5146578261899124, "grad_norm": 1.9765625, "learning_rate": 2.627975995593698e-06, "loss": 0.5001, "step": 12120 }, { "epoch": 1.5147841187149735, "grad_norm": 2.03125, "learning_rate": 2.6266272062448118e-06, "loss": 0.467, "step": 12121 }, { "epoch": 1.514910411240035, "grad_norm": 1.984375, "learning_rate": 2.6252787107808053e-06, "loss": 0.4917, "step": 12122 }, { "epoch": 1.5150367037650958, "grad_norm": 2.203125, "learning_rate": 2.6239305092554258e-06, "loss": 0.7206, "step": 12123 }, { "epoch": 1.5151629962901572, "grad_norm": 1.96875, "learning_rate": 2.62258260172241e-06, "loss": 0.4808, "step": 12124 }, { "epoch": 1.5152892888152183, "grad_norm": 1.9140625, "learning_rate": 2.6212349882354826e-06, "loss": 0.5159, "step": 12125 }, { "epoch": 1.5154155813402794, "grad_norm": 2.140625, "learning_rate": 2.6198876688483554e-06, "loss": 0.5198, "step": 12126 }, { "epoch": 1.5155418738653406, "grad_norm": 1.9609375, "learning_rate": 2.618540643614729e-06, "loss": 0.5169, "step": 12127 }, { "epoch": 1.5156681663904017, "grad_norm": 1.9921875, "learning_rate": 2.6171939125882907e-06, "loss": 0.5202, "step": 12128 }, { "epoch": 1.515794458915463, "grad_norm": 1.9765625, "learning_rate": 2.6158474758227235e-06, "loss": 0.4814, "step": 12129 }, { "epoch": 1.515920751440524, "grad_norm": 2.015625, "learning_rate": 2.6145013333716897e-06, "loss": 0.4352, "step": 12130 }, { "epoch": 1.5160470439655853, "grad_norm": 1.9921875, "learning_rate": 2.613155485288844e-06, "loss": 0.4602, "step": 12131 }, { "epoch": 1.5161733364906465, "grad_norm": 2.1875, "learning_rate": 2.6118099316278287e-06, "loss": 0.4995, "step": 12132 }, { "epoch": 1.5162996290157076, "grad_norm": 2.046875, "learning_rate": 2.610464672442273e-06, "loss": 0.4588, "step": 12133 }, { "epoch": 1.516425921540769, "grad_norm": 2.03125, "learning_rate": 2.6091197077857977e-06, "loss": 0.5665, "step": 12134 }, { "epoch": 1.5165522140658299, "grad_norm": 1.96875, "learning_rate": 2.607775037712008e-06, "loss": 0.477, "step": 12135 }, { "epoch": 1.5166785065908912, "grad_norm": 2.0, "learning_rate": 2.6064306622745007e-06, "loss": 0.4318, "step": 12136 }, { "epoch": 1.5168047991159523, "grad_norm": 2.0, "learning_rate": 2.6050865815268576e-06, "loss": 0.5086, "step": 12137 }, { "epoch": 1.5169310916410135, "grad_norm": 1.8984375, "learning_rate": 2.6037427955226524e-06, "loss": 0.4119, "step": 12138 }, { "epoch": 1.5170573841660748, "grad_norm": 2.0625, "learning_rate": 2.602399304315444e-06, "loss": 0.4977, "step": 12139 }, { "epoch": 1.5171836766911357, "grad_norm": 2.15625, "learning_rate": 2.6010561079587817e-06, "loss": 0.5351, "step": 12140 }, { "epoch": 1.517309969216197, "grad_norm": 1.84375, "learning_rate": 2.5997132065062e-06, "loss": 0.447, "step": 12141 }, { "epoch": 1.5174362617412582, "grad_norm": 1.9453125, "learning_rate": 2.5983706000112275e-06, "loss": 0.4963, "step": 12142 }, { "epoch": 1.5175625542663194, "grad_norm": 1.8671875, "learning_rate": 2.5970282885273733e-06, "loss": 0.4867, "step": 12143 }, { "epoch": 1.5176888467913805, "grad_norm": 1.9609375, "learning_rate": 2.59568627210814e-06, "loss": 0.4994, "step": 12144 }, { "epoch": 1.5178151393164416, "grad_norm": 2.140625, "learning_rate": 2.5943445508070186e-06, "loss": 0.5013, "step": 12145 }, { "epoch": 1.517941431841503, "grad_norm": 2.109375, "learning_rate": 2.5930031246774823e-06, "loss": 0.5465, "step": 12146 }, { "epoch": 1.518067724366564, "grad_norm": 1.9453125, "learning_rate": 2.5916619937730037e-06, "loss": 0.4885, "step": 12147 }, { "epoch": 1.5181940168916253, "grad_norm": 1.90625, "learning_rate": 2.5903211581470346e-06, "loss": 0.4594, "step": 12148 }, { "epoch": 1.5183203094166864, "grad_norm": 2.0625, "learning_rate": 2.588980617853015e-06, "loss": 0.6177, "step": 12149 }, { "epoch": 1.5184466019417475, "grad_norm": 2.28125, "learning_rate": 2.587640372944379e-06, "loss": 0.5296, "step": 12150 }, { "epoch": 1.5185728944668089, "grad_norm": 2.015625, "learning_rate": 2.5863004234745435e-06, "loss": 0.4609, "step": 12151 }, { "epoch": 1.5186991869918698, "grad_norm": 1.953125, "learning_rate": 2.584960769496916e-06, "loss": 0.4121, "step": 12152 }, { "epoch": 1.5188254795169311, "grad_norm": 2.046875, "learning_rate": 2.5836214110648916e-06, "loss": 0.4849, "step": 12153 }, { "epoch": 1.5189517720419923, "grad_norm": 2.109375, "learning_rate": 2.5822823482318547e-06, "loss": 0.5062, "step": 12154 }, { "epoch": 1.5190780645670534, "grad_norm": 2.078125, "learning_rate": 2.5809435810511775e-06, "loss": 0.4142, "step": 12155 }, { "epoch": 1.5192043570921148, "grad_norm": 1.9296875, "learning_rate": 2.5796051095762175e-06, "loss": 0.4279, "step": 12156 }, { "epoch": 1.5193306496171757, "grad_norm": 1.921875, "learning_rate": 2.578266933860326e-06, "loss": 0.542, "step": 12157 }, { "epoch": 1.519456942142237, "grad_norm": 2.078125, "learning_rate": 2.576929053956838e-06, "loss": 0.5089, "step": 12158 }, { "epoch": 1.5195832346672982, "grad_norm": 1.9296875, "learning_rate": 2.5755914699190776e-06, "loss": 0.5234, "step": 12159 }, { "epoch": 1.5197095271923593, "grad_norm": 1.9140625, "learning_rate": 2.5742541818003595e-06, "loss": 0.4677, "step": 12160 }, { "epoch": 1.5198358197174204, "grad_norm": 2.0625, "learning_rate": 2.5729171896539827e-06, "loss": 0.4715, "step": 12161 }, { "epoch": 1.5199621122424816, "grad_norm": 1.84375, "learning_rate": 2.5715804935332377e-06, "loss": 0.4506, "step": 12162 }, { "epoch": 1.520088404767543, "grad_norm": 2.0625, "learning_rate": 2.570244093491402e-06, "loss": 0.5027, "step": 12163 }, { "epoch": 1.5202146972926038, "grad_norm": 1.984375, "learning_rate": 2.568907989581738e-06, "loss": 0.4568, "step": 12164 }, { "epoch": 1.5203409898176652, "grad_norm": 1.9453125, "learning_rate": 2.567572181857506e-06, "loss": 0.4637, "step": 12165 }, { "epoch": 1.5204672823427263, "grad_norm": 1.921875, "learning_rate": 2.5662366703719444e-06, "loss": 0.4323, "step": 12166 }, { "epoch": 1.5205935748677875, "grad_norm": 1.9140625, "learning_rate": 2.5649014551782836e-06, "loss": 0.5266, "step": 12167 }, { "epoch": 1.5207198673928488, "grad_norm": 1.8671875, "learning_rate": 2.5635665363297424e-06, "loss": 0.5012, "step": 12168 }, { "epoch": 1.5208461599179097, "grad_norm": 2.015625, "learning_rate": 2.562231913879527e-06, "loss": 0.5255, "step": 12169 }, { "epoch": 1.520972452442971, "grad_norm": 2.203125, "learning_rate": 2.5608975878808327e-06, "loss": 0.4875, "step": 12170 }, { "epoch": 1.5210987449680322, "grad_norm": 2.0, "learning_rate": 2.5595635583868427e-06, "loss": 0.5084, "step": 12171 }, { "epoch": 1.5212250374930933, "grad_norm": 1.984375, "learning_rate": 2.5582298254507274e-06, "loss": 0.4714, "step": 12172 }, { "epoch": 1.5213513300181547, "grad_norm": 2.109375, "learning_rate": 2.556896389125646e-06, "loss": 0.5032, "step": 12173 }, { "epoch": 1.5214776225432156, "grad_norm": 1.890625, "learning_rate": 2.5555632494647476e-06, "loss": 0.4864, "step": 12174 }, { "epoch": 1.521603915068277, "grad_norm": 1.9453125, "learning_rate": 2.5542304065211675e-06, "loss": 0.4907, "step": 12175 }, { "epoch": 1.521730207593338, "grad_norm": 1.78125, "learning_rate": 2.552897860348028e-06, "loss": 0.4215, "step": 12176 }, { "epoch": 1.5218565001183992, "grad_norm": 1.984375, "learning_rate": 2.5515656109984433e-06, "loss": 0.5031, "step": 12177 }, { "epoch": 1.5219827926434604, "grad_norm": 2.0625, "learning_rate": 2.5502336585255116e-06, "loss": 0.404, "step": 12178 }, { "epoch": 1.5221090851685215, "grad_norm": 2.078125, "learning_rate": 2.548902002982323e-06, "loss": 0.5255, "step": 12179 }, { "epoch": 1.5222353776935829, "grad_norm": 1.8984375, "learning_rate": 2.5475706444219505e-06, "loss": 0.454, "step": 12180 }, { "epoch": 1.5223616702186438, "grad_norm": 2.046875, "learning_rate": 2.546239582897465e-06, "loss": 0.5263, "step": 12181 }, { "epoch": 1.5224879627437051, "grad_norm": 2.265625, "learning_rate": 2.5449088184619163e-06, "loss": 0.6469, "step": 12182 }, { "epoch": 1.5226142552687663, "grad_norm": 1.8828125, "learning_rate": 2.5435783511683444e-06, "loss": 0.4738, "step": 12183 }, { "epoch": 1.5227405477938274, "grad_norm": 2.078125, "learning_rate": 2.542248181069781e-06, "loss": 0.4661, "step": 12184 }, { "epoch": 1.5228668403188887, "grad_norm": 1.890625, "learning_rate": 2.54091830821924e-06, "loss": 0.4369, "step": 12185 }, { "epoch": 1.5229931328439497, "grad_norm": 2.03125, "learning_rate": 2.539588732669731e-06, "loss": 0.5142, "step": 12186 }, { "epoch": 1.523119425369011, "grad_norm": 1.8359375, "learning_rate": 2.5382594544742444e-06, "loss": 0.5046, "step": 12187 }, { "epoch": 1.5232457178940721, "grad_norm": 2.171875, "learning_rate": 2.5369304736857635e-06, "loss": 0.6055, "step": 12188 }, { "epoch": 1.5233720104191333, "grad_norm": 1.9140625, "learning_rate": 2.5356017903572584e-06, "loss": 0.4667, "step": 12189 }, { "epoch": 1.5234983029441946, "grad_norm": 1.8828125, "learning_rate": 2.534273404541686e-06, "loss": 0.4775, "step": 12190 }, { "epoch": 1.5236245954692555, "grad_norm": 2.0, "learning_rate": 2.532945316291994e-06, "loss": 0.5242, "step": 12191 }, { "epoch": 1.523750887994317, "grad_norm": 1.8359375, "learning_rate": 2.531617525661115e-06, "loss": 0.4866, "step": 12192 }, { "epoch": 1.523877180519378, "grad_norm": 1.859375, "learning_rate": 2.5302900327019743e-06, "loss": 0.4764, "step": 12193 }, { "epoch": 1.5240034730444392, "grad_norm": 1.9453125, "learning_rate": 2.52896283746748e-06, "loss": 0.4445, "step": 12194 }, { "epoch": 1.5241297655695003, "grad_norm": 1.9453125, "learning_rate": 2.527635940010532e-06, "loss": 0.4702, "step": 12195 }, { "epoch": 1.5242560580945614, "grad_norm": 2.03125, "learning_rate": 2.5263093403840145e-06, "loss": 0.4489, "step": 12196 }, { "epoch": 1.5243823506196228, "grad_norm": 2.109375, "learning_rate": 2.524983038640808e-06, "loss": 0.5341, "step": 12197 }, { "epoch": 1.524508643144684, "grad_norm": 2.25, "learning_rate": 2.523657034833773e-06, "loss": 0.513, "step": 12198 }, { "epoch": 1.524634935669745, "grad_norm": 2.0, "learning_rate": 2.5223313290157603e-06, "loss": 0.5407, "step": 12199 }, { "epoch": 1.5247612281948062, "grad_norm": 2.015625, "learning_rate": 2.52100592123961e-06, "loss": 0.5407, "step": 12200 }, { "epoch": 1.5248875207198673, "grad_norm": 2.03125, "learning_rate": 2.5196808115581483e-06, "loss": 0.4766, "step": 12201 }, { "epoch": 1.5250138132449287, "grad_norm": 2.09375, "learning_rate": 2.5183560000241938e-06, "loss": 0.5324, "step": 12202 }, { "epoch": 1.5251401057699896, "grad_norm": 2.09375, "learning_rate": 2.5170314866905477e-06, "loss": 0.5663, "step": 12203 }, { "epoch": 1.525266398295051, "grad_norm": 1.8984375, "learning_rate": 2.5157072716100028e-06, "loss": 0.4593, "step": 12204 }, { "epoch": 1.525392690820112, "grad_norm": 2.109375, "learning_rate": 2.514383354835338e-06, "loss": 0.5733, "step": 12205 }, { "epoch": 1.5255189833451732, "grad_norm": 2.078125, "learning_rate": 2.5130597364193234e-06, "loss": 0.4983, "step": 12206 }, { "epoch": 1.5256452758702346, "grad_norm": 1.953125, "learning_rate": 2.511736416414714e-06, "loss": 0.4341, "step": 12207 }, { "epoch": 1.5257715683952955, "grad_norm": 1.9765625, "learning_rate": 2.5104133948742547e-06, "loss": 0.5486, "step": 12208 }, { "epoch": 1.5258978609203568, "grad_norm": 1.953125, "learning_rate": 2.509090671850678e-06, "loss": 0.5374, "step": 12209 }, { "epoch": 1.526024153445418, "grad_norm": 1.984375, "learning_rate": 2.5077682473967037e-06, "loss": 0.5142, "step": 12210 }, { "epoch": 1.526150445970479, "grad_norm": 1.953125, "learning_rate": 2.5064461215650406e-06, "loss": 0.4792, "step": 12211 }, { "epoch": 1.5262767384955402, "grad_norm": 1.9765625, "learning_rate": 2.505124294408383e-06, "loss": 0.4685, "step": 12212 }, { "epoch": 1.5264030310206014, "grad_norm": 1.96875, "learning_rate": 2.503802765979422e-06, "loss": 0.4472, "step": 12213 }, { "epoch": 1.5265293235456627, "grad_norm": 1.9140625, "learning_rate": 2.502481536330826e-06, "loss": 0.4984, "step": 12214 }, { "epoch": 1.5266556160707239, "grad_norm": 2.1875, "learning_rate": 2.501160605515258e-06, "loss": 0.4656, "step": 12215 }, { "epoch": 1.526781908595785, "grad_norm": 1.9296875, "learning_rate": 2.4998399735853652e-06, "loss": 0.4931, "step": 12216 }, { "epoch": 1.5269082011208461, "grad_norm": 2.015625, "learning_rate": 2.498519640593786e-06, "loss": 0.4502, "step": 12217 }, { "epoch": 1.5270344936459073, "grad_norm": 2.015625, "learning_rate": 2.497199606593147e-06, "loss": 0.5255, "step": 12218 }, { "epoch": 1.5271607861709686, "grad_norm": 1.9453125, "learning_rate": 2.4958798716360587e-06, "loss": 0.4836, "step": 12219 }, { "epoch": 1.5272870786960295, "grad_norm": 2.0, "learning_rate": 2.494560435775124e-06, "loss": 0.4957, "step": 12220 }, { "epoch": 1.5274133712210909, "grad_norm": 1.9140625, "learning_rate": 2.4932412990629317e-06, "loss": 0.4669, "step": 12221 }, { "epoch": 1.527539663746152, "grad_norm": 1.7734375, "learning_rate": 2.4919224615520608e-06, "loss": 0.4199, "step": 12222 }, { "epoch": 1.5276659562712132, "grad_norm": 1.96875, "learning_rate": 2.490603923295076e-06, "loss": 0.4715, "step": 12223 }, { "epoch": 1.5277922487962745, "grad_norm": 1.921875, "learning_rate": 2.489285684344532e-06, "loss": 0.5252, "step": 12224 }, { "epoch": 1.5279185413213354, "grad_norm": 2.03125, "learning_rate": 2.487967744752969e-06, "loss": 0.4794, "step": 12225 }, { "epoch": 1.5280448338463968, "grad_norm": 2.3125, "learning_rate": 2.4866501045729184e-06, "loss": 0.5511, "step": 12226 }, { "epoch": 1.528171126371458, "grad_norm": 1.9921875, "learning_rate": 2.485332763856898e-06, "loss": 0.5275, "step": 12227 }, { "epoch": 1.528297418896519, "grad_norm": 2.046875, "learning_rate": 2.48401572265741e-06, "loss": 0.4636, "step": 12228 }, { "epoch": 1.5284237114215804, "grad_norm": 2.0, "learning_rate": 2.482698981026955e-06, "loss": 0.5327, "step": 12229 }, { "epoch": 1.5285500039466413, "grad_norm": 2.1875, "learning_rate": 2.4813825390180113e-06, "loss": 0.5008, "step": 12230 }, { "epoch": 1.5286762964717027, "grad_norm": 2.15625, "learning_rate": 2.4800663966830506e-06, "loss": 0.5301, "step": 12231 }, { "epoch": 1.5288025889967638, "grad_norm": 1.9765625, "learning_rate": 2.4787505540745303e-06, "loss": 0.5301, "step": 12232 }, { "epoch": 1.528928881521825, "grad_norm": 1.7578125, "learning_rate": 2.4774350112448974e-06, "loss": 0.4227, "step": 12233 }, { "epoch": 1.529055174046886, "grad_norm": 1.9140625, "learning_rate": 2.4761197682465844e-06, "loss": 0.4542, "step": 12234 }, { "epoch": 1.5291814665719472, "grad_norm": 2.28125, "learning_rate": 2.474804825132017e-06, "loss": 0.4861, "step": 12235 }, { "epoch": 1.5293077590970086, "grad_norm": 1.9140625, "learning_rate": 2.4734901819536015e-06, "loss": 0.4882, "step": 12236 }, { "epoch": 1.5294340516220695, "grad_norm": 2.078125, "learning_rate": 2.4721758387637396e-06, "loss": 0.4808, "step": 12237 }, { "epoch": 1.5295603441471308, "grad_norm": 2.03125, "learning_rate": 2.4708617956148175e-06, "loss": 0.48, "step": 12238 }, { "epoch": 1.529686636672192, "grad_norm": 1.90625, "learning_rate": 2.4695480525592087e-06, "loss": 0.5068, "step": 12239 }, { "epoch": 1.529812929197253, "grad_norm": 1.8984375, "learning_rate": 2.468234609649276e-06, "loss": 0.4604, "step": 12240 }, { "epoch": 1.5299392217223144, "grad_norm": 1.9375, "learning_rate": 2.4669214669373696e-06, "loss": 0.4832, "step": 12241 }, { "epoch": 1.5300655142473754, "grad_norm": 1.90625, "learning_rate": 2.46560862447583e-06, "loss": 0.4633, "step": 12242 }, { "epoch": 1.5301918067724367, "grad_norm": 1.96875, "learning_rate": 2.4642960823169824e-06, "loss": 0.4791, "step": 12243 }, { "epoch": 1.5303180992974978, "grad_norm": 2.0625, "learning_rate": 2.46298384051314e-06, "loss": 0.5289, "step": 12244 }, { "epoch": 1.530444391822559, "grad_norm": 2.09375, "learning_rate": 2.46167189911661e-06, "loss": 0.453, "step": 12245 }, { "epoch": 1.5305706843476203, "grad_norm": 2.078125, "learning_rate": 2.4603602581796813e-06, "loss": 0.5059, "step": 12246 }, { "epoch": 1.5306969768726812, "grad_norm": 2.046875, "learning_rate": 2.459048917754632e-06, "loss": 0.5346, "step": 12247 }, { "epoch": 1.5308232693977426, "grad_norm": 1.90625, "learning_rate": 2.45773787789373e-06, "loss": 0.4487, "step": 12248 }, { "epoch": 1.5309495619228037, "grad_norm": 2.0625, "learning_rate": 2.4564271386492278e-06, "loss": 0.6141, "step": 12249 }, { "epoch": 1.5310758544478649, "grad_norm": 1.984375, "learning_rate": 2.4551167000733713e-06, "loss": 0.5289, "step": 12250 }, { "epoch": 1.531202146972926, "grad_norm": 2.140625, "learning_rate": 2.4538065622183905e-06, "loss": 0.442, "step": 12251 }, { "epoch": 1.5313284394979871, "grad_norm": 2.03125, "learning_rate": 2.452496725136503e-06, "loss": 0.5035, "step": 12252 }, { "epoch": 1.5314547320230485, "grad_norm": 1.984375, "learning_rate": 2.4511871888799178e-06, "loss": 0.5032, "step": 12253 }, { "epoch": 1.5315810245481094, "grad_norm": 1.9453125, "learning_rate": 2.4498779535008277e-06, "loss": 0.4376, "step": 12254 }, { "epoch": 1.5317073170731708, "grad_norm": 1.921875, "learning_rate": 2.4485690190514175e-06, "loss": 0.484, "step": 12255 }, { "epoch": 1.531833609598232, "grad_norm": 1.921875, "learning_rate": 2.4472603855838573e-06, "loss": 0.5557, "step": 12256 }, { "epoch": 1.531959902123293, "grad_norm": 1.8359375, "learning_rate": 2.4459520531503056e-06, "loss": 0.4198, "step": 12257 }, { "epoch": 1.5320861946483544, "grad_norm": 1.8359375, "learning_rate": 2.444644021802911e-06, "loss": 0.4867, "step": 12258 }, { "epoch": 1.5322124871734153, "grad_norm": 1.984375, "learning_rate": 2.4433362915938065e-06, "loss": 0.5346, "step": 12259 }, { "epoch": 1.5323387796984766, "grad_norm": 1.8984375, "learning_rate": 2.4420288625751133e-06, "loss": 0.4415, "step": 12260 }, { "epoch": 1.5324650722235378, "grad_norm": 1.9609375, "learning_rate": 2.4407217347989486e-06, "loss": 0.4416, "step": 12261 }, { "epoch": 1.532591364748599, "grad_norm": 1.890625, "learning_rate": 2.439414908317408e-06, "loss": 0.465, "step": 12262 }, { "epoch": 1.5327176572736603, "grad_norm": 2.15625, "learning_rate": 2.438108383182578e-06, "loss": 0.5018, "step": 12263 }, { "epoch": 1.5328439497987212, "grad_norm": 1.96875, "learning_rate": 2.436802159446534e-06, "loss": 0.4377, "step": 12264 }, { "epoch": 1.5329702423237825, "grad_norm": 2.109375, "learning_rate": 2.4354962371613378e-06, "loss": 0.4681, "step": 12265 }, { "epoch": 1.5330965348488437, "grad_norm": 2.15625, "learning_rate": 2.4341906163790428e-06, "loss": 0.4999, "step": 12266 }, { "epoch": 1.5332228273739048, "grad_norm": 1.96875, "learning_rate": 2.4328852971516858e-06, "loss": 0.5014, "step": 12267 }, { "epoch": 1.533349119898966, "grad_norm": 1.84375, "learning_rate": 2.431580279531295e-06, "loss": 0.4306, "step": 12268 }, { "epoch": 1.533475412424027, "grad_norm": 2.03125, "learning_rate": 2.4302755635698827e-06, "loss": 0.4836, "step": 12269 }, { "epoch": 1.5336017049490884, "grad_norm": 2.109375, "learning_rate": 2.4289711493194557e-06, "loss": 0.4881, "step": 12270 }, { "epoch": 1.5337279974741493, "grad_norm": 1.8984375, "learning_rate": 2.427667036832001e-06, "loss": 0.4825, "step": 12271 }, { "epoch": 1.5338542899992107, "grad_norm": 1.796875, "learning_rate": 2.4263632261595005e-06, "loss": 0.4532, "step": 12272 }, { "epoch": 1.5339805825242718, "grad_norm": 1.890625, "learning_rate": 2.4250597173539193e-06, "loss": 0.4523, "step": 12273 }, { "epoch": 1.534106875049333, "grad_norm": 1.96875, "learning_rate": 2.4237565104672123e-06, "loss": 0.4558, "step": 12274 }, { "epoch": 1.5342331675743943, "grad_norm": 2.109375, "learning_rate": 2.4224536055513213e-06, "loss": 0.5615, "step": 12275 }, { "epoch": 1.5343594600994552, "grad_norm": 1.859375, "learning_rate": 2.4211510026581765e-06, "loss": 0.4582, "step": 12276 }, { "epoch": 1.5344857526245166, "grad_norm": 2.03125, "learning_rate": 2.4198487018397e-06, "loss": 0.5229, "step": 12277 }, { "epoch": 1.5346120451495777, "grad_norm": 2.140625, "learning_rate": 2.418546703147796e-06, "loss": 0.5253, "step": 12278 }, { "epoch": 1.5347383376746389, "grad_norm": 2.0, "learning_rate": 2.41724500663436e-06, "loss": 0.4852, "step": 12279 }, { "epoch": 1.5348646301997002, "grad_norm": 2.15625, "learning_rate": 2.4159436123512737e-06, "loss": 0.5107, "step": 12280 }, { "epoch": 1.5349909227247611, "grad_norm": 1.8671875, "learning_rate": 2.4146425203504076e-06, "loss": 0.4093, "step": 12281 }, { "epoch": 1.5351172152498225, "grad_norm": 2.015625, "learning_rate": 2.4133417306836206e-06, "loss": 0.518, "step": 12282 }, { "epoch": 1.5352435077748836, "grad_norm": 1.9296875, "learning_rate": 2.4120412434027575e-06, "loss": 0.5105, "step": 12283 }, { "epoch": 1.5353698002999447, "grad_norm": 1.828125, "learning_rate": 2.4107410585596547e-06, "loss": 0.4531, "step": 12284 }, { "epoch": 1.5354960928250059, "grad_norm": 2.046875, "learning_rate": 2.409441176206133e-06, "loss": 0.5082, "step": 12285 }, { "epoch": 1.535622385350067, "grad_norm": 1.96875, "learning_rate": 2.4081415963940024e-06, "loss": 0.5165, "step": 12286 }, { "epoch": 1.5357486778751284, "grad_norm": 1.953125, "learning_rate": 2.406842319175062e-06, "loss": 0.4861, "step": 12287 }, { "epoch": 1.5358749704001893, "grad_norm": 1.9453125, "learning_rate": 2.405543344601098e-06, "loss": 0.4684, "step": 12288 }, { "epoch": 1.5360012629252506, "grad_norm": 1.9140625, "learning_rate": 2.4042446727238846e-06, "loss": 0.5261, "step": 12289 }, { "epoch": 1.5361275554503118, "grad_norm": 2.078125, "learning_rate": 2.4029463035951815e-06, "loss": 0.5335, "step": 12290 }, { "epoch": 1.536253847975373, "grad_norm": 1.9296875, "learning_rate": 2.4016482372667404e-06, "loss": 0.4037, "step": 12291 }, { "epoch": 1.5363801405004343, "grad_norm": 1.9375, "learning_rate": 2.400350473790296e-06, "loss": 0.4515, "step": 12292 }, { "epoch": 1.5365064330254952, "grad_norm": 2.09375, "learning_rate": 2.3990530132175805e-06, "loss": 0.4466, "step": 12293 }, { "epoch": 1.5366327255505565, "grad_norm": 2.046875, "learning_rate": 2.3977558556003035e-06, "loss": 0.5036, "step": 12294 }, { "epoch": 1.5367590180756177, "grad_norm": 2.296875, "learning_rate": 2.396459000990167e-06, "loss": 0.5955, "step": 12295 }, { "epoch": 1.5368853106006788, "grad_norm": 1.8515625, "learning_rate": 2.395162449438859e-06, "loss": 0.4525, "step": 12296 }, { "epoch": 1.5370116031257401, "grad_norm": 1.9140625, "learning_rate": 2.3938662009980596e-06, "loss": 0.4439, "step": 12297 }, { "epoch": 1.537137895650801, "grad_norm": 1.9921875, "learning_rate": 2.392570255719433e-06, "loss": 0.5052, "step": 12298 }, { "epoch": 1.5372641881758624, "grad_norm": 1.96875, "learning_rate": 2.391274613654633e-06, "loss": 0.437, "step": 12299 }, { "epoch": 1.5373904807009235, "grad_norm": 2.125, "learning_rate": 2.3899792748552987e-06, "loss": 0.4406, "step": 12300 }, { "epoch": 1.5375167732259847, "grad_norm": 2.15625, "learning_rate": 2.3886842393730615e-06, "loss": 0.5139, "step": 12301 }, { "epoch": 1.5376430657510458, "grad_norm": 1.9921875, "learning_rate": 2.3873895072595364e-06, "loss": 0.4609, "step": 12302 }, { "epoch": 1.537769358276107, "grad_norm": 2.25, "learning_rate": 2.3860950785663305e-06, "loss": 0.558, "step": 12303 }, { "epoch": 1.5378956508011683, "grad_norm": 2.09375, "learning_rate": 2.3848009533450355e-06, "loss": 0.4734, "step": 12304 }, { "epoch": 1.5380219433262292, "grad_norm": 1.9453125, "learning_rate": 2.3835071316472313e-06, "loss": 0.509, "step": 12305 }, { "epoch": 1.5381482358512906, "grad_norm": 2.03125, "learning_rate": 2.3822136135244887e-06, "loss": 0.4772, "step": 12306 }, { "epoch": 1.5382745283763517, "grad_norm": 1.8828125, "learning_rate": 2.3809203990283616e-06, "loss": 0.5335, "step": 12307 }, { "epoch": 1.5384008209014128, "grad_norm": 1.875, "learning_rate": 2.3796274882103964e-06, "loss": 0.4246, "step": 12308 }, { "epoch": 1.5385271134264742, "grad_norm": 1.9375, "learning_rate": 2.3783348811221218e-06, "loss": 0.4628, "step": 12309 }, { "epoch": 1.538653405951535, "grad_norm": 2.015625, "learning_rate": 2.3770425778150643e-06, "loss": 0.4881, "step": 12310 }, { "epoch": 1.5387796984765965, "grad_norm": 2.09375, "learning_rate": 2.3757505783407273e-06, "loss": 0.4988, "step": 12311 }, { "epoch": 1.5389059910016576, "grad_norm": 1.9453125, "learning_rate": 2.374458882750609e-06, "loss": 0.433, "step": 12312 }, { "epoch": 1.5390322835267187, "grad_norm": 2.078125, "learning_rate": 2.3731674910961924e-06, "loss": 0.5341, "step": 12313 }, { "epoch": 1.53915857605178, "grad_norm": 2.09375, "learning_rate": 2.3718764034289487e-06, "loss": 0.4832, "step": 12314 }, { "epoch": 1.539284868576841, "grad_norm": 2.0, "learning_rate": 2.3705856198003384e-06, "loss": 0.4698, "step": 12315 }, { "epoch": 1.5394111611019023, "grad_norm": 1.9296875, "learning_rate": 2.3692951402618082e-06, "loss": 0.5611, "step": 12316 }, { "epoch": 1.5395374536269635, "grad_norm": 2.03125, "learning_rate": 2.368004964864794e-06, "loss": 0.5638, "step": 12317 }, { "epoch": 1.5396637461520246, "grad_norm": 1.9765625, "learning_rate": 2.3667150936607197e-06, "loss": 0.5008, "step": 12318 }, { "epoch": 1.5397900386770857, "grad_norm": 2.078125, "learning_rate": 2.3654255267009963e-06, "loss": 0.5151, "step": 12319 }, { "epoch": 1.5399163312021469, "grad_norm": 1.9140625, "learning_rate": 2.3641362640370215e-06, "loss": 0.4517, "step": 12320 }, { "epoch": 1.5400426237272082, "grad_norm": 2.03125, "learning_rate": 2.362847305720183e-06, "loss": 0.4891, "step": 12321 }, { "epoch": 1.5401689162522694, "grad_norm": 2.421875, "learning_rate": 2.3615586518018565e-06, "loss": 0.5708, "step": 12322 }, { "epoch": 1.5402952087773305, "grad_norm": 2.015625, "learning_rate": 2.360270302333403e-06, "loss": 0.448, "step": 12323 }, { "epoch": 1.5404215013023916, "grad_norm": 2.03125, "learning_rate": 2.3589822573661737e-06, "loss": 0.5314, "step": 12324 }, { "epoch": 1.5405477938274528, "grad_norm": 1.8515625, "learning_rate": 2.3576945169515076e-06, "loss": 0.502, "step": 12325 }, { "epoch": 1.5406740863525141, "grad_norm": 1.984375, "learning_rate": 2.35640708114073e-06, "loss": 0.4929, "step": 12326 }, { "epoch": 1.540800378877575, "grad_norm": 2.09375, "learning_rate": 2.355119949985153e-06, "loss": 0.4867, "step": 12327 }, { "epoch": 1.5409266714026364, "grad_norm": 2.0625, "learning_rate": 2.3538331235360836e-06, "loss": 0.5484, "step": 12328 }, { "epoch": 1.5410529639276975, "grad_norm": 1.9921875, "learning_rate": 2.3525466018448094e-06, "loss": 0.5123, "step": 12329 }, { "epoch": 1.5411792564527587, "grad_norm": 1.9921875, "learning_rate": 2.351260384962607e-06, "loss": 0.4713, "step": 12330 }, { "epoch": 1.54130554897782, "grad_norm": 1.8515625, "learning_rate": 2.3499744729407424e-06, "loss": 0.4148, "step": 12331 }, { "epoch": 1.541431841502881, "grad_norm": 1.9140625, "learning_rate": 2.3486888658304694e-06, "loss": 0.4919, "step": 12332 }, { "epoch": 1.5415581340279423, "grad_norm": 2.125, "learning_rate": 2.3474035636830284e-06, "loss": 0.4537, "step": 12333 }, { "epoch": 1.5416844265530034, "grad_norm": 2.0, "learning_rate": 2.34611856654965e-06, "loss": 0.4972, "step": 12334 }, { "epoch": 1.5418107190780646, "grad_norm": 2.109375, "learning_rate": 2.344833874481549e-06, "loss": 0.4615, "step": 12335 }, { "epoch": 1.5419370116031257, "grad_norm": 1.9375, "learning_rate": 2.3435494875299315e-06, "loss": 0.4638, "step": 12336 }, { "epoch": 1.5420633041281868, "grad_norm": 2.3125, "learning_rate": 2.3422654057459893e-06, "loss": 0.5494, "step": 12337 }, { "epoch": 1.5421895966532482, "grad_norm": 2.15625, "learning_rate": 2.340981629180904e-06, "loss": 0.6247, "step": 12338 }, { "epoch": 1.5423158891783093, "grad_norm": 1.9453125, "learning_rate": 2.3396981578858425e-06, "loss": 0.4139, "step": 12339 }, { "epoch": 1.5424421817033704, "grad_norm": 2.0625, "learning_rate": 2.3384149919119616e-06, "loss": 0.441, "step": 12340 }, { "epoch": 1.5425684742284316, "grad_norm": 1.859375, "learning_rate": 2.3371321313104056e-06, "loss": 0.4635, "step": 12341 }, { "epoch": 1.5426947667534927, "grad_norm": 1.9140625, "learning_rate": 2.3358495761323053e-06, "loss": 0.4435, "step": 12342 }, { "epoch": 1.542821059278554, "grad_norm": 2.0, "learning_rate": 2.3345673264287794e-06, "loss": 0.5587, "step": 12343 }, { "epoch": 1.542947351803615, "grad_norm": 2.03125, "learning_rate": 2.3332853822509385e-06, "loss": 0.4706, "step": 12344 }, { "epoch": 1.5430736443286763, "grad_norm": 1.921875, "learning_rate": 2.332003743649871e-06, "loss": 0.46, "step": 12345 }, { "epoch": 1.5431999368537375, "grad_norm": 1.984375, "learning_rate": 2.330722410676669e-06, "loss": 0.4515, "step": 12346 }, { "epoch": 1.5433262293787986, "grad_norm": 2.0, "learning_rate": 2.3294413833823993e-06, "loss": 0.516, "step": 12347 }, { "epoch": 1.54345252190386, "grad_norm": 1.8671875, "learning_rate": 2.32816066181812e-06, "loss": 0.4463, "step": 12348 }, { "epoch": 1.5435788144289209, "grad_norm": 1.9765625, "learning_rate": 2.3268802460348773e-06, "loss": 0.53, "step": 12349 }, { "epoch": 1.5437051069539822, "grad_norm": 2.078125, "learning_rate": 2.3256001360837066e-06, "loss": 0.5054, "step": 12350 }, { "epoch": 1.5438313994790434, "grad_norm": 1.7890625, "learning_rate": 2.3243203320156294e-06, "loss": 0.4562, "step": 12351 }, { "epoch": 1.5439576920041045, "grad_norm": 1.9609375, "learning_rate": 2.3230408338816557e-06, "loss": 0.4902, "step": 12352 }, { "epoch": 1.5440839845291658, "grad_norm": 1.9609375, "learning_rate": 2.3217616417327826e-06, "loss": 0.4846, "step": 12353 }, { "epoch": 1.5442102770542268, "grad_norm": 2.21875, "learning_rate": 2.320482755619997e-06, "loss": 0.5478, "step": 12354 }, { "epoch": 1.544336569579288, "grad_norm": 2.078125, "learning_rate": 2.3192041755942717e-06, "loss": 0.5913, "step": 12355 }, { "epoch": 1.5444628621043492, "grad_norm": 2.109375, "learning_rate": 2.317925901706568e-06, "loss": 0.482, "step": 12356 }, { "epoch": 1.5445891546294104, "grad_norm": 1.9453125, "learning_rate": 2.316647934007833e-06, "loss": 0.4891, "step": 12357 }, { "epoch": 1.5447154471544715, "grad_norm": 2.046875, "learning_rate": 2.3153702725490057e-06, "loss": 0.4965, "step": 12358 }, { "epoch": 1.5448417396795326, "grad_norm": 2.109375, "learning_rate": 2.31409291738101e-06, "loss": 0.5567, "step": 12359 }, { "epoch": 1.544968032204594, "grad_norm": 2.015625, "learning_rate": 2.3128158685547575e-06, "loss": 0.4692, "step": 12360 }, { "epoch": 1.545094324729655, "grad_norm": 2.03125, "learning_rate": 2.3115391261211495e-06, "loss": 0.5427, "step": 12361 }, { "epoch": 1.5452206172547163, "grad_norm": 1.9140625, "learning_rate": 2.310262690131072e-06, "loss": 0.4817, "step": 12362 }, { "epoch": 1.5453469097797774, "grad_norm": 1.9765625, "learning_rate": 2.3089865606354e-06, "loss": 0.4658, "step": 12363 }, { "epoch": 1.5454732023048385, "grad_norm": 1.9296875, "learning_rate": 2.3077107376850005e-06, "loss": 0.4599, "step": 12364 }, { "epoch": 1.5455994948299, "grad_norm": 2.140625, "learning_rate": 2.306435221330724e-06, "loss": 0.4834, "step": 12365 }, { "epoch": 1.5457257873549608, "grad_norm": 1.90625, "learning_rate": 2.3051600116234072e-06, "loss": 0.477, "step": 12366 }, { "epoch": 1.5458520798800222, "grad_norm": 1.9609375, "learning_rate": 2.3038851086138794e-06, "loss": 0.5311, "step": 12367 }, { "epoch": 1.5459783724050833, "grad_norm": 2.0, "learning_rate": 2.302610512352953e-06, "loss": 0.4978, "step": 12368 }, { "epoch": 1.5461046649301444, "grad_norm": 2.09375, "learning_rate": 2.3013362228914317e-06, "loss": 0.5178, "step": 12369 }, { "epoch": 1.5462309574552058, "grad_norm": 2.171875, "learning_rate": 2.3000622402801054e-06, "loss": 0.5297, "step": 12370 }, { "epoch": 1.5463572499802667, "grad_norm": 2.03125, "learning_rate": 2.2987885645697507e-06, "loss": 0.514, "step": 12371 }, { "epoch": 1.546483542505328, "grad_norm": 1.9140625, "learning_rate": 2.297515195811135e-06, "loss": 0.4811, "step": 12372 }, { "epoch": 1.5466098350303892, "grad_norm": 2.0625, "learning_rate": 2.2962421340550113e-06, "loss": 0.58, "step": 12373 }, { "epoch": 1.5467361275554503, "grad_norm": 1.875, "learning_rate": 2.29496937935212e-06, "loss": 0.5068, "step": 12374 }, { "epoch": 1.5468624200805114, "grad_norm": 2.21875, "learning_rate": 2.293696931753191e-06, "loss": 0.5775, "step": 12375 }, { "epoch": 1.5469887126055726, "grad_norm": 1.953125, "learning_rate": 2.2924247913089405e-06, "loss": 0.5292, "step": 12376 }, { "epoch": 1.547115005130634, "grad_norm": 2.109375, "learning_rate": 2.291152958070072e-06, "loss": 0.4999, "step": 12377 }, { "epoch": 1.5472412976556948, "grad_norm": 1.9453125, "learning_rate": 2.28988143208728e-06, "loss": 0.5594, "step": 12378 }, { "epoch": 1.5473675901807562, "grad_norm": 2.015625, "learning_rate": 2.288610213411242e-06, "loss": 0.5955, "step": 12379 }, { "epoch": 1.5474938827058173, "grad_norm": 2.03125, "learning_rate": 2.287339302092627e-06, "loss": 0.4488, "step": 12380 }, { "epoch": 1.5476201752308785, "grad_norm": 1.9921875, "learning_rate": 2.2860686981820877e-06, "loss": 0.4913, "step": 12381 }, { "epoch": 1.5477464677559398, "grad_norm": 2.140625, "learning_rate": 2.284798401730274e-06, "loss": 0.4627, "step": 12382 }, { "epoch": 1.5478727602810007, "grad_norm": 2.078125, "learning_rate": 2.283528412787811e-06, "loss": 0.567, "step": 12383 }, { "epoch": 1.547999052806062, "grad_norm": 2.078125, "learning_rate": 2.28225873140532e-06, "loss": 0.4923, "step": 12384 }, { "epoch": 1.5481253453311232, "grad_norm": 2.40625, "learning_rate": 2.2809893576334063e-06, "loss": 0.534, "step": 12385 }, { "epoch": 1.5482516378561844, "grad_norm": 1.8359375, "learning_rate": 2.2797202915226646e-06, "loss": 0.4482, "step": 12386 }, { "epoch": 1.5483779303812457, "grad_norm": 1.9609375, "learning_rate": 2.2784515331236767e-06, "loss": 0.5144, "step": 12387 }, { "epoch": 1.5485042229063066, "grad_norm": 2.125, "learning_rate": 2.277183082487011e-06, "loss": 0.4558, "step": 12388 }, { "epoch": 1.548630515431368, "grad_norm": 1.890625, "learning_rate": 2.2759149396632274e-06, "loss": 0.4741, "step": 12389 }, { "epoch": 1.5487568079564291, "grad_norm": 2.125, "learning_rate": 2.2746471047028696e-06, "loss": 0.4626, "step": 12390 }, { "epoch": 1.5488831004814902, "grad_norm": 1.9921875, "learning_rate": 2.2733795776564705e-06, "loss": 0.5563, "step": 12391 }, { "epoch": 1.5490093930065514, "grad_norm": 1.9375, "learning_rate": 2.272112358574551e-06, "loss": 0.5004, "step": 12392 }, { "epoch": 1.5491356855316125, "grad_norm": 2.125, "learning_rate": 2.270845447507618e-06, "loss": 0.4248, "step": 12393 }, { "epoch": 1.5492619780566739, "grad_norm": 2.015625, "learning_rate": 2.2695788445061697e-06, "loss": 0.5487, "step": 12394 }, { "epoch": 1.5493882705817348, "grad_norm": 1.9765625, "learning_rate": 2.268312549620688e-06, "loss": 0.4771, "step": 12395 }, { "epoch": 1.5495145631067961, "grad_norm": 2.0, "learning_rate": 2.2670465629016447e-06, "loss": 0.5061, "step": 12396 }, { "epoch": 1.5496408556318573, "grad_norm": 1.921875, "learning_rate": 2.265780884399501e-06, "loss": 0.4854, "step": 12397 }, { "epoch": 1.5497671481569184, "grad_norm": 1.9453125, "learning_rate": 2.264515514164701e-06, "loss": 0.5033, "step": 12398 }, { "epoch": 1.5498934406819798, "grad_norm": 2.0625, "learning_rate": 2.263250452247678e-06, "loss": 0.4647, "step": 12399 }, { "epoch": 1.5500197332070407, "grad_norm": 1.84375, "learning_rate": 2.26198569869886e-06, "loss": 0.4128, "step": 12400 }, { "epoch": 1.550146025732102, "grad_norm": 2.046875, "learning_rate": 2.2607212535686527e-06, "loss": 0.5189, "step": 12401 }, { "epoch": 1.5502723182571632, "grad_norm": 1.8671875, "learning_rate": 2.2594571169074565e-06, "loss": 0.461, "step": 12402 }, { "epoch": 1.5503986107822243, "grad_norm": 1.921875, "learning_rate": 2.2581932887656543e-06, "loss": 0.4815, "step": 12403 }, { "epoch": 1.5505249033072857, "grad_norm": 1.9921875, "learning_rate": 2.256929769193621e-06, "loss": 0.4763, "step": 12404 }, { "epoch": 1.5506511958323466, "grad_norm": 2.0625, "learning_rate": 2.255666558241717e-06, "loss": 0.5114, "step": 12405 }, { "epoch": 1.550777488357408, "grad_norm": 2.015625, "learning_rate": 2.25440365596029e-06, "loss": 0.4174, "step": 12406 }, { "epoch": 1.550903780882469, "grad_norm": 2.03125, "learning_rate": 2.2531410623996765e-06, "loss": 0.4901, "step": 12407 }, { "epoch": 1.5510300734075302, "grad_norm": 2.046875, "learning_rate": 2.2518787776102012e-06, "loss": 0.52, "step": 12408 }, { "epoch": 1.5511563659325913, "grad_norm": 2.234375, "learning_rate": 2.250616801642176e-06, "loss": 0.5234, "step": 12409 }, { "epoch": 1.5512826584576525, "grad_norm": 2.078125, "learning_rate": 2.2493551345458984e-06, "loss": 0.4364, "step": 12410 }, { "epoch": 1.5514089509827138, "grad_norm": 2.0, "learning_rate": 2.248093776371657e-06, "loss": 0.4956, "step": 12411 }, { "epoch": 1.5515352435077747, "grad_norm": 2.203125, "learning_rate": 2.2468327271697256e-06, "loss": 0.5197, "step": 12412 }, { "epoch": 1.551661536032836, "grad_norm": 1.9765625, "learning_rate": 2.2455719869903682e-06, "loss": 0.4887, "step": 12413 }, { "epoch": 1.5517878285578972, "grad_norm": 1.9609375, "learning_rate": 2.2443115558838322e-06, "loss": 0.4759, "step": 12414 }, { "epoch": 1.5519141210829583, "grad_norm": 2.125, "learning_rate": 2.243051433900357e-06, "loss": 0.4921, "step": 12415 }, { "epoch": 1.5520404136080197, "grad_norm": 2.0625, "learning_rate": 2.241791621090169e-06, "loss": 0.4888, "step": 12416 }, { "epoch": 1.5521667061330806, "grad_norm": 1.9921875, "learning_rate": 2.2405321175034767e-06, "loss": 0.4682, "step": 12417 }, { "epoch": 1.552292998658142, "grad_norm": 1.9296875, "learning_rate": 2.2392729231904876e-06, "loss": 0.5223, "step": 12418 }, { "epoch": 1.552419291183203, "grad_norm": 1.9296875, "learning_rate": 2.2380140382013858e-06, "loss": 0.4855, "step": 12419 }, { "epoch": 1.5525455837082642, "grad_norm": 1.84375, "learning_rate": 2.2367554625863496e-06, "loss": 0.469, "step": 12420 }, { "epoch": 1.5526718762333256, "grad_norm": 2.015625, "learning_rate": 2.235497196395542e-06, "loss": 0.6035, "step": 12421 }, { "epoch": 1.5527981687583865, "grad_norm": 2.09375, "learning_rate": 2.2342392396791136e-06, "loss": 0.5019, "step": 12422 }, { "epoch": 1.5529244612834479, "grad_norm": 1.9921875, "learning_rate": 2.2329815924872034e-06, "loss": 0.4814, "step": 12423 }, { "epoch": 1.553050753808509, "grad_norm": 2.171875, "learning_rate": 2.231724254869939e-06, "loss": 0.5529, "step": 12424 }, { "epoch": 1.5531770463335701, "grad_norm": 1.9609375, "learning_rate": 2.2304672268774363e-06, "loss": 0.4614, "step": 12425 }, { "epoch": 1.5533033388586313, "grad_norm": 1.90625, "learning_rate": 2.229210508559795e-06, "loss": 0.4909, "step": 12426 }, { "epoch": 1.5534296313836924, "grad_norm": 2.0, "learning_rate": 2.2279540999671047e-06, "loss": 0.507, "step": 12427 }, { "epoch": 1.5535559239087537, "grad_norm": 2.140625, "learning_rate": 2.226698001149444e-06, "loss": 0.5099, "step": 12428 }, { "epoch": 1.5536822164338149, "grad_norm": 1.9375, "learning_rate": 2.2254422121568795e-06, "loss": 0.5332, "step": 12429 }, { "epoch": 1.553808508958876, "grad_norm": 2.265625, "learning_rate": 2.2241867330394606e-06, "loss": 0.544, "step": 12430 }, { "epoch": 1.5539348014839371, "grad_norm": 2.015625, "learning_rate": 2.2229315638472305e-06, "loss": 0.4963, "step": 12431 }, { "epoch": 1.5540610940089983, "grad_norm": 2.0, "learning_rate": 2.2216767046302146e-06, "loss": 0.5027, "step": 12432 }, { "epoch": 1.5541873865340596, "grad_norm": 1.953125, "learning_rate": 2.220422155438431e-06, "loss": 0.4763, "step": 12433 }, { "epoch": 1.5543136790591205, "grad_norm": 2.125, "learning_rate": 2.2191679163218814e-06, "loss": 0.5091, "step": 12434 }, { "epoch": 1.554439971584182, "grad_norm": 2.0, "learning_rate": 2.2179139873305555e-06, "loss": 0.4922, "step": 12435 }, { "epoch": 1.554566264109243, "grad_norm": 1.953125, "learning_rate": 2.2166603685144363e-06, "loss": 0.5795, "step": 12436 }, { "epoch": 1.5546925566343042, "grad_norm": 1.9140625, "learning_rate": 2.215407059923487e-06, "loss": 0.4427, "step": 12437 }, { "epoch": 1.5548188491593655, "grad_norm": 2.0, "learning_rate": 2.214154061607662e-06, "loss": 0.5254, "step": 12438 }, { "epoch": 1.5549451416844264, "grad_norm": 2.046875, "learning_rate": 2.2129013736169024e-06, "loss": 0.5053, "step": 12439 }, { "epoch": 1.5550714342094878, "grad_norm": 1.9140625, "learning_rate": 2.211648996001138e-06, "loss": 0.4757, "step": 12440 }, { "epoch": 1.555197726734549, "grad_norm": 2.125, "learning_rate": 2.2103969288102856e-06, "loss": 0.4879, "step": 12441 }, { "epoch": 1.55532401925961, "grad_norm": 2.125, "learning_rate": 2.209145172094248e-06, "loss": 0.5188, "step": 12442 }, { "epoch": 1.5554503117846712, "grad_norm": 2.0, "learning_rate": 2.20789372590292e-06, "loss": 0.522, "step": 12443 }, { "epoch": 1.5555766043097323, "grad_norm": 1.953125, "learning_rate": 2.206642590286179e-06, "loss": 0.4835, "step": 12444 }, { "epoch": 1.5557028968347937, "grad_norm": 2.09375, "learning_rate": 2.205391765293892e-06, "loss": 0.5103, "step": 12445 }, { "epoch": 1.5558291893598548, "grad_norm": 2.078125, "learning_rate": 2.204141250975915e-06, "loss": 0.4411, "step": 12446 }, { "epoch": 1.555955481884916, "grad_norm": 2.125, "learning_rate": 2.202891047382091e-06, "loss": 0.5571, "step": 12447 }, { "epoch": 1.556081774409977, "grad_norm": 1.921875, "learning_rate": 2.2016411545622497e-06, "loss": 0.4189, "step": 12448 }, { "epoch": 1.5562080669350382, "grad_norm": 2.078125, "learning_rate": 2.2003915725662073e-06, "loss": 0.4809, "step": 12449 }, { "epoch": 1.5563343594600996, "grad_norm": 1.9453125, "learning_rate": 2.1991423014437706e-06, "loss": 0.4333, "step": 12450 }, { "epoch": 1.5564606519851605, "grad_norm": 2.1875, "learning_rate": 2.1978933412447324e-06, "loss": 0.5721, "step": 12451 }, { "epoch": 1.5565869445102218, "grad_norm": 2.171875, "learning_rate": 2.1966446920188734e-06, "loss": 0.558, "step": 12452 }, { "epoch": 1.556713237035283, "grad_norm": 1.8984375, "learning_rate": 2.1953963538159574e-06, "loss": 0.496, "step": 12453 }, { "epoch": 1.556839529560344, "grad_norm": 1.96875, "learning_rate": 2.1941483266857478e-06, "loss": 0.4766, "step": 12454 }, { "epoch": 1.5569658220854055, "grad_norm": 2.0, "learning_rate": 2.1929006106779848e-06, "loss": 0.4836, "step": 12455 }, { "epoch": 1.5570921146104664, "grad_norm": 2.0625, "learning_rate": 2.1916532058423977e-06, "loss": 0.5614, "step": 12456 }, { "epoch": 1.5572184071355277, "grad_norm": 2.171875, "learning_rate": 2.1904061122287078e-06, "loss": 0.5533, "step": 12457 }, { "epoch": 1.5573446996605889, "grad_norm": 2.1875, "learning_rate": 2.189159329886619e-06, "loss": 0.5527, "step": 12458 }, { "epoch": 1.55747099218565, "grad_norm": 1.828125, "learning_rate": 2.187912858865825e-06, "loss": 0.5061, "step": 12459 }, { "epoch": 1.5575972847107113, "grad_norm": 2.078125, "learning_rate": 2.186666699216008e-06, "loss": 0.5363, "step": 12460 }, { "epoch": 1.5577235772357723, "grad_norm": 2.078125, "learning_rate": 2.185420850986838e-06, "loss": 0.4844, "step": 12461 }, { "epoch": 1.5578498697608336, "grad_norm": 2.3125, "learning_rate": 2.1841753142279686e-06, "loss": 0.5417, "step": 12462 }, { "epoch": 1.5579761622858948, "grad_norm": 2.03125, "learning_rate": 2.182930088989047e-06, "loss": 0.4361, "step": 12463 }, { "epoch": 1.5581024548109559, "grad_norm": 1.8359375, "learning_rate": 2.1816851753197023e-06, "loss": 0.4436, "step": 12464 }, { "epoch": 1.558228747336017, "grad_norm": 2.1875, "learning_rate": 2.1804405732695556e-06, "loss": 0.5307, "step": 12465 }, { "epoch": 1.5583550398610782, "grad_norm": 1.8671875, "learning_rate": 2.179196282888213e-06, "loss": 0.484, "step": 12466 }, { "epoch": 1.5584813323861395, "grad_norm": 1.9765625, "learning_rate": 2.177952304225269e-06, "loss": 0.4777, "step": 12467 }, { "epoch": 1.5586076249112004, "grad_norm": 2.046875, "learning_rate": 2.1767086373303057e-06, "loss": 0.5539, "step": 12468 }, { "epoch": 1.5587339174362618, "grad_norm": 2.125, "learning_rate": 2.175465282252892e-06, "loss": 0.497, "step": 12469 }, { "epoch": 1.558860209961323, "grad_norm": 1.9375, "learning_rate": 2.174222239042586e-06, "loss": 0.4418, "step": 12470 }, { "epoch": 1.558986502486384, "grad_norm": 1.84375, "learning_rate": 2.1729795077489293e-06, "loss": 0.4377, "step": 12471 }, { "epoch": 1.5591127950114454, "grad_norm": 1.90625, "learning_rate": 2.1717370884214596e-06, "loss": 0.4538, "step": 12472 }, { "epoch": 1.5592390875365063, "grad_norm": 2.0, "learning_rate": 2.1704949811096943e-06, "loss": 0.4968, "step": 12473 }, { "epoch": 1.5593653800615677, "grad_norm": 2.15625, "learning_rate": 2.1692531858631393e-06, "loss": 0.5129, "step": 12474 }, { "epoch": 1.5594916725866288, "grad_norm": 2.015625, "learning_rate": 2.168011702731292e-06, "loss": 0.5113, "step": 12475 }, { "epoch": 1.55961796511169, "grad_norm": 2.015625, "learning_rate": 2.1667705317636333e-06, "loss": 0.5234, "step": 12476 }, { "epoch": 1.5597442576367513, "grad_norm": 2.125, "learning_rate": 2.165529673009633e-06, "loss": 0.5591, "step": 12477 }, { "epoch": 1.5598705501618122, "grad_norm": 1.96875, "learning_rate": 2.1642891265187504e-06, "loss": 0.4752, "step": 12478 }, { "epoch": 1.5599968426868736, "grad_norm": 2.140625, "learning_rate": 2.163048892340429e-06, "loss": 0.5178, "step": 12479 }, { "epoch": 1.5601231352119347, "grad_norm": 2.0625, "learning_rate": 2.161808970524103e-06, "loss": 0.4727, "step": 12480 }, { "epoch": 1.5602494277369958, "grad_norm": 1.984375, "learning_rate": 2.1605693611191913e-06, "loss": 0.5079, "step": 12481 }, { "epoch": 1.560375720262057, "grad_norm": 2.546875, "learning_rate": 2.1593300641751038e-06, "loss": 0.5397, "step": 12482 }, { "epoch": 1.560502012787118, "grad_norm": 2.046875, "learning_rate": 2.1580910797412323e-06, "loss": 0.5194, "step": 12483 }, { "epoch": 1.5606283053121794, "grad_norm": 2.09375, "learning_rate": 2.156852407866964e-06, "loss": 0.5032, "step": 12484 }, { "epoch": 1.5607545978372404, "grad_norm": 2.1875, "learning_rate": 2.1556140486016664e-06, "loss": 0.4792, "step": 12485 }, { "epoch": 1.5608808903623017, "grad_norm": 2.21875, "learning_rate": 2.154376001994698e-06, "loss": 0.4744, "step": 12486 }, { "epoch": 1.5610071828873628, "grad_norm": 2.140625, "learning_rate": 2.153138268095405e-06, "loss": 0.4654, "step": 12487 }, { "epoch": 1.561133475412424, "grad_norm": 2.0, "learning_rate": 2.1519008469531213e-06, "loss": 0.5069, "step": 12488 }, { "epoch": 1.5612597679374853, "grad_norm": 1.984375, "learning_rate": 2.1506637386171626e-06, "loss": 0.4911, "step": 12489 }, { "epoch": 1.5613860604625462, "grad_norm": 2.03125, "learning_rate": 2.1494269431368442e-06, "loss": 0.5146, "step": 12490 }, { "epoch": 1.5615123529876076, "grad_norm": 2.09375, "learning_rate": 2.148190460561458e-06, "loss": 0.5071, "step": 12491 }, { "epoch": 1.5616386455126687, "grad_norm": 1.8984375, "learning_rate": 2.1469542909402887e-06, "loss": 0.4984, "step": 12492 }, { "epoch": 1.5617649380377299, "grad_norm": 1.875, "learning_rate": 2.1457184343226054e-06, "loss": 0.5146, "step": 12493 }, { "epoch": 1.5618912305627912, "grad_norm": 2.109375, "learning_rate": 2.1444828907576675e-06, "loss": 0.4928, "step": 12494 }, { "epoch": 1.5620175230878521, "grad_norm": 2.015625, "learning_rate": 2.1432476602947206e-06, "loss": 0.5053, "step": 12495 }, { "epoch": 1.5621438156129135, "grad_norm": 1.9296875, "learning_rate": 2.1420127429829972e-06, "loss": 0.5421, "step": 12496 }, { "epoch": 1.5622701081379746, "grad_norm": 2.109375, "learning_rate": 2.1407781388717185e-06, "loss": 0.4544, "step": 12497 }, { "epoch": 1.5623964006630358, "grad_norm": 2.296875, "learning_rate": 2.139543848010094e-06, "loss": 0.5897, "step": 12498 }, { "epoch": 1.562522693188097, "grad_norm": 1.8984375, "learning_rate": 2.138309870447317e-06, "loss": 0.5175, "step": 12499 }, { "epoch": 1.562648985713158, "grad_norm": 2.09375, "learning_rate": 2.1370762062325746e-06, "loss": 0.5002, "step": 12500 }, { "epoch": 1.5627752782382194, "grad_norm": 2.0625, "learning_rate": 2.1358428554150344e-06, "loss": 0.51, "step": 12501 }, { "epoch": 1.5629015707632803, "grad_norm": 2.015625, "learning_rate": 2.1346098180438557e-06, "loss": 0.4417, "step": 12502 }, { "epoch": 1.5630278632883416, "grad_norm": 2.0, "learning_rate": 2.1333770941681855e-06, "loss": 0.5017, "step": 12503 }, { "epoch": 1.5631541558134028, "grad_norm": 2.21875, "learning_rate": 2.132144683837155e-06, "loss": 0.5332, "step": 12504 }, { "epoch": 1.563280448338464, "grad_norm": 1.9296875, "learning_rate": 2.1309125870998883e-06, "loss": 0.4991, "step": 12505 }, { "epoch": 1.5634067408635253, "grad_norm": 1.8515625, "learning_rate": 2.1296808040054904e-06, "loss": 0.3914, "step": 12506 }, { "epoch": 1.5635330333885862, "grad_norm": 2.015625, "learning_rate": 2.1284493346030577e-06, "loss": 0.4951, "step": 12507 }, { "epoch": 1.5636593259136475, "grad_norm": 1.8984375, "learning_rate": 2.1272181789416756e-06, "loss": 0.4724, "step": 12508 }, { "epoch": 1.5637856184387087, "grad_norm": 1.8828125, "learning_rate": 2.1259873370704153e-06, "loss": 0.5296, "step": 12509 }, { "epoch": 1.5639119109637698, "grad_norm": 1.8671875, "learning_rate": 2.124756809038334e-06, "loss": 0.4627, "step": 12510 }, { "epoch": 1.5640382034888312, "grad_norm": 2.03125, "learning_rate": 2.123526594894477e-06, "loss": 0.5061, "step": 12511 }, { "epoch": 1.564164496013892, "grad_norm": 2.625, "learning_rate": 2.1222966946878785e-06, "loss": 0.65, "step": 12512 }, { "epoch": 1.5642907885389534, "grad_norm": 2.3125, "learning_rate": 2.1210671084675592e-06, "loss": 0.5766, "step": 12513 }, { "epoch": 1.5644170810640146, "grad_norm": 1.9296875, "learning_rate": 2.119837836282528e-06, "loss": 0.4657, "step": 12514 }, { "epoch": 1.5645433735890757, "grad_norm": 2.28125, "learning_rate": 2.1186088781817794e-06, "loss": 0.5473, "step": 12515 }, { "epoch": 1.5646696661141368, "grad_norm": 2.015625, "learning_rate": 2.1173802342142967e-06, "loss": 0.4963, "step": 12516 }, { "epoch": 1.564795958639198, "grad_norm": 2.109375, "learning_rate": 2.1161519044290524e-06, "loss": 0.4901, "step": 12517 }, { "epoch": 1.5649222511642593, "grad_norm": 2.09375, "learning_rate": 2.1149238888750034e-06, "loss": 0.5821, "step": 12518 }, { "epoch": 1.5650485436893202, "grad_norm": 1.953125, "learning_rate": 2.1136961876010965e-06, "loss": 0.4954, "step": 12519 }, { "epoch": 1.5651748362143816, "grad_norm": 2.0, "learning_rate": 2.1124688006562633e-06, "loss": 0.5177, "step": 12520 }, { "epoch": 1.5653011287394427, "grad_norm": 1.96875, "learning_rate": 2.111241728089426e-06, "loss": 0.4735, "step": 12521 }, { "epoch": 1.5654274212645038, "grad_norm": 1.9921875, "learning_rate": 2.110014969949492e-06, "loss": 0.5097, "step": 12522 }, { "epoch": 1.5655537137895652, "grad_norm": 2.359375, "learning_rate": 2.108788526285357e-06, "loss": 0.5649, "step": 12523 }, { "epoch": 1.5656800063146261, "grad_norm": 1.96875, "learning_rate": 2.1075623971459038e-06, "loss": 0.466, "step": 12524 }, { "epoch": 1.5658062988396875, "grad_norm": 2.046875, "learning_rate": 2.1063365825800007e-06, "loss": 0.4669, "step": 12525 }, { "epoch": 1.5659325913647486, "grad_norm": 2.1875, "learning_rate": 2.1051110826365107e-06, "loss": 0.4881, "step": 12526 }, { "epoch": 1.5660588838898097, "grad_norm": 2.21875, "learning_rate": 2.1038858973642782e-06, "loss": 0.5163, "step": 12527 }, { "epoch": 1.566185176414871, "grad_norm": 2.046875, "learning_rate": 2.102661026812134e-06, "loss": 0.4678, "step": 12528 }, { "epoch": 1.566311468939932, "grad_norm": 2.09375, "learning_rate": 2.1014364710288992e-06, "loss": 0.5125, "step": 12529 }, { "epoch": 1.5664377614649934, "grad_norm": 2.046875, "learning_rate": 2.100212230063382e-06, "loss": 0.5559, "step": 12530 }, { "epoch": 1.5665640539900545, "grad_norm": 2.046875, "learning_rate": 2.0989883039643777e-06, "loss": 0.4192, "step": 12531 }, { "epoch": 1.5666903465151156, "grad_norm": 2.140625, "learning_rate": 2.0977646927806682e-06, "loss": 0.431, "step": 12532 }, { "epoch": 1.5668166390401768, "grad_norm": 1.984375, "learning_rate": 2.0965413965610246e-06, "loss": 0.5409, "step": 12533 }, { "epoch": 1.566942931565238, "grad_norm": 2.078125, "learning_rate": 2.095318415354205e-06, "loss": 0.4924, "step": 12534 }, { "epoch": 1.5670692240902993, "grad_norm": 1.890625, "learning_rate": 2.094095749208953e-06, "loss": 0.4366, "step": 12535 }, { "epoch": 1.5671955166153602, "grad_norm": 2.015625, "learning_rate": 2.092873398174001e-06, "loss": 0.4853, "step": 12536 }, { "epoch": 1.5673218091404215, "grad_norm": 2.1875, "learning_rate": 2.0916513622980715e-06, "loss": 0.5584, "step": 12537 }, { "epoch": 1.5674481016654827, "grad_norm": 2.09375, "learning_rate": 2.0904296416298696e-06, "loss": 0.5099, "step": 12538 }, { "epoch": 1.5675743941905438, "grad_norm": 2.125, "learning_rate": 2.089208236218091e-06, "loss": 0.5685, "step": 12539 }, { "epoch": 1.5677006867156051, "grad_norm": 1.9375, "learning_rate": 2.0879871461114178e-06, "loss": 0.5317, "step": 12540 }, { "epoch": 1.567826979240666, "grad_norm": 1.921875, "learning_rate": 2.0867663713585196e-06, "loss": 0.4792, "step": 12541 }, { "epoch": 1.5679532717657274, "grad_norm": 2.0, "learning_rate": 2.085545912008052e-06, "loss": 0.4904, "step": 12542 }, { "epoch": 1.5680795642907885, "grad_norm": 1.9921875, "learning_rate": 2.0843257681086603e-06, "loss": 0.4666, "step": 12543 }, { "epoch": 1.5682058568158497, "grad_norm": 2.046875, "learning_rate": 2.08310593970898e-06, "loss": 0.4523, "step": 12544 }, { "epoch": 1.568332149340911, "grad_norm": 2.15625, "learning_rate": 2.0818864268576277e-06, "loss": 0.5623, "step": 12545 }, { "epoch": 1.568458441865972, "grad_norm": 2.3125, "learning_rate": 2.0806672296032106e-06, "loss": 0.5902, "step": 12546 }, { "epoch": 1.5685847343910333, "grad_norm": 2.203125, "learning_rate": 2.0794483479943228e-06, "loss": 0.6668, "step": 12547 }, { "epoch": 1.5687110269160944, "grad_norm": 1.953125, "learning_rate": 2.0782297820795462e-06, "loss": 0.4557, "step": 12548 }, { "epoch": 1.5688373194411556, "grad_norm": 2.015625, "learning_rate": 2.077011531907449e-06, "loss": 0.5571, "step": 12549 }, { "epoch": 1.5689636119662167, "grad_norm": 2.140625, "learning_rate": 2.0757935975265886e-06, "loss": 0.5649, "step": 12550 }, { "epoch": 1.5690899044912778, "grad_norm": 1.8828125, "learning_rate": 2.074575978985509e-06, "loss": 0.4353, "step": 12551 }, { "epoch": 1.5692161970163392, "grad_norm": 2.078125, "learning_rate": 2.0733586763327407e-06, "loss": 0.4918, "step": 12552 }, { "epoch": 1.5693424895414003, "grad_norm": 1.9609375, "learning_rate": 2.0721416896168034e-06, "loss": 0.4373, "step": 12553 }, { "epoch": 1.5694687820664615, "grad_norm": 1.921875, "learning_rate": 2.070925018886203e-06, "loss": 0.4966, "step": 12554 }, { "epoch": 1.5695950745915226, "grad_norm": 2.046875, "learning_rate": 2.0697086641894338e-06, "loss": 0.5354, "step": 12555 }, { "epoch": 1.5697213671165837, "grad_norm": 1.84375, "learning_rate": 2.068492625574975e-06, "loss": 0.5023, "step": 12556 }, { "epoch": 1.569847659641645, "grad_norm": 1.84375, "learning_rate": 2.067276903091295e-06, "loss": 0.4261, "step": 12557 }, { "epoch": 1.569973952166706, "grad_norm": 2.0625, "learning_rate": 2.0660614967868518e-06, "loss": 0.5082, "step": 12558 }, { "epoch": 1.5701002446917673, "grad_norm": 2.1875, "learning_rate": 2.064846406710087e-06, "loss": 0.5649, "step": 12559 }, { "epoch": 1.5702265372168285, "grad_norm": 1.890625, "learning_rate": 2.0636316329094317e-06, "loss": 0.4317, "step": 12560 }, { "epoch": 1.5703528297418896, "grad_norm": 2.0, "learning_rate": 2.062417175433301e-06, "loss": 0.4606, "step": 12561 }, { "epoch": 1.570479122266951, "grad_norm": 1.9921875, "learning_rate": 2.061203034330105e-06, "loss": 0.5225, "step": 12562 }, { "epoch": 1.5706054147920119, "grad_norm": 1.9375, "learning_rate": 2.059989209648234e-06, "loss": 0.5387, "step": 12563 }, { "epoch": 1.5707317073170732, "grad_norm": 2.015625, "learning_rate": 2.0587757014360686e-06, "loss": 0.4985, "step": 12564 }, { "epoch": 1.5708579998421344, "grad_norm": 1.890625, "learning_rate": 2.057562509741977e-06, "loss": 0.5364, "step": 12565 }, { "epoch": 1.5709842923671955, "grad_norm": 2.078125, "learning_rate": 2.056349634614312e-06, "loss": 0.4396, "step": 12566 }, { "epoch": 1.5711105848922566, "grad_norm": 1.984375, "learning_rate": 2.0551370761014176e-06, "loss": 0.5006, "step": 12567 }, { "epoch": 1.5712368774173178, "grad_norm": 2.03125, "learning_rate": 2.0539248342516226e-06, "loss": 0.4374, "step": 12568 }, { "epoch": 1.5713631699423791, "grad_norm": 1.9765625, "learning_rate": 2.052712909113245e-06, "loss": 0.5011, "step": 12569 }, { "epoch": 1.5714894624674403, "grad_norm": 1.9296875, "learning_rate": 2.0515013007345887e-06, "loss": 0.4276, "step": 12570 }, { "epoch": 1.5716157549925014, "grad_norm": 1.9453125, "learning_rate": 2.0502900091639442e-06, "loss": 0.4877, "step": 12571 }, { "epoch": 1.5717420475175625, "grad_norm": 2.03125, "learning_rate": 2.0490790344495923e-06, "loss": 0.5278, "step": 12572 }, { "epoch": 1.5718683400426237, "grad_norm": 2.03125, "learning_rate": 2.0478683766398e-06, "loss": 0.4444, "step": 12573 }, { "epoch": 1.571994632567685, "grad_norm": 2.21875, "learning_rate": 2.046658035782819e-06, "loss": 0.6574, "step": 12574 }, { "epoch": 1.572120925092746, "grad_norm": 2.09375, "learning_rate": 2.0454480119268926e-06, "loss": 0.406, "step": 12575 }, { "epoch": 1.5722472176178073, "grad_norm": 1.8125, "learning_rate": 2.0442383051202497e-06, "loss": 0.4985, "step": 12576 }, { "epoch": 1.5723735101428684, "grad_norm": 2.140625, "learning_rate": 2.0430289154111038e-06, "loss": 0.5231, "step": 12577 }, { "epoch": 1.5724998026679295, "grad_norm": 2.109375, "learning_rate": 2.04181984284766e-06, "loss": 0.4825, "step": 12578 }, { "epoch": 1.572626095192991, "grad_norm": 2.015625, "learning_rate": 2.040611087478107e-06, "loss": 0.4893, "step": 12579 }, { "epoch": 1.5727523877180518, "grad_norm": 1.9375, "learning_rate": 2.039402649350626e-06, "loss": 0.5018, "step": 12580 }, { "epoch": 1.5728786802431132, "grad_norm": 2.140625, "learning_rate": 2.038194528513382e-06, "loss": 0.5517, "step": 12581 }, { "epoch": 1.5730049727681743, "grad_norm": 2.25, "learning_rate": 2.0369867250145268e-06, "loss": 0.5708, "step": 12582 }, { "epoch": 1.5731312652932354, "grad_norm": 2.234375, "learning_rate": 2.0357792389022e-06, "loss": 0.4741, "step": 12583 }, { "epoch": 1.5732575578182968, "grad_norm": 1.984375, "learning_rate": 2.0345720702245307e-06, "loss": 0.4971, "step": 12584 }, { "epoch": 1.5733838503433577, "grad_norm": 2.15625, "learning_rate": 2.0333652190296326e-06, "loss": 0.5036, "step": 12585 }, { "epoch": 1.573510142868419, "grad_norm": 1.9296875, "learning_rate": 2.032158685365607e-06, "loss": 0.4804, "step": 12586 }, { "epoch": 1.5736364353934802, "grad_norm": 2.078125, "learning_rate": 2.030952469280546e-06, "loss": 0.4797, "step": 12587 }, { "epoch": 1.5737627279185413, "grad_norm": 2.265625, "learning_rate": 2.029746570822524e-06, "loss": 0.4208, "step": 12588 }, { "epoch": 1.5738890204436025, "grad_norm": 1.90625, "learning_rate": 2.028540990039606e-06, "loss": 0.4328, "step": 12589 }, { "epoch": 1.5740153129686636, "grad_norm": 2.0, "learning_rate": 2.0273357269798433e-06, "loss": 0.5442, "step": 12590 }, { "epoch": 1.574141605493725, "grad_norm": 2.21875, "learning_rate": 2.0261307816912756e-06, "loss": 0.572, "step": 12591 }, { "epoch": 1.5742678980187859, "grad_norm": 1.984375, "learning_rate": 2.024926154221929e-06, "loss": 0.4869, "step": 12592 }, { "epoch": 1.5743941905438472, "grad_norm": 1.9609375, "learning_rate": 2.023721844619817e-06, "loss": 0.4968, "step": 12593 }, { "epoch": 1.5745204830689083, "grad_norm": 1.9921875, "learning_rate": 2.022517852932939e-06, "loss": 0.4773, "step": 12594 }, { "epoch": 1.5746467755939695, "grad_norm": 2.078125, "learning_rate": 2.021314179209285e-06, "loss": 0.5177, "step": 12595 }, { "epoch": 1.5747730681190308, "grad_norm": 2.046875, "learning_rate": 2.0201108234968305e-06, "loss": 0.5379, "step": 12596 }, { "epoch": 1.5748993606440918, "grad_norm": 2.09375, "learning_rate": 2.0189077858435347e-06, "loss": 0.4844, "step": 12597 }, { "epoch": 1.575025653169153, "grad_norm": 2.265625, "learning_rate": 2.017705066297354e-06, "loss": 0.5699, "step": 12598 }, { "epoch": 1.5751519456942142, "grad_norm": 1.9453125, "learning_rate": 2.0165026649062215e-06, "loss": 0.4896, "step": 12599 }, { "epoch": 1.5752782382192754, "grad_norm": 1.90625, "learning_rate": 2.015300581718065e-06, "loss": 0.4644, "step": 12600 }, { "epoch": 1.5754045307443367, "grad_norm": 1.890625, "learning_rate": 2.014098816780794e-06, "loss": 0.4333, "step": 12601 }, { "epoch": 1.5755308232693976, "grad_norm": 2.109375, "learning_rate": 2.012897370142308e-06, "loss": 0.5184, "step": 12602 }, { "epoch": 1.575657115794459, "grad_norm": 1.9375, "learning_rate": 2.011696241850496e-06, "loss": 0.4657, "step": 12603 }, { "epoch": 1.5757834083195201, "grad_norm": 1.9296875, "learning_rate": 2.010495431953231e-06, "loss": 0.5104, "step": 12604 }, { "epoch": 1.5759097008445813, "grad_norm": 1.84375, "learning_rate": 2.009294940498374e-06, "loss": 0.4592, "step": 12605 }, { "epoch": 1.5760359933696424, "grad_norm": 2.0625, "learning_rate": 2.0080947675337735e-06, "loss": 0.4536, "step": 12606 }, { "epoch": 1.5761622858947035, "grad_norm": 1.9765625, "learning_rate": 2.0068949131072656e-06, "loss": 0.5151, "step": 12607 }, { "epoch": 1.5762885784197649, "grad_norm": 1.8671875, "learning_rate": 2.005695377266674e-06, "loss": 0.4227, "step": 12608 }, { "epoch": 1.5764148709448258, "grad_norm": 2.015625, "learning_rate": 2.004496160059809e-06, "loss": 0.4608, "step": 12609 }, { "epoch": 1.5765411634698872, "grad_norm": 2.140625, "learning_rate": 2.0032972615344694e-06, "loss": 0.4971, "step": 12610 }, { "epoch": 1.5766674559949483, "grad_norm": 1.9921875, "learning_rate": 2.002098681738439e-06, "loss": 0.5271, "step": 12611 }, { "epoch": 1.5767937485200094, "grad_norm": 2.234375, "learning_rate": 2.0009004207194914e-06, "loss": 0.4997, "step": 12612 }, { "epoch": 1.5769200410450708, "grad_norm": 2.03125, "learning_rate": 1.999702478525386e-06, "loss": 0.4362, "step": 12613 }, { "epoch": 1.5770463335701317, "grad_norm": 2.0, "learning_rate": 1.9985048552038698e-06, "loss": 0.5327, "step": 12614 }, { "epoch": 1.577172626095193, "grad_norm": 2.109375, "learning_rate": 1.997307550802674e-06, "loss": 0.5502, "step": 12615 }, { "epoch": 1.5772989186202542, "grad_norm": 2.0625, "learning_rate": 1.996110565369527e-06, "loss": 0.4799, "step": 12616 }, { "epoch": 1.5774252111453153, "grad_norm": 1.984375, "learning_rate": 1.9949138989521345e-06, "loss": 0.5726, "step": 12617 }, { "epoch": 1.5775515036703767, "grad_norm": 2.03125, "learning_rate": 1.993717551598191e-06, "loss": 0.4662, "step": 12618 }, { "epoch": 1.5776777961954376, "grad_norm": 1.9453125, "learning_rate": 1.992521523355383e-06, "loss": 0.4041, "step": 12619 }, { "epoch": 1.577804088720499, "grad_norm": 1.984375, "learning_rate": 1.9913258142713798e-06, "loss": 0.5215, "step": 12620 }, { "epoch": 1.57793038124556, "grad_norm": 2.03125, "learning_rate": 1.990130424393839e-06, "loss": 0.5397, "step": 12621 }, { "epoch": 1.5780566737706212, "grad_norm": 1.984375, "learning_rate": 1.988935353770407e-06, "loss": 0.5704, "step": 12622 }, { "epoch": 1.5781829662956823, "grad_norm": 1.984375, "learning_rate": 1.9877406024487154e-06, "loss": 0.4946, "step": 12623 }, { "epoch": 1.5783092588207435, "grad_norm": 2.0, "learning_rate": 1.9865461704763844e-06, "loss": 0.5257, "step": 12624 }, { "epoch": 1.5784355513458048, "grad_norm": 1.9765625, "learning_rate": 1.985352057901022e-06, "loss": 0.4595, "step": 12625 }, { "epoch": 1.5785618438708657, "grad_norm": 2.03125, "learning_rate": 1.9841582647702217e-06, "loss": 0.5132, "step": 12626 }, { "epoch": 1.578688136395927, "grad_norm": 1.8359375, "learning_rate": 1.9829647911315664e-06, "loss": 0.4773, "step": 12627 }, { "epoch": 1.5788144289209882, "grad_norm": 1.9765625, "learning_rate": 1.9817716370326236e-06, "loss": 0.4794, "step": 12628 }, { "epoch": 1.5789407214460494, "grad_norm": 2.015625, "learning_rate": 1.9805788025209504e-06, "loss": 0.5637, "step": 12629 }, { "epoch": 1.5790670139711107, "grad_norm": 1.875, "learning_rate": 1.9793862876440906e-06, "loss": 0.4443, "step": 12630 }, { "epoch": 1.5791933064961716, "grad_norm": 2.265625, "learning_rate": 1.9781940924495735e-06, "loss": 0.5857, "step": 12631 }, { "epoch": 1.579319599021233, "grad_norm": 2.0, "learning_rate": 1.977002216984919e-06, "loss": 0.5644, "step": 12632 }, { "epoch": 1.5794458915462941, "grad_norm": 1.90625, "learning_rate": 1.975810661297628e-06, "loss": 0.4788, "step": 12633 }, { "epoch": 1.5795721840713552, "grad_norm": 1.90625, "learning_rate": 1.9746194254352e-06, "loss": 0.4728, "step": 12634 }, { "epoch": 1.5796984765964166, "grad_norm": 1.9296875, "learning_rate": 1.973428509445111e-06, "loss": 0.4995, "step": 12635 }, { "epoch": 1.5798247691214775, "grad_norm": 2.015625, "learning_rate": 1.972237913374829e-06, "loss": 0.4699, "step": 12636 }, { "epoch": 1.5799510616465389, "grad_norm": 2.03125, "learning_rate": 1.9710476372718067e-06, "loss": 0.4865, "step": 12637 }, { "epoch": 1.5800773541716, "grad_norm": 1.828125, "learning_rate": 1.9698576811834878e-06, "loss": 0.407, "step": 12638 }, { "epoch": 1.5802036466966611, "grad_norm": 1.8515625, "learning_rate": 1.9686680451572994e-06, "loss": 0.4133, "step": 12639 }, { "epoch": 1.5803299392217223, "grad_norm": 2.03125, "learning_rate": 1.967478729240658e-06, "loss": 0.461, "step": 12640 }, { "epoch": 1.5804562317467834, "grad_norm": 1.875, "learning_rate": 1.9662897334809674e-06, "loss": 0.4949, "step": 12641 }, { "epoch": 1.5805825242718448, "grad_norm": 2.0625, "learning_rate": 1.965101057925617e-06, "loss": 0.5381, "step": 12642 }, { "epoch": 1.5807088167969057, "grad_norm": 1.9140625, "learning_rate": 1.9639127026219853e-06, "loss": 0.5011, "step": 12643 }, { "epoch": 1.580835109321967, "grad_norm": 2.046875, "learning_rate": 1.9627246676174363e-06, "loss": 0.5048, "step": 12644 }, { "epoch": 1.5809614018470282, "grad_norm": 2.09375, "learning_rate": 1.961536952959324e-06, "loss": 0.5309, "step": 12645 }, { "epoch": 1.5810876943720893, "grad_norm": 2.125, "learning_rate": 1.9603495586949872e-06, "loss": 0.5234, "step": 12646 }, { "epoch": 1.5812139868971506, "grad_norm": 1.9140625, "learning_rate": 1.9591624848717516e-06, "loss": 0.4636, "step": 12647 }, { "epoch": 1.5813402794222116, "grad_norm": 1.84375, "learning_rate": 1.957975731536932e-06, "loss": 0.4411, "step": 12648 }, { "epoch": 1.581466571947273, "grad_norm": 2.265625, "learning_rate": 1.9567892987378288e-06, "loss": 0.5317, "step": 12649 }, { "epoch": 1.581592864472334, "grad_norm": 2.171875, "learning_rate": 1.955603186521732e-06, "loss": 0.5689, "step": 12650 }, { "epoch": 1.5817191569973952, "grad_norm": 1.9765625, "learning_rate": 1.9544173949359126e-06, "loss": 0.4608, "step": 12651 }, { "epoch": 1.5818454495224565, "grad_norm": 2.046875, "learning_rate": 1.95323192402764e-06, "loss": 0.4767, "step": 12652 }, { "epoch": 1.5819717420475174, "grad_norm": 2.015625, "learning_rate": 1.9520467738441605e-06, "loss": 0.5485, "step": 12653 }, { "epoch": 1.5820980345725788, "grad_norm": 1.9296875, "learning_rate": 1.9508619444327125e-06, "loss": 0.5339, "step": 12654 }, { "epoch": 1.58222432709764, "grad_norm": 1.9765625, "learning_rate": 1.949677435840519e-06, "loss": 0.4652, "step": 12655 }, { "epoch": 1.582350619622701, "grad_norm": 2.125, "learning_rate": 1.9484932481147934e-06, "loss": 0.5407, "step": 12656 }, { "epoch": 1.5824769121477622, "grad_norm": 2.078125, "learning_rate": 1.9473093813027334e-06, "loss": 0.5521, "step": 12657 }, { "epoch": 1.5826032046728233, "grad_norm": 1.9453125, "learning_rate": 1.9461258354515246e-06, "loss": 0.4681, "step": 12658 }, { "epoch": 1.5827294971978847, "grad_norm": 2.109375, "learning_rate": 1.944942610608341e-06, "loss": 0.4734, "step": 12659 }, { "epoch": 1.5828557897229458, "grad_norm": 2.046875, "learning_rate": 1.943759706820344e-06, "loss": 0.4707, "step": 12660 }, { "epoch": 1.582982082248007, "grad_norm": 1.9375, "learning_rate": 1.9425771241346802e-06, "loss": 0.5133, "step": 12661 }, { "epoch": 1.583108374773068, "grad_norm": 1.96875, "learning_rate": 1.9413948625984846e-06, "loss": 0.4662, "step": 12662 }, { "epoch": 1.5832346672981292, "grad_norm": 1.859375, "learning_rate": 1.9402129222588797e-06, "loss": 0.4644, "step": 12663 }, { "epoch": 1.5833609598231906, "grad_norm": 1.9296875, "learning_rate": 1.9390313031629735e-06, "loss": 0.4996, "step": 12664 }, { "epoch": 1.5834872523482515, "grad_norm": 2.0625, "learning_rate": 1.937850005357864e-06, "loss": 0.4949, "step": 12665 }, { "epoch": 1.5836135448733129, "grad_norm": 2.0, "learning_rate": 1.936669028890635e-06, "loss": 0.4973, "step": 12666 }, { "epoch": 1.583739837398374, "grad_norm": 2.015625, "learning_rate": 1.935488373808356e-06, "loss": 0.4715, "step": 12667 }, { "epoch": 1.5838661299234351, "grad_norm": 1.9375, "learning_rate": 1.934308040158087e-06, "loss": 0.5159, "step": 12668 }, { "epoch": 1.5839924224484965, "grad_norm": 2.046875, "learning_rate": 1.9331280279868682e-06, "loss": 0.6359, "step": 12669 }, { "epoch": 1.5841187149735574, "grad_norm": 2.0625, "learning_rate": 1.9319483373417392e-06, "loss": 0.4744, "step": 12670 }, { "epoch": 1.5842450074986187, "grad_norm": 1.9921875, "learning_rate": 1.9307689682697163e-06, "loss": 0.5858, "step": 12671 }, { "epoch": 1.5843713000236799, "grad_norm": 1.9921875, "learning_rate": 1.929589920817806e-06, "loss": 0.5432, "step": 12672 }, { "epoch": 1.584497592548741, "grad_norm": 2.0, "learning_rate": 1.928411195033003e-06, "loss": 0.5201, "step": 12673 }, { "epoch": 1.5846238850738021, "grad_norm": 1.9921875, "learning_rate": 1.927232790962288e-06, "loss": 0.4754, "step": 12674 }, { "epoch": 1.5847501775988633, "grad_norm": 1.875, "learning_rate": 1.9260547086526292e-06, "loss": 0.4683, "step": 12675 }, { "epoch": 1.5848764701239246, "grad_norm": 1.890625, "learning_rate": 1.9248769481509834e-06, "loss": 0.488, "step": 12676 }, { "epoch": 1.5850027626489858, "grad_norm": 2.0, "learning_rate": 1.923699509504291e-06, "loss": 0.4649, "step": 12677 }, { "epoch": 1.585129055174047, "grad_norm": 2.09375, "learning_rate": 1.9225223927594838e-06, "loss": 0.5485, "step": 12678 }, { "epoch": 1.585255347699108, "grad_norm": 2.171875, "learning_rate": 1.921345597963478e-06, "loss": 0.5756, "step": 12679 }, { "epoch": 1.5853816402241692, "grad_norm": 2.0625, "learning_rate": 1.9201691251631783e-06, "loss": 0.5486, "step": 12680 }, { "epoch": 1.5855079327492305, "grad_norm": 2.03125, "learning_rate": 1.918992974405476e-06, "loss": 0.4614, "step": 12681 }, { "epoch": 1.5856342252742914, "grad_norm": 2.0, "learning_rate": 1.9178171457372485e-06, "loss": 0.5165, "step": 12682 }, { "epoch": 1.5857605177993528, "grad_norm": 2.1875, "learning_rate": 1.9166416392053622e-06, "loss": 0.4422, "step": 12683 }, { "epoch": 1.585886810324414, "grad_norm": 2.015625, "learning_rate": 1.9154664548566715e-06, "loss": 0.5797, "step": 12684 }, { "epoch": 1.586013102849475, "grad_norm": 1.96875, "learning_rate": 1.9142915927380148e-06, "loss": 0.5208, "step": 12685 }, { "epoch": 1.5861393953745364, "grad_norm": 1.859375, "learning_rate": 1.9131170528962184e-06, "loss": 0.4808, "step": 12686 }, { "epoch": 1.5862656878995973, "grad_norm": 2.21875, "learning_rate": 1.9119428353780968e-06, "loss": 0.4771, "step": 12687 }, { "epoch": 1.5863919804246587, "grad_norm": 2.125, "learning_rate": 1.910768940230454e-06, "loss": 0.4902, "step": 12688 }, { "epoch": 1.5865182729497198, "grad_norm": 1.84375, "learning_rate": 1.909595367500078e-06, "loss": 0.3984, "step": 12689 }, { "epoch": 1.586644565474781, "grad_norm": 2.15625, "learning_rate": 1.9084221172337424e-06, "loss": 0.5408, "step": 12690 }, { "epoch": 1.5867708579998423, "grad_norm": 1.9296875, "learning_rate": 1.9072491894782119e-06, "loss": 0.4927, "step": 12691 }, { "epoch": 1.5868971505249032, "grad_norm": 2.171875, "learning_rate": 1.9060765842802364e-06, "loss": 0.5236, "step": 12692 }, { "epoch": 1.5870234430499646, "grad_norm": 2.28125, "learning_rate": 1.9049043016865532e-06, "loss": 0.5179, "step": 12693 }, { "epoch": 1.5871497355750257, "grad_norm": 1.9296875, "learning_rate": 1.9037323417438857e-06, "loss": 0.5138, "step": 12694 }, { "epoch": 1.5872760281000868, "grad_norm": 1.9375, "learning_rate": 1.9025607044989458e-06, "loss": 0.4232, "step": 12695 }, { "epoch": 1.587402320625148, "grad_norm": 1.9375, "learning_rate": 1.9013893899984325e-06, "loss": 0.4628, "step": 12696 }, { "epoch": 1.587528613150209, "grad_norm": 2.046875, "learning_rate": 1.900218398289032e-06, "loss": 0.4803, "step": 12697 }, { "epoch": 1.5876549056752705, "grad_norm": 2.15625, "learning_rate": 1.899047729417416e-06, "loss": 0.4757, "step": 12698 }, { "epoch": 1.5877811982003314, "grad_norm": 1.9609375, "learning_rate": 1.8978773834302456e-06, "loss": 0.527, "step": 12699 }, { "epoch": 1.5879074907253927, "grad_norm": 1.8828125, "learning_rate": 1.896707360374167e-06, "loss": 0.4414, "step": 12700 }, { "epoch": 1.5880337832504539, "grad_norm": 2.328125, "learning_rate": 1.8955376602958141e-06, "loss": 0.5492, "step": 12701 }, { "epoch": 1.588160075775515, "grad_norm": 2.0625, "learning_rate": 1.8943682832418108e-06, "loss": 0.4885, "step": 12702 }, { "epoch": 1.5882863683005763, "grad_norm": 1.96875, "learning_rate": 1.893199229258763e-06, "loss": 0.5285, "step": 12703 }, { "epoch": 1.5884126608256373, "grad_norm": 2.03125, "learning_rate": 1.892030498393269e-06, "loss": 0.4937, "step": 12704 }, { "epoch": 1.5885389533506986, "grad_norm": 2.109375, "learning_rate": 1.8908620906919085e-06, "loss": 0.5873, "step": 12705 }, { "epoch": 1.5886652458757597, "grad_norm": 1.9296875, "learning_rate": 1.8896940062012514e-06, "loss": 0.5036, "step": 12706 }, { "epoch": 1.5887915384008209, "grad_norm": 2.03125, "learning_rate": 1.8885262449678587e-06, "loss": 0.5, "step": 12707 }, { "epoch": 1.5889178309258822, "grad_norm": 2.0, "learning_rate": 1.887358807038272e-06, "loss": 0.582, "step": 12708 }, { "epoch": 1.5890441234509431, "grad_norm": 2.0, "learning_rate": 1.8861916924590217e-06, "loss": 0.4422, "step": 12709 }, { "epoch": 1.5891704159760045, "grad_norm": 2.21875, "learning_rate": 1.8850249012766286e-06, "loss": 0.5363, "step": 12710 }, { "epoch": 1.5892967085010656, "grad_norm": 2.234375, "learning_rate": 1.8838584335375965e-06, "loss": 0.5156, "step": 12711 }, { "epoch": 1.5894230010261268, "grad_norm": 2.140625, "learning_rate": 1.882692289288418e-06, "loss": 0.5226, "step": 12712 }, { "epoch": 1.589549293551188, "grad_norm": 1.8828125, "learning_rate": 1.881526468575573e-06, "loss": 0.4698, "step": 12713 }, { "epoch": 1.589675586076249, "grad_norm": 1.7890625, "learning_rate": 1.8803609714455296e-06, "loss": 0.4067, "step": 12714 }, { "epoch": 1.5898018786013104, "grad_norm": 2.1875, "learning_rate": 1.8791957979447395e-06, "loss": 0.6864, "step": 12715 }, { "epoch": 1.5899281711263713, "grad_norm": 1.921875, "learning_rate": 1.8780309481196446e-06, "loss": 0.4733, "step": 12716 }, { "epoch": 1.5900544636514327, "grad_norm": 1.859375, "learning_rate": 1.8768664220166731e-06, "loss": 0.4647, "step": 12717 }, { "epoch": 1.5901807561764938, "grad_norm": 2.078125, "learning_rate": 1.8757022196822406e-06, "loss": 0.5315, "step": 12718 }, { "epoch": 1.590307048701555, "grad_norm": 2.0625, "learning_rate": 1.8745383411627483e-06, "loss": 0.4909, "step": 12719 }, { "epoch": 1.5904333412266163, "grad_norm": 1.9765625, "learning_rate": 1.873374786504587e-06, "loss": 0.4858, "step": 12720 }, { "epoch": 1.5905596337516772, "grad_norm": 1.890625, "learning_rate": 1.872211555754132e-06, "loss": 0.4822, "step": 12721 }, { "epoch": 1.5906859262767385, "grad_norm": 2.265625, "learning_rate": 1.8710486489577484e-06, "loss": 0.5233, "step": 12722 }, { "epoch": 1.5908122188017997, "grad_norm": 2.109375, "learning_rate": 1.8698860661617845e-06, "loss": 0.497, "step": 12723 }, { "epoch": 1.5909385113268608, "grad_norm": 2.1875, "learning_rate": 1.868723807412578e-06, "loss": 0.6474, "step": 12724 }, { "epoch": 1.5910648038519222, "grad_norm": 2.046875, "learning_rate": 1.867561872756457e-06, "loss": 0.6649, "step": 12725 }, { "epoch": 1.591191096376983, "grad_norm": 2.109375, "learning_rate": 1.8664002622397325e-06, "loss": 0.5036, "step": 12726 }, { "epoch": 1.5913173889020444, "grad_norm": 1.8203125, "learning_rate": 1.865238975908702e-06, "loss": 0.4688, "step": 12727 }, { "epoch": 1.5914436814271056, "grad_norm": 1.9375, "learning_rate": 1.8640780138096515e-06, "loss": 0.4537, "step": 12728 }, { "epoch": 1.5915699739521667, "grad_norm": 1.984375, "learning_rate": 1.8629173759888553e-06, "loss": 0.459, "step": 12729 }, { "epoch": 1.5916962664772278, "grad_norm": 2.0625, "learning_rate": 1.861757062492573e-06, "loss": 0.4919, "step": 12730 }, { "epoch": 1.591822559002289, "grad_norm": 2.0, "learning_rate": 1.8605970733670519e-06, "loss": 0.5237, "step": 12731 }, { "epoch": 1.5919488515273503, "grad_norm": 2.046875, "learning_rate": 1.8594374086585265e-06, "loss": 0.4854, "step": 12732 }, { "epoch": 1.5920751440524112, "grad_norm": 2.109375, "learning_rate": 1.8582780684132184e-06, "loss": 0.5173, "step": 12733 }, { "epoch": 1.5922014365774726, "grad_norm": 1.96875, "learning_rate": 1.8571190526773352e-06, "loss": 0.4681, "step": 12734 }, { "epoch": 1.5923277291025337, "grad_norm": 1.9765625, "learning_rate": 1.8559603614970745e-06, "loss": 0.4847, "step": 12735 }, { "epoch": 1.5924540216275949, "grad_norm": 1.9296875, "learning_rate": 1.8548019949186168e-06, "loss": 0.4838, "step": 12736 }, { "epoch": 1.5925803141526562, "grad_norm": 2.078125, "learning_rate": 1.8536439529881322e-06, "loss": 0.4848, "step": 12737 }, { "epoch": 1.5927066066777171, "grad_norm": 1.9140625, "learning_rate": 1.8524862357517792e-06, "loss": 0.5016, "step": 12738 }, { "epoch": 1.5928328992027785, "grad_norm": 1.9296875, "learning_rate": 1.8513288432556996e-06, "loss": 0.5631, "step": 12739 }, { "epoch": 1.5929591917278396, "grad_norm": 1.8828125, "learning_rate": 1.8501717755460246e-06, "loss": 0.4913, "step": 12740 }, { "epoch": 1.5930854842529008, "grad_norm": 2.140625, "learning_rate": 1.8490150326688739e-06, "loss": 0.4837, "step": 12741 }, { "epoch": 1.593211776777962, "grad_norm": 2.265625, "learning_rate": 1.847858614670347e-06, "loss": 0.7144, "step": 12742 }, { "epoch": 1.593338069303023, "grad_norm": 1.921875, "learning_rate": 1.8467025215965428e-06, "loss": 0.4412, "step": 12743 }, { "epoch": 1.5934643618280844, "grad_norm": 1.8125, "learning_rate": 1.845546753493539e-06, "loss": 0.4101, "step": 12744 }, { "epoch": 1.5935906543531455, "grad_norm": 1.8828125, "learning_rate": 1.8443913104073984e-06, "loss": 0.5274, "step": 12745 }, { "epoch": 1.5937169468782066, "grad_norm": 2.0625, "learning_rate": 1.843236192384178e-06, "loss": 0.5422, "step": 12746 }, { "epoch": 1.5938432394032678, "grad_norm": 1.859375, "learning_rate": 1.842081399469915e-06, "loss": 0.4908, "step": 12747 }, { "epoch": 1.593969531928329, "grad_norm": 2.0, "learning_rate": 1.8409269317106382e-06, "loss": 0.4643, "step": 12748 }, { "epoch": 1.5940958244533903, "grad_norm": 2.03125, "learning_rate": 1.839772789152362e-06, "loss": 0.4762, "step": 12749 }, { "epoch": 1.5942221169784512, "grad_norm": 1.9375, "learning_rate": 1.8386189718410863e-06, "loss": 0.5221, "step": 12750 }, { "epoch": 1.5943484095035125, "grad_norm": 1.9765625, "learning_rate": 1.8374654798228008e-06, "loss": 0.4661, "step": 12751 }, { "epoch": 1.5944747020285737, "grad_norm": 2.125, "learning_rate": 1.8363123131434812e-06, "loss": 0.5999, "step": 12752 }, { "epoch": 1.5946009945536348, "grad_norm": 2.09375, "learning_rate": 1.8351594718490894e-06, "loss": 0.4417, "step": 12753 }, { "epoch": 1.5947272870786962, "grad_norm": 1.90625, "learning_rate": 1.8340069559855744e-06, "loss": 0.4802, "step": 12754 }, { "epoch": 1.594853579603757, "grad_norm": 2.0, "learning_rate": 1.8328547655988726e-06, "loss": 0.4379, "step": 12755 }, { "epoch": 1.5949798721288184, "grad_norm": 1.9296875, "learning_rate": 1.8317029007349086e-06, "loss": 0.5329, "step": 12756 }, { "epoch": 1.5951061646538796, "grad_norm": 1.875, "learning_rate": 1.830551361439592e-06, "loss": 0.4561, "step": 12757 }, { "epoch": 1.5952324571789407, "grad_norm": 1.8515625, "learning_rate": 1.8294001477588208e-06, "loss": 0.4259, "step": 12758 }, { "epoch": 1.595358749704002, "grad_norm": 2.046875, "learning_rate": 1.8282492597384804e-06, "loss": 0.5619, "step": 12759 }, { "epoch": 1.595485042229063, "grad_norm": 2.015625, "learning_rate": 1.8270986974244388e-06, "loss": 0.4885, "step": 12760 }, { "epoch": 1.5956113347541243, "grad_norm": 1.9140625, "learning_rate": 1.8259484608625588e-06, "loss": 0.4778, "step": 12761 }, { "epoch": 1.5957376272791854, "grad_norm": 2.109375, "learning_rate": 1.8247985500986864e-06, "loss": 0.5077, "step": 12762 }, { "epoch": 1.5958639198042466, "grad_norm": 1.953125, "learning_rate": 1.8236489651786516e-06, "loss": 0.5075, "step": 12763 }, { "epoch": 1.5959902123293077, "grad_norm": 2.015625, "learning_rate": 1.8224997061482751e-06, "loss": 0.5486, "step": 12764 }, { "epoch": 1.5961165048543688, "grad_norm": 2.015625, "learning_rate": 1.8213507730533642e-06, "loss": 0.4678, "step": 12765 }, { "epoch": 1.5962427973794302, "grad_norm": 1.8828125, "learning_rate": 1.8202021659397107e-06, "loss": 0.5152, "step": 12766 }, { "epoch": 1.5963690899044911, "grad_norm": 1.953125, "learning_rate": 1.819053884853098e-06, "loss": 0.5271, "step": 12767 }, { "epoch": 1.5964953824295525, "grad_norm": 1.953125, "learning_rate": 1.8179059298392921e-06, "loss": 0.4881, "step": 12768 }, { "epoch": 1.5966216749546136, "grad_norm": 1.84375, "learning_rate": 1.816758300944047e-06, "loss": 0.4031, "step": 12769 }, { "epoch": 1.5967479674796747, "grad_norm": 1.9921875, "learning_rate": 1.8156109982131064e-06, "loss": 0.4512, "step": 12770 }, { "epoch": 1.596874260004736, "grad_norm": 1.953125, "learning_rate": 1.8144640216921983e-06, "loss": 0.4145, "step": 12771 }, { "epoch": 1.597000552529797, "grad_norm": 2.28125, "learning_rate": 1.8133173714270368e-06, "loss": 0.5328, "step": 12772 }, { "epoch": 1.5971268450548584, "grad_norm": 1.9140625, "learning_rate": 1.812171047463327e-06, "loss": 0.4786, "step": 12773 }, { "epoch": 1.5972531375799195, "grad_norm": 2.0625, "learning_rate": 1.8110250498467585e-06, "loss": 0.5426, "step": 12774 }, { "epoch": 1.5973794301049806, "grad_norm": 2.015625, "learning_rate": 1.8098793786230063e-06, "loss": 0.585, "step": 12775 }, { "epoch": 1.597505722630042, "grad_norm": 1.9375, "learning_rate": 1.8087340338377347e-06, "loss": 0.4703, "step": 12776 }, { "epoch": 1.597632015155103, "grad_norm": 2.140625, "learning_rate": 1.8075890155365938e-06, "loss": 0.5365, "step": 12777 }, { "epoch": 1.5977583076801642, "grad_norm": 2.0625, "learning_rate": 1.8064443237652207e-06, "loss": 0.5847, "step": 12778 }, { "epoch": 1.5978846002052254, "grad_norm": 1.96875, "learning_rate": 1.8052999585692444e-06, "loss": 0.5047, "step": 12779 }, { "epoch": 1.5980108927302865, "grad_norm": 1.921875, "learning_rate": 1.8041559199942726e-06, "loss": 0.4203, "step": 12780 }, { "epoch": 1.5981371852553476, "grad_norm": 2.015625, "learning_rate": 1.8030122080859058e-06, "loss": 0.4722, "step": 12781 }, { "epoch": 1.5982634777804088, "grad_norm": 2.140625, "learning_rate": 1.8018688228897275e-06, "loss": 0.5481, "step": 12782 }, { "epoch": 1.5983897703054701, "grad_norm": 2.046875, "learning_rate": 1.8007257644513121e-06, "loss": 0.5056, "step": 12783 }, { "epoch": 1.5985160628305313, "grad_norm": 1.9375, "learning_rate": 1.799583032816219e-06, "loss": 0.4775, "step": 12784 }, { "epoch": 1.5986423553555924, "grad_norm": 2.203125, "learning_rate": 1.7984406280299937e-06, "loss": 0.6065, "step": 12785 }, { "epoch": 1.5987686478806535, "grad_norm": 2.078125, "learning_rate": 1.79729855013817e-06, "loss": 0.4511, "step": 12786 }, { "epoch": 1.5988949404057147, "grad_norm": 2.0, "learning_rate": 1.7961567991862695e-06, "loss": 0.4447, "step": 12787 }, { "epoch": 1.599021232930776, "grad_norm": 2.203125, "learning_rate": 1.7950153752197984e-06, "loss": 0.4793, "step": 12788 }, { "epoch": 1.599147525455837, "grad_norm": 2.09375, "learning_rate": 1.793874278284251e-06, "loss": 0.5463, "step": 12789 }, { "epoch": 1.5992738179808983, "grad_norm": 1.921875, "learning_rate": 1.7927335084251095e-06, "loss": 0.4492, "step": 12790 }, { "epoch": 1.5994001105059594, "grad_norm": 2.234375, "learning_rate": 1.7915930656878423e-06, "loss": 0.5256, "step": 12791 }, { "epoch": 1.5995264030310206, "grad_norm": 2.140625, "learning_rate": 1.7904529501179047e-06, "loss": 0.4911, "step": 12792 }, { "epoch": 1.599652695556082, "grad_norm": 1.859375, "learning_rate": 1.7893131617607374e-06, "loss": 0.4585, "step": 12793 }, { "epoch": 1.5997789880811428, "grad_norm": 1.9375, "learning_rate": 1.7881737006617716e-06, "loss": 0.4671, "step": 12794 }, { "epoch": 1.5999052806062042, "grad_norm": 2.046875, "learning_rate": 1.7870345668664224e-06, "loss": 0.4991, "step": 12795 }, { "epoch": 1.6000315731312653, "grad_norm": 2.09375, "learning_rate": 1.78589576042009e-06, "loss": 0.4884, "step": 12796 }, { "epoch": 1.6001578656563265, "grad_norm": 1.765625, "learning_rate": 1.7847572813681713e-06, "loss": 0.4644, "step": 12797 }, { "epoch": 1.6002841581813876, "grad_norm": 2.296875, "learning_rate": 1.783619129756039e-06, "loss": 0.6051, "step": 12798 }, { "epoch": 1.6004104507064487, "grad_norm": 2.015625, "learning_rate": 1.782481305629058e-06, "loss": 0.5015, "step": 12799 }, { "epoch": 1.60053674323151, "grad_norm": 1.890625, "learning_rate": 1.7813438090325796e-06, "loss": 0.4635, "step": 12800 }, { "epoch": 1.6006630357565712, "grad_norm": 1.9609375, "learning_rate": 1.78020664001194e-06, "loss": 0.4512, "step": 12801 }, { "epoch": 1.6007893282816323, "grad_norm": 2.25, "learning_rate": 1.7790697986124662e-06, "loss": 0.4798, "step": 12802 }, { "epoch": 1.6009156208066935, "grad_norm": 2.046875, "learning_rate": 1.7779332848794684e-06, "loss": 0.5356, "step": 12803 }, { "epoch": 1.6010419133317546, "grad_norm": 1.8359375, "learning_rate": 1.7767970988582461e-06, "loss": 0.5263, "step": 12804 }, { "epoch": 1.601168205856816, "grad_norm": 1.9609375, "learning_rate": 1.7756612405940843e-06, "loss": 0.4561, "step": 12805 }, { "epoch": 1.6012944983818769, "grad_norm": 2.109375, "learning_rate": 1.7745257101322555e-06, "loss": 0.4905, "step": 12806 }, { "epoch": 1.6014207909069382, "grad_norm": 2.1875, "learning_rate": 1.7733905075180202e-06, "loss": 0.547, "step": 12807 }, { "epoch": 1.6015470834319994, "grad_norm": 1.890625, "learning_rate": 1.7722556327966234e-06, "loss": 0.4894, "step": 12808 }, { "epoch": 1.6016733759570605, "grad_norm": 2.140625, "learning_rate": 1.7711210860132998e-06, "loss": 0.5409, "step": 12809 }, { "epoch": 1.6017996684821219, "grad_norm": 2.046875, "learning_rate": 1.7699868672132692e-06, "loss": 0.5527, "step": 12810 }, { "epoch": 1.6019259610071828, "grad_norm": 2.203125, "learning_rate": 1.7688529764417395e-06, "loss": 0.5088, "step": 12811 }, { "epoch": 1.6020522535322441, "grad_norm": 2.140625, "learning_rate": 1.7677194137439036e-06, "loss": 0.4871, "step": 12812 }, { "epoch": 1.6021785460573053, "grad_norm": 1.9375, "learning_rate": 1.766586179164943e-06, "loss": 0.4721, "step": 12813 }, { "epoch": 1.6023048385823664, "grad_norm": 1.8984375, "learning_rate": 1.765453272750024e-06, "loss": 0.4871, "step": 12814 }, { "epoch": 1.6024311311074277, "grad_norm": 1.890625, "learning_rate": 1.7643206945443058e-06, "loss": 0.4654, "step": 12815 }, { "epoch": 1.6025574236324887, "grad_norm": 2.125, "learning_rate": 1.763188444592927e-06, "loss": 0.507, "step": 12816 }, { "epoch": 1.60268371615755, "grad_norm": 1.9921875, "learning_rate": 1.7620565229410181e-06, "loss": 0.4752, "step": 12817 }, { "epoch": 1.6028100086826111, "grad_norm": 1.859375, "learning_rate": 1.7609249296336927e-06, "loss": 0.4524, "step": 12818 }, { "epoch": 1.6029363012076723, "grad_norm": 2.015625, "learning_rate": 1.7597936647160562e-06, "loss": 0.5079, "step": 12819 }, { "epoch": 1.6030625937327334, "grad_norm": 2.0625, "learning_rate": 1.758662728233196e-06, "loss": 0.5581, "step": 12820 }, { "epoch": 1.6031888862577945, "grad_norm": 1.90625, "learning_rate": 1.7575321202301875e-06, "loss": 0.5159, "step": 12821 }, { "epoch": 1.603315178782856, "grad_norm": 2.0625, "learning_rate": 1.7564018407520966e-06, "loss": 0.4779, "step": 12822 }, { "epoch": 1.6034414713079168, "grad_norm": 2.03125, "learning_rate": 1.7552718898439726e-06, "loss": 0.4475, "step": 12823 }, { "epoch": 1.6035677638329782, "grad_norm": 1.875, "learning_rate": 1.7541422675508524e-06, "loss": 0.4605, "step": 12824 }, { "epoch": 1.6036940563580393, "grad_norm": 2.375, "learning_rate": 1.7530129739177593e-06, "loss": 0.5582, "step": 12825 }, { "epoch": 1.6038203488831004, "grad_norm": 2.234375, "learning_rate": 1.751884008989706e-06, "loss": 0.4677, "step": 12826 }, { "epoch": 1.6039466414081618, "grad_norm": 2.09375, "learning_rate": 1.7507553728116877e-06, "loss": 0.5278, "step": 12827 }, { "epoch": 1.6040729339332227, "grad_norm": 1.9609375, "learning_rate": 1.7496270654286918e-06, "loss": 0.4635, "step": 12828 }, { "epoch": 1.604199226458284, "grad_norm": 2.0, "learning_rate": 1.7484990868856887e-06, "loss": 0.4997, "step": 12829 }, { "epoch": 1.6043255189833452, "grad_norm": 2.015625, "learning_rate": 1.747371437227634e-06, "loss": 0.4693, "step": 12830 }, { "epoch": 1.6044518115084063, "grad_norm": 1.9296875, "learning_rate": 1.7462441164994793e-06, "loss": 0.3884, "step": 12831 }, { "epoch": 1.6045781040334677, "grad_norm": 1.9609375, "learning_rate": 1.745117124746154e-06, "loss": 0.5368, "step": 12832 }, { "epoch": 1.6047043965585286, "grad_norm": 2.046875, "learning_rate": 1.7439904620125758e-06, "loss": 0.5092, "step": 12833 }, { "epoch": 1.60483068908359, "grad_norm": 2.078125, "learning_rate": 1.742864128343653e-06, "loss": 0.4903, "step": 12834 }, { "epoch": 1.604956981608651, "grad_norm": 2.125, "learning_rate": 1.7417381237842768e-06, "loss": 0.4535, "step": 12835 }, { "epoch": 1.6050832741337122, "grad_norm": 2.09375, "learning_rate": 1.7406124483793286e-06, "loss": 0.4044, "step": 12836 }, { "epoch": 1.6052095666587733, "grad_norm": 1.9921875, "learning_rate": 1.7394871021736725e-06, "loss": 0.5218, "step": 12837 }, { "epoch": 1.6053358591838345, "grad_norm": 1.90625, "learning_rate": 1.7383620852121652e-06, "loss": 0.4303, "step": 12838 }, { "epoch": 1.6054621517088958, "grad_norm": 2.0, "learning_rate": 1.737237397539646e-06, "loss": 0.5769, "step": 12839 }, { "epoch": 1.6055884442339567, "grad_norm": 1.9765625, "learning_rate": 1.7361130392009407e-06, "loss": 0.4834, "step": 12840 }, { "epoch": 1.605714736759018, "grad_norm": 2.046875, "learning_rate": 1.7349890102408662e-06, "loss": 0.5286, "step": 12841 }, { "epoch": 1.6058410292840792, "grad_norm": 1.859375, "learning_rate": 1.7338653107042213e-06, "loss": 0.4997, "step": 12842 }, { "epoch": 1.6059673218091404, "grad_norm": 2.09375, "learning_rate": 1.7327419406357959e-06, "loss": 0.5041, "step": 12843 }, { "epoch": 1.6060936143342017, "grad_norm": 1.9140625, "learning_rate": 1.731618900080363e-06, "loss": 0.4993, "step": 12844 }, { "epoch": 1.6062199068592626, "grad_norm": 2.109375, "learning_rate": 1.7304961890826844e-06, "loss": 0.4997, "step": 12845 }, { "epoch": 1.606346199384324, "grad_norm": 1.96875, "learning_rate": 1.7293738076875077e-06, "loss": 0.4713, "step": 12846 }, { "epoch": 1.6064724919093851, "grad_norm": 2.078125, "learning_rate": 1.7282517559395728e-06, "loss": 0.5093, "step": 12847 }, { "epoch": 1.6065987844344463, "grad_norm": 2.15625, "learning_rate": 1.7271300338835995e-06, "loss": 0.4993, "step": 12848 }, { "epoch": 1.6067250769595076, "grad_norm": 1.9375, "learning_rate": 1.7260086415642952e-06, "loss": 0.4288, "step": 12849 }, { "epoch": 1.6068513694845685, "grad_norm": 1.953125, "learning_rate": 1.7248875790263585e-06, "loss": 0.5057, "step": 12850 }, { "epoch": 1.6069776620096299, "grad_norm": 2.125, "learning_rate": 1.7237668463144696e-06, "loss": 0.4812, "step": 12851 }, { "epoch": 1.607103954534691, "grad_norm": 1.9296875, "learning_rate": 1.7226464434733015e-06, "loss": 0.4648, "step": 12852 }, { "epoch": 1.6072302470597521, "grad_norm": 1.859375, "learning_rate": 1.7215263705475072e-06, "loss": 0.4759, "step": 12853 }, { "epoch": 1.6073565395848133, "grad_norm": 2.0625, "learning_rate": 1.7204066275817332e-06, "loss": 0.5569, "step": 12854 }, { "epoch": 1.6074828321098744, "grad_norm": 1.984375, "learning_rate": 1.7192872146206075e-06, "loss": 0.4822, "step": 12855 }, { "epoch": 1.6076091246349358, "grad_norm": 2.15625, "learning_rate": 1.7181681317087485e-06, "loss": 0.5965, "step": 12856 }, { "epoch": 1.6077354171599967, "grad_norm": 1.890625, "learning_rate": 1.7170493788907593e-06, "loss": 0.409, "step": 12857 }, { "epoch": 1.607861709685058, "grad_norm": 1.796875, "learning_rate": 1.7159309562112314e-06, "loss": 0.4141, "step": 12858 }, { "epoch": 1.6079880022101192, "grad_norm": 2.015625, "learning_rate": 1.7148128637147421e-06, "loss": 0.4602, "step": 12859 }, { "epoch": 1.6081142947351803, "grad_norm": 1.75, "learning_rate": 1.7136951014458569e-06, "loss": 0.4112, "step": 12860 }, { "epoch": 1.6082405872602417, "grad_norm": 1.7890625, "learning_rate": 1.7125776694491247e-06, "loss": 0.4385, "step": 12861 }, { "epoch": 1.6083668797853026, "grad_norm": 1.8515625, "learning_rate": 1.7114605677690833e-06, "loss": 0.5136, "step": 12862 }, { "epoch": 1.608493172310364, "grad_norm": 1.9375, "learning_rate": 1.7103437964502611e-06, "loss": 0.489, "step": 12863 }, { "epoch": 1.608619464835425, "grad_norm": 2.03125, "learning_rate": 1.7092273555371698e-06, "loss": 0.4535, "step": 12864 }, { "epoch": 1.6087457573604862, "grad_norm": 2.046875, "learning_rate": 1.7081112450743054e-06, "loss": 0.5447, "step": 12865 }, { "epoch": 1.6088720498855476, "grad_norm": 2.125, "learning_rate": 1.706995465106155e-06, "loss": 0.5585, "step": 12866 }, { "epoch": 1.6089983424106085, "grad_norm": 2.0, "learning_rate": 1.7058800156771904e-06, "loss": 0.5261, "step": 12867 }, { "epoch": 1.6091246349356698, "grad_norm": 1.921875, "learning_rate": 1.7047648968318697e-06, "loss": 0.5012, "step": 12868 }, { "epoch": 1.609250927460731, "grad_norm": 2.015625, "learning_rate": 1.7036501086146418e-06, "loss": 0.54, "step": 12869 }, { "epoch": 1.609377219985792, "grad_norm": 1.953125, "learning_rate": 1.7025356510699364e-06, "loss": 0.5099, "step": 12870 }, { "epoch": 1.6095035125108532, "grad_norm": 2.140625, "learning_rate": 1.7014215242421738e-06, "loss": 0.4895, "step": 12871 }, { "epoch": 1.6096298050359144, "grad_norm": 1.9140625, "learning_rate": 1.7003077281757618e-06, "loss": 0.4656, "step": 12872 }, { "epoch": 1.6097560975609757, "grad_norm": 2.078125, "learning_rate": 1.6991942629150915e-06, "loss": 0.5199, "step": 12873 }, { "epoch": 1.6098823900860366, "grad_norm": 1.9375, "learning_rate": 1.6980811285045452e-06, "loss": 0.4745, "step": 12874 }, { "epoch": 1.610008682611098, "grad_norm": 2.21875, "learning_rate": 1.6969683249884884e-06, "loss": 0.5533, "step": 12875 }, { "epoch": 1.610134975136159, "grad_norm": 1.984375, "learning_rate": 1.6958558524112757e-06, "loss": 0.4915, "step": 12876 }, { "epoch": 1.6102612676612202, "grad_norm": 2.15625, "learning_rate": 1.694743710817246e-06, "loss": 0.5811, "step": 12877 }, { "epoch": 1.6103875601862816, "grad_norm": 1.75, "learning_rate": 1.693631900250725e-06, "loss": 0.4215, "step": 12878 }, { "epoch": 1.6105138527113425, "grad_norm": 2.03125, "learning_rate": 1.692520420756033e-06, "loss": 0.5218, "step": 12879 }, { "epoch": 1.6106401452364039, "grad_norm": 1.96875, "learning_rate": 1.6914092723774656e-06, "loss": 0.5074, "step": 12880 }, { "epoch": 1.610766437761465, "grad_norm": 1.9140625, "learning_rate": 1.6902984551593127e-06, "loss": 0.4367, "step": 12881 }, { "epoch": 1.6108927302865261, "grad_norm": 2.015625, "learning_rate": 1.689187969145849e-06, "loss": 0.4992, "step": 12882 }, { "epoch": 1.6110190228115875, "grad_norm": 2.0, "learning_rate": 1.6880778143813348e-06, "loss": 0.4364, "step": 12883 }, { "epoch": 1.6111453153366484, "grad_norm": 1.890625, "learning_rate": 1.686967990910018e-06, "loss": 0.4234, "step": 12884 }, { "epoch": 1.6112716078617098, "grad_norm": 2.046875, "learning_rate": 1.6858584987761327e-06, "loss": 0.5371, "step": 12885 }, { "epoch": 1.6113979003867709, "grad_norm": 1.75, "learning_rate": 1.6847493380239032e-06, "loss": 0.4497, "step": 12886 }, { "epoch": 1.611524192911832, "grad_norm": 2.0625, "learning_rate": 1.6836405086975372e-06, "loss": 0.4563, "step": 12887 }, { "epoch": 1.6116504854368932, "grad_norm": 2.078125, "learning_rate": 1.6825320108412279e-06, "loss": 0.6084, "step": 12888 }, { "epoch": 1.6117767779619543, "grad_norm": 2.0625, "learning_rate": 1.6814238444991593e-06, "loss": 0.4885, "step": 12889 }, { "epoch": 1.6119030704870156, "grad_norm": 2.03125, "learning_rate": 1.6803160097155001e-06, "loss": 0.5008, "step": 12890 }, { "epoch": 1.6120293630120768, "grad_norm": 1.953125, "learning_rate": 1.6792085065344044e-06, "loss": 0.4302, "step": 12891 }, { "epoch": 1.612155655537138, "grad_norm": 2.1875, "learning_rate": 1.6781013350000165e-06, "loss": 0.5083, "step": 12892 }, { "epoch": 1.612281948062199, "grad_norm": 2.125, "learning_rate": 1.6769944951564654e-06, "loss": 0.4858, "step": 12893 }, { "epoch": 1.6124082405872602, "grad_norm": 2.078125, "learning_rate": 1.6758879870478627e-06, "loss": 0.5376, "step": 12894 }, { "epoch": 1.6125345331123215, "grad_norm": 2.125, "learning_rate": 1.674781810718319e-06, "loss": 0.4641, "step": 12895 }, { "epoch": 1.6126608256373824, "grad_norm": 2.015625, "learning_rate": 1.6736759662119183e-06, "loss": 0.4754, "step": 12896 }, { "epoch": 1.6127871181624438, "grad_norm": 2.03125, "learning_rate": 1.6725704535727395e-06, "loss": 0.5202, "step": 12897 }, { "epoch": 1.612913410687505, "grad_norm": 2.109375, "learning_rate": 1.6714652728448445e-06, "loss": 0.5032, "step": 12898 }, { "epoch": 1.613039703212566, "grad_norm": 2.25, "learning_rate": 1.6703604240722825e-06, "loss": 0.5415, "step": 12899 }, { "epoch": 1.6131659957376274, "grad_norm": 1.9609375, "learning_rate": 1.669255907299091e-06, "loss": 0.4503, "step": 12900 }, { "epoch": 1.6132922882626883, "grad_norm": 1.8515625, "learning_rate": 1.6681517225692934e-06, "loss": 0.4229, "step": 12901 }, { "epoch": 1.6134185807877497, "grad_norm": 2.0, "learning_rate": 1.6670478699268988e-06, "loss": 0.5219, "step": 12902 }, { "epoch": 1.6135448733128108, "grad_norm": 1.984375, "learning_rate": 1.6659443494159055e-06, "loss": 0.4971, "step": 12903 }, { "epoch": 1.613671165837872, "grad_norm": 1.8125, "learning_rate": 1.6648411610802973e-06, "loss": 0.4568, "step": 12904 }, { "epoch": 1.613797458362933, "grad_norm": 1.859375, "learning_rate": 1.6637383049640432e-06, "loss": 0.4332, "step": 12905 }, { "epoch": 1.6139237508879942, "grad_norm": 2.0, "learning_rate": 1.6626357811111016e-06, "loss": 0.5405, "step": 12906 }, { "epoch": 1.6140500434130556, "grad_norm": 1.96875, "learning_rate": 1.661533589565415e-06, "loss": 0.453, "step": 12907 }, { "epoch": 1.6141763359381167, "grad_norm": 1.984375, "learning_rate": 1.660431730370916e-06, "loss": 0.5082, "step": 12908 }, { "epoch": 1.6143026284631778, "grad_norm": 1.90625, "learning_rate": 1.6593302035715208e-06, "loss": 0.5137, "step": 12909 }, { "epoch": 1.614428920988239, "grad_norm": 1.90625, "learning_rate": 1.658229009211132e-06, "loss": 0.4284, "step": 12910 }, { "epoch": 1.6145552135133001, "grad_norm": 2.125, "learning_rate": 1.6571281473336443e-06, "loss": 0.5013, "step": 12911 }, { "epoch": 1.6146815060383615, "grad_norm": 1.8125, "learning_rate": 1.6560276179829337e-06, "loss": 0.4502, "step": 12912 }, { "epoch": 1.6148077985634224, "grad_norm": 2.15625, "learning_rate": 1.6549274212028643e-06, "loss": 0.5242, "step": 12913 }, { "epoch": 1.6149340910884837, "grad_norm": 1.9609375, "learning_rate": 1.6538275570372875e-06, "loss": 0.498, "step": 12914 }, { "epoch": 1.6150603836135449, "grad_norm": 1.9453125, "learning_rate": 1.6527280255300414e-06, "loss": 0.4585, "step": 12915 }, { "epoch": 1.615186676138606, "grad_norm": 1.953125, "learning_rate": 1.6516288267249513e-06, "loss": 0.4775, "step": 12916 }, { "epoch": 1.6153129686636674, "grad_norm": 2.15625, "learning_rate": 1.6505299606658265e-06, "loss": 0.5068, "step": 12917 }, { "epoch": 1.6154392611887283, "grad_norm": 2.078125, "learning_rate": 1.6494314273964673e-06, "loss": 0.4926, "step": 12918 }, { "epoch": 1.6155655537137896, "grad_norm": 2.140625, "learning_rate": 1.6483332269606578e-06, "loss": 0.4779, "step": 12919 }, { "epoch": 1.6156918462388508, "grad_norm": 1.8515625, "learning_rate": 1.6472353594021695e-06, "loss": 0.4504, "step": 12920 }, { "epoch": 1.615818138763912, "grad_norm": 2.046875, "learning_rate": 1.6461378247647608e-06, "loss": 0.5117, "step": 12921 }, { "epoch": 1.6159444312889732, "grad_norm": 1.8828125, "learning_rate": 1.6450406230921767e-06, "loss": 0.4942, "step": 12922 }, { "epoch": 1.6160707238140342, "grad_norm": 2.046875, "learning_rate": 1.6439437544281501e-06, "loss": 0.4978, "step": 12923 }, { "epoch": 1.6161970163390955, "grad_norm": 2.0625, "learning_rate": 1.642847218816398e-06, "loss": 0.4418, "step": 12924 }, { "epoch": 1.6163233088641566, "grad_norm": 1.875, "learning_rate": 1.6417510163006256e-06, "loss": 0.4399, "step": 12925 }, { "epoch": 1.6164496013892178, "grad_norm": 1.96875, "learning_rate": 1.6406551469245248e-06, "loss": 0.5184, "step": 12926 }, { "epoch": 1.616575893914279, "grad_norm": 1.9609375, "learning_rate": 1.6395596107317757e-06, "loss": 0.481, "step": 12927 }, { "epoch": 1.61670218643934, "grad_norm": 2.109375, "learning_rate": 1.638464407766044e-06, "loss": 0.5033, "step": 12928 }, { "epoch": 1.6168284789644014, "grad_norm": 2.015625, "learning_rate": 1.637369538070981e-06, "loss": 0.4924, "step": 12929 }, { "epoch": 1.6169547714894623, "grad_norm": 2.125, "learning_rate": 1.6362750016902261e-06, "loss": 0.5224, "step": 12930 }, { "epoch": 1.6170810640145237, "grad_norm": 1.890625, "learning_rate": 1.635180798667404e-06, "loss": 0.4185, "step": 12931 }, { "epoch": 1.6172073565395848, "grad_norm": 2.03125, "learning_rate": 1.634086929046127e-06, "loss": 0.4594, "step": 12932 }, { "epoch": 1.617333649064646, "grad_norm": 1.9375, "learning_rate": 1.6329933928699937e-06, "loss": 0.4746, "step": 12933 }, { "epoch": 1.6174599415897073, "grad_norm": 2.171875, "learning_rate": 1.6319001901825916e-06, "loss": 0.4666, "step": 12934 }, { "epoch": 1.6175862341147682, "grad_norm": 2.09375, "learning_rate": 1.6308073210274923e-06, "loss": 0.6105, "step": 12935 }, { "epoch": 1.6177125266398296, "grad_norm": 1.984375, "learning_rate": 1.6297147854482543e-06, "loss": 0.5026, "step": 12936 }, { "epoch": 1.6178388191648907, "grad_norm": 1.9609375, "learning_rate": 1.628622583488424e-06, "loss": 0.4838, "step": 12937 }, { "epoch": 1.6179651116899518, "grad_norm": 2.078125, "learning_rate": 1.6275307151915332e-06, "loss": 0.4521, "step": 12938 }, { "epoch": 1.6180914042150132, "grad_norm": 1.859375, "learning_rate": 1.6264391806011027e-06, "loss": 0.5295, "step": 12939 }, { "epoch": 1.618217696740074, "grad_norm": 1.84375, "learning_rate": 1.6253479797606364e-06, "loss": 0.387, "step": 12940 }, { "epoch": 1.6183439892651355, "grad_norm": 1.9453125, "learning_rate": 1.6242571127136286e-06, "loss": 0.4647, "step": 12941 }, { "epoch": 1.6184702817901966, "grad_norm": 2.09375, "learning_rate": 1.6231665795035556e-06, "loss": 0.461, "step": 12942 }, { "epoch": 1.6185965743152577, "grad_norm": 2.015625, "learning_rate": 1.6220763801738882e-06, "loss": 0.441, "step": 12943 }, { "epoch": 1.6187228668403189, "grad_norm": 2.421875, "learning_rate": 1.620986514768077e-06, "loss": 0.541, "step": 12944 }, { "epoch": 1.61884915936538, "grad_norm": 1.875, "learning_rate": 1.6198969833295607e-06, "loss": 0.4763, "step": 12945 }, { "epoch": 1.6189754518904413, "grad_norm": 2.0625, "learning_rate": 1.618807785901767e-06, "loss": 0.5355, "step": 12946 }, { "epoch": 1.6191017444155023, "grad_norm": 2.0, "learning_rate": 1.6177189225281064e-06, "loss": 0.523, "step": 12947 }, { "epoch": 1.6192280369405636, "grad_norm": 2.078125, "learning_rate": 1.61663039325198e-06, "loss": 0.5307, "step": 12948 }, { "epoch": 1.6193543294656247, "grad_norm": 1.953125, "learning_rate": 1.615542198116774e-06, "loss": 0.5344, "step": 12949 }, { "epoch": 1.6194806219906859, "grad_norm": 2.046875, "learning_rate": 1.6144543371658605e-06, "loss": 0.4836, "step": 12950 }, { "epoch": 1.6196069145157472, "grad_norm": 1.9921875, "learning_rate": 1.6133668104425992e-06, "loss": 0.4751, "step": 12951 }, { "epoch": 1.6197332070408081, "grad_norm": 2.046875, "learning_rate": 1.6122796179903355e-06, "loss": 0.5085, "step": 12952 }, { "epoch": 1.6198594995658695, "grad_norm": 2.0625, "learning_rate": 1.6111927598524046e-06, "loss": 0.455, "step": 12953 }, { "epoch": 1.6199857920909306, "grad_norm": 1.9296875, "learning_rate": 1.6101062360721232e-06, "loss": 0.497, "step": 12954 }, { "epoch": 1.6201120846159918, "grad_norm": 1.8828125, "learning_rate": 1.6090200466927997e-06, "loss": 0.4628, "step": 12955 }, { "epoch": 1.6202383771410531, "grad_norm": 2.046875, "learning_rate": 1.6079341917577262e-06, "loss": 0.5436, "step": 12956 }, { "epoch": 1.620364669666114, "grad_norm": 1.9296875, "learning_rate": 1.6068486713101816e-06, "loss": 0.5648, "step": 12957 }, { "epoch": 1.6204909621911754, "grad_norm": 1.96875, "learning_rate": 1.6057634853934313e-06, "loss": 0.4342, "step": 12958 }, { "epoch": 1.6206172547162365, "grad_norm": 2.0, "learning_rate": 1.6046786340507314e-06, "loss": 0.4528, "step": 12959 }, { "epoch": 1.6207435472412977, "grad_norm": 2.03125, "learning_rate": 1.60359411732532e-06, "loss": 0.4749, "step": 12960 }, { "epoch": 1.6208698397663588, "grad_norm": 2.171875, "learning_rate": 1.6025099352604235e-06, "loss": 0.4933, "step": 12961 }, { "epoch": 1.62099613229142, "grad_norm": 1.953125, "learning_rate": 1.6014260878992537e-06, "loss": 0.4613, "step": 12962 }, { "epoch": 1.6211224248164813, "grad_norm": 1.953125, "learning_rate": 1.600342575285011e-06, "loss": 0.4514, "step": 12963 }, { "epoch": 1.6212487173415422, "grad_norm": 2.09375, "learning_rate": 1.5992593974608817e-06, "loss": 0.4895, "step": 12964 }, { "epoch": 1.6213750098666035, "grad_norm": 2.0625, "learning_rate": 1.598176554470038e-06, "loss": 0.4837, "step": 12965 }, { "epoch": 1.6215013023916647, "grad_norm": 2.046875, "learning_rate": 1.5970940463556407e-06, "loss": 0.4766, "step": 12966 }, { "epoch": 1.6216275949167258, "grad_norm": 1.8515625, "learning_rate": 1.5960118731608342e-06, "loss": 0.4383, "step": 12967 }, { "epoch": 1.6217538874417872, "grad_norm": 1.921875, "learning_rate": 1.5949300349287522e-06, "loss": 0.5269, "step": 12968 }, { "epoch": 1.621880179966848, "grad_norm": 1.9375, "learning_rate": 1.593848531702513e-06, "loss": 0.4757, "step": 12969 }, { "epoch": 1.6220064724919094, "grad_norm": 2.21875, "learning_rate": 1.5927673635252262e-06, "loss": 0.5291, "step": 12970 }, { "epoch": 1.6221327650169706, "grad_norm": 2.296875, "learning_rate": 1.5916865304399809e-06, "loss": 0.5927, "step": 12971 }, { "epoch": 1.6222590575420317, "grad_norm": 1.875, "learning_rate": 1.5906060324898575e-06, "loss": 0.4578, "step": 12972 }, { "epoch": 1.622385350067093, "grad_norm": 2.25, "learning_rate": 1.5895258697179228e-06, "loss": 0.5215, "step": 12973 }, { "epoch": 1.622511642592154, "grad_norm": 1.984375, "learning_rate": 1.5884460421672287e-06, "loss": 0.5351, "step": 12974 }, { "epoch": 1.6226379351172153, "grad_norm": 1.9765625, "learning_rate": 1.5873665498808155e-06, "loss": 0.5218, "step": 12975 }, { "epoch": 1.6227642276422765, "grad_norm": 1.8984375, "learning_rate": 1.5862873929017053e-06, "loss": 0.4968, "step": 12976 }, { "epoch": 1.6228905201673376, "grad_norm": 2.15625, "learning_rate": 1.5852085712729171e-06, "loss": 0.5468, "step": 12977 }, { "epoch": 1.6230168126923987, "grad_norm": 2.078125, "learning_rate": 1.5841300850374465e-06, "loss": 0.5095, "step": 12978 }, { "epoch": 1.6231431052174599, "grad_norm": 2.0, "learning_rate": 1.58305193423828e-06, "loss": 0.5328, "step": 12979 }, { "epoch": 1.6232693977425212, "grad_norm": 1.984375, "learning_rate": 1.5819741189183902e-06, "loss": 0.5313, "step": 12980 }, { "epoch": 1.6233956902675821, "grad_norm": 2.109375, "learning_rate": 1.5808966391207358e-06, "loss": 0.4411, "step": 12981 }, { "epoch": 1.6235219827926435, "grad_norm": 2.40625, "learning_rate": 1.5798194948882617e-06, "loss": 0.5258, "step": 12982 }, { "epoch": 1.6236482753177046, "grad_norm": 1.84375, "learning_rate": 1.578742686263902e-06, "loss": 0.4535, "step": 12983 }, { "epoch": 1.6237745678427657, "grad_norm": 1.8515625, "learning_rate": 1.5776662132905752e-06, "loss": 0.4618, "step": 12984 }, { "epoch": 1.623900860367827, "grad_norm": 1.6953125, "learning_rate": 1.5765900760111862e-06, "loss": 0.3972, "step": 12985 }, { "epoch": 1.624027152892888, "grad_norm": 1.9921875, "learning_rate": 1.575514274468628e-06, "loss": 0.459, "step": 12986 }, { "epoch": 1.6241534454179494, "grad_norm": 2.125, "learning_rate": 1.5744388087057793e-06, "loss": 0.5441, "step": 12987 }, { "epoch": 1.6242797379430105, "grad_norm": 1.8359375, "learning_rate": 1.573363678765506e-06, "loss": 0.4223, "step": 12988 }, { "epoch": 1.6244060304680716, "grad_norm": 1.890625, "learning_rate": 1.572288884690658e-06, "loss": 0.443, "step": 12989 }, { "epoch": 1.624532322993133, "grad_norm": 1.921875, "learning_rate": 1.571214426524077e-06, "loss": 0.4374, "step": 12990 }, { "epoch": 1.624658615518194, "grad_norm": 2.046875, "learning_rate": 1.570140304308586e-06, "loss": 0.5213, "step": 12991 }, { "epoch": 1.6247849080432553, "grad_norm": 1.9609375, "learning_rate": 1.569066518086999e-06, "loss": 0.5197, "step": 12992 }, { "epoch": 1.6249112005683164, "grad_norm": 2.125, "learning_rate": 1.5679930679021128e-06, "loss": 0.5172, "step": 12993 }, { "epoch": 1.6250374930933775, "grad_norm": 1.9375, "learning_rate": 1.5669199537967106e-06, "loss": 0.5449, "step": 12994 }, { "epoch": 1.6251637856184387, "grad_norm": 1.9296875, "learning_rate": 1.5658471758135697e-06, "loss": 0.4594, "step": 12995 }, { "epoch": 1.6252900781434998, "grad_norm": 1.8203125, "learning_rate": 1.5647747339954445e-06, "loss": 0.4411, "step": 12996 }, { "epoch": 1.6254163706685612, "grad_norm": 1.953125, "learning_rate": 1.563702628385082e-06, "loss": 0.4778, "step": 12997 }, { "epoch": 1.625542663193622, "grad_norm": 1.9921875, "learning_rate": 1.5626308590252114e-06, "loss": 0.432, "step": 12998 }, { "epoch": 1.6256689557186834, "grad_norm": 2.203125, "learning_rate": 1.5615594259585532e-06, "loss": 0.5462, "step": 12999 }, { "epoch": 1.6257952482437446, "grad_norm": 2.125, "learning_rate": 1.5604883292278106e-06, "loss": 0.5228, "step": 13000 }, { "epoch": 1.6259215407688057, "grad_norm": 2.03125, "learning_rate": 1.5594175688756752e-06, "loss": 0.5163, "step": 13001 }, { "epoch": 1.626047833293867, "grad_norm": 1.9921875, "learning_rate": 1.5583471449448251e-06, "loss": 0.4658, "step": 13002 }, { "epoch": 1.626174125818928, "grad_norm": 2.359375, "learning_rate": 1.557277057477925e-06, "loss": 0.6222, "step": 13003 }, { "epoch": 1.6263004183439893, "grad_norm": 1.8984375, "learning_rate": 1.5562073065176253e-06, "loss": 0.4758, "step": 13004 }, { "epoch": 1.6264267108690504, "grad_norm": 1.953125, "learning_rate": 1.5551378921065652e-06, "loss": 0.5135, "step": 13005 }, { "epoch": 1.6265530033941116, "grad_norm": 1.9609375, "learning_rate": 1.5540688142873672e-06, "loss": 0.4588, "step": 13006 }, { "epoch": 1.626679295919173, "grad_norm": 2.015625, "learning_rate": 1.553000073102643e-06, "loss": 0.402, "step": 13007 }, { "epoch": 1.6268055884442338, "grad_norm": 2.015625, "learning_rate": 1.5519316685949903e-06, "loss": 0.5955, "step": 13008 }, { "epoch": 1.6269318809692952, "grad_norm": 1.9140625, "learning_rate": 1.5508636008069932e-06, "loss": 0.4582, "step": 13009 }, { "epoch": 1.6270581734943563, "grad_norm": 2.046875, "learning_rate": 1.549795869781221e-06, "loss": 0.5025, "step": 13010 }, { "epoch": 1.6271844660194175, "grad_norm": 2.015625, "learning_rate": 1.5487284755602328e-06, "loss": 0.5056, "step": 13011 }, { "epoch": 1.6273107585444786, "grad_norm": 1.9765625, "learning_rate": 1.547661418186568e-06, "loss": 0.4084, "step": 13012 }, { "epoch": 1.6274370510695397, "grad_norm": 2.125, "learning_rate": 1.5465946977027634e-06, "loss": 0.5151, "step": 13013 }, { "epoch": 1.627563343594601, "grad_norm": 2.25, "learning_rate": 1.5455283141513333e-06, "loss": 0.5727, "step": 13014 }, { "epoch": 1.6276896361196622, "grad_norm": 1.875, "learning_rate": 1.54446226757478e-06, "loss": 0.4264, "step": 13015 }, { "epoch": 1.6278159286447234, "grad_norm": 1.8203125, "learning_rate": 1.543396558015594e-06, "loss": 0.4806, "step": 13016 }, { "epoch": 1.6279422211697845, "grad_norm": 1.9375, "learning_rate": 1.5423311855162526e-06, "loss": 0.4354, "step": 13017 }, { "epoch": 1.6280685136948456, "grad_norm": 2.0625, "learning_rate": 1.5412661501192184e-06, "loss": 0.4729, "step": 13018 }, { "epoch": 1.628194806219907, "grad_norm": 1.90625, "learning_rate": 1.5402014518669418e-06, "loss": 0.4648, "step": 13019 }, { "epoch": 1.6283210987449679, "grad_norm": 2.03125, "learning_rate": 1.5391370908018589e-06, "loss": 0.5368, "step": 13020 }, { "epoch": 1.6284473912700292, "grad_norm": 1.8828125, "learning_rate": 1.5380730669663912e-06, "loss": 0.4708, "step": 13021 }, { "epoch": 1.6285736837950904, "grad_norm": 2.21875, "learning_rate": 1.5370093804029495e-06, "loss": 0.4844, "step": 13022 }, { "epoch": 1.6286999763201515, "grad_norm": 1.828125, "learning_rate": 1.5359460311539298e-06, "loss": 0.4349, "step": 13023 }, { "epoch": 1.6288262688452129, "grad_norm": 2.109375, "learning_rate": 1.5348830192617148e-06, "loss": 0.4671, "step": 13024 }, { "epoch": 1.6289525613702738, "grad_norm": 1.9921875, "learning_rate": 1.5338203447686717e-06, "loss": 0.4841, "step": 13025 }, { "epoch": 1.6290788538953351, "grad_norm": 2.046875, "learning_rate": 1.5327580077171589e-06, "loss": 0.5219, "step": 13026 }, { "epoch": 1.6292051464203963, "grad_norm": 1.9375, "learning_rate": 1.531696008149517e-06, "loss": 0.4569, "step": 13027 }, { "epoch": 1.6293314389454574, "grad_norm": 1.90625, "learning_rate": 1.5306343461080753e-06, "loss": 0.4794, "step": 13028 }, { "epoch": 1.6294577314705185, "grad_norm": 2.0625, "learning_rate": 1.5295730216351479e-06, "loss": 0.5015, "step": 13029 }, { "epoch": 1.6295840239955797, "grad_norm": 1.96875, "learning_rate": 1.5285120347730354e-06, "loss": 0.4746, "step": 13030 }, { "epoch": 1.629710316520641, "grad_norm": 2.0, "learning_rate": 1.5274513855640315e-06, "loss": 0.4485, "step": 13031 }, { "epoch": 1.6298366090457022, "grad_norm": 2.21875, "learning_rate": 1.5263910740504073e-06, "loss": 0.5617, "step": 13032 }, { "epoch": 1.6299629015707633, "grad_norm": 1.9296875, "learning_rate": 1.5253311002744253e-06, "loss": 0.4218, "step": 13033 }, { "epoch": 1.6300891940958244, "grad_norm": 2.09375, "learning_rate": 1.524271464278333e-06, "loss": 0.5194, "step": 13034 }, { "epoch": 1.6302154866208856, "grad_norm": 2.015625, "learning_rate": 1.5232121661043652e-06, "loss": 0.5564, "step": 13035 }, { "epoch": 1.630341779145947, "grad_norm": 1.9609375, "learning_rate": 1.522153205794742e-06, "loss": 0.5056, "step": 13036 }, { "epoch": 1.6304680716710078, "grad_norm": 2.046875, "learning_rate": 1.5210945833916723e-06, "loss": 0.4954, "step": 13037 }, { "epoch": 1.6305943641960692, "grad_norm": 2.0, "learning_rate": 1.5200362989373508e-06, "loss": 0.5618, "step": 13038 }, { "epoch": 1.6307206567211303, "grad_norm": 2.046875, "learning_rate": 1.5189783524739554e-06, "loss": 0.5257, "step": 13039 }, { "epoch": 1.6308469492461914, "grad_norm": 2.125, "learning_rate": 1.517920744043656e-06, "loss": 0.5368, "step": 13040 }, { "epoch": 1.6309732417712528, "grad_norm": 1.8671875, "learning_rate": 1.5168634736886056e-06, "loss": 0.5227, "step": 13041 }, { "epoch": 1.6310995342963137, "grad_norm": 2.0, "learning_rate": 1.5158065414509437e-06, "loss": 0.5147, "step": 13042 }, { "epoch": 1.631225826821375, "grad_norm": 2.09375, "learning_rate": 1.5147499473727977e-06, "loss": 0.5843, "step": 13043 }, { "epoch": 1.6313521193464362, "grad_norm": 1.9765625, "learning_rate": 1.5136936914962818e-06, "loss": 0.5787, "step": 13044 }, { "epoch": 1.6314784118714973, "grad_norm": 1.8671875, "learning_rate": 1.5126377738634934e-06, "loss": 0.4789, "step": 13045 }, { "epoch": 1.6316047043965587, "grad_norm": 2.140625, "learning_rate": 1.511582194516521e-06, "loss": 0.5199, "step": 13046 }, { "epoch": 1.6317309969216196, "grad_norm": 2.0, "learning_rate": 1.5105269534974364e-06, "loss": 0.4936, "step": 13047 }, { "epoch": 1.631857289446681, "grad_norm": 2.328125, "learning_rate": 1.5094720508482973e-06, "loss": 0.493, "step": 13048 }, { "epoch": 1.631983581971742, "grad_norm": 2.171875, "learning_rate": 1.5084174866111535e-06, "loss": 0.6314, "step": 13049 }, { "epoch": 1.6321098744968032, "grad_norm": 1.828125, "learning_rate": 1.5073632608280354e-06, "loss": 0.4598, "step": 13050 }, { "epoch": 1.6322361670218644, "grad_norm": 2.046875, "learning_rate": 1.5063093735409617e-06, "loss": 0.5123, "step": 13051 }, { "epoch": 1.6323624595469255, "grad_norm": 2.0625, "learning_rate": 1.505255824791939e-06, "loss": 0.4462, "step": 13052 }, { "epoch": 1.6324887520719868, "grad_norm": 1.9609375, "learning_rate": 1.504202614622957e-06, "loss": 0.5352, "step": 13053 }, { "epoch": 1.6326150445970478, "grad_norm": 2.046875, "learning_rate": 1.5031497430759966e-06, "loss": 0.5613, "step": 13054 }, { "epoch": 1.6327413371221091, "grad_norm": 2.078125, "learning_rate": 1.5020972101930208e-06, "loss": 0.4838, "step": 13055 }, { "epoch": 1.6328676296471702, "grad_norm": 2.71875, "learning_rate": 1.5010450160159817e-06, "loss": 0.5522, "step": 13056 }, { "epoch": 1.6329939221722314, "grad_norm": 1.9296875, "learning_rate": 1.4999931605868168e-06, "loss": 0.4316, "step": 13057 }, { "epoch": 1.6331202146972927, "grad_norm": 2.15625, "learning_rate": 1.4989416439474514e-06, "loss": 0.4865, "step": 13058 }, { "epoch": 1.6332465072223536, "grad_norm": 1.890625, "learning_rate": 1.4978904661397965e-06, "loss": 0.4818, "step": 13059 }, { "epoch": 1.633372799747415, "grad_norm": 1.8984375, "learning_rate": 1.4968396272057484e-06, "loss": 0.4689, "step": 13060 }, { "epoch": 1.6334990922724761, "grad_norm": 1.9375, "learning_rate": 1.4957891271871915e-06, "loss": 0.472, "step": 13061 }, { "epoch": 1.6336253847975373, "grad_norm": 2.0625, "learning_rate": 1.494738966125997e-06, "loss": 0.5627, "step": 13062 }, { "epoch": 1.6337516773225986, "grad_norm": 1.9609375, "learning_rate": 1.493689144064021e-06, "loss": 0.4585, "step": 13063 }, { "epoch": 1.6338779698476595, "grad_norm": 2.0625, "learning_rate": 1.492639661043106e-06, "loss": 0.4831, "step": 13064 }, { "epoch": 1.634004262372721, "grad_norm": 2.3125, "learning_rate": 1.4915905171050838e-06, "loss": 0.5002, "step": 13065 }, { "epoch": 1.634130554897782, "grad_norm": 2.078125, "learning_rate": 1.4905417122917676e-06, "loss": 0.5122, "step": 13066 }, { "epoch": 1.6342568474228432, "grad_norm": 2.046875, "learning_rate": 1.489493246644964e-06, "loss": 0.5029, "step": 13067 }, { "epoch": 1.6343831399479043, "grad_norm": 1.984375, "learning_rate": 1.4884451202064609e-06, "loss": 0.5384, "step": 13068 }, { "epoch": 1.6345094324729654, "grad_norm": 2.15625, "learning_rate": 1.4873973330180347e-06, "loss": 0.6159, "step": 13069 }, { "epoch": 1.6346357249980268, "grad_norm": 2.046875, "learning_rate": 1.4863498851214464e-06, "loss": 0.5097, "step": 13070 }, { "epoch": 1.6347620175230877, "grad_norm": 2.09375, "learning_rate": 1.485302776558445e-06, "loss": 0.4621, "step": 13071 }, { "epoch": 1.634888310048149, "grad_norm": 2.03125, "learning_rate": 1.4842560073707668e-06, "loss": 0.507, "step": 13072 }, { "epoch": 1.6350146025732102, "grad_norm": 2.171875, "learning_rate": 1.4832095776001321e-06, "loss": 0.4835, "step": 13073 }, { "epoch": 1.6351408950982713, "grad_norm": 2.234375, "learning_rate": 1.4821634872882495e-06, "loss": 0.5404, "step": 13074 }, { "epoch": 1.6352671876233327, "grad_norm": 2.015625, "learning_rate": 1.4811177364768138e-06, "loss": 0.4604, "step": 13075 }, { "epoch": 1.6353934801483936, "grad_norm": 2.1875, "learning_rate": 1.4800723252075067e-06, "loss": 0.5337, "step": 13076 }, { "epoch": 1.635519772673455, "grad_norm": 1.921875, "learning_rate": 1.479027253521994e-06, "loss": 0.5124, "step": 13077 }, { "epoch": 1.635646065198516, "grad_norm": 1.9375, "learning_rate": 1.4779825214619325e-06, "loss": 0.4349, "step": 13078 }, { "epoch": 1.6357723577235772, "grad_norm": 1.8828125, "learning_rate": 1.4769381290689598e-06, "loss": 0.4212, "step": 13079 }, { "epoch": 1.6358986502486386, "grad_norm": 1.8359375, "learning_rate": 1.4758940763847052e-06, "loss": 0.4541, "step": 13080 }, { "epoch": 1.6360249427736995, "grad_norm": 2.15625, "learning_rate": 1.4748503634507804e-06, "loss": 0.4706, "step": 13081 }, { "epoch": 1.6361512352987608, "grad_norm": 2.0, "learning_rate": 1.473806990308786e-06, "loss": 0.4687, "step": 13082 }, { "epoch": 1.636277527823822, "grad_norm": 1.9296875, "learning_rate": 1.4727639570003093e-06, "loss": 0.5299, "step": 13083 }, { "epoch": 1.636403820348883, "grad_norm": 1.9609375, "learning_rate": 1.4717212635669208e-06, "loss": 0.4926, "step": 13084 }, { "epoch": 1.6365301128739442, "grad_norm": 2.0, "learning_rate": 1.4706789100501794e-06, "loss": 0.4503, "step": 13085 }, { "epoch": 1.6366564053990054, "grad_norm": 2.015625, "learning_rate": 1.4696368964916352e-06, "loss": 0.5083, "step": 13086 }, { "epoch": 1.6367826979240667, "grad_norm": 1.921875, "learning_rate": 1.468595222932817e-06, "loss": 0.4431, "step": 13087 }, { "epoch": 1.6369089904491276, "grad_norm": 1.90625, "learning_rate": 1.4675538894152442e-06, "loss": 0.5043, "step": 13088 }, { "epoch": 1.637035282974189, "grad_norm": 2.015625, "learning_rate": 1.466512895980422e-06, "loss": 0.5228, "step": 13089 }, { "epoch": 1.6371615754992501, "grad_norm": 2.109375, "learning_rate": 1.465472242669841e-06, "loss": 0.4985, "step": 13090 }, { "epoch": 1.6372878680243113, "grad_norm": 2.015625, "learning_rate": 1.4644319295249798e-06, "loss": 0.4664, "step": 13091 }, { "epoch": 1.6374141605493726, "grad_norm": 1.9375, "learning_rate": 1.4633919565873033e-06, "loss": 0.4871, "step": 13092 }, { "epoch": 1.6375404530744335, "grad_norm": 2.015625, "learning_rate": 1.4623523238982618e-06, "loss": 0.5142, "step": 13093 }, { "epoch": 1.6376667455994949, "grad_norm": 2.0625, "learning_rate": 1.4613130314992919e-06, "loss": 0.4898, "step": 13094 }, { "epoch": 1.637793038124556, "grad_norm": 2.0, "learning_rate": 1.4602740794318182e-06, "loss": 0.5377, "step": 13095 }, { "epoch": 1.6379193306496171, "grad_norm": 1.8515625, "learning_rate": 1.45923546773725e-06, "loss": 0.4869, "step": 13096 }, { "epoch": 1.6380456231746785, "grad_norm": 2.0625, "learning_rate": 1.458197196456984e-06, "loss": 0.5995, "step": 13097 }, { "epoch": 1.6381719156997394, "grad_norm": 1.96875, "learning_rate": 1.457159265632404e-06, "loss": 0.4494, "step": 13098 }, { "epoch": 1.6382982082248008, "grad_norm": 1.9765625, "learning_rate": 1.456121675304879e-06, "loss": 0.4786, "step": 13099 }, { "epoch": 1.638424500749862, "grad_norm": 2.0, "learning_rate": 1.4550844255157647e-06, "loss": 0.5309, "step": 13100 }, { "epoch": 1.638550793274923, "grad_norm": 2.09375, "learning_rate": 1.4540475163064038e-06, "loss": 0.4777, "step": 13101 }, { "epoch": 1.6386770857999842, "grad_norm": 2.125, "learning_rate": 1.4530109477181254e-06, "loss": 0.5195, "step": 13102 }, { "epoch": 1.6388033783250453, "grad_norm": 1.96875, "learning_rate": 1.4519747197922406e-06, "loss": 0.4683, "step": 13103 }, { "epoch": 1.6389296708501067, "grad_norm": 1.9765625, "learning_rate": 1.4509388325700557e-06, "loss": 0.5032, "step": 13104 }, { "epoch": 1.6390559633751676, "grad_norm": 2.3125, "learning_rate": 1.4499032860928587e-06, "loss": 0.526, "step": 13105 }, { "epoch": 1.639182255900229, "grad_norm": 1.9375, "learning_rate": 1.4488680804019216e-06, "loss": 0.4208, "step": 13106 }, { "epoch": 1.63930854842529, "grad_norm": 2.015625, "learning_rate": 1.4478332155385067e-06, "loss": 0.5067, "step": 13107 }, { "epoch": 1.6394348409503512, "grad_norm": 1.9140625, "learning_rate": 1.4467986915438602e-06, "loss": 0.4762, "step": 13108 }, { "epoch": 1.6395611334754125, "grad_norm": 1.9296875, "learning_rate": 1.4457645084592164e-06, "loss": 0.4559, "step": 13109 }, { "epoch": 1.6396874260004735, "grad_norm": 1.8828125, "learning_rate": 1.4447306663257953e-06, "loss": 0.4792, "step": 13110 }, { "epoch": 1.6398137185255348, "grad_norm": 2.046875, "learning_rate": 1.4436971651848031e-06, "loss": 0.4929, "step": 13111 }, { "epoch": 1.639940011050596, "grad_norm": 2.015625, "learning_rate": 1.4426640050774332e-06, "loss": 0.5162, "step": 13112 }, { "epoch": 1.640066303575657, "grad_norm": 1.890625, "learning_rate": 1.4416311860448639e-06, "loss": 0.4524, "step": 13113 }, { "epoch": 1.6401925961007184, "grad_norm": 2.0625, "learning_rate": 1.4405987081282613e-06, "loss": 0.5735, "step": 13114 }, { "epoch": 1.6403188886257793, "grad_norm": 1.953125, "learning_rate": 1.439566571368778e-06, "loss": 0.4407, "step": 13115 }, { "epoch": 1.6404451811508407, "grad_norm": 2.140625, "learning_rate": 1.4385347758075518e-06, "loss": 0.5566, "step": 13116 }, { "epoch": 1.6405714736759018, "grad_norm": 1.90625, "learning_rate": 1.437503321485708e-06, "loss": 0.4755, "step": 13117 }, { "epoch": 1.640697766200963, "grad_norm": 1.890625, "learning_rate": 1.4364722084443572e-06, "loss": 0.4656, "step": 13118 }, { "epoch": 1.640824058726024, "grad_norm": 2.109375, "learning_rate": 1.4354414367245984e-06, "loss": 0.415, "step": 13119 }, { "epoch": 1.6409503512510852, "grad_norm": 1.984375, "learning_rate": 1.4344110063675143e-06, "loss": 0.4822, "step": 13120 }, { "epoch": 1.6410766437761466, "grad_norm": 2.03125, "learning_rate": 1.4333809174141733e-06, "loss": 0.4743, "step": 13121 }, { "epoch": 1.6412029363012077, "grad_norm": 2.0625, "learning_rate": 1.4323511699056381e-06, "loss": 0.4922, "step": 13122 }, { "epoch": 1.6413292288262689, "grad_norm": 1.84375, "learning_rate": 1.431321763882949e-06, "loss": 0.4725, "step": 13123 }, { "epoch": 1.64145552135133, "grad_norm": 1.9375, "learning_rate": 1.4302926993871347e-06, "loss": 0.4889, "step": 13124 }, { "epoch": 1.6415818138763911, "grad_norm": 2.0625, "learning_rate": 1.429263976459212e-06, "loss": 0.5261, "step": 13125 }, { "epoch": 1.6417081064014525, "grad_norm": 1.84375, "learning_rate": 1.428235595140184e-06, "loss": 0.4364, "step": 13126 }, { "epoch": 1.6418343989265134, "grad_norm": 1.90625, "learning_rate": 1.4272075554710374e-06, "loss": 0.5349, "step": 13127 }, { "epoch": 1.6419606914515747, "grad_norm": 1.8046875, "learning_rate": 1.4261798574927499e-06, "loss": 0.4712, "step": 13128 }, { "epoch": 1.6420869839766359, "grad_norm": 1.8671875, "learning_rate": 1.425152501246282e-06, "loss": 0.4006, "step": 13129 }, { "epoch": 1.642213276501697, "grad_norm": 1.78125, "learning_rate": 1.4241254867725806e-06, "loss": 0.4258, "step": 13130 }, { "epoch": 1.6423395690267584, "grad_norm": 1.8671875, "learning_rate": 1.4230988141125822e-06, "loss": 0.4265, "step": 13131 }, { "epoch": 1.6424658615518193, "grad_norm": 2.015625, "learning_rate": 1.4220724833072052e-06, "loss": 0.522, "step": 13132 }, { "epoch": 1.6425921540768806, "grad_norm": 1.9921875, "learning_rate": 1.421046494397359e-06, "loss": 0.4786, "step": 13133 }, { "epoch": 1.6427184466019418, "grad_norm": 1.96875, "learning_rate": 1.4200208474239352e-06, "loss": 0.5401, "step": 13134 }, { "epoch": 1.642844739127003, "grad_norm": 1.9609375, "learning_rate": 1.4189955424278145e-06, "loss": 0.4816, "step": 13135 }, { "epoch": 1.642971031652064, "grad_norm": 2.125, "learning_rate": 1.417970579449862e-06, "loss": 0.4754, "step": 13136 }, { "epoch": 1.6430973241771252, "grad_norm": 2.078125, "learning_rate": 1.4169459585309319e-06, "loss": 0.4993, "step": 13137 }, { "epoch": 1.6432236167021865, "grad_norm": 2.171875, "learning_rate": 1.4159216797118624e-06, "loss": 0.5146, "step": 13138 }, { "epoch": 1.6433499092272477, "grad_norm": 2.03125, "learning_rate": 1.4148977430334753e-06, "loss": 0.4204, "step": 13139 }, { "epoch": 1.6434762017523088, "grad_norm": 2.046875, "learning_rate": 1.4138741485365892e-06, "loss": 0.5007, "step": 13140 }, { "epoch": 1.64360249427737, "grad_norm": 2.125, "learning_rate": 1.412850896261998e-06, "loss": 0.525, "step": 13141 }, { "epoch": 1.643728786802431, "grad_norm": 2.1875, "learning_rate": 1.411827986250487e-06, "loss": 0.5052, "step": 13142 }, { "epoch": 1.6438550793274924, "grad_norm": 2.15625, "learning_rate": 1.4108054185428256e-06, "loss": 0.4678, "step": 13143 }, { "epoch": 1.6439813718525533, "grad_norm": 1.9453125, "learning_rate": 1.4097831931797734e-06, "loss": 0.4576, "step": 13144 }, { "epoch": 1.6441076643776147, "grad_norm": 1.875, "learning_rate": 1.4087613102020714e-06, "loss": 0.4429, "step": 13145 }, { "epoch": 1.6442339569026758, "grad_norm": 1.84375, "learning_rate": 1.4077397696504502e-06, "loss": 0.4584, "step": 13146 }, { "epoch": 1.644360249427737, "grad_norm": 2.109375, "learning_rate": 1.406718571565626e-06, "loss": 0.584, "step": 13147 }, { "epoch": 1.6444865419527983, "grad_norm": 1.8125, "learning_rate": 1.4056977159883011e-06, "loss": 0.4227, "step": 13148 }, { "epoch": 1.6446128344778592, "grad_norm": 1.9453125, "learning_rate": 1.4046772029591648e-06, "loss": 0.4666, "step": 13149 }, { "epoch": 1.6447391270029206, "grad_norm": 1.828125, "learning_rate": 1.4036570325188926e-06, "loss": 0.4828, "step": 13150 }, { "epoch": 1.6448654195279817, "grad_norm": 2.03125, "learning_rate": 1.402637204708145e-06, "loss": 0.4541, "step": 13151 }, { "epoch": 1.6449917120530428, "grad_norm": 1.9609375, "learning_rate": 1.4016177195675695e-06, "loss": 0.4178, "step": 13152 }, { "epoch": 1.6451180045781042, "grad_norm": 2.1875, "learning_rate": 1.4005985771378027e-06, "loss": 0.491, "step": 13153 }, { "epoch": 1.645244297103165, "grad_norm": 1.9453125, "learning_rate": 1.3995797774594632e-06, "loss": 0.5067, "step": 13154 }, { "epoch": 1.6453705896282265, "grad_norm": 2.109375, "learning_rate": 1.3985613205731585e-06, "loss": 0.5367, "step": 13155 }, { "epoch": 1.6454968821532876, "grad_norm": 2.09375, "learning_rate": 1.3975432065194804e-06, "loss": 0.5729, "step": 13156 }, { "epoch": 1.6456231746783487, "grad_norm": 2.03125, "learning_rate": 1.396525435339009e-06, "loss": 0.4395, "step": 13157 }, { "epoch": 1.6457494672034099, "grad_norm": 2.078125, "learning_rate": 1.3955080070723126e-06, "loss": 0.5096, "step": 13158 }, { "epoch": 1.645875759728471, "grad_norm": 1.921875, "learning_rate": 1.3944909217599422e-06, "loss": 0.4935, "step": 13159 }, { "epoch": 1.6460020522535324, "grad_norm": 2.046875, "learning_rate": 1.3934741794424355e-06, "loss": 0.6275, "step": 13160 }, { "epoch": 1.6461283447785933, "grad_norm": 2.046875, "learning_rate": 1.392457780160319e-06, "loss": 0.5108, "step": 13161 }, { "epoch": 1.6462546373036546, "grad_norm": 2.25, "learning_rate": 1.391441723954101e-06, "loss": 0.6904, "step": 13162 }, { "epoch": 1.6463809298287158, "grad_norm": 2.015625, "learning_rate": 1.390426010864282e-06, "loss": 0.4813, "step": 13163 }, { "epoch": 1.646507222353777, "grad_norm": 1.9609375, "learning_rate": 1.3894106409313445e-06, "loss": 0.4359, "step": 13164 }, { "epoch": 1.6466335148788382, "grad_norm": 2.125, "learning_rate": 1.38839561419576e-06, "loss": 0.4377, "step": 13165 }, { "epoch": 1.6467598074038992, "grad_norm": 2.015625, "learning_rate": 1.3873809306979824e-06, "loss": 0.5195, "step": 13166 }, { "epoch": 1.6468860999289605, "grad_norm": 1.8828125, "learning_rate": 1.3863665904784573e-06, "loss": 0.4684, "step": 13167 }, { "epoch": 1.6470123924540216, "grad_norm": 2.0, "learning_rate": 1.3853525935776112e-06, "loss": 0.4846, "step": 13168 }, { "epoch": 1.6471386849790828, "grad_norm": 1.8359375, "learning_rate": 1.384338940035862e-06, "loss": 0.4193, "step": 13169 }, { "epoch": 1.6472649775041441, "grad_norm": 2.0625, "learning_rate": 1.3833256298936105e-06, "loss": 0.4717, "step": 13170 }, { "epoch": 1.647391270029205, "grad_norm": 2.25, "learning_rate": 1.3823126631912454e-06, "loss": 0.5526, "step": 13171 }, { "epoch": 1.6475175625542664, "grad_norm": 2.109375, "learning_rate": 1.38130003996914e-06, "loss": 0.5007, "step": 13172 }, { "epoch": 1.6476438550793275, "grad_norm": 2.265625, "learning_rate": 1.3802877602676556e-06, "loss": 0.5985, "step": 13173 }, { "epoch": 1.6477701476043887, "grad_norm": 2.15625, "learning_rate": 1.3792758241271386e-06, "loss": 0.6092, "step": 13174 }, { "epoch": 1.6478964401294498, "grad_norm": 2.109375, "learning_rate": 1.3782642315879213e-06, "loss": 0.5611, "step": 13175 }, { "epoch": 1.648022732654511, "grad_norm": 1.90625, "learning_rate": 1.377252982690327e-06, "loss": 0.5203, "step": 13176 }, { "epoch": 1.6481490251795723, "grad_norm": 1.9765625, "learning_rate": 1.3762420774746598e-06, "loss": 0.5277, "step": 13177 }, { "epoch": 1.6482753177046332, "grad_norm": 1.8515625, "learning_rate": 1.3752315159812123e-06, "loss": 0.4564, "step": 13178 }, { "epoch": 1.6484016102296946, "grad_norm": 2.015625, "learning_rate": 1.3742212982502613e-06, "loss": 0.4337, "step": 13179 }, { "epoch": 1.6485279027547557, "grad_norm": 1.921875, "learning_rate": 1.373211424322074e-06, "loss": 0.4665, "step": 13180 }, { "epoch": 1.6486541952798168, "grad_norm": 1.9453125, "learning_rate": 1.3722018942368997e-06, "loss": 0.4497, "step": 13181 }, { "epoch": 1.6487804878048782, "grad_norm": 2.015625, "learning_rate": 1.3711927080349762e-06, "loss": 0.506, "step": 13182 }, { "epoch": 1.648906780329939, "grad_norm": 2.015625, "learning_rate": 1.3701838657565282e-06, "loss": 0.4994, "step": 13183 }, { "epoch": 1.6490330728550004, "grad_norm": 2.015625, "learning_rate": 1.3691753674417652e-06, "loss": 0.5296, "step": 13184 }, { "epoch": 1.6491593653800616, "grad_norm": 2.03125, "learning_rate": 1.3681672131308832e-06, "loss": 0.5462, "step": 13185 }, { "epoch": 1.6492856579051227, "grad_norm": 2.03125, "learning_rate": 1.3671594028640656e-06, "loss": 0.4885, "step": 13186 }, { "epoch": 1.649411950430184, "grad_norm": 1.90625, "learning_rate": 1.3661519366814802e-06, "loss": 0.4985, "step": 13187 }, { "epoch": 1.649538242955245, "grad_norm": 2.015625, "learning_rate": 1.3651448146232816e-06, "loss": 0.4932, "step": 13188 }, { "epoch": 1.6496645354803063, "grad_norm": 1.875, "learning_rate": 1.3641380367296142e-06, "loss": 0.4174, "step": 13189 }, { "epoch": 1.6497908280053675, "grad_norm": 2.140625, "learning_rate": 1.3631316030406028e-06, "loss": 0.5846, "step": 13190 }, { "epoch": 1.6499171205304286, "grad_norm": 1.9375, "learning_rate": 1.3621255135963628e-06, "loss": 0.4778, "step": 13191 }, { "epoch": 1.6500434130554897, "grad_norm": 1.9296875, "learning_rate": 1.3611197684369937e-06, "loss": 0.4268, "step": 13192 }, { "epoch": 1.6501697055805509, "grad_norm": 1.8671875, "learning_rate": 1.3601143676025808e-06, "loss": 0.4632, "step": 13193 }, { "epoch": 1.6502959981056122, "grad_norm": 2.15625, "learning_rate": 1.359109311133201e-06, "loss": 0.4911, "step": 13194 }, { "epoch": 1.6504222906306731, "grad_norm": 1.9375, "learning_rate": 1.3581045990689112e-06, "loss": 0.522, "step": 13195 }, { "epoch": 1.6505485831557345, "grad_norm": 2.109375, "learning_rate": 1.357100231449756e-06, "loss": 0.4736, "step": 13196 }, { "epoch": 1.6506748756807956, "grad_norm": 1.890625, "learning_rate": 1.3560962083157693e-06, "loss": 0.4895, "step": 13197 }, { "epoch": 1.6508011682058568, "grad_norm": 1.9453125, "learning_rate": 1.3550925297069662e-06, "loss": 0.5075, "step": 13198 }, { "epoch": 1.6509274607309181, "grad_norm": 1.875, "learning_rate": 1.3540891956633528e-06, "loss": 0.4627, "step": 13199 }, { "epoch": 1.651053753255979, "grad_norm": 1.9609375, "learning_rate": 1.3530862062249194e-06, "loss": 0.4819, "step": 13200 }, { "epoch": 1.6511800457810404, "grad_norm": 1.9140625, "learning_rate": 1.352083561431643e-06, "loss": 0.4641, "step": 13201 }, { "epoch": 1.6513063383061015, "grad_norm": 1.7890625, "learning_rate": 1.3510812613234848e-06, "loss": 0.446, "step": 13202 }, { "epoch": 1.6514326308311627, "grad_norm": 1.96875, "learning_rate": 1.350079305940396e-06, "loss": 0.4909, "step": 13203 }, { "epoch": 1.651558923356224, "grad_norm": 1.9921875, "learning_rate": 1.3490776953223107e-06, "loss": 0.5212, "step": 13204 }, { "epoch": 1.651685215881285, "grad_norm": 1.8359375, "learning_rate": 1.348076429509152e-06, "loss": 0.4617, "step": 13205 }, { "epoch": 1.6518115084063463, "grad_norm": 1.9296875, "learning_rate": 1.3470755085408272e-06, "loss": 0.4995, "step": 13206 }, { "epoch": 1.6519378009314074, "grad_norm": 1.9609375, "learning_rate": 1.346074932457231e-06, "loss": 0.4551, "step": 13207 }, { "epoch": 1.6520640934564685, "grad_norm": 1.9765625, "learning_rate": 1.3450747012982435e-06, "loss": 0.4821, "step": 13208 }, { "epoch": 1.6521903859815297, "grad_norm": 2.0625, "learning_rate": 1.3440748151037319e-06, "loss": 0.4766, "step": 13209 }, { "epoch": 1.6523166785065908, "grad_norm": 1.9453125, "learning_rate": 1.3430752739135488e-06, "loss": 0.5152, "step": 13210 }, { "epoch": 1.6524429710316522, "grad_norm": 2.21875, "learning_rate": 1.3420760777675312e-06, "loss": 0.6299, "step": 13211 }, { "epoch": 1.652569263556713, "grad_norm": 2.15625, "learning_rate": 1.3410772267055093e-06, "loss": 0.5841, "step": 13212 }, { "epoch": 1.6526955560817744, "grad_norm": 1.9375, "learning_rate": 1.3400787207672939e-06, "loss": 0.4868, "step": 13213 }, { "epoch": 1.6528218486068356, "grad_norm": 1.9375, "learning_rate": 1.3390805599926814e-06, "loss": 0.5266, "step": 13214 }, { "epoch": 1.6529481411318967, "grad_norm": 2.0625, "learning_rate": 1.3380827444214572e-06, "loss": 0.5237, "step": 13215 }, { "epoch": 1.653074433656958, "grad_norm": 2.046875, "learning_rate": 1.3370852740933905e-06, "loss": 0.5177, "step": 13216 }, { "epoch": 1.653200726182019, "grad_norm": 1.796875, "learning_rate": 1.336088149048239e-06, "loss": 0.4507, "step": 13217 }, { "epoch": 1.6533270187070803, "grad_norm": 2.15625, "learning_rate": 1.335091369325746e-06, "loss": 0.5071, "step": 13218 }, { "epoch": 1.6534533112321415, "grad_norm": 2.0625, "learning_rate": 1.3340949349656395e-06, "loss": 0.4992, "step": 13219 }, { "epoch": 1.6535796037572026, "grad_norm": 1.96875, "learning_rate": 1.3330988460076367e-06, "loss": 0.5313, "step": 13220 }, { "epoch": 1.653705896282264, "grad_norm": 1.96875, "learning_rate": 1.3321031024914377e-06, "loss": 0.4979, "step": 13221 }, { "epoch": 1.6538321888073249, "grad_norm": 1.9375, "learning_rate": 1.331107704456731e-06, "loss": 0.554, "step": 13222 }, { "epoch": 1.6539584813323862, "grad_norm": 2.0625, "learning_rate": 1.3301126519431907e-06, "loss": 0.5174, "step": 13223 }, { "epoch": 1.6540847738574473, "grad_norm": 1.9140625, "learning_rate": 1.329117944990478e-06, "loss": 0.5045, "step": 13224 }, { "epoch": 1.6542110663825085, "grad_norm": 2.0, "learning_rate": 1.3281235836382388e-06, "loss": 0.4391, "step": 13225 }, { "epoch": 1.6543373589075696, "grad_norm": 2.09375, "learning_rate": 1.3271295679261053e-06, "loss": 0.5651, "step": 13226 }, { "epoch": 1.6544636514326307, "grad_norm": 1.9296875, "learning_rate": 1.3261358978936979e-06, "loss": 0.4319, "step": 13227 }, { "epoch": 1.654589943957692, "grad_norm": 2.09375, "learning_rate": 1.3251425735806212e-06, "loss": 0.5782, "step": 13228 }, { "epoch": 1.654716236482753, "grad_norm": 2.0, "learning_rate": 1.3241495950264649e-06, "loss": 0.4591, "step": 13229 }, { "epoch": 1.6548425290078144, "grad_norm": 1.9140625, "learning_rate": 1.3231569622708107e-06, "loss": 0.564, "step": 13230 }, { "epoch": 1.6549688215328755, "grad_norm": 2.15625, "learning_rate": 1.3221646753532202e-06, "loss": 0.5028, "step": 13231 }, { "epoch": 1.6550951140579366, "grad_norm": 1.90625, "learning_rate": 1.3211727343132441e-06, "loss": 0.4465, "step": 13232 }, { "epoch": 1.655221406582998, "grad_norm": 1.96875, "learning_rate": 1.3201811391904185e-06, "loss": 0.4733, "step": 13233 }, { "epoch": 1.655347699108059, "grad_norm": 2.125, "learning_rate": 1.3191898900242662e-06, "loss": 0.5518, "step": 13234 }, { "epoch": 1.6554739916331203, "grad_norm": 1.9375, "learning_rate": 1.3181989868542955e-06, "loss": 0.4549, "step": 13235 }, { "epoch": 1.6556002841581814, "grad_norm": 2.03125, "learning_rate": 1.3172084297200028e-06, "loss": 0.5237, "step": 13236 }, { "epoch": 1.6557265766832425, "grad_norm": 1.9921875, "learning_rate": 1.3162182186608674e-06, "loss": 0.4945, "step": 13237 }, { "epoch": 1.6558528692083039, "grad_norm": 1.828125, "learning_rate": 1.315228353716357e-06, "loss": 0.4577, "step": 13238 }, { "epoch": 1.6559791617333648, "grad_norm": 2.015625, "learning_rate": 1.3142388349259261e-06, "loss": 0.4696, "step": 13239 }, { "epoch": 1.6561054542584261, "grad_norm": 1.9765625, "learning_rate": 1.3132496623290148e-06, "loss": 0.5047, "step": 13240 }, { "epoch": 1.6562317467834873, "grad_norm": 1.8203125, "learning_rate": 1.312260835965049e-06, "loss": 0.4067, "step": 13241 }, { "epoch": 1.6563580393085484, "grad_norm": 2.296875, "learning_rate": 1.3112723558734397e-06, "loss": 0.598, "step": 13242 }, { "epoch": 1.6564843318336095, "grad_norm": 1.984375, "learning_rate": 1.3102842220935863e-06, "loss": 0.4547, "step": 13243 }, { "epoch": 1.6566106243586707, "grad_norm": 1.84375, "learning_rate": 1.309296434664874e-06, "loss": 0.5036, "step": 13244 }, { "epoch": 1.656736916883732, "grad_norm": 1.8515625, "learning_rate": 1.3083089936266724e-06, "loss": 0.448, "step": 13245 }, { "epoch": 1.6568632094087932, "grad_norm": 2.015625, "learning_rate": 1.307321899018339e-06, "loss": 0.4656, "step": 13246 }, { "epoch": 1.6569895019338543, "grad_norm": 1.921875, "learning_rate": 1.3063351508792144e-06, "loss": 0.5051, "step": 13247 }, { "epoch": 1.6571157944589154, "grad_norm": 2.140625, "learning_rate": 1.305348749248634e-06, "loss": 0.4783, "step": 13248 }, { "epoch": 1.6572420869839766, "grad_norm": 2.140625, "learning_rate": 1.3043626941659093e-06, "loss": 0.5216, "step": 13249 }, { "epoch": 1.657368379509038, "grad_norm": 2.09375, "learning_rate": 1.3033769856703437e-06, "loss": 0.506, "step": 13250 }, { "epoch": 1.6574946720340988, "grad_norm": 2.046875, "learning_rate": 1.3023916238012235e-06, "loss": 0.5106, "step": 13251 }, { "epoch": 1.6576209645591602, "grad_norm": 1.875, "learning_rate": 1.3014066085978238e-06, "loss": 0.4559, "step": 13252 }, { "epoch": 1.6577472570842213, "grad_norm": 2.0625, "learning_rate": 1.3004219400994044e-06, "loss": 0.5072, "step": 13253 }, { "epoch": 1.6578735496092825, "grad_norm": 2.09375, "learning_rate": 1.2994376183452128e-06, "loss": 0.4939, "step": 13254 }, { "epoch": 1.6579998421343438, "grad_norm": 2.0625, "learning_rate": 1.298453643374481e-06, "loss": 0.5311, "step": 13255 }, { "epoch": 1.6581261346594047, "grad_norm": 2.171875, "learning_rate": 1.2974700152264287e-06, "loss": 0.4897, "step": 13256 }, { "epoch": 1.658252427184466, "grad_norm": 1.984375, "learning_rate": 1.2964867339402587e-06, "loss": 0.5075, "step": 13257 }, { "epoch": 1.6583787197095272, "grad_norm": 1.9609375, "learning_rate": 1.2955037995551645e-06, "loss": 0.459, "step": 13258 }, { "epoch": 1.6585050122345883, "grad_norm": 1.9453125, "learning_rate": 1.2945212121103234e-06, "loss": 0.4715, "step": 13259 }, { "epoch": 1.6586313047596495, "grad_norm": 1.9765625, "learning_rate": 1.2935389716448976e-06, "loss": 0.4642, "step": 13260 }, { "epoch": 1.6587575972847106, "grad_norm": 1.953125, "learning_rate": 1.2925570781980379e-06, "loss": 0.4881, "step": 13261 }, { "epoch": 1.658883889809772, "grad_norm": 1.9609375, "learning_rate": 1.2915755318088795e-06, "loss": 0.4792, "step": 13262 }, { "epoch": 1.659010182334833, "grad_norm": 2.09375, "learning_rate": 1.2905943325165448e-06, "loss": 0.525, "step": 13263 }, { "epoch": 1.6591364748598942, "grad_norm": 1.984375, "learning_rate": 1.2896134803601424e-06, "loss": 0.464, "step": 13264 }, { "epoch": 1.6592627673849554, "grad_norm": 1.8828125, "learning_rate": 1.288632975378764e-06, "loss": 0.4334, "step": 13265 }, { "epoch": 1.6593890599100165, "grad_norm": 2.015625, "learning_rate": 1.2876528176114956e-06, "loss": 0.5002, "step": 13266 }, { "epoch": 1.6595153524350779, "grad_norm": 2.109375, "learning_rate": 1.2866730070973998e-06, "loss": 0.4851, "step": 13267 }, { "epoch": 1.6596416449601388, "grad_norm": 1.921875, "learning_rate": 1.2856935438755314e-06, "loss": 0.4373, "step": 13268 }, { "epoch": 1.6597679374852001, "grad_norm": 1.9453125, "learning_rate": 1.2847144279849277e-06, "loss": 0.5265, "step": 13269 }, { "epoch": 1.6598942300102613, "grad_norm": 1.90625, "learning_rate": 1.2837356594646155e-06, "loss": 0.4626, "step": 13270 }, { "epoch": 1.6600205225353224, "grad_norm": 2.078125, "learning_rate": 1.2827572383536057e-06, "loss": 0.571, "step": 13271 }, { "epoch": 1.6601468150603838, "grad_norm": 2.109375, "learning_rate": 1.281779164690895e-06, "loss": 0.4575, "step": 13272 }, { "epoch": 1.6602731075854447, "grad_norm": 2.1875, "learning_rate": 1.2808014385154676e-06, "loss": 0.5118, "step": 13273 }, { "epoch": 1.660399400110506, "grad_norm": 1.8828125, "learning_rate": 1.2798240598662936e-06, "loss": 0.4616, "step": 13274 }, { "epoch": 1.6605256926355672, "grad_norm": 1.8671875, "learning_rate": 1.2788470287823284e-06, "loss": 0.4624, "step": 13275 }, { "epoch": 1.6606519851606283, "grad_norm": 2.015625, "learning_rate": 1.2778703453025155e-06, "loss": 0.4978, "step": 13276 }, { "epoch": 1.6607782776856896, "grad_norm": 1.984375, "learning_rate": 1.276894009465781e-06, "loss": 0.4912, "step": 13277 }, { "epoch": 1.6609045702107506, "grad_norm": 2.171875, "learning_rate": 1.27591802131104e-06, "loss": 0.5321, "step": 13278 }, { "epoch": 1.661030862735812, "grad_norm": 1.9765625, "learning_rate": 1.2749423808771944e-06, "loss": 0.5093, "step": 13279 }, { "epoch": 1.661157155260873, "grad_norm": 2.015625, "learning_rate": 1.2739670882031296e-06, "loss": 0.4634, "step": 13280 }, { "epoch": 1.6612834477859342, "grad_norm": 1.890625, "learning_rate": 1.2729921433277182e-06, "loss": 0.4528, "step": 13281 }, { "epoch": 1.6614097403109953, "grad_norm": 2.15625, "learning_rate": 1.27201754628982e-06, "loss": 0.4902, "step": 13282 }, { "epoch": 1.6615360328360564, "grad_norm": 2.25, "learning_rate": 1.2710432971282783e-06, "loss": 0.5119, "step": 13283 }, { "epoch": 1.6616623253611178, "grad_norm": 2.0625, "learning_rate": 1.2700693958819266e-06, "loss": 0.5078, "step": 13284 }, { "epoch": 1.6617886178861787, "grad_norm": 2.25, "learning_rate": 1.2690958425895828e-06, "loss": 0.6077, "step": 13285 }, { "epoch": 1.66191491041124, "grad_norm": 1.9921875, "learning_rate": 1.2681226372900478e-06, "loss": 0.4888, "step": 13286 }, { "epoch": 1.6620412029363012, "grad_norm": 1.984375, "learning_rate": 1.2671497800221133e-06, "loss": 0.497, "step": 13287 }, { "epoch": 1.6621674954613623, "grad_norm": 2.046875, "learning_rate": 1.2661772708245535e-06, "loss": 0.5293, "step": 13288 }, { "epoch": 1.6622937879864237, "grad_norm": 2.140625, "learning_rate": 1.2652051097361317e-06, "loss": 0.4998, "step": 13289 }, { "epoch": 1.6624200805114846, "grad_norm": 2.078125, "learning_rate": 1.2642332967955939e-06, "loss": 0.5598, "step": 13290 }, { "epoch": 1.662546373036546, "grad_norm": 1.859375, "learning_rate": 1.2632618320416766e-06, "loss": 0.4601, "step": 13291 }, { "epoch": 1.662672665561607, "grad_norm": 2.15625, "learning_rate": 1.262290715513098e-06, "loss": 0.5383, "step": 13292 }, { "epoch": 1.6627989580866682, "grad_norm": 2.109375, "learning_rate": 1.261319947248567e-06, "loss": 0.5014, "step": 13293 }, { "epoch": 1.6629252506117296, "grad_norm": 2.046875, "learning_rate": 1.2603495272867728e-06, "loss": 0.5058, "step": 13294 }, { "epoch": 1.6630515431367905, "grad_norm": 2.078125, "learning_rate": 1.2593794556663963e-06, "loss": 0.5, "step": 13295 }, { "epoch": 1.6631778356618518, "grad_norm": 2.171875, "learning_rate": 1.2584097324261025e-06, "loss": 0.5297, "step": 13296 }, { "epoch": 1.663304128186913, "grad_norm": 1.8515625, "learning_rate": 1.2574403576045401e-06, "loss": 0.4605, "step": 13297 }, { "epoch": 1.6634304207119741, "grad_norm": 2.109375, "learning_rate": 1.2564713312403475e-06, "loss": 0.6024, "step": 13298 }, { "epoch": 1.6635567132370352, "grad_norm": 1.9375, "learning_rate": 1.255502653372147e-06, "loss": 0.4907, "step": 13299 }, { "epoch": 1.6636830057620964, "grad_norm": 1.9453125, "learning_rate": 1.25453432403855e-06, "loss": 0.4742, "step": 13300 }, { "epoch": 1.6638092982871577, "grad_norm": 1.96875, "learning_rate": 1.2535663432781465e-06, "loss": 0.5274, "step": 13301 }, { "epoch": 1.6639355908122186, "grad_norm": 1.8515625, "learning_rate": 1.2525987111295234e-06, "loss": 0.4586, "step": 13302 }, { "epoch": 1.66406188333728, "grad_norm": 1.984375, "learning_rate": 1.2516314276312468e-06, "loss": 0.4877, "step": 13303 }, { "epoch": 1.6641881758623411, "grad_norm": 1.96875, "learning_rate": 1.2506644928218702e-06, "loss": 0.4668, "step": 13304 }, { "epoch": 1.6643144683874023, "grad_norm": 2.09375, "learning_rate": 1.249697906739933e-06, "loss": 0.5419, "step": 13305 }, { "epoch": 1.6644407609124636, "grad_norm": 1.9453125, "learning_rate": 1.2487316694239593e-06, "loss": 0.4737, "step": 13306 }, { "epoch": 1.6645670534375245, "grad_norm": 2.015625, "learning_rate": 1.2477657809124632e-06, "loss": 0.5161, "step": 13307 }, { "epoch": 1.664693345962586, "grad_norm": 2.09375, "learning_rate": 1.2468002412439427e-06, "loss": 0.6197, "step": 13308 }, { "epoch": 1.664819638487647, "grad_norm": 1.859375, "learning_rate": 1.2458350504568806e-06, "loss": 0.4404, "step": 13309 }, { "epoch": 1.6649459310127082, "grad_norm": 2.078125, "learning_rate": 1.244870208589748e-06, "loss": 0.4991, "step": 13310 }, { "epoch": 1.6650722235377695, "grad_norm": 1.953125, "learning_rate": 1.2439057156809998e-06, "loss": 0.4505, "step": 13311 }, { "epoch": 1.6651985160628304, "grad_norm": 1.84375, "learning_rate": 1.2429415717690806e-06, "loss": 0.457, "step": 13312 }, { "epoch": 1.6653248085878918, "grad_norm": 1.8515625, "learning_rate": 1.2419777768924167e-06, "loss": 0.4759, "step": 13313 }, { "epoch": 1.665451101112953, "grad_norm": 2.140625, "learning_rate": 1.241014331089424e-06, "loss": 0.4837, "step": 13314 }, { "epoch": 1.665577393638014, "grad_norm": 1.875, "learning_rate": 1.240051234398504e-06, "loss": 0.4206, "step": 13315 }, { "epoch": 1.6657036861630752, "grad_norm": 2.0, "learning_rate": 1.23908848685804e-06, "loss": 0.4642, "step": 13316 }, { "epoch": 1.6658299786881363, "grad_norm": 1.9609375, "learning_rate": 1.2381260885064084e-06, "loss": 0.4552, "step": 13317 }, { "epoch": 1.6659562712131977, "grad_norm": 2.140625, "learning_rate": 1.2371640393819662e-06, "loss": 0.5153, "step": 13318 }, { "epoch": 1.6660825637382586, "grad_norm": 2.015625, "learning_rate": 1.236202339523056e-06, "loss": 0.4924, "step": 13319 }, { "epoch": 1.66620885626332, "grad_norm": 2.0, "learning_rate": 1.235240988968014e-06, "loss": 0.4551, "step": 13320 }, { "epoch": 1.666335148788381, "grad_norm": 2.03125, "learning_rate": 1.2342799877551558e-06, "loss": 0.5036, "step": 13321 }, { "epoch": 1.6664614413134422, "grad_norm": 1.9140625, "learning_rate": 1.2333193359227836e-06, "loss": 0.4901, "step": 13322 }, { "epoch": 1.6665877338385036, "grad_norm": 1.953125, "learning_rate": 1.2323590335091862e-06, "loss": 0.4987, "step": 13323 }, { "epoch": 1.6667140263635645, "grad_norm": 2.015625, "learning_rate": 1.2313990805526388e-06, "loss": 0.4904, "step": 13324 }, { "epoch": 1.6668403188886258, "grad_norm": 1.984375, "learning_rate": 1.2304394770914053e-06, "loss": 0.5209, "step": 13325 }, { "epoch": 1.666966611413687, "grad_norm": 1.921875, "learning_rate": 1.2294802231637304e-06, "loss": 0.4541, "step": 13326 }, { "epoch": 1.667092903938748, "grad_norm": 2.140625, "learning_rate": 1.2285213188078482e-06, "loss": 0.4801, "step": 13327 }, { "epoch": 1.6672191964638094, "grad_norm": 1.921875, "learning_rate": 1.2275627640619803e-06, "loss": 0.4229, "step": 13328 }, { "epoch": 1.6673454889888704, "grad_norm": 2.0625, "learning_rate": 1.2266045589643295e-06, "loss": 0.5654, "step": 13329 }, { "epoch": 1.6674717815139317, "grad_norm": 1.890625, "learning_rate": 1.2256467035530895e-06, "loss": 0.4771, "step": 13330 }, { "epoch": 1.6675980740389929, "grad_norm": 1.96875, "learning_rate": 1.2246891978664376e-06, "loss": 0.5517, "step": 13331 }, { "epoch": 1.667724366564054, "grad_norm": 1.9140625, "learning_rate": 1.2237320419425369e-06, "loss": 0.4684, "step": 13332 }, { "epoch": 1.6678506590891151, "grad_norm": 1.8828125, "learning_rate": 1.222775235819539e-06, "loss": 0.4802, "step": 13333 }, { "epoch": 1.6679769516141763, "grad_norm": 1.8984375, "learning_rate": 1.2218187795355784e-06, "loss": 0.4987, "step": 13334 }, { "epoch": 1.6681032441392376, "grad_norm": 2.078125, "learning_rate": 1.2208626731287787e-06, "loss": 0.5289, "step": 13335 }, { "epoch": 1.6682295366642985, "grad_norm": 2.015625, "learning_rate": 1.2199069166372456e-06, "loss": 0.5343, "step": 13336 }, { "epoch": 1.6683558291893599, "grad_norm": 2.046875, "learning_rate": 1.2189515100990734e-06, "loss": 0.477, "step": 13337 }, { "epoch": 1.668482121714421, "grad_norm": 1.9453125, "learning_rate": 1.2179964535523447e-06, "loss": 0.4865, "step": 13338 }, { "epoch": 1.6686084142394821, "grad_norm": 2.140625, "learning_rate": 1.217041747035126e-06, "loss": 0.5773, "step": 13339 }, { "epoch": 1.6687347067645435, "grad_norm": 1.9140625, "learning_rate": 1.2160873905854665e-06, "loss": 0.5042, "step": 13340 }, { "epoch": 1.6688609992896044, "grad_norm": 1.90625, "learning_rate": 1.215133384241407e-06, "loss": 0.4785, "step": 13341 }, { "epoch": 1.6689872918146658, "grad_norm": 1.953125, "learning_rate": 1.2141797280409717e-06, "loss": 0.4433, "step": 13342 }, { "epoch": 1.669113584339727, "grad_norm": 1.9609375, "learning_rate": 1.21322642202217e-06, "loss": 0.5439, "step": 13343 }, { "epoch": 1.669239876864788, "grad_norm": 2.03125, "learning_rate": 1.2122734662229985e-06, "loss": 0.4799, "step": 13344 }, { "epoch": 1.6693661693898494, "grad_norm": 2.0625, "learning_rate": 1.2113208606814408e-06, "loss": 0.5128, "step": 13345 }, { "epoch": 1.6694924619149103, "grad_norm": 1.984375, "learning_rate": 1.2103686054354635e-06, "loss": 0.498, "step": 13346 }, { "epoch": 1.6696187544399717, "grad_norm": 2.03125, "learning_rate": 1.209416700523024e-06, "loss": 0.4833, "step": 13347 }, { "epoch": 1.6697450469650328, "grad_norm": 2.0625, "learning_rate": 1.2084651459820606e-06, "loss": 0.5057, "step": 13348 }, { "epoch": 1.669871339490094, "grad_norm": 1.921875, "learning_rate": 1.207513941850501e-06, "loss": 0.4974, "step": 13349 }, { "epoch": 1.669997632015155, "grad_norm": 1.984375, "learning_rate": 1.206563088166257e-06, "loss": 0.4309, "step": 13350 }, { "epoch": 1.6701239245402162, "grad_norm": 1.96875, "learning_rate": 1.2056125849672285e-06, "loss": 0.5363, "step": 13351 }, { "epoch": 1.6702502170652775, "grad_norm": 1.875, "learning_rate": 1.2046624322912993e-06, "loss": 0.4258, "step": 13352 }, { "epoch": 1.6703765095903387, "grad_norm": 2.0, "learning_rate": 1.2037126301763413e-06, "loss": 0.4752, "step": 13353 }, { "epoch": 1.6705028021153998, "grad_norm": 1.8125, "learning_rate": 1.20276317866021e-06, "loss": 0.4323, "step": 13354 }, { "epoch": 1.670629094640461, "grad_norm": 2.078125, "learning_rate": 1.2018140777807473e-06, "loss": 0.485, "step": 13355 }, { "epoch": 1.670755387165522, "grad_norm": 1.8203125, "learning_rate": 1.2008653275757854e-06, "loss": 0.4431, "step": 13356 }, { "epoch": 1.6708816796905834, "grad_norm": 2.171875, "learning_rate": 1.199916928083138e-06, "loss": 0.5511, "step": 13357 }, { "epoch": 1.6710079722156443, "grad_norm": 2.0625, "learning_rate": 1.198968879340605e-06, "loss": 0.4825, "step": 13358 }, { "epoch": 1.6711342647407057, "grad_norm": 1.9609375, "learning_rate": 1.1980211813859744e-06, "loss": 0.4321, "step": 13359 }, { "epoch": 1.6712605572657668, "grad_norm": 2.046875, "learning_rate": 1.1970738342570187e-06, "loss": 0.5237, "step": 13360 }, { "epoch": 1.671386849790828, "grad_norm": 2.09375, "learning_rate": 1.1961268379914958e-06, "loss": 0.4961, "step": 13361 }, { "epoch": 1.6715131423158893, "grad_norm": 1.9921875, "learning_rate": 1.1951801926271533e-06, "loss": 0.5205, "step": 13362 }, { "epoch": 1.6716394348409502, "grad_norm": 2.0625, "learning_rate": 1.1942338982017189e-06, "loss": 0.4786, "step": 13363 }, { "epoch": 1.6717657273660116, "grad_norm": 2.15625, "learning_rate": 1.193287954752912e-06, "loss": 0.5183, "step": 13364 }, { "epoch": 1.6718920198910727, "grad_norm": 1.921875, "learning_rate": 1.192342362318435e-06, "loss": 0.5017, "step": 13365 }, { "epoch": 1.6720183124161339, "grad_norm": 2.203125, "learning_rate": 1.1913971209359776e-06, "loss": 0.5171, "step": 13366 }, { "epoch": 1.672144604941195, "grad_norm": 1.953125, "learning_rate": 1.1904522306432131e-06, "loss": 0.4413, "step": 13367 }, { "epoch": 1.6722708974662561, "grad_norm": 2.109375, "learning_rate": 1.1895076914778037e-06, "loss": 0.5197, "step": 13368 }, { "epoch": 1.6723971899913175, "grad_norm": 2.15625, "learning_rate": 1.1885635034773969e-06, "loss": 0.5041, "step": 13369 }, { "epoch": 1.6725234825163786, "grad_norm": 2.140625, "learning_rate": 1.1876196666796235e-06, "loss": 0.5121, "step": 13370 }, { "epoch": 1.6726497750414397, "grad_norm": 2.171875, "learning_rate": 1.186676181122106e-06, "loss": 0.5167, "step": 13371 }, { "epoch": 1.6727760675665009, "grad_norm": 1.9765625, "learning_rate": 1.1857330468424466e-06, "loss": 0.4991, "step": 13372 }, { "epoch": 1.672902360091562, "grad_norm": 1.8125, "learning_rate": 1.184790263878236e-06, "loss": 0.4941, "step": 13373 }, { "epoch": 1.6730286526166234, "grad_norm": 2.21875, "learning_rate": 1.1838478322670554e-06, "loss": 0.4879, "step": 13374 }, { "epoch": 1.6731549451416843, "grad_norm": 2.703125, "learning_rate": 1.1829057520464638e-06, "loss": 0.5323, "step": 13375 }, { "epoch": 1.6732812376667456, "grad_norm": 2.0625, "learning_rate": 1.181964023254013e-06, "loss": 0.4779, "step": 13376 }, { "epoch": 1.6734075301918068, "grad_norm": 2.15625, "learning_rate": 1.1810226459272356e-06, "loss": 0.5578, "step": 13377 }, { "epoch": 1.673533822716868, "grad_norm": 2.0625, "learning_rate": 1.1800816201036535e-06, "loss": 0.55, "step": 13378 }, { "epoch": 1.6736601152419293, "grad_norm": 2.015625, "learning_rate": 1.1791409458207748e-06, "loss": 0.513, "step": 13379 }, { "epoch": 1.6737864077669902, "grad_norm": 1.921875, "learning_rate": 1.1782006231160914e-06, "loss": 0.4566, "step": 13380 }, { "epoch": 1.6739127002920515, "grad_norm": 2.015625, "learning_rate": 1.1772606520270823e-06, "loss": 0.5141, "step": 13381 }, { "epoch": 1.6740389928171127, "grad_norm": 1.84375, "learning_rate": 1.1763210325912132e-06, "loss": 0.4546, "step": 13382 }, { "epoch": 1.6741652853421738, "grad_norm": 1.8046875, "learning_rate": 1.1753817648459343e-06, "loss": 0.4809, "step": 13383 }, { "epoch": 1.6742915778672351, "grad_norm": 2.015625, "learning_rate": 1.174442848828683e-06, "loss": 0.4432, "step": 13384 }, { "epoch": 1.674417870392296, "grad_norm": 1.9921875, "learning_rate": 1.173504284576882e-06, "loss": 0.4692, "step": 13385 }, { "epoch": 1.6745441629173574, "grad_norm": 2.015625, "learning_rate": 1.1725660721279396e-06, "loss": 0.496, "step": 13386 }, { "epoch": 1.6746704554424185, "grad_norm": 1.9765625, "learning_rate": 1.1716282115192512e-06, "loss": 0.4797, "step": 13387 }, { "epoch": 1.6747967479674797, "grad_norm": 2.21875, "learning_rate": 1.1706907027881987e-06, "loss": 0.4975, "step": 13388 }, { "epoch": 1.6749230404925408, "grad_norm": 1.9453125, "learning_rate": 1.1697535459721477e-06, "loss": 0.4711, "step": 13389 }, { "epoch": 1.675049333017602, "grad_norm": 1.90625, "learning_rate": 1.1688167411084518e-06, "loss": 0.4024, "step": 13390 }, { "epoch": 1.6751756255426633, "grad_norm": 2.015625, "learning_rate": 1.1678802882344464e-06, "loss": 0.5721, "step": 13391 }, { "epoch": 1.6753019180677242, "grad_norm": 2.125, "learning_rate": 1.166944187387462e-06, "loss": 0.534, "step": 13392 }, { "epoch": 1.6754282105927856, "grad_norm": 1.9609375, "learning_rate": 1.1660084386048066e-06, "loss": 0.465, "step": 13393 }, { "epoch": 1.6755545031178467, "grad_norm": 2.140625, "learning_rate": 1.1650730419237765e-06, "loss": 0.505, "step": 13394 }, { "epoch": 1.6756807956429078, "grad_norm": 1.953125, "learning_rate": 1.1641379973816547e-06, "loss": 0.4658, "step": 13395 }, { "epoch": 1.6758070881679692, "grad_norm": 2.0625, "learning_rate": 1.1632033050157099e-06, "loss": 0.5458, "step": 13396 }, { "epoch": 1.67593338069303, "grad_norm": 1.734375, "learning_rate": 1.1622689648631968e-06, "loss": 0.4218, "step": 13397 }, { "epoch": 1.6760596732180915, "grad_norm": 2.0, "learning_rate": 1.1613349769613557e-06, "loss": 0.5124, "step": 13398 }, { "epoch": 1.6761859657431526, "grad_norm": 2.234375, "learning_rate": 1.1604013413474136e-06, "loss": 0.4995, "step": 13399 }, { "epoch": 1.6763122582682137, "grad_norm": 2.03125, "learning_rate": 1.1594680580585815e-06, "loss": 0.5496, "step": 13400 }, { "epoch": 1.676438550793275, "grad_norm": 1.8828125, "learning_rate": 1.1585351271320588e-06, "loss": 0.4651, "step": 13401 }, { "epoch": 1.676564843318336, "grad_norm": 2.40625, "learning_rate": 1.1576025486050313e-06, "loss": 0.5265, "step": 13402 }, { "epoch": 1.6766911358433974, "grad_norm": 1.8359375, "learning_rate": 1.156670322514667e-06, "loss": 0.4338, "step": 13403 }, { "epoch": 1.6768174283684585, "grad_norm": 1.9296875, "learning_rate": 1.1557384488981227e-06, "loss": 0.4934, "step": 13404 }, { "epoch": 1.6769437208935196, "grad_norm": 1.921875, "learning_rate": 1.1548069277925422e-06, "loss": 0.5322, "step": 13405 }, { "epoch": 1.6770700134185808, "grad_norm": 2.046875, "learning_rate": 1.1538757592350514e-06, "loss": 0.4462, "step": 13406 }, { "epoch": 1.6771963059436419, "grad_norm": 2.109375, "learning_rate": 1.152944943262767e-06, "loss": 0.5161, "step": 13407 }, { "epoch": 1.6773225984687032, "grad_norm": 1.90625, "learning_rate": 1.1520144799127863e-06, "loss": 0.4706, "step": 13408 }, { "epoch": 1.6774488909937642, "grad_norm": 1.8359375, "learning_rate": 1.1510843692221952e-06, "loss": 0.4675, "step": 13409 }, { "epoch": 1.6775751835188255, "grad_norm": 2.015625, "learning_rate": 1.1501546112280693e-06, "loss": 0.5044, "step": 13410 }, { "epoch": 1.6777014760438866, "grad_norm": 2.015625, "learning_rate": 1.1492252059674647e-06, "loss": 0.4906, "step": 13411 }, { "epoch": 1.6778277685689478, "grad_norm": 1.8671875, "learning_rate": 1.148296153477425e-06, "loss": 0.4952, "step": 13412 }, { "epoch": 1.6779540610940091, "grad_norm": 1.8828125, "learning_rate": 1.1473674537949798e-06, "loss": 0.44, "step": 13413 }, { "epoch": 1.67808035361907, "grad_norm": 1.9375, "learning_rate": 1.1464391069571446e-06, "loss": 0.4565, "step": 13414 }, { "epoch": 1.6782066461441314, "grad_norm": 2.0, "learning_rate": 1.1455111130009223e-06, "loss": 0.5283, "step": 13415 }, { "epoch": 1.6783329386691925, "grad_norm": 1.9296875, "learning_rate": 1.1445834719632987e-06, "loss": 0.4434, "step": 13416 }, { "epoch": 1.6784592311942537, "grad_norm": 1.8828125, "learning_rate": 1.1436561838812488e-06, "loss": 0.5265, "step": 13417 }, { "epoch": 1.678585523719315, "grad_norm": 1.9375, "learning_rate": 1.1427292487917307e-06, "loss": 0.5473, "step": 13418 }, { "epoch": 1.678711816244376, "grad_norm": 2.0, "learning_rate": 1.1418026667316918e-06, "loss": 0.4719, "step": 13419 }, { "epoch": 1.6788381087694373, "grad_norm": 1.84375, "learning_rate": 1.140876437738062e-06, "loss": 0.4716, "step": 13420 }, { "epoch": 1.6789644012944984, "grad_norm": 2.09375, "learning_rate": 1.139950561847759e-06, "loss": 0.5662, "step": 13421 }, { "epoch": 1.6790906938195596, "grad_norm": 2.046875, "learning_rate": 1.1390250390976855e-06, "loss": 0.5378, "step": 13422 }, { "epoch": 1.6792169863446207, "grad_norm": 2.015625, "learning_rate": 1.1380998695247314e-06, "loss": 0.5018, "step": 13423 }, { "epoch": 1.6793432788696818, "grad_norm": 2.015625, "learning_rate": 1.13717505316577e-06, "loss": 0.5221, "step": 13424 }, { "epoch": 1.6794695713947432, "grad_norm": 1.9921875, "learning_rate": 1.1362505900576649e-06, "loss": 0.4906, "step": 13425 }, { "epoch": 1.679595863919804, "grad_norm": 1.8671875, "learning_rate": 1.135326480237261e-06, "loss": 0.4368, "step": 13426 }, { "epoch": 1.6797221564448654, "grad_norm": 2.171875, "learning_rate": 1.13440272374139e-06, "loss": 0.549, "step": 13427 }, { "epoch": 1.6798484489699266, "grad_norm": 2.0, "learning_rate": 1.1334793206068739e-06, "loss": 0.5091, "step": 13428 }, { "epoch": 1.6799747414949877, "grad_norm": 1.9453125, "learning_rate": 1.1325562708705162e-06, "loss": 0.5551, "step": 13429 }, { "epoch": 1.680101034020049, "grad_norm": 1.765625, "learning_rate": 1.1316335745691065e-06, "loss": 0.4208, "step": 13430 }, { "epoch": 1.68022732654511, "grad_norm": 1.890625, "learning_rate": 1.130711231739422e-06, "loss": 0.4698, "step": 13431 }, { "epoch": 1.6803536190701713, "grad_norm": 1.921875, "learning_rate": 1.1297892424182243e-06, "loss": 0.4831, "step": 13432 }, { "epoch": 1.6804799115952325, "grad_norm": 1.84375, "learning_rate": 1.1288676066422633e-06, "loss": 0.5306, "step": 13433 }, { "epoch": 1.6806062041202936, "grad_norm": 2.109375, "learning_rate": 1.1279463244482714e-06, "loss": 0.5322, "step": 13434 }, { "epoch": 1.680732496645355, "grad_norm": 1.8828125, "learning_rate": 1.1270253958729694e-06, "loss": 0.4484, "step": 13435 }, { "epoch": 1.6808587891704159, "grad_norm": 1.9375, "learning_rate": 1.1261048209530635e-06, "loss": 0.4259, "step": 13436 }, { "epoch": 1.6809850816954772, "grad_norm": 2.125, "learning_rate": 1.1251845997252442e-06, "loss": 0.5088, "step": 13437 }, { "epoch": 1.6811113742205384, "grad_norm": 1.890625, "learning_rate": 1.1242647322261912e-06, "loss": 0.4784, "step": 13438 }, { "epoch": 1.6812376667455995, "grad_norm": 2.40625, "learning_rate": 1.1233452184925675e-06, "loss": 0.5797, "step": 13439 }, { "epoch": 1.6813639592706606, "grad_norm": 2.03125, "learning_rate": 1.1224260585610224e-06, "loss": 0.4813, "step": 13440 }, { "epoch": 1.6814902517957218, "grad_norm": 2.125, "learning_rate": 1.1215072524681913e-06, "loss": 0.5342, "step": 13441 }, { "epoch": 1.6816165443207831, "grad_norm": 2.0, "learning_rate": 1.1205888002506971e-06, "loss": 0.4274, "step": 13442 }, { "epoch": 1.681742836845844, "grad_norm": 2.015625, "learning_rate": 1.1196707019451447e-06, "loss": 0.5219, "step": 13443 }, { "epoch": 1.6818691293709054, "grad_norm": 2.140625, "learning_rate": 1.1187529575881283e-06, "loss": 0.4771, "step": 13444 }, { "epoch": 1.6819954218959665, "grad_norm": 2.109375, "learning_rate": 1.1178355672162266e-06, "loss": 0.5367, "step": 13445 }, { "epoch": 1.6821217144210276, "grad_norm": 2.03125, "learning_rate": 1.1169185308660058e-06, "loss": 0.449, "step": 13446 }, { "epoch": 1.682248006946089, "grad_norm": 2.0625, "learning_rate": 1.1160018485740176e-06, "loss": 0.4781, "step": 13447 }, { "epoch": 1.68237429947115, "grad_norm": 2.125, "learning_rate": 1.1150855203767964e-06, "loss": 0.5225, "step": 13448 }, { "epoch": 1.6825005919962113, "grad_norm": 1.953125, "learning_rate": 1.1141695463108648e-06, "loss": 0.4489, "step": 13449 }, { "epoch": 1.6826268845212724, "grad_norm": 1.9765625, "learning_rate": 1.113253926412734e-06, "loss": 0.4456, "step": 13450 }, { "epoch": 1.6827531770463335, "grad_norm": 2.015625, "learning_rate": 1.1123386607188957e-06, "loss": 0.444, "step": 13451 }, { "epoch": 1.682879469571395, "grad_norm": 1.9296875, "learning_rate": 1.1114237492658308e-06, "loss": 0.4783, "step": 13452 }, { "epoch": 1.6830057620964558, "grad_norm": 2.0, "learning_rate": 1.1105091920900058e-06, "loss": 0.4699, "step": 13453 }, { "epoch": 1.6831320546215172, "grad_norm": 1.953125, "learning_rate": 1.1095949892278735e-06, "loss": 0.4458, "step": 13454 }, { "epoch": 1.6832583471465783, "grad_norm": 1.9921875, "learning_rate": 1.1086811407158704e-06, "loss": 0.5277, "step": 13455 }, { "epoch": 1.6833846396716394, "grad_norm": 1.9765625, "learning_rate": 1.1077676465904209e-06, "loss": 0.5609, "step": 13456 }, { "epoch": 1.6835109321967006, "grad_norm": 1.953125, "learning_rate": 1.1068545068879354e-06, "loss": 0.5234, "step": 13457 }, { "epoch": 1.6836372247217617, "grad_norm": 2.140625, "learning_rate": 1.1059417216448075e-06, "loss": 0.5206, "step": 13458 }, { "epoch": 1.683763517246823, "grad_norm": 2.09375, "learning_rate": 1.1050292908974214e-06, "loss": 0.5088, "step": 13459 }, { "epoch": 1.683889809771884, "grad_norm": 2.03125, "learning_rate": 1.1041172146821422e-06, "loss": 0.4124, "step": 13460 }, { "epoch": 1.6840161022969453, "grad_norm": 2.171875, "learning_rate": 1.1032054930353232e-06, "loss": 0.4767, "step": 13461 }, { "epoch": 1.6841423948220064, "grad_norm": 1.765625, "learning_rate": 1.1022941259933051e-06, "loss": 0.4324, "step": 13462 }, { "epoch": 1.6842686873470676, "grad_norm": 2.34375, "learning_rate": 1.1013831135924091e-06, "loss": 0.5968, "step": 13463 }, { "epoch": 1.684394979872129, "grad_norm": 2.0625, "learning_rate": 1.1004724558689506e-06, "loss": 0.5254, "step": 13464 }, { "epoch": 1.6845212723971899, "grad_norm": 2.015625, "learning_rate": 1.0995621528592238e-06, "loss": 0.5337, "step": 13465 }, { "epoch": 1.6846475649222512, "grad_norm": 1.9765625, "learning_rate": 1.098652204599512e-06, "loss": 0.4219, "step": 13466 }, { "epoch": 1.6847738574473123, "grad_norm": 1.96875, "learning_rate": 1.0977426111260825e-06, "loss": 0.4629, "step": 13467 }, { "epoch": 1.6849001499723735, "grad_norm": 1.9921875, "learning_rate": 1.09683337247519e-06, "loss": 0.4567, "step": 13468 }, { "epoch": 1.6850264424974348, "grad_norm": 1.90625, "learning_rate": 1.0959244886830756e-06, "loss": 0.4286, "step": 13469 }, { "epoch": 1.6851527350224957, "grad_norm": 1.9765625, "learning_rate": 1.0950159597859632e-06, "loss": 0.5635, "step": 13470 }, { "epoch": 1.685279027547557, "grad_norm": 2.203125, "learning_rate": 1.0941077858200666e-06, "loss": 0.4495, "step": 13471 }, { "epoch": 1.6854053200726182, "grad_norm": 2.078125, "learning_rate": 1.093199966821582e-06, "loss": 0.5382, "step": 13472 }, { "epoch": 1.6855316125976794, "grad_norm": 1.859375, "learning_rate": 1.092292502826694e-06, "loss": 0.4521, "step": 13473 }, { "epoch": 1.6856579051227405, "grad_norm": 1.9921875, "learning_rate": 1.0913853938715701e-06, "loss": 0.5297, "step": 13474 }, { "epoch": 1.6857841976478016, "grad_norm": 2.03125, "learning_rate": 1.0904786399923672e-06, "loss": 0.5065, "step": 13475 }, { "epoch": 1.685910490172863, "grad_norm": 1.984375, "learning_rate": 1.0895722412252264e-06, "loss": 0.4987, "step": 13476 }, { "epoch": 1.6860367826979241, "grad_norm": 2.109375, "learning_rate": 1.088666197606274e-06, "loss": 0.5173, "step": 13477 }, { "epoch": 1.6861630752229853, "grad_norm": 2.078125, "learning_rate": 1.0877605091716214e-06, "loss": 0.4677, "step": 13478 }, { "epoch": 1.6862893677480464, "grad_norm": 1.9296875, "learning_rate": 1.0868551759573698e-06, "loss": 0.4365, "step": 13479 }, { "epoch": 1.6864156602731075, "grad_norm": 2.1875, "learning_rate": 1.085950197999599e-06, "loss": 0.5518, "step": 13480 }, { "epoch": 1.6865419527981689, "grad_norm": 2.109375, "learning_rate": 1.0850455753343847e-06, "loss": 0.5207, "step": 13481 }, { "epoch": 1.6866682453232298, "grad_norm": 2.046875, "learning_rate": 1.0841413079977815e-06, "loss": 0.4644, "step": 13482 }, { "epoch": 1.6867945378482911, "grad_norm": 2.078125, "learning_rate": 1.0832373960258303e-06, "loss": 0.5236, "step": 13483 }, { "epoch": 1.6869208303733523, "grad_norm": 1.921875, "learning_rate": 1.082333839454559e-06, "loss": 0.4908, "step": 13484 }, { "epoch": 1.6870471228984134, "grad_norm": 2.03125, "learning_rate": 1.0814306383199812e-06, "loss": 0.5021, "step": 13485 }, { "epoch": 1.6871734154234748, "grad_norm": 2.09375, "learning_rate": 1.080527792658096e-06, "loss": 0.4538, "step": 13486 }, { "epoch": 1.6872997079485357, "grad_norm": 1.8984375, "learning_rate": 1.0796253025048898e-06, "loss": 0.4298, "step": 13487 }, { "epoch": 1.687426000473597, "grad_norm": 2.15625, "learning_rate": 1.0787231678963328e-06, "loss": 0.5523, "step": 13488 }, { "epoch": 1.6875522929986582, "grad_norm": 2.03125, "learning_rate": 1.077821388868382e-06, "loss": 0.5074, "step": 13489 }, { "epoch": 1.6876785855237193, "grad_norm": 2.078125, "learning_rate": 1.0769199654569805e-06, "loss": 0.5067, "step": 13490 }, { "epoch": 1.6878048780487804, "grad_norm": 2.046875, "learning_rate": 1.0760188976980568e-06, "loss": 0.5068, "step": 13491 }, { "epoch": 1.6879311705738416, "grad_norm": 1.921875, "learning_rate": 1.0751181856275238e-06, "loss": 0.4812, "step": 13492 }, { "epoch": 1.688057463098903, "grad_norm": 1.9609375, "learning_rate": 1.0742178292812844e-06, "loss": 0.4967, "step": 13493 }, { "epoch": 1.688183755623964, "grad_norm": 2.171875, "learning_rate": 1.0733178286952229e-06, "loss": 0.5579, "step": 13494 }, { "epoch": 1.6883100481490252, "grad_norm": 2.0625, "learning_rate": 1.0724181839052117e-06, "loss": 0.4967, "step": 13495 }, { "epoch": 1.6884363406740863, "grad_norm": 2.078125, "learning_rate": 1.0715188949471056e-06, "loss": 0.5754, "step": 13496 }, { "epoch": 1.6885626331991475, "grad_norm": 2.0, "learning_rate": 1.0706199618567525e-06, "loss": 0.5327, "step": 13497 }, { "epoch": 1.6886889257242088, "grad_norm": 1.8671875, "learning_rate": 1.0697213846699806e-06, "loss": 0.4129, "step": 13498 }, { "epoch": 1.6888152182492697, "grad_norm": 1.953125, "learning_rate": 1.0688231634226043e-06, "loss": 0.5161, "step": 13499 }, { "epoch": 1.688941510774331, "grad_norm": 1.984375, "learning_rate": 1.067925298150425e-06, "loss": 0.4964, "step": 13500 }, { "epoch": 1.6890678032993922, "grad_norm": 2.0625, "learning_rate": 1.0670277888892278e-06, "loss": 0.5738, "step": 13501 }, { "epoch": 1.6891940958244533, "grad_norm": 1.8125, "learning_rate": 1.066130635674788e-06, "loss": 0.4475, "step": 13502 }, { "epoch": 1.6893203883495147, "grad_norm": 1.9375, "learning_rate": 1.0652338385428618e-06, "loss": 0.4983, "step": 13503 }, { "epoch": 1.6894466808745756, "grad_norm": 2.125, "learning_rate": 1.0643373975291937e-06, "loss": 0.4507, "step": 13504 }, { "epoch": 1.689572973399637, "grad_norm": 2.0625, "learning_rate": 1.0634413126695143e-06, "loss": 0.4804, "step": 13505 }, { "epoch": 1.689699265924698, "grad_norm": 1.828125, "learning_rate": 1.0625455839995392e-06, "loss": 0.4819, "step": 13506 }, { "epoch": 1.6898255584497592, "grad_norm": 2.03125, "learning_rate": 1.06165021155497e-06, "loss": 0.5197, "step": 13507 }, { "epoch": 1.6899518509748206, "grad_norm": 2.046875, "learning_rate": 1.0607551953714945e-06, "loss": 0.537, "step": 13508 }, { "epoch": 1.6900781434998815, "grad_norm": 1.9921875, "learning_rate": 1.0598605354847846e-06, "loss": 0.4659, "step": 13509 }, { "epoch": 1.6902044360249429, "grad_norm": 1.8828125, "learning_rate": 1.0589662319305006e-06, "loss": 0.4504, "step": 13510 }, { "epoch": 1.690330728550004, "grad_norm": 2.03125, "learning_rate": 1.0580722847442859e-06, "loss": 0.4777, "step": 13511 }, { "epoch": 1.6904570210750651, "grad_norm": 2.0625, "learning_rate": 1.057178693961771e-06, "loss": 0.517, "step": 13512 }, { "epoch": 1.6905833136001263, "grad_norm": 1.890625, "learning_rate": 1.0562854596185744e-06, "loss": 0.5184, "step": 13513 }, { "epoch": 1.6907096061251874, "grad_norm": 1.8828125, "learning_rate": 1.0553925817502975e-06, "loss": 0.4661, "step": 13514 }, { "epoch": 1.6908358986502487, "grad_norm": 2.0625, "learning_rate": 1.0545000603925282e-06, "loss": 0.5189, "step": 13515 }, { "epoch": 1.6909621911753097, "grad_norm": 1.984375, "learning_rate": 1.0536078955808394e-06, "loss": 0.4503, "step": 13516 }, { "epoch": 1.691088483700371, "grad_norm": 2.09375, "learning_rate": 1.0527160873507914e-06, "loss": 0.4949, "step": 13517 }, { "epoch": 1.6912147762254321, "grad_norm": 1.921875, "learning_rate": 1.051824635737929e-06, "loss": 0.5273, "step": 13518 }, { "epoch": 1.6913410687504933, "grad_norm": 1.953125, "learning_rate": 1.0509335407777843e-06, "loss": 0.4563, "step": 13519 }, { "epoch": 1.6914673612755546, "grad_norm": 2.015625, "learning_rate": 1.0500428025058728e-06, "loss": 0.516, "step": 13520 }, { "epoch": 1.6915936538006155, "grad_norm": 1.9375, "learning_rate": 1.0491524209576976e-06, "loss": 0.5297, "step": 13521 }, { "epoch": 1.691719946325677, "grad_norm": 2.15625, "learning_rate": 1.0482623961687478e-06, "loss": 0.5188, "step": 13522 }, { "epoch": 1.691846238850738, "grad_norm": 2.0625, "learning_rate": 1.0473727281744971e-06, "loss": 0.5663, "step": 13523 }, { "epoch": 1.6919725313757992, "grad_norm": 1.9921875, "learning_rate": 1.0464834170104066e-06, "loss": 0.5401, "step": 13524 }, { "epoch": 1.6920988239008605, "grad_norm": 2.15625, "learning_rate": 1.0455944627119208e-06, "loss": 0.5088, "step": 13525 }, { "epoch": 1.6922251164259214, "grad_norm": 1.984375, "learning_rate": 1.0447058653144704e-06, "loss": 0.5069, "step": 13526 }, { "epoch": 1.6923514089509828, "grad_norm": 1.96875, "learning_rate": 1.043817624853476e-06, "loss": 0.5018, "step": 13527 }, { "epoch": 1.692477701476044, "grad_norm": 2.046875, "learning_rate": 1.0429297413643358e-06, "loss": 0.5183, "step": 13528 }, { "epoch": 1.692603994001105, "grad_norm": 2.09375, "learning_rate": 1.0420422148824427e-06, "loss": 0.4231, "step": 13529 }, { "epoch": 1.6927302865261662, "grad_norm": 1.8984375, "learning_rate": 1.0411550454431718e-06, "loss": 0.5181, "step": 13530 }, { "epoch": 1.6928565790512273, "grad_norm": 1.8984375, "learning_rate": 1.0402682330818815e-06, "loss": 0.4562, "step": 13531 }, { "epoch": 1.6929828715762887, "grad_norm": 1.9921875, "learning_rate": 1.0393817778339188e-06, "loss": 0.3914, "step": 13532 }, { "epoch": 1.6931091641013496, "grad_norm": 1.9609375, "learning_rate": 1.0384956797346147e-06, "loss": 0.4455, "step": 13533 }, { "epoch": 1.693235456626411, "grad_norm": 2.171875, "learning_rate": 1.0376099388192885e-06, "loss": 0.5459, "step": 13534 }, { "epoch": 1.693361749151472, "grad_norm": 1.8125, "learning_rate": 1.0367245551232418e-06, "loss": 0.4797, "step": 13535 }, { "epoch": 1.6934880416765332, "grad_norm": 2.0625, "learning_rate": 1.0358395286817657e-06, "loss": 0.4867, "step": 13536 }, { "epoch": 1.6936143342015946, "grad_norm": 1.921875, "learning_rate": 1.0349548595301351e-06, "loss": 0.4535, "step": 13537 }, { "epoch": 1.6937406267266555, "grad_norm": 2.03125, "learning_rate": 1.0340705477036095e-06, "loss": 0.5346, "step": 13538 }, { "epoch": 1.6938669192517168, "grad_norm": 1.953125, "learning_rate": 1.0331865932374353e-06, "loss": 0.4805, "step": 13539 }, { "epoch": 1.693993211776778, "grad_norm": 2.015625, "learning_rate": 1.0323029961668463e-06, "loss": 0.5097, "step": 13540 }, { "epoch": 1.694119504301839, "grad_norm": 1.9375, "learning_rate": 1.03141975652706e-06, "loss": 0.4498, "step": 13541 }, { "epoch": 1.6942457968269005, "grad_norm": 2.046875, "learning_rate": 1.0305368743532795e-06, "loss": 0.5263, "step": 13542 }, { "epoch": 1.6943720893519614, "grad_norm": 1.859375, "learning_rate": 1.0296543496806955e-06, "loss": 0.4638, "step": 13543 }, { "epoch": 1.6944983818770227, "grad_norm": 1.90625, "learning_rate": 1.0287721825444807e-06, "loss": 0.4506, "step": 13544 }, { "epoch": 1.6946246744020839, "grad_norm": 2.0625, "learning_rate": 1.0278903729797996e-06, "loss": 0.5046, "step": 13545 }, { "epoch": 1.694750966927145, "grad_norm": 2.140625, "learning_rate": 1.0270089210217981e-06, "loss": 0.6163, "step": 13546 }, { "epoch": 1.6948772594522061, "grad_norm": 1.9765625, "learning_rate": 1.0261278267056074e-06, "loss": 0.4615, "step": 13547 }, { "epoch": 1.6950035519772673, "grad_norm": 1.84375, "learning_rate": 1.025247090066348e-06, "loss": 0.5113, "step": 13548 }, { "epoch": 1.6951298445023286, "grad_norm": 1.8828125, "learning_rate": 1.0243667111391209e-06, "loss": 0.5377, "step": 13549 }, { "epoch": 1.6952561370273895, "grad_norm": 1.84375, "learning_rate": 1.023486689959019e-06, "loss": 0.4889, "step": 13550 }, { "epoch": 1.6953824295524509, "grad_norm": 2.03125, "learning_rate": 1.0226070265611155e-06, "loss": 0.4674, "step": 13551 }, { "epoch": 1.695508722077512, "grad_norm": 2.1875, "learning_rate": 1.021727720980472e-06, "loss": 0.4944, "step": 13552 }, { "epoch": 1.6956350146025732, "grad_norm": 2.0, "learning_rate": 1.0208487732521365e-06, "loss": 0.4713, "step": 13553 }, { "epoch": 1.6957613071276345, "grad_norm": 1.9453125, "learning_rate": 1.0199701834111419e-06, "loss": 0.5078, "step": 13554 }, { "epoch": 1.6958875996526954, "grad_norm": 1.96875, "learning_rate": 1.0190919514925057e-06, "loss": 0.533, "step": 13555 }, { "epoch": 1.6960138921777568, "grad_norm": 2.078125, "learning_rate": 1.0182140775312321e-06, "loss": 0.4655, "step": 13556 }, { "epoch": 1.696140184702818, "grad_norm": 2.0, "learning_rate": 1.017336561562312e-06, "loss": 0.461, "step": 13557 }, { "epoch": 1.696266477227879, "grad_norm": 1.9765625, "learning_rate": 1.0164594036207199e-06, "loss": 0.4565, "step": 13558 }, { "epoch": 1.6963927697529404, "grad_norm": 2.03125, "learning_rate": 1.0155826037414174e-06, "loss": 0.489, "step": 13559 }, { "epoch": 1.6965190622780013, "grad_norm": 2.140625, "learning_rate": 1.0147061619593512e-06, "loss": 0.4992, "step": 13560 }, { "epoch": 1.6966453548030627, "grad_norm": 1.9453125, "learning_rate": 1.0138300783094556e-06, "loss": 0.5065, "step": 13561 }, { "epoch": 1.6967716473281238, "grad_norm": 2.140625, "learning_rate": 1.0129543528266494e-06, "loss": 0.5975, "step": 13562 }, { "epoch": 1.696897939853185, "grad_norm": 1.9765625, "learning_rate": 1.0120789855458358e-06, "loss": 0.4892, "step": 13563 }, { "epoch": 1.697024232378246, "grad_norm": 1.7890625, "learning_rate": 1.0112039765019055e-06, "loss": 0.4268, "step": 13564 }, { "epoch": 1.6971505249033072, "grad_norm": 2.140625, "learning_rate": 1.0103293257297331e-06, "loss": 0.5369, "step": 13565 }, { "epoch": 1.6972768174283686, "grad_norm": 2.03125, "learning_rate": 1.0094550332641805e-06, "loss": 0.5869, "step": 13566 }, { "epoch": 1.6974031099534295, "grad_norm": 2.15625, "learning_rate": 1.0085810991400958e-06, "loss": 0.5651, "step": 13567 }, { "epoch": 1.6975294024784908, "grad_norm": 2.015625, "learning_rate": 1.0077075233923118e-06, "loss": 0.4833, "step": 13568 }, { "epoch": 1.697655695003552, "grad_norm": 2.140625, "learning_rate": 1.0068343060556462e-06, "loss": 0.5008, "step": 13569 }, { "epoch": 1.697781987528613, "grad_norm": 2.0625, "learning_rate": 1.0059614471649038e-06, "loss": 0.5504, "step": 13570 }, { "epoch": 1.6979082800536744, "grad_norm": 1.921875, "learning_rate": 1.0050889467548741e-06, "loss": 0.4596, "step": 13571 }, { "epoch": 1.6980345725787354, "grad_norm": 2.03125, "learning_rate": 1.004216804860334e-06, "loss": 0.4583, "step": 13572 }, { "epoch": 1.6981608651037967, "grad_norm": 2.09375, "learning_rate": 1.0033450215160433e-06, "loss": 0.5511, "step": 13573 }, { "epoch": 1.6982871576288578, "grad_norm": 1.828125, "learning_rate": 1.0024735967567512e-06, "loss": 0.3981, "step": 13574 }, { "epoch": 1.698413450153919, "grad_norm": 1.9609375, "learning_rate": 1.0016025306171894e-06, "loss": 0.4669, "step": 13575 }, { "epoch": 1.6985397426789803, "grad_norm": 1.9921875, "learning_rate": 1.000731823132075e-06, "loss": 0.5753, "step": 13576 }, { "epoch": 1.6986660352040412, "grad_norm": 1.96875, "learning_rate": 9.998614743361168e-07, "loss": 0.4655, "step": 13577 }, { "epoch": 1.6987923277291026, "grad_norm": 2.0, "learning_rate": 9.989914842640004e-07, "loss": 0.5438, "step": 13578 }, { "epoch": 1.6989186202541637, "grad_norm": 2.046875, "learning_rate": 9.981218529504045e-07, "loss": 0.5416, "step": 13579 }, { "epoch": 1.6990449127792249, "grad_norm": 2.015625, "learning_rate": 9.972525804299881e-07, "loss": 0.5158, "step": 13580 }, { "epoch": 1.699171205304286, "grad_norm": 1.890625, "learning_rate": 9.963836667373993e-07, "loss": 0.4336, "step": 13581 }, { "epoch": 1.6992974978293471, "grad_norm": 1.8125, "learning_rate": 9.955151119072715e-07, "loss": 0.4795, "step": 13582 }, { "epoch": 1.6994237903544085, "grad_norm": 2.125, "learning_rate": 9.946469159742234e-07, "loss": 0.4542, "step": 13583 }, { "epoch": 1.6995500828794696, "grad_norm": 2.015625, "learning_rate": 9.937790789728574e-07, "loss": 0.4459, "step": 13584 }, { "epoch": 1.6996763754045308, "grad_norm": 1.9375, "learning_rate": 9.929116009377648e-07, "loss": 0.5303, "step": 13585 }, { "epoch": 1.699802667929592, "grad_norm": 2.0625, "learning_rate": 9.920444819035202e-07, "loss": 0.5299, "step": 13586 }, { "epoch": 1.699928960454653, "grad_norm": 2.15625, "learning_rate": 9.91177721904687e-07, "loss": 0.5051, "step": 13587 }, { "epoch": 1.7000552529797144, "grad_norm": 1.78125, "learning_rate": 9.903113209758098e-07, "loss": 0.4499, "step": 13588 }, { "epoch": 1.7001815455047753, "grad_norm": 2.0, "learning_rate": 9.89445279151422e-07, "loss": 0.5522, "step": 13589 }, { "epoch": 1.7003078380298366, "grad_norm": 2.125, "learning_rate": 9.885795964660416e-07, "loss": 0.5628, "step": 13590 }, { "epoch": 1.7004341305548978, "grad_norm": 1.9609375, "learning_rate": 9.877142729541743e-07, "loss": 0.4901, "step": 13591 }, { "epoch": 1.700560423079959, "grad_norm": 2.015625, "learning_rate": 9.868493086503061e-07, "loss": 0.5578, "step": 13592 }, { "epoch": 1.7006867156050203, "grad_norm": 2.140625, "learning_rate": 9.859847035889169e-07, "loss": 0.5973, "step": 13593 }, { "epoch": 1.7008130081300812, "grad_norm": 1.9765625, "learning_rate": 9.85120457804466e-07, "loss": 0.4986, "step": 13594 }, { "epoch": 1.7009393006551425, "grad_norm": 2.21875, "learning_rate": 9.842565713314001e-07, "loss": 0.6015, "step": 13595 }, { "epoch": 1.7010655931802037, "grad_norm": 1.9296875, "learning_rate": 9.833930442041506e-07, "loss": 0.4912, "step": 13596 }, { "epoch": 1.7011918857052648, "grad_norm": 2.171875, "learning_rate": 9.825298764571368e-07, "loss": 0.533, "step": 13597 }, { "epoch": 1.701318178230326, "grad_norm": 2.0, "learning_rate": 9.816670681247631e-07, "loss": 0.5355, "step": 13598 }, { "epoch": 1.701444470755387, "grad_norm": 1.875, "learning_rate": 9.808046192414166e-07, "loss": 0.4485, "step": 13599 }, { "epoch": 1.7015707632804484, "grad_norm": 1.9453125, "learning_rate": 9.799425298414755e-07, "loss": 0.4699, "step": 13600 }, { "epoch": 1.7016970558055096, "grad_norm": 1.9765625, "learning_rate": 9.790807999592976e-07, "loss": 0.5261, "step": 13601 }, { "epoch": 1.7018233483305707, "grad_norm": 1.8984375, "learning_rate": 9.782194296292314e-07, "loss": 0.4491, "step": 13602 }, { "epoch": 1.7019496408556318, "grad_norm": 2.21875, "learning_rate": 9.773584188856088e-07, "loss": 0.4741, "step": 13603 }, { "epoch": 1.702075933380693, "grad_norm": 1.9921875, "learning_rate": 9.764977677627463e-07, "loss": 0.5175, "step": 13604 }, { "epoch": 1.7022022259057543, "grad_norm": 1.9609375, "learning_rate": 9.756374762949494e-07, "loss": 0.5754, "step": 13605 }, { "epoch": 1.7023285184308152, "grad_norm": 2.234375, "learning_rate": 9.747775445165053e-07, "loss": 0.5687, "step": 13606 }, { "epoch": 1.7024548109558766, "grad_norm": 1.96875, "learning_rate": 9.739179724616898e-07, "loss": 0.4694, "step": 13607 }, { "epoch": 1.7025811034809377, "grad_norm": 2.015625, "learning_rate": 9.730587601647612e-07, "loss": 0.4959, "step": 13608 }, { "epoch": 1.7027073960059989, "grad_norm": 2.03125, "learning_rate": 9.721999076599697e-07, "loss": 0.5315, "step": 13609 }, { "epoch": 1.7028336885310602, "grad_norm": 1.921875, "learning_rate": 9.713414149815458e-07, "loss": 0.4115, "step": 13610 }, { "epoch": 1.7029599810561211, "grad_norm": 2.046875, "learning_rate": 9.704832821637056e-07, "loss": 0.5338, "step": 13611 }, { "epoch": 1.7030862735811825, "grad_norm": 1.84375, "learning_rate": 9.696255092406525e-07, "loss": 0.4251, "step": 13612 }, { "epoch": 1.7032125661062436, "grad_norm": 2.21875, "learning_rate": 9.687680962465762e-07, "loss": 0.5101, "step": 13613 }, { "epoch": 1.7033388586313047, "grad_norm": 2.125, "learning_rate": 9.679110432156502e-07, "loss": 0.5297, "step": 13614 }, { "epoch": 1.703465151156366, "grad_norm": 2.0625, "learning_rate": 9.67054350182035e-07, "loss": 0.4642, "step": 13615 }, { "epoch": 1.703591443681427, "grad_norm": 2.03125, "learning_rate": 9.661980171798758e-07, "loss": 0.484, "step": 13616 }, { "epoch": 1.7037177362064884, "grad_norm": 2.03125, "learning_rate": 9.653420442433049e-07, "loss": 0.4895, "step": 13617 }, { "epoch": 1.7038440287315495, "grad_norm": 1.90625, "learning_rate": 9.644864314064384e-07, "loss": 0.4428, "step": 13618 }, { "epoch": 1.7039703212566106, "grad_norm": 2.0625, "learning_rate": 9.636311787033791e-07, "loss": 0.5569, "step": 13619 }, { "epoch": 1.7040966137816718, "grad_norm": 1.96875, "learning_rate": 9.627762861682155e-07, "loss": 0.419, "step": 13620 }, { "epoch": 1.704222906306733, "grad_norm": 1.9609375, "learning_rate": 9.619217538350223e-07, "loss": 0.4163, "step": 13621 }, { "epoch": 1.7043491988317943, "grad_norm": 2.078125, "learning_rate": 9.610675817378578e-07, "loss": 0.4694, "step": 13622 }, { "epoch": 1.7044754913568552, "grad_norm": 2.140625, "learning_rate": 9.602137699107684e-07, "loss": 0.528, "step": 13623 }, { "epoch": 1.7046017838819165, "grad_norm": 1.9765625, "learning_rate": 9.593603183877843e-07, "loss": 0.5829, "step": 13624 }, { "epoch": 1.7047280764069777, "grad_norm": 2.0625, "learning_rate": 9.585072272029227e-07, "loss": 0.4747, "step": 13625 }, { "epoch": 1.7048543689320388, "grad_norm": 1.8984375, "learning_rate": 9.576544963901824e-07, "loss": 0.4695, "step": 13626 }, { "epoch": 1.7049806614571001, "grad_norm": 2.03125, "learning_rate": 9.568021259835559e-07, "loss": 0.4906, "step": 13627 }, { "epoch": 1.705106953982161, "grad_norm": 1.984375, "learning_rate": 9.55950116017015e-07, "loss": 0.4674, "step": 13628 }, { "epoch": 1.7052332465072224, "grad_norm": 2.03125, "learning_rate": 9.55098466524519e-07, "loss": 0.505, "step": 13629 }, { "epoch": 1.7053595390322835, "grad_norm": 1.96875, "learning_rate": 9.54247177540012e-07, "loss": 0.4735, "step": 13630 }, { "epoch": 1.7054858315573447, "grad_norm": 1.984375, "learning_rate": 9.533962490974236e-07, "loss": 0.4032, "step": 13631 }, { "epoch": 1.705612124082406, "grad_norm": 1.96875, "learning_rate": 9.52545681230671e-07, "loss": 0.527, "step": 13632 }, { "epoch": 1.705738416607467, "grad_norm": 2.1875, "learning_rate": 9.516954739736561e-07, "loss": 0.5057, "step": 13633 }, { "epoch": 1.7058647091325283, "grad_norm": 1.9765625, "learning_rate": 9.508456273602651e-07, "loss": 0.6098, "step": 13634 }, { "epoch": 1.7059910016575894, "grad_norm": 1.9921875, "learning_rate": 9.499961414243708e-07, "loss": 0.5487, "step": 13635 }, { "epoch": 1.7061172941826506, "grad_norm": 1.9609375, "learning_rate": 9.491470161998317e-07, "loss": 0.4739, "step": 13636 }, { "epoch": 1.7062435867077117, "grad_norm": 1.9765625, "learning_rate": 9.482982517204931e-07, "loss": 0.4542, "step": 13637 }, { "epoch": 1.7063698792327728, "grad_norm": 1.9140625, "learning_rate": 9.474498480201822e-07, "loss": 0.5175, "step": 13638 }, { "epoch": 1.7064961717578342, "grad_norm": 2.03125, "learning_rate": 9.466018051327175e-07, "loss": 0.444, "step": 13639 }, { "epoch": 1.706622464282895, "grad_norm": 1.8984375, "learning_rate": 9.457541230918976e-07, "loss": 0.4939, "step": 13640 }, { "epoch": 1.7067487568079565, "grad_norm": 1.9375, "learning_rate": 9.449068019315099e-07, "loss": 0.4299, "step": 13641 }, { "epoch": 1.7068750493330176, "grad_norm": 1.9296875, "learning_rate": 9.440598416853275e-07, "loss": 0.4561, "step": 13642 }, { "epoch": 1.7070013418580787, "grad_norm": 1.8984375, "learning_rate": 9.432132423871054e-07, "loss": 0.4776, "step": 13643 }, { "epoch": 1.70712763438314, "grad_norm": 2.1875, "learning_rate": 9.42367004070589e-07, "loss": 0.4617, "step": 13644 }, { "epoch": 1.707253926908201, "grad_norm": 2.15625, "learning_rate": 9.415211267695079e-07, "loss": 0.543, "step": 13645 }, { "epoch": 1.7073802194332623, "grad_norm": 1.9765625, "learning_rate": 9.406756105175751e-07, "loss": 0.4782, "step": 13646 }, { "epoch": 1.7075065119583235, "grad_norm": 2.046875, "learning_rate": 9.398304553484927e-07, "loss": 0.5302, "step": 13647 }, { "epoch": 1.7076328044833846, "grad_norm": 2.09375, "learning_rate": 9.389856612959458e-07, "loss": 0.5483, "step": 13648 }, { "epoch": 1.707759097008446, "grad_norm": 2.09375, "learning_rate": 9.381412283936053e-07, "loss": 0.5364, "step": 13649 }, { "epoch": 1.7078853895335069, "grad_norm": 2.046875, "learning_rate": 9.372971566751287e-07, "loss": 0.5165, "step": 13650 }, { "epoch": 1.7080116820585682, "grad_norm": 1.8984375, "learning_rate": 9.364534461741581e-07, "loss": 0.4328, "step": 13651 }, { "epoch": 1.7081379745836294, "grad_norm": 1.9921875, "learning_rate": 9.356100969243231e-07, "loss": 0.4541, "step": 13652 }, { "epoch": 1.7082642671086905, "grad_norm": 1.9375, "learning_rate": 9.347671089592358e-07, "loss": 0.4924, "step": 13653 }, { "epoch": 1.7083905596337516, "grad_norm": 2.109375, "learning_rate": 9.339244823124971e-07, "loss": 0.4724, "step": 13654 }, { "epoch": 1.7085168521588128, "grad_norm": 2.09375, "learning_rate": 9.330822170176912e-07, "loss": 0.4695, "step": 13655 }, { "epoch": 1.7086431446838741, "grad_norm": 2.078125, "learning_rate": 9.32240313108389e-07, "loss": 0.4825, "step": 13656 }, { "epoch": 1.708769437208935, "grad_norm": 1.875, "learning_rate": 9.313987706181471e-07, "loss": 0.5064, "step": 13657 }, { "epoch": 1.7088957297339964, "grad_norm": 1.8984375, "learning_rate": 9.305575895805075e-07, "loss": 0.4847, "step": 13658 }, { "epoch": 1.7090220222590575, "grad_norm": 2.0625, "learning_rate": 9.297167700289966e-07, "loss": 0.4395, "step": 13659 }, { "epoch": 1.7091483147841187, "grad_norm": 2.15625, "learning_rate": 9.288763119971289e-07, "loss": 0.4991, "step": 13660 }, { "epoch": 1.70927460730918, "grad_norm": 2.015625, "learning_rate": 9.28036215518402e-07, "loss": 0.4574, "step": 13661 }, { "epoch": 1.709400899834241, "grad_norm": 1.9375, "learning_rate": 9.271964806262979e-07, "loss": 0.4593, "step": 13662 }, { "epoch": 1.7095271923593023, "grad_norm": 1.9140625, "learning_rate": 9.26357107354291e-07, "loss": 0.4314, "step": 13663 }, { "epoch": 1.7096534848843634, "grad_norm": 1.921875, "learning_rate": 9.255180957358345e-07, "loss": 0.5812, "step": 13664 }, { "epoch": 1.7097797774094246, "grad_norm": 2.0625, "learning_rate": 9.246794458043695e-07, "loss": 0.4535, "step": 13665 }, { "epoch": 1.709906069934486, "grad_norm": 2.109375, "learning_rate": 9.238411575933226e-07, "loss": 0.4927, "step": 13666 }, { "epoch": 1.7100323624595468, "grad_norm": 2.109375, "learning_rate": 9.230032311361059e-07, "loss": 0.6025, "step": 13667 }, { "epoch": 1.7101586549846082, "grad_norm": 1.96875, "learning_rate": 9.221656664661161e-07, "loss": 0.4648, "step": 13668 }, { "epoch": 1.7102849475096693, "grad_norm": 2.0625, "learning_rate": 9.213284636167385e-07, "loss": 0.4753, "step": 13669 }, { "epoch": 1.7104112400347304, "grad_norm": 2.125, "learning_rate": 9.204916226213411e-07, "loss": 0.5135, "step": 13670 }, { "epoch": 1.7105375325597916, "grad_norm": 2.1875, "learning_rate": 9.196551435132783e-07, "loss": 0.5609, "step": 13671 }, { "epoch": 1.7106638250848527, "grad_norm": 2.0, "learning_rate": 9.188190263258901e-07, "loss": 0.4651, "step": 13672 }, { "epoch": 1.710790117609914, "grad_norm": 2.546875, "learning_rate": 9.179832710925018e-07, "loss": 0.6079, "step": 13673 }, { "epoch": 1.710916410134975, "grad_norm": 2.03125, "learning_rate": 9.171478778464249e-07, "loss": 0.5262, "step": 13674 }, { "epoch": 1.7110427026600363, "grad_norm": 2.125, "learning_rate": 9.163128466209559e-07, "loss": 0.5153, "step": 13675 }, { "epoch": 1.7111689951850975, "grad_norm": 1.8828125, "learning_rate": 9.15478177449377e-07, "loss": 0.4525, "step": 13676 }, { "epoch": 1.7112952877101586, "grad_norm": 2.34375, "learning_rate": 9.146438703649574e-07, "loss": 0.5743, "step": 13677 }, { "epoch": 1.71142158023522, "grad_norm": 2.3125, "learning_rate": 9.138099254009503e-07, "loss": 0.5355, "step": 13678 }, { "epoch": 1.7115478727602809, "grad_norm": 2.15625, "learning_rate": 9.129763425905924e-07, "loss": 0.4989, "step": 13679 }, { "epoch": 1.7116741652853422, "grad_norm": 1.828125, "learning_rate": 9.121431219671096e-07, "loss": 0.4367, "step": 13680 }, { "epoch": 1.7118004578104034, "grad_norm": 2.09375, "learning_rate": 9.113102635637139e-07, "loss": 0.4665, "step": 13681 }, { "epoch": 1.7119267503354645, "grad_norm": 2.0, "learning_rate": 9.104777674135989e-07, "loss": 0.5838, "step": 13682 }, { "epoch": 1.7120530428605258, "grad_norm": 1.9921875, "learning_rate": 9.09645633549947e-07, "loss": 0.4968, "step": 13683 }, { "epoch": 1.7121793353855868, "grad_norm": 2.09375, "learning_rate": 9.088138620059251e-07, "loss": 0.4601, "step": 13684 }, { "epoch": 1.712305627910648, "grad_norm": 1.9765625, "learning_rate": 9.079824528146852e-07, "loss": 0.4666, "step": 13685 }, { "epoch": 1.7124319204357092, "grad_norm": 2.140625, "learning_rate": 9.071514060093645e-07, "loss": 0.5057, "step": 13686 }, { "epoch": 1.7125582129607704, "grad_norm": 1.90625, "learning_rate": 9.063207216230884e-07, "loss": 0.5419, "step": 13687 }, { "epoch": 1.7126845054858315, "grad_norm": 1.90625, "learning_rate": 9.054903996889642e-07, "loss": 0.4964, "step": 13688 }, { "epoch": 1.7128107980108926, "grad_norm": 2.0625, "learning_rate": 9.046604402400871e-07, "loss": 0.5199, "step": 13689 }, { "epoch": 1.712937090535954, "grad_norm": 1.953125, "learning_rate": 9.038308433095367e-07, "loss": 0.514, "step": 13690 }, { "epoch": 1.713063383061015, "grad_norm": 2.28125, "learning_rate": 9.030016089303805e-07, "loss": 0.5321, "step": 13691 }, { "epoch": 1.7131896755860763, "grad_norm": 2.03125, "learning_rate": 9.021727371356681e-07, "loss": 0.5588, "step": 13692 }, { "epoch": 1.7133159681111374, "grad_norm": 1.953125, "learning_rate": 9.013442279584372e-07, "loss": 0.5084, "step": 13693 }, { "epoch": 1.7134422606361985, "grad_norm": 2.09375, "learning_rate": 9.005160814317093e-07, "loss": 0.4592, "step": 13694 }, { "epoch": 1.7135685531612599, "grad_norm": 1.9921875, "learning_rate": 8.996882975884936e-07, "loss": 0.432, "step": 13695 }, { "epoch": 1.7136948456863208, "grad_norm": 2.15625, "learning_rate": 8.988608764617823e-07, "loss": 0.5598, "step": 13696 }, { "epoch": 1.7138211382113822, "grad_norm": 1.953125, "learning_rate": 8.98033818084556e-07, "loss": 0.4489, "step": 13697 }, { "epoch": 1.7139474307364433, "grad_norm": 1.890625, "learning_rate": 8.97207122489775e-07, "loss": 0.3932, "step": 13698 }, { "epoch": 1.7140737232615044, "grad_norm": 1.9609375, "learning_rate": 8.96380789710396e-07, "loss": 0.5482, "step": 13699 }, { "epoch": 1.7142000157865658, "grad_norm": 1.8984375, "learning_rate": 8.955548197793496e-07, "loss": 0.435, "step": 13700 }, { "epoch": 1.7143263083116267, "grad_norm": 2.015625, "learning_rate": 8.947292127295603e-07, "loss": 0.515, "step": 13701 }, { "epoch": 1.714452600836688, "grad_norm": 1.96875, "learning_rate": 8.939039685939321e-07, "loss": 0.4928, "step": 13702 }, { "epoch": 1.7145788933617492, "grad_norm": 2.203125, "learning_rate": 8.930790874053585e-07, "loss": 0.5519, "step": 13703 }, { "epoch": 1.7147051858868103, "grad_norm": 1.890625, "learning_rate": 8.922545691967178e-07, "loss": 0.4879, "step": 13704 }, { "epoch": 1.7148314784118714, "grad_norm": 2.03125, "learning_rate": 8.914304140008711e-07, "loss": 0.4843, "step": 13705 }, { "epoch": 1.7149577709369326, "grad_norm": 1.8828125, "learning_rate": 8.906066218506704e-07, "loss": 0.446, "step": 13706 }, { "epoch": 1.715084063461994, "grad_norm": 2.046875, "learning_rate": 8.897831927789469e-07, "loss": 0.525, "step": 13707 }, { "epoch": 1.715210355987055, "grad_norm": 1.921875, "learning_rate": 8.889601268185233e-07, "loss": 0.4865, "step": 13708 }, { "epoch": 1.7153366485121162, "grad_norm": 2.015625, "learning_rate": 8.881374240022034e-07, "loss": 0.5115, "step": 13709 }, { "epoch": 1.7154629410371773, "grad_norm": 1.9609375, "learning_rate": 8.873150843627776e-07, "loss": 0.51, "step": 13710 }, { "epoch": 1.7155892335622385, "grad_norm": 1.875, "learning_rate": 8.864931079330241e-07, "loss": 0.5363, "step": 13711 }, { "epoch": 1.7157155260872998, "grad_norm": 2.03125, "learning_rate": 8.856714947457034e-07, "loss": 0.5193, "step": 13712 }, { "epoch": 1.7158418186123607, "grad_norm": 2.09375, "learning_rate": 8.848502448335649e-07, "loss": 0.5398, "step": 13713 }, { "epoch": 1.715968111137422, "grad_norm": 1.8984375, "learning_rate": 8.840293582293392e-07, "loss": 0.4481, "step": 13714 }, { "epoch": 1.7160944036624832, "grad_norm": 1.921875, "learning_rate": 8.832088349657463e-07, "loss": 0.4676, "step": 13715 }, { "epoch": 1.7162206961875444, "grad_norm": 1.96875, "learning_rate": 8.823886750754884e-07, "loss": 0.5356, "step": 13716 }, { "epoch": 1.7163469887126057, "grad_norm": 2.0625, "learning_rate": 8.815688785912591e-07, "loss": 0.5493, "step": 13717 }, { "epoch": 1.7164732812376666, "grad_norm": 1.96875, "learning_rate": 8.80749445545731e-07, "loss": 0.4741, "step": 13718 }, { "epoch": 1.716599573762728, "grad_norm": 1.859375, "learning_rate": 8.799303759715649e-07, "loss": 0.4807, "step": 13719 }, { "epoch": 1.7167258662877891, "grad_norm": 1.9921875, "learning_rate": 8.791116699014057e-07, "loss": 0.4999, "step": 13720 }, { "epoch": 1.7168521588128502, "grad_norm": 1.8671875, "learning_rate": 8.782933273678884e-07, "loss": 0.4405, "step": 13721 }, { "epoch": 1.7169784513379114, "grad_norm": 1.8359375, "learning_rate": 8.774753484036269e-07, "loss": 0.4184, "step": 13722 }, { "epoch": 1.7171047438629725, "grad_norm": 2.140625, "learning_rate": 8.766577330412252e-07, "loss": 0.5146, "step": 13723 }, { "epoch": 1.7172310363880339, "grad_norm": 1.875, "learning_rate": 8.758404813132704e-07, "loss": 0.4095, "step": 13724 }, { "epoch": 1.717357328913095, "grad_norm": 1.953125, "learning_rate": 8.750235932523388e-07, "loss": 0.4659, "step": 13725 }, { "epoch": 1.7174836214381561, "grad_norm": 1.984375, "learning_rate": 8.742070688909865e-07, "loss": 0.6381, "step": 13726 }, { "epoch": 1.7176099139632173, "grad_norm": 1.921875, "learning_rate": 8.733909082617598e-07, "loss": 0.4632, "step": 13727 }, { "epoch": 1.7177362064882784, "grad_norm": 2.234375, "learning_rate": 8.725751113971891e-07, "loss": 0.5051, "step": 13728 }, { "epoch": 1.7178624990133398, "grad_norm": 2.109375, "learning_rate": 8.7175967832979e-07, "loss": 0.4937, "step": 13729 }, { "epoch": 1.7179887915384007, "grad_norm": 1.9921875, "learning_rate": 8.709446090920626e-07, "loss": 0.4973, "step": 13730 }, { "epoch": 1.718115084063462, "grad_norm": 2.140625, "learning_rate": 8.701299037164946e-07, "loss": 0.5369, "step": 13731 }, { "epoch": 1.7182413765885232, "grad_norm": 2.0625, "learning_rate": 8.693155622355587e-07, "loss": 0.453, "step": 13732 }, { "epoch": 1.7183676691135843, "grad_norm": 1.9609375, "learning_rate": 8.685015846817113e-07, "loss": 0.4815, "step": 13733 }, { "epoch": 1.7184939616386457, "grad_norm": 1.90625, "learning_rate": 8.676879710873943e-07, "loss": 0.4895, "step": 13734 }, { "epoch": 1.7186202541637066, "grad_norm": 1.8984375, "learning_rate": 8.668747214850392e-07, "loss": 0.4726, "step": 13735 }, { "epoch": 1.718746546688768, "grad_norm": 2.046875, "learning_rate": 8.660618359070605e-07, "loss": 0.552, "step": 13736 }, { "epoch": 1.718872839213829, "grad_norm": 2.265625, "learning_rate": 8.652493143858553e-07, "loss": 0.5859, "step": 13737 }, { "epoch": 1.7189991317388902, "grad_norm": 2.0625, "learning_rate": 8.644371569538112e-07, "loss": 0.5336, "step": 13738 }, { "epoch": 1.7191254242639515, "grad_norm": 2.203125, "learning_rate": 8.636253636432967e-07, "loss": 0.4708, "step": 13739 }, { "epoch": 1.7192517167890125, "grad_norm": 2.03125, "learning_rate": 8.628139344866704e-07, "loss": 0.48, "step": 13740 }, { "epoch": 1.7193780093140738, "grad_norm": 1.96875, "learning_rate": 8.620028695162708e-07, "loss": 0.5051, "step": 13741 }, { "epoch": 1.719504301839135, "grad_norm": 2.03125, "learning_rate": 8.611921687644276e-07, "loss": 0.5013, "step": 13742 }, { "epoch": 1.719630594364196, "grad_norm": 2.09375, "learning_rate": 8.603818322634516e-07, "loss": 0.4834, "step": 13743 }, { "epoch": 1.7197568868892572, "grad_norm": 2.109375, "learning_rate": 8.595718600456415e-07, "loss": 0.5254, "step": 13744 }, { "epoch": 1.7198831794143183, "grad_norm": 1.9765625, "learning_rate": 8.587622521432815e-07, "loss": 0.4447, "step": 13745 }, { "epoch": 1.7200094719393797, "grad_norm": 2.171875, "learning_rate": 8.5795300858864e-07, "loss": 0.5286, "step": 13746 }, { "epoch": 1.7201357644644406, "grad_norm": 1.9140625, "learning_rate": 8.571441294139715e-07, "loss": 0.4883, "step": 13747 }, { "epoch": 1.720262056989502, "grad_norm": 1.921875, "learning_rate": 8.563356146515156e-07, "loss": 0.4912, "step": 13748 }, { "epoch": 1.720388349514563, "grad_norm": 1.9765625, "learning_rate": 8.555274643334988e-07, "loss": 0.5119, "step": 13749 }, { "epoch": 1.7205146420396242, "grad_norm": 2.03125, "learning_rate": 8.547196784921319e-07, "loss": 0.4956, "step": 13750 }, { "epoch": 1.7206409345646856, "grad_norm": 2.09375, "learning_rate": 8.539122571596093e-07, "loss": 0.5396, "step": 13751 }, { "epoch": 1.7207672270897465, "grad_norm": 2.234375, "learning_rate": 8.531052003681139e-07, "loss": 0.5471, "step": 13752 }, { "epoch": 1.7208935196148079, "grad_norm": 1.921875, "learning_rate": 8.522985081498148e-07, "loss": 0.468, "step": 13753 }, { "epoch": 1.721019812139869, "grad_norm": 1.921875, "learning_rate": 8.514921805368637e-07, "loss": 0.4893, "step": 13754 }, { "epoch": 1.7211461046649301, "grad_norm": 2.03125, "learning_rate": 8.506862175613983e-07, "loss": 0.4727, "step": 13755 }, { "epoch": 1.7212723971899915, "grad_norm": 1.9765625, "learning_rate": 8.498806192555431e-07, "loss": 0.4763, "step": 13756 }, { "epoch": 1.7213986897150524, "grad_norm": 1.8359375, "learning_rate": 8.490753856514077e-07, "loss": 0.46, "step": 13757 }, { "epoch": 1.7215249822401137, "grad_norm": 2.015625, "learning_rate": 8.482705167810856e-07, "loss": 0.4733, "step": 13758 }, { "epoch": 1.7216512747651749, "grad_norm": 2.0625, "learning_rate": 8.474660126766565e-07, "loss": 0.5414, "step": 13759 }, { "epoch": 1.721777567290236, "grad_norm": 1.9609375, "learning_rate": 8.466618733701881e-07, "loss": 0.4864, "step": 13760 }, { "epoch": 1.7219038598152971, "grad_norm": 2.15625, "learning_rate": 8.458580988937293e-07, "loss": 0.5772, "step": 13761 }, { "epoch": 1.7220301523403583, "grad_norm": 1.875, "learning_rate": 8.450546892793176e-07, "loss": 0.4273, "step": 13762 }, { "epoch": 1.7221564448654196, "grad_norm": 2.015625, "learning_rate": 8.442516445589755e-07, "loss": 0.5777, "step": 13763 }, { "epoch": 1.7222827373904805, "grad_norm": 1.984375, "learning_rate": 8.434489647647093e-07, "loss": 0.4684, "step": 13764 }, { "epoch": 1.722409029915542, "grad_norm": 1.9296875, "learning_rate": 8.426466499285135e-07, "loss": 0.498, "step": 13765 }, { "epoch": 1.722535322440603, "grad_norm": 1.875, "learning_rate": 8.418447000823638e-07, "loss": 0.388, "step": 13766 }, { "epoch": 1.7226616149656642, "grad_norm": 2.078125, "learning_rate": 8.410431152582266e-07, "loss": 0.48, "step": 13767 }, { "epoch": 1.7227879074907255, "grad_norm": 1.8671875, "learning_rate": 8.402418954880497e-07, "loss": 0.488, "step": 13768 }, { "epoch": 1.7229142000157864, "grad_norm": 1.9765625, "learning_rate": 8.394410408037678e-07, "loss": 0.4366, "step": 13769 }, { "epoch": 1.7230404925408478, "grad_norm": 1.9765625, "learning_rate": 8.386405512373008e-07, "loss": 0.447, "step": 13770 }, { "epoch": 1.723166785065909, "grad_norm": 1.984375, "learning_rate": 8.378404268205564e-07, "loss": 0.4669, "step": 13771 }, { "epoch": 1.72329307759097, "grad_norm": 2.0, "learning_rate": 8.370406675854237e-07, "loss": 0.471, "step": 13772 }, { "epoch": 1.7234193701160314, "grad_norm": 2.0, "learning_rate": 8.362412735637792e-07, "loss": 0.4612, "step": 13773 }, { "epoch": 1.7235456626410923, "grad_norm": 2.203125, "learning_rate": 8.354422447874855e-07, "loss": 0.5586, "step": 13774 }, { "epoch": 1.7236719551661537, "grad_norm": 1.90625, "learning_rate": 8.346435812883902e-07, "loss": 0.4482, "step": 13775 }, { "epoch": 1.7237982476912148, "grad_norm": 1.921875, "learning_rate": 8.338452830983257e-07, "loss": 0.4466, "step": 13776 }, { "epoch": 1.723924540216276, "grad_norm": 2.046875, "learning_rate": 8.330473502491099e-07, "loss": 0.4475, "step": 13777 }, { "epoch": 1.724050832741337, "grad_norm": 2.078125, "learning_rate": 8.322497827725461e-07, "loss": 0.5495, "step": 13778 }, { "epoch": 1.7241771252663982, "grad_norm": 1.9140625, "learning_rate": 8.314525807004248e-07, "loss": 0.5032, "step": 13779 }, { "epoch": 1.7243034177914596, "grad_norm": 2.046875, "learning_rate": 8.30655744064519e-07, "loss": 0.5259, "step": 13780 }, { "epoch": 1.7244297103165205, "grad_norm": 2.0625, "learning_rate": 8.298592728965904e-07, "loss": 0.5658, "step": 13781 }, { "epoch": 1.7245560028415818, "grad_norm": 1.9140625, "learning_rate": 8.290631672283833e-07, "loss": 0.5593, "step": 13782 }, { "epoch": 1.724682295366643, "grad_norm": 1.9609375, "learning_rate": 8.28267427091628e-07, "loss": 0.4556, "step": 13783 }, { "epoch": 1.724808587891704, "grad_norm": 2.078125, "learning_rate": 8.274720525180425e-07, "loss": 0.5085, "step": 13784 }, { "epoch": 1.7249348804167655, "grad_norm": 2.1875, "learning_rate": 8.266770435393279e-07, "loss": 0.5135, "step": 13785 }, { "epoch": 1.7250611729418264, "grad_norm": 2.09375, "learning_rate": 8.258824001871701e-07, "loss": 0.556, "step": 13786 }, { "epoch": 1.7251874654668877, "grad_norm": 1.796875, "learning_rate": 8.250881224932428e-07, "loss": 0.4103, "step": 13787 }, { "epoch": 1.7253137579919489, "grad_norm": 1.9140625, "learning_rate": 8.242942104892015e-07, "loss": 0.466, "step": 13788 }, { "epoch": 1.72544005051701, "grad_norm": 1.96875, "learning_rate": 8.235006642066945e-07, "loss": 0.4659, "step": 13789 }, { "epoch": 1.7255663430420713, "grad_norm": 1.8359375, "learning_rate": 8.227074836773474e-07, "loss": 0.4639, "step": 13790 }, { "epoch": 1.7256926355671323, "grad_norm": 1.8984375, "learning_rate": 8.219146689327762e-07, "loss": 0.4343, "step": 13791 }, { "epoch": 1.7258189280921936, "grad_norm": 2.046875, "learning_rate": 8.211222200045787e-07, "loss": 0.4719, "step": 13792 }, { "epoch": 1.7259452206172547, "grad_norm": 2.09375, "learning_rate": 8.203301369243411e-07, "loss": 0.5454, "step": 13793 }, { "epoch": 1.7260715131423159, "grad_norm": 1.8828125, "learning_rate": 8.195384197236334e-07, "loss": 0.4668, "step": 13794 }, { "epoch": 1.726197805667377, "grad_norm": 1.9296875, "learning_rate": 8.187470684340115e-07, "loss": 0.5407, "step": 13795 }, { "epoch": 1.7263240981924382, "grad_norm": 1.9140625, "learning_rate": 8.17956083087017e-07, "loss": 0.4509, "step": 13796 }, { "epoch": 1.7264503907174995, "grad_norm": 1.984375, "learning_rate": 8.171654637141768e-07, "loss": 0.5292, "step": 13797 }, { "epoch": 1.7265766832425604, "grad_norm": 1.9609375, "learning_rate": 8.163752103470036e-07, "loss": 0.445, "step": 13798 }, { "epoch": 1.7267029757676218, "grad_norm": 2.03125, "learning_rate": 8.15585323016993e-07, "loss": 0.4527, "step": 13799 }, { "epoch": 1.726829268292683, "grad_norm": 1.8671875, "learning_rate": 8.147958017556301e-07, "loss": 0.4332, "step": 13800 }, { "epoch": 1.726955560817744, "grad_norm": 2.0, "learning_rate": 8.14006646594383e-07, "loss": 0.4986, "step": 13801 }, { "epoch": 1.7270818533428054, "grad_norm": 2.09375, "learning_rate": 8.132178575647054e-07, "loss": 0.4972, "step": 13802 }, { "epoch": 1.7272081458678663, "grad_norm": 2.0625, "learning_rate": 8.124294346980355e-07, "loss": 0.4608, "step": 13803 }, { "epoch": 1.7273344383929277, "grad_norm": 2.09375, "learning_rate": 8.116413780257992e-07, "loss": 0.5071, "step": 13804 }, { "epoch": 1.7274607309179888, "grad_norm": 1.78125, "learning_rate": 8.108536875794048e-07, "loss": 0.4147, "step": 13805 }, { "epoch": 1.72758702344305, "grad_norm": 2.25, "learning_rate": 8.100663633902483e-07, "loss": 0.5558, "step": 13806 }, { "epoch": 1.7277133159681113, "grad_norm": 2.0, "learning_rate": 8.092794054897135e-07, "loss": 0.4973, "step": 13807 }, { "epoch": 1.7278396084931722, "grad_norm": 1.8828125, "learning_rate": 8.08492813909163e-07, "loss": 0.4685, "step": 13808 }, { "epoch": 1.7279659010182336, "grad_norm": 2.0, "learning_rate": 8.077065886799507e-07, "loss": 0.4532, "step": 13809 }, { "epoch": 1.7280921935432947, "grad_norm": 1.9921875, "learning_rate": 8.069207298334125e-07, "loss": 0.5205, "step": 13810 }, { "epoch": 1.7282184860683558, "grad_norm": 2.046875, "learning_rate": 8.061352374008713e-07, "loss": 0.5094, "step": 13811 }, { "epoch": 1.728344778593417, "grad_norm": 1.9375, "learning_rate": 8.053501114136342e-07, "loss": 0.48, "step": 13812 }, { "epoch": 1.728471071118478, "grad_norm": 2.109375, "learning_rate": 8.04565351902995e-07, "loss": 0.4703, "step": 13813 }, { "epoch": 1.7285973636435394, "grad_norm": 1.8359375, "learning_rate": 8.037809589002332e-07, "loss": 0.4158, "step": 13814 }, { "epoch": 1.7287236561686006, "grad_norm": 2.125, "learning_rate": 8.029969324366105e-07, "loss": 0.5259, "step": 13815 }, { "epoch": 1.7288499486936617, "grad_norm": 1.9296875, "learning_rate": 8.022132725433784e-07, "loss": 0.4342, "step": 13816 }, { "epoch": 1.7289762412187228, "grad_norm": 1.953125, "learning_rate": 8.01429979251771e-07, "loss": 0.4866, "step": 13817 }, { "epoch": 1.729102533743784, "grad_norm": 2.09375, "learning_rate": 8.006470525930088e-07, "loss": 0.5047, "step": 13818 }, { "epoch": 1.7292288262688453, "grad_norm": 1.8359375, "learning_rate": 7.998644925982968e-07, "loss": 0.4684, "step": 13819 }, { "epoch": 1.7293551187939062, "grad_norm": 1.8828125, "learning_rate": 7.990822992988267e-07, "loss": 0.4188, "step": 13820 }, { "epoch": 1.7294814113189676, "grad_norm": 2.046875, "learning_rate": 7.983004727257737e-07, "loss": 0.5142, "step": 13821 }, { "epoch": 1.7296077038440287, "grad_norm": 2.078125, "learning_rate": 7.975190129103005e-07, "loss": 0.5269, "step": 13822 }, { "epoch": 1.7297339963690899, "grad_norm": 1.8046875, "learning_rate": 7.967379198835535e-07, "loss": 0.4657, "step": 13823 }, { "epoch": 1.7298602888941512, "grad_norm": 1.96875, "learning_rate": 7.959571936766642e-07, "loss": 0.5165, "step": 13824 }, { "epoch": 1.7299865814192121, "grad_norm": 1.890625, "learning_rate": 7.951768343207533e-07, "loss": 0.4713, "step": 13825 }, { "epoch": 1.7301128739442735, "grad_norm": 2.0, "learning_rate": 7.943968418469228e-07, "loss": 0.4651, "step": 13826 }, { "epoch": 1.7302391664693346, "grad_norm": 1.9140625, "learning_rate": 7.93617216286261e-07, "loss": 0.4773, "step": 13827 }, { "epoch": 1.7303654589943958, "grad_norm": 2.078125, "learning_rate": 7.928379576698419e-07, "loss": 0.5331, "step": 13828 }, { "epoch": 1.7304917515194569, "grad_norm": 2.140625, "learning_rate": 7.920590660287242e-07, "loss": 0.4649, "step": 13829 }, { "epoch": 1.730618044044518, "grad_norm": 1.9765625, "learning_rate": 7.91280541393955e-07, "loss": 0.4628, "step": 13830 }, { "epoch": 1.7307443365695794, "grad_norm": 2.0, "learning_rate": 7.90502383796562e-07, "loss": 0.5297, "step": 13831 }, { "epoch": 1.7308706290946405, "grad_norm": 1.953125, "learning_rate": 7.897245932675612e-07, "loss": 0.5777, "step": 13832 }, { "epoch": 1.7309969216197016, "grad_norm": 2.09375, "learning_rate": 7.889471698379547e-07, "loss": 0.486, "step": 13833 }, { "epoch": 1.7311232141447628, "grad_norm": 2.125, "learning_rate": 7.881701135387265e-07, "loss": 0.4877, "step": 13834 }, { "epoch": 1.731249506669824, "grad_norm": 2.03125, "learning_rate": 7.873934244008508e-07, "loss": 0.5326, "step": 13835 }, { "epoch": 1.7313757991948853, "grad_norm": 2.0625, "learning_rate": 7.866171024552827e-07, "loss": 0.5037, "step": 13836 }, { "epoch": 1.7315020917199462, "grad_norm": 2.21875, "learning_rate": 7.858411477329653e-07, "loss": 0.5847, "step": 13837 }, { "epoch": 1.7316283842450075, "grad_norm": 2.171875, "learning_rate": 7.85065560264825e-07, "loss": 0.5994, "step": 13838 }, { "epoch": 1.7317546767700687, "grad_norm": 2.140625, "learning_rate": 7.84290340081777e-07, "loss": 0.4678, "step": 13839 }, { "epoch": 1.7318809692951298, "grad_norm": 1.8828125, "learning_rate": 7.835154872147188e-07, "loss": 0.4593, "step": 13840 }, { "epoch": 1.7320072618201912, "grad_norm": 2.265625, "learning_rate": 7.827410016945336e-07, "loss": 0.5197, "step": 13841 }, { "epoch": 1.732133554345252, "grad_norm": 1.96875, "learning_rate": 7.819668835520888e-07, "loss": 0.5378, "step": 13842 }, { "epoch": 1.7322598468703134, "grad_norm": 1.96875, "learning_rate": 7.811931328182431e-07, "loss": 0.4461, "step": 13843 }, { "epoch": 1.7323861393953746, "grad_norm": 1.984375, "learning_rate": 7.804197495238341e-07, "loss": 0.5308, "step": 13844 }, { "epoch": 1.7325124319204357, "grad_norm": 2.078125, "learning_rate": 7.796467336996871e-07, "loss": 0.549, "step": 13845 }, { "epoch": 1.732638724445497, "grad_norm": 2.1875, "learning_rate": 7.788740853766131e-07, "loss": 0.4887, "step": 13846 }, { "epoch": 1.732765016970558, "grad_norm": 2.015625, "learning_rate": 7.781018045854072e-07, "loss": 0.5233, "step": 13847 }, { "epoch": 1.7328913094956193, "grad_norm": 1.921875, "learning_rate": 7.773298913568506e-07, "loss": 0.4806, "step": 13848 }, { "epoch": 1.7330176020206804, "grad_norm": 2.03125, "learning_rate": 7.765583457217107e-07, "loss": 0.4396, "step": 13849 }, { "epoch": 1.7331438945457416, "grad_norm": 1.8828125, "learning_rate": 7.757871677107398e-07, "loss": 0.4374, "step": 13850 }, { "epoch": 1.7332701870708027, "grad_norm": 2.140625, "learning_rate": 7.750163573546743e-07, "loss": 0.5323, "step": 13851 }, { "epoch": 1.7333964795958638, "grad_norm": 1.8828125, "learning_rate": 7.742459146842362e-07, "loss": 0.4408, "step": 13852 }, { "epoch": 1.7335227721209252, "grad_norm": 2.234375, "learning_rate": 7.734758397301345e-07, "loss": 0.6209, "step": 13853 }, { "epoch": 1.7336490646459861, "grad_norm": 2.046875, "learning_rate": 7.727061325230633e-07, "loss": 0.5255, "step": 13854 }, { "epoch": 1.7337753571710475, "grad_norm": 2.296875, "learning_rate": 7.719367930936994e-07, "loss": 0.4847, "step": 13855 }, { "epoch": 1.7339016496961086, "grad_norm": 2.03125, "learning_rate": 7.711678214727081e-07, "loss": 0.426, "step": 13856 }, { "epoch": 1.7340279422211697, "grad_norm": 1.921875, "learning_rate": 7.703992176907382e-07, "loss": 0.3959, "step": 13857 }, { "epoch": 1.734154234746231, "grad_norm": 1.9296875, "learning_rate": 7.696309817784242e-07, "loss": 0.4236, "step": 13858 }, { "epoch": 1.734280527271292, "grad_norm": 1.9140625, "learning_rate": 7.688631137663871e-07, "loss": 0.4667, "step": 13859 }, { "epoch": 1.7344068197963534, "grad_norm": 1.78125, "learning_rate": 7.680956136852302e-07, "loss": 0.416, "step": 13860 }, { "epoch": 1.7345331123214145, "grad_norm": 1.8984375, "learning_rate": 7.673284815655469e-07, "loss": 0.5212, "step": 13861 }, { "epoch": 1.7346594048464756, "grad_norm": 1.9765625, "learning_rate": 7.665617174379125e-07, "loss": 0.4609, "step": 13862 }, { "epoch": 1.734785697371537, "grad_norm": 1.96875, "learning_rate": 7.657953213328872e-07, "loss": 0.3992, "step": 13863 }, { "epoch": 1.734911989896598, "grad_norm": 2.140625, "learning_rate": 7.650292932810188e-07, "loss": 0.5332, "step": 13864 }, { "epoch": 1.7350382824216593, "grad_norm": 2.09375, "learning_rate": 7.642636333128395e-07, "loss": 0.5865, "step": 13865 }, { "epoch": 1.7351645749467204, "grad_norm": 2.078125, "learning_rate": 7.634983414588659e-07, "loss": 0.4696, "step": 13866 }, { "epoch": 1.7352908674717815, "grad_norm": 2.109375, "learning_rate": 7.627334177496004e-07, "loss": 0.5276, "step": 13867 }, { "epoch": 1.7354171599968427, "grad_norm": 1.96875, "learning_rate": 7.619688622155319e-07, "loss": 0.4627, "step": 13868 }, { "epoch": 1.7355434525219038, "grad_norm": 2.140625, "learning_rate": 7.612046748871327e-07, "loss": 0.4925, "step": 13869 }, { "epoch": 1.7356697450469651, "grad_norm": 2.0625, "learning_rate": 7.604408557948628e-07, "loss": 0.531, "step": 13870 }, { "epoch": 1.735796037572026, "grad_norm": 1.8984375, "learning_rate": 7.596774049691658e-07, "loss": 0.5169, "step": 13871 }, { "epoch": 1.7359223300970874, "grad_norm": 1.796875, "learning_rate": 7.589143224404693e-07, "loss": 0.4023, "step": 13872 }, { "epoch": 1.7360486226221485, "grad_norm": 2.125, "learning_rate": 7.581516082391905e-07, "loss": 0.5299, "step": 13873 }, { "epoch": 1.7361749151472097, "grad_norm": 1.90625, "learning_rate": 7.573892623957279e-07, "loss": 0.4293, "step": 13874 }, { "epoch": 1.736301207672271, "grad_norm": 1.890625, "learning_rate": 7.566272849404676e-07, "loss": 0.4138, "step": 13875 }, { "epoch": 1.736427500197332, "grad_norm": 1.9765625, "learning_rate": 7.558656759037796e-07, "loss": 0.5557, "step": 13876 }, { "epoch": 1.7365537927223933, "grad_norm": 1.7578125, "learning_rate": 7.551044353160197e-07, "loss": 0.366, "step": 13877 }, { "epoch": 1.7366800852474544, "grad_norm": 1.9921875, "learning_rate": 7.543435632075291e-07, "loss": 0.5255, "step": 13878 }, { "epoch": 1.7368063777725156, "grad_norm": 1.953125, "learning_rate": 7.535830596086324e-07, "loss": 0.4522, "step": 13879 }, { "epoch": 1.736932670297577, "grad_norm": 2.125, "learning_rate": 7.528229245496455e-07, "loss": 0.5096, "step": 13880 }, { "epoch": 1.7370589628226378, "grad_norm": 1.765625, "learning_rate": 7.520631580608639e-07, "loss": 0.4214, "step": 13881 }, { "epoch": 1.7371852553476992, "grad_norm": 1.796875, "learning_rate": 7.513037601725704e-07, "loss": 0.4511, "step": 13882 }, { "epoch": 1.7373115478727603, "grad_norm": 1.9375, "learning_rate": 7.505447309150304e-07, "loss": 0.48, "step": 13883 }, { "epoch": 1.7374378403978215, "grad_norm": 2.09375, "learning_rate": 7.497860703185e-07, "loss": 0.5341, "step": 13884 }, { "epoch": 1.7375641329228826, "grad_norm": 2.1875, "learning_rate": 7.49027778413216e-07, "loss": 0.5053, "step": 13885 }, { "epoch": 1.7376904254479437, "grad_norm": 2.046875, "learning_rate": 7.482698552294032e-07, "loss": 0.5011, "step": 13886 }, { "epoch": 1.737816717973005, "grad_norm": 2.109375, "learning_rate": 7.475123007972685e-07, "loss": 0.4669, "step": 13887 }, { "epoch": 1.737943010498066, "grad_norm": 1.984375, "learning_rate": 7.467551151470076e-07, "loss": 0.5289, "step": 13888 }, { "epoch": 1.7380693030231273, "grad_norm": 1.921875, "learning_rate": 7.45998298308801e-07, "loss": 0.4667, "step": 13889 }, { "epoch": 1.7381955955481885, "grad_norm": 2.078125, "learning_rate": 7.452418503128111e-07, "loss": 0.708, "step": 13890 }, { "epoch": 1.7383218880732496, "grad_norm": 1.90625, "learning_rate": 7.444857711891906e-07, "loss": 0.4924, "step": 13891 }, { "epoch": 1.738448180598311, "grad_norm": 2.078125, "learning_rate": 7.437300609680742e-07, "loss": 0.5838, "step": 13892 }, { "epoch": 1.7385744731233719, "grad_norm": 1.9609375, "learning_rate": 7.429747196795823e-07, "loss": 0.5597, "step": 13893 }, { "epoch": 1.7387007656484332, "grad_norm": 2.0625, "learning_rate": 7.422197473538206e-07, "loss": 0.5133, "step": 13894 }, { "epoch": 1.7388270581734944, "grad_norm": 1.9140625, "learning_rate": 7.414651440208808e-07, "loss": 0.4412, "step": 13895 }, { "epoch": 1.7389533506985555, "grad_norm": 1.8203125, "learning_rate": 7.4071090971084e-07, "loss": 0.4349, "step": 13896 }, { "epoch": 1.7390796432236169, "grad_norm": 1.921875, "learning_rate": 7.399570444537584e-07, "loss": 0.5166, "step": 13897 }, { "epoch": 1.7392059357486778, "grad_norm": 1.9765625, "learning_rate": 7.392035482796867e-07, "loss": 0.4948, "step": 13898 }, { "epoch": 1.7393322282737391, "grad_norm": 1.9765625, "learning_rate": 7.384504212186549e-07, "loss": 0.4845, "step": 13899 }, { "epoch": 1.7394585207988003, "grad_norm": 2.109375, "learning_rate": 7.376976633006828e-07, "loss": 0.4867, "step": 13900 }, { "epoch": 1.7395848133238614, "grad_norm": 1.9453125, "learning_rate": 7.369452745557715e-07, "loss": 0.4886, "step": 13901 }, { "epoch": 1.7397111058489225, "grad_norm": 1.8828125, "learning_rate": 7.361932550139095e-07, "loss": 0.4415, "step": 13902 }, { "epoch": 1.7398373983739837, "grad_norm": 1.9453125, "learning_rate": 7.354416047050716e-07, "loss": 0.4816, "step": 13903 }, { "epoch": 1.739963690899045, "grad_norm": 2.03125, "learning_rate": 7.346903236592162e-07, "loss": 0.5007, "step": 13904 }, { "epoch": 1.740089983424106, "grad_norm": 2.015625, "learning_rate": 7.33939411906287e-07, "loss": 0.5669, "step": 13905 }, { "epoch": 1.7402162759491673, "grad_norm": 2.0, "learning_rate": 7.331888694762146e-07, "loss": 0.5116, "step": 13906 }, { "epoch": 1.7403425684742284, "grad_norm": 1.9921875, "learning_rate": 7.324386963989138e-07, "loss": 0.5166, "step": 13907 }, { "epoch": 1.7404688609992895, "grad_norm": 1.96875, "learning_rate": 7.316888927042842e-07, "loss": 0.5176, "step": 13908 }, { "epoch": 1.740595153524351, "grad_norm": 1.953125, "learning_rate": 7.309394584222107e-07, "loss": 0.4846, "step": 13909 }, { "epoch": 1.7407214460494118, "grad_norm": 1.9375, "learning_rate": 7.301903935825649e-07, "loss": 0.5314, "step": 13910 }, { "epoch": 1.7408477385744732, "grad_norm": 1.9140625, "learning_rate": 7.29441698215202e-07, "loss": 0.5254, "step": 13911 }, { "epoch": 1.7409740310995343, "grad_norm": 2.0, "learning_rate": 7.286933723499634e-07, "loss": 0.463, "step": 13912 }, { "epoch": 1.7411003236245954, "grad_norm": 1.875, "learning_rate": 7.279454160166755e-07, "loss": 0.5014, "step": 13913 }, { "epoch": 1.7412266161496568, "grad_norm": 1.9296875, "learning_rate": 7.271978292451498e-07, "loss": 0.4428, "step": 13914 }, { "epoch": 1.7413529086747177, "grad_norm": 2.0625, "learning_rate": 7.264506120651826e-07, "loss": 0.4616, "step": 13915 }, { "epoch": 1.741479201199779, "grad_norm": 2.046875, "learning_rate": 7.25703764506559e-07, "loss": 0.5128, "step": 13916 }, { "epoch": 1.7416054937248402, "grad_norm": 1.921875, "learning_rate": 7.249572865990451e-07, "loss": 0.4734, "step": 13917 }, { "epoch": 1.7417317862499013, "grad_norm": 2.0, "learning_rate": 7.242111783723926e-07, "loss": 0.5247, "step": 13918 }, { "epoch": 1.7418580787749625, "grad_norm": 2.015625, "learning_rate": 7.234654398563412e-07, "loss": 0.4795, "step": 13919 }, { "epoch": 1.7419843713000236, "grad_norm": 1.8984375, "learning_rate": 7.227200710806126e-07, "loss": 0.4519, "step": 13920 }, { "epoch": 1.742110663825085, "grad_norm": 1.9765625, "learning_rate": 7.219750720749174e-07, "loss": 0.5094, "step": 13921 }, { "epoch": 1.7422369563501459, "grad_norm": 1.9375, "learning_rate": 7.212304428689476e-07, "loss": 0.4816, "step": 13922 }, { "epoch": 1.7423632488752072, "grad_norm": 2.296875, "learning_rate": 7.204861834923827e-07, "loss": 0.4892, "step": 13923 }, { "epoch": 1.7424895414002683, "grad_norm": 1.9921875, "learning_rate": 7.197422939748877e-07, "loss": 0.5001, "step": 13924 }, { "epoch": 1.7426158339253295, "grad_norm": 2.046875, "learning_rate": 7.189987743461124e-07, "loss": 0.4674, "step": 13925 }, { "epoch": 1.7427421264503908, "grad_norm": 2.21875, "learning_rate": 7.182556246356909e-07, "loss": 0.5805, "step": 13926 }, { "epoch": 1.7428684189754517, "grad_norm": 1.875, "learning_rate": 7.175128448732438e-07, "loss": 0.4453, "step": 13927 }, { "epoch": 1.742994711500513, "grad_norm": 2.125, "learning_rate": 7.167704350883764e-07, "loss": 0.5348, "step": 13928 }, { "epoch": 1.7431210040255742, "grad_norm": 1.890625, "learning_rate": 7.160283953106783e-07, "loss": 0.4305, "step": 13929 }, { "epoch": 1.7432472965506354, "grad_norm": 2.09375, "learning_rate": 7.152867255697282e-07, "loss": 0.481, "step": 13930 }, { "epoch": 1.7433735890756967, "grad_norm": 1.9296875, "learning_rate": 7.145454258950846e-07, "loss": 0.5426, "step": 13931 }, { "epoch": 1.7434998816007576, "grad_norm": 1.984375, "learning_rate": 7.13804496316296e-07, "loss": 0.4556, "step": 13932 }, { "epoch": 1.743626174125819, "grad_norm": 2.03125, "learning_rate": 7.1306393686289e-07, "loss": 0.4562, "step": 13933 }, { "epoch": 1.7437524666508801, "grad_norm": 1.90625, "learning_rate": 7.123237475643885e-07, "loss": 0.4294, "step": 13934 }, { "epoch": 1.7438787591759413, "grad_norm": 2.109375, "learning_rate": 7.115839284502924e-07, "loss": 0.4981, "step": 13935 }, { "epoch": 1.7440050517010024, "grad_norm": 2.0, "learning_rate": 7.10844479550088e-07, "loss": 0.4975, "step": 13936 }, { "epoch": 1.7441313442260635, "grad_norm": 2.140625, "learning_rate": 7.101054008932473e-07, "loss": 0.5256, "step": 13937 }, { "epoch": 1.7442576367511249, "grad_norm": 1.984375, "learning_rate": 7.093666925092302e-07, "loss": 0.5049, "step": 13938 }, { "epoch": 1.744383929276186, "grad_norm": 2.03125, "learning_rate": 7.086283544274786e-07, "loss": 0.4576, "step": 13939 }, { "epoch": 1.7445102218012472, "grad_norm": 1.9765625, "learning_rate": 7.078903866774212e-07, "loss": 0.4964, "step": 13940 }, { "epoch": 1.7446365143263083, "grad_norm": 2.03125, "learning_rate": 7.071527892884711e-07, "loss": 0.4621, "step": 13941 }, { "epoch": 1.7447628068513694, "grad_norm": 2.0, "learning_rate": 7.064155622900281e-07, "loss": 0.5039, "step": 13942 }, { "epoch": 1.7448890993764308, "grad_norm": 2.015625, "learning_rate": 7.056787057114756e-07, "loss": 0.5328, "step": 13943 }, { "epoch": 1.7450153919014917, "grad_norm": 1.8984375, "learning_rate": 7.049422195821831e-07, "loss": 0.3988, "step": 13944 }, { "epoch": 1.745141684426553, "grad_norm": 2.015625, "learning_rate": 7.042061039315062e-07, "loss": 0.4903, "step": 13945 }, { "epoch": 1.7452679769516142, "grad_norm": 1.9453125, "learning_rate": 7.034703587887826e-07, "loss": 0.4432, "step": 13946 }, { "epoch": 1.7453942694766753, "grad_norm": 1.96875, "learning_rate": 7.027349841833386e-07, "loss": 0.4718, "step": 13947 }, { "epoch": 1.7455205620017367, "grad_norm": 2.0625, "learning_rate": 7.019999801444844e-07, "loss": 0.482, "step": 13948 }, { "epoch": 1.7456468545267976, "grad_norm": 1.9921875, "learning_rate": 7.012653467015152e-07, "loss": 0.4358, "step": 13949 }, { "epoch": 1.745773147051859, "grad_norm": 1.9765625, "learning_rate": 7.00531083883711e-07, "loss": 0.5217, "step": 13950 }, { "epoch": 1.74589943957692, "grad_norm": 1.9453125, "learning_rate": 6.997971917203383e-07, "loss": 0.4856, "step": 13951 }, { "epoch": 1.7460257321019812, "grad_norm": 1.9375, "learning_rate": 6.990636702406495e-07, "loss": 0.481, "step": 13952 }, { "epoch": 1.7461520246270423, "grad_norm": 2.0, "learning_rate": 6.98330519473881e-07, "loss": 0.6075, "step": 13953 }, { "epoch": 1.7462783171521035, "grad_norm": 1.9375, "learning_rate": 6.975977394492528e-07, "loss": 0.4068, "step": 13954 }, { "epoch": 1.7464046096771648, "grad_norm": 1.9609375, "learning_rate": 6.968653301959727e-07, "loss": 0.4974, "step": 13955 }, { "epoch": 1.746530902202226, "grad_norm": 1.890625, "learning_rate": 6.961332917432318e-07, "loss": 0.4494, "step": 13956 }, { "epoch": 1.746657194727287, "grad_norm": 1.8671875, "learning_rate": 6.954016241202088e-07, "loss": 0.4866, "step": 13957 }, { "epoch": 1.7467834872523482, "grad_norm": 2.484375, "learning_rate": 6.94670327356065e-07, "loss": 0.5223, "step": 13958 }, { "epoch": 1.7469097797774094, "grad_norm": 1.96875, "learning_rate": 6.939394014799494e-07, "loss": 0.5365, "step": 13959 }, { "epoch": 1.7470360723024707, "grad_norm": 1.875, "learning_rate": 6.932088465209941e-07, "loss": 0.4876, "step": 13960 }, { "epoch": 1.7471623648275316, "grad_norm": 2.09375, "learning_rate": 6.924786625083169e-07, "loss": 0.46, "step": 13961 }, { "epoch": 1.747288657352593, "grad_norm": 2.3125, "learning_rate": 6.917488494710223e-07, "loss": 0.4551, "step": 13962 }, { "epoch": 1.7474149498776541, "grad_norm": 1.8671875, "learning_rate": 6.910194074381971e-07, "loss": 0.4395, "step": 13963 }, { "epoch": 1.7475412424027152, "grad_norm": 2.140625, "learning_rate": 6.902903364389179e-07, "loss": 0.5687, "step": 13964 }, { "epoch": 1.7476675349277766, "grad_norm": 1.921875, "learning_rate": 6.895616365022407e-07, "loss": 0.5056, "step": 13965 }, { "epoch": 1.7477938274528375, "grad_norm": 2.015625, "learning_rate": 6.888333076572118e-07, "loss": 0.4695, "step": 13966 }, { "epoch": 1.7479201199778989, "grad_norm": 1.859375, "learning_rate": 6.881053499328594e-07, "loss": 0.4509, "step": 13967 }, { "epoch": 1.74804641250296, "grad_norm": 1.7890625, "learning_rate": 6.873777633581991e-07, "loss": 0.4936, "step": 13968 }, { "epoch": 1.7481727050280211, "grad_norm": 1.9453125, "learning_rate": 6.866505479622276e-07, "loss": 0.5125, "step": 13969 }, { "epoch": 1.7482989975530825, "grad_norm": 1.90625, "learning_rate": 6.859237037739353e-07, "loss": 0.5035, "step": 13970 }, { "epoch": 1.7484252900781434, "grad_norm": 2.109375, "learning_rate": 6.851972308222899e-07, "loss": 0.4731, "step": 13971 }, { "epoch": 1.7485515826032048, "grad_norm": 2.0, "learning_rate": 6.84471129136246e-07, "loss": 0.4661, "step": 13972 }, { "epoch": 1.748677875128266, "grad_norm": 1.9375, "learning_rate": 6.837453987447463e-07, "loss": 0.4821, "step": 13973 }, { "epoch": 1.748804167653327, "grad_norm": 2.203125, "learning_rate": 6.830200396767139e-07, "loss": 0.5675, "step": 13974 }, { "epoch": 1.7489304601783882, "grad_norm": 1.8671875, "learning_rate": 6.822950519610627e-07, "loss": 0.4798, "step": 13975 }, { "epoch": 1.7490567527034493, "grad_norm": 2.15625, "learning_rate": 6.815704356266861e-07, "loss": 0.4884, "step": 13976 }, { "epoch": 1.7491830452285106, "grad_norm": 1.9453125, "learning_rate": 6.808461907024688e-07, "loss": 0.4934, "step": 13977 }, { "epoch": 1.7493093377535716, "grad_norm": 1.9453125, "learning_rate": 6.801223172172743e-07, "loss": 0.4576, "step": 13978 }, { "epoch": 1.749435630278633, "grad_norm": 1.90625, "learning_rate": 6.793988151999565e-07, "loss": 0.4865, "step": 13979 }, { "epoch": 1.749561922803694, "grad_norm": 2.015625, "learning_rate": 6.786756846793518e-07, "loss": 0.4966, "step": 13980 }, { "epoch": 1.7496882153287552, "grad_norm": 2.0, "learning_rate": 6.779529256842832e-07, "loss": 0.5133, "step": 13981 }, { "epoch": 1.7498145078538165, "grad_norm": 2.03125, "learning_rate": 6.772305382435563e-07, "loss": 0.5456, "step": 13982 }, { "epoch": 1.7499408003788774, "grad_norm": 1.9609375, "learning_rate": 6.765085223859658e-07, "loss": 0.4606, "step": 13983 }, { "epoch": 1.7500670929039388, "grad_norm": 1.8671875, "learning_rate": 6.757868781402876e-07, "loss": 0.4445, "step": 13984 }, { "epoch": 1.750193385429, "grad_norm": 2.0625, "learning_rate": 6.750656055352867e-07, "loss": 0.4992, "step": 13985 }, { "epoch": 1.750319677954061, "grad_norm": 1.9609375, "learning_rate": 6.743447045997098e-07, "loss": 0.6212, "step": 13986 }, { "epoch": 1.7504459704791224, "grad_norm": 2.203125, "learning_rate": 6.736241753622885e-07, "loss": 0.5755, "step": 13987 }, { "epoch": 1.7505722630041833, "grad_norm": 2.0, "learning_rate": 6.729040178517454e-07, "loss": 0.4823, "step": 13988 }, { "epoch": 1.7506985555292447, "grad_norm": 2.046875, "learning_rate": 6.721842320967831e-07, "loss": 0.5638, "step": 13989 }, { "epoch": 1.7508248480543058, "grad_norm": 1.84375, "learning_rate": 6.714648181260897e-07, "loss": 0.4487, "step": 13990 }, { "epoch": 1.750951140579367, "grad_norm": 1.9921875, "learning_rate": 6.707457759683389e-07, "loss": 0.4767, "step": 13991 }, { "epoch": 1.751077433104428, "grad_norm": 1.734375, "learning_rate": 6.700271056521912e-07, "loss": 0.3971, "step": 13992 }, { "epoch": 1.7512037256294892, "grad_norm": 2.078125, "learning_rate": 6.693088072062903e-07, "loss": 0.5542, "step": 13993 }, { "epoch": 1.7513300181545506, "grad_norm": 2.171875, "learning_rate": 6.685908806592655e-07, "loss": 0.4999, "step": 13994 }, { "epoch": 1.7514563106796115, "grad_norm": 2.171875, "learning_rate": 6.678733260397329e-07, "loss": 0.6826, "step": 13995 }, { "epoch": 1.7515826032046728, "grad_norm": 2.046875, "learning_rate": 6.671561433762908e-07, "loss": 0.5229, "step": 13996 }, { "epoch": 1.751708895729734, "grad_norm": 2.109375, "learning_rate": 6.664393326975261e-07, "loss": 0.5343, "step": 13997 }, { "epoch": 1.7518351882547951, "grad_norm": 2.125, "learning_rate": 6.657228940320071e-07, "loss": 0.5344, "step": 13998 }, { "epoch": 1.7519614807798565, "grad_norm": 1.7265625, "learning_rate": 6.650068274082922e-07, "loss": 0.4334, "step": 13999 }, { "epoch": 1.7520877733049174, "grad_norm": 1.9375, "learning_rate": 6.642911328549195e-07, "loss": 0.4182, "step": 14000 }, { "epoch": 1.7522140658299787, "grad_norm": 2.03125, "learning_rate": 6.635758104004152e-07, "loss": 0.5651, "step": 14001 }, { "epoch": 1.7523403583550399, "grad_norm": 2.125, "learning_rate": 6.628608600732922e-07, "loss": 0.4837, "step": 14002 }, { "epoch": 1.752466650880101, "grad_norm": 2.09375, "learning_rate": 6.621462819020441e-07, "loss": 0.4885, "step": 14003 }, { "epoch": 1.7525929434051624, "grad_norm": 2.078125, "learning_rate": 6.614320759151538e-07, "loss": 0.509, "step": 14004 }, { "epoch": 1.7527192359302233, "grad_norm": 1.921875, "learning_rate": 6.607182421410874e-07, "loss": 0.3941, "step": 14005 }, { "epoch": 1.7528455284552846, "grad_norm": 1.828125, "learning_rate": 6.600047806082965e-07, "loss": 0.4201, "step": 14006 }, { "epoch": 1.7529718209803458, "grad_norm": 1.890625, "learning_rate": 6.592916913452196e-07, "loss": 0.4599, "step": 14007 }, { "epoch": 1.753098113505407, "grad_norm": 2.0, "learning_rate": 6.585789743802773e-07, "loss": 0.4939, "step": 14008 }, { "epoch": 1.753224406030468, "grad_norm": 1.96875, "learning_rate": 6.578666297418757e-07, "loss": 0.5236, "step": 14009 }, { "epoch": 1.7533506985555292, "grad_norm": 1.9609375, "learning_rate": 6.571546574584098e-07, "loss": 0.4772, "step": 14010 }, { "epoch": 1.7534769910805905, "grad_norm": 2.046875, "learning_rate": 6.564430575582547e-07, "loss": 0.4361, "step": 14011 }, { "epoch": 1.7536032836056514, "grad_norm": 2.234375, "learning_rate": 6.557318300697735e-07, "loss": 0.5858, "step": 14012 }, { "epoch": 1.7537295761307128, "grad_norm": 2.1875, "learning_rate": 6.550209750213154e-07, "loss": 0.4543, "step": 14013 }, { "epoch": 1.753855868655774, "grad_norm": 1.859375, "learning_rate": 6.543104924412124e-07, "loss": 0.4784, "step": 14014 }, { "epoch": 1.753982161180835, "grad_norm": 2.046875, "learning_rate": 6.536003823577819e-07, "loss": 0.5298, "step": 14015 }, { "epoch": 1.7541084537058964, "grad_norm": 2.234375, "learning_rate": 6.528906447993289e-07, "loss": 0.5747, "step": 14016 }, { "epoch": 1.7542347462309573, "grad_norm": 2.0625, "learning_rate": 6.521812797941407e-07, "loss": 0.5029, "step": 14017 }, { "epoch": 1.7543610387560187, "grad_norm": 1.8203125, "learning_rate": 6.514722873704904e-07, "loss": 0.4938, "step": 14018 }, { "epoch": 1.7544873312810798, "grad_norm": 1.9609375, "learning_rate": 6.507636675566375e-07, "loss": 0.4757, "step": 14019 }, { "epoch": 1.754613623806141, "grad_norm": 1.8671875, "learning_rate": 6.500554203808262e-07, "loss": 0.4421, "step": 14020 }, { "epoch": 1.7547399163312023, "grad_norm": 1.9453125, "learning_rate": 6.49347545871285e-07, "loss": 0.4558, "step": 14021 }, { "epoch": 1.7548662088562632, "grad_norm": 2.125, "learning_rate": 6.48640044056228e-07, "loss": 0.5099, "step": 14022 }, { "epoch": 1.7549925013813246, "grad_norm": 2.09375, "learning_rate": 6.479329149638535e-07, "loss": 0.4683, "step": 14023 }, { "epoch": 1.7551187939063857, "grad_norm": 1.90625, "learning_rate": 6.472261586223483e-07, "loss": 0.5131, "step": 14024 }, { "epoch": 1.7552450864314468, "grad_norm": 1.9375, "learning_rate": 6.465197750598806e-07, "loss": 0.4833, "step": 14025 }, { "epoch": 1.755371378956508, "grad_norm": 2.171875, "learning_rate": 6.458137643046058e-07, "loss": 0.5081, "step": 14026 }, { "epoch": 1.755497671481569, "grad_norm": 2.09375, "learning_rate": 6.451081263846637e-07, "loss": 0.5052, "step": 14027 }, { "epoch": 1.7556239640066305, "grad_norm": 2.109375, "learning_rate": 6.444028613281783e-07, "loss": 0.5423, "step": 14028 }, { "epoch": 1.7557502565316914, "grad_norm": 1.875, "learning_rate": 6.436979691632605e-07, "loss": 0.5014, "step": 14029 }, { "epoch": 1.7558765490567527, "grad_norm": 1.9609375, "learning_rate": 6.429934499180057e-07, "loss": 0.4699, "step": 14030 }, { "epoch": 1.7560028415818139, "grad_norm": 1.9453125, "learning_rate": 6.422893036204946e-07, "loss": 0.4802, "step": 14031 }, { "epoch": 1.756129134106875, "grad_norm": 1.8359375, "learning_rate": 6.415855302987928e-07, "loss": 0.5017, "step": 14032 }, { "epoch": 1.7562554266319363, "grad_norm": 2.109375, "learning_rate": 6.408821299809498e-07, "loss": 0.5108, "step": 14033 }, { "epoch": 1.7563817191569973, "grad_norm": 1.953125, "learning_rate": 6.401791026950022e-07, "loss": 0.4421, "step": 14034 }, { "epoch": 1.7565080116820586, "grad_norm": 1.953125, "learning_rate": 6.39476448468972e-07, "loss": 0.4339, "step": 14035 }, { "epoch": 1.7566343042071197, "grad_norm": 1.8359375, "learning_rate": 6.387741673308634e-07, "loss": 0.4337, "step": 14036 }, { "epoch": 1.7567605967321809, "grad_norm": 1.9921875, "learning_rate": 6.380722593086686e-07, "loss": 0.4359, "step": 14037 }, { "epoch": 1.7568868892572422, "grad_norm": 2.09375, "learning_rate": 6.373707244303639e-07, "loss": 0.5734, "step": 14038 }, { "epoch": 1.7570131817823031, "grad_norm": 2.125, "learning_rate": 6.366695627239116e-07, "loss": 0.5371, "step": 14039 }, { "epoch": 1.7571394743073645, "grad_norm": 2.09375, "learning_rate": 6.359687742172571e-07, "loss": 0.4638, "step": 14040 }, { "epoch": 1.7572657668324256, "grad_norm": 2.109375, "learning_rate": 6.352683589383302e-07, "loss": 0.5266, "step": 14041 }, { "epoch": 1.7573920593574868, "grad_norm": 1.8515625, "learning_rate": 6.34568316915053e-07, "loss": 0.4331, "step": 14042 }, { "epoch": 1.757518351882548, "grad_norm": 1.8984375, "learning_rate": 6.338686481753242e-07, "loss": 0.4322, "step": 14043 }, { "epoch": 1.757644644407609, "grad_norm": 1.8828125, "learning_rate": 6.331693527470306e-07, "loss": 0.4403, "step": 14044 }, { "epoch": 1.7577709369326704, "grad_norm": 1.9609375, "learning_rate": 6.324704306580454e-07, "loss": 0.4937, "step": 14045 }, { "epoch": 1.7578972294577315, "grad_norm": 2.078125, "learning_rate": 6.317718819362261e-07, "loss": 0.4806, "step": 14046 }, { "epoch": 1.7580235219827927, "grad_norm": 2.5, "learning_rate": 6.310737066094152e-07, "loss": 0.6069, "step": 14047 }, { "epoch": 1.7581498145078538, "grad_norm": 2.09375, "learning_rate": 6.303759047054392e-07, "loss": 0.4611, "step": 14048 }, { "epoch": 1.758276107032915, "grad_norm": 2.03125, "learning_rate": 6.296784762521124e-07, "loss": 0.5405, "step": 14049 }, { "epoch": 1.7584023995579763, "grad_norm": 2.0625, "learning_rate": 6.289814212772305e-07, "loss": 0.5256, "step": 14050 }, { "epoch": 1.7585286920830372, "grad_norm": 1.75, "learning_rate": 6.282847398085779e-07, "loss": 0.4124, "step": 14051 }, { "epoch": 1.7586549846080985, "grad_norm": 1.953125, "learning_rate": 6.275884318739234e-07, "loss": 0.5119, "step": 14052 }, { "epoch": 1.7587812771331597, "grad_norm": 2.0625, "learning_rate": 6.268924975010182e-07, "loss": 0.5004, "step": 14053 }, { "epoch": 1.7589075696582208, "grad_norm": 1.90625, "learning_rate": 6.261969367176013e-07, "loss": 0.4347, "step": 14054 }, { "epoch": 1.7590338621832822, "grad_norm": 1.984375, "learning_rate": 6.25501749551397e-07, "loss": 0.4924, "step": 14055 }, { "epoch": 1.759160154708343, "grad_norm": 2.09375, "learning_rate": 6.248069360301123e-07, "loss": 0.543, "step": 14056 }, { "epoch": 1.7592864472334044, "grad_norm": 1.9921875, "learning_rate": 6.241124961814426e-07, "loss": 0.4701, "step": 14057 }, { "epoch": 1.7594127397584656, "grad_norm": 2.0625, "learning_rate": 6.234184300330648e-07, "loss": 0.4937, "step": 14058 }, { "epoch": 1.7595390322835267, "grad_norm": 2.109375, "learning_rate": 6.227247376126421e-07, "loss": 0.4727, "step": 14059 }, { "epoch": 1.7596653248085878, "grad_norm": 2.03125, "learning_rate": 6.22031418947826e-07, "loss": 0.4574, "step": 14060 }, { "epoch": 1.759791617333649, "grad_norm": 2.046875, "learning_rate": 6.213384740662498e-07, "loss": 0.5142, "step": 14061 }, { "epoch": 1.7599179098587103, "grad_norm": 1.9296875, "learning_rate": 6.206459029955314e-07, "loss": 0.4406, "step": 14062 }, { "epoch": 1.7600442023837715, "grad_norm": 2.0625, "learning_rate": 6.199537057632765e-07, "loss": 0.4359, "step": 14063 }, { "epoch": 1.7601704949088326, "grad_norm": 1.9609375, "learning_rate": 6.192618823970731e-07, "loss": 0.4807, "step": 14064 }, { "epoch": 1.7602967874338937, "grad_norm": 2.171875, "learning_rate": 6.185704329244969e-07, "loss": 0.5306, "step": 14065 }, { "epoch": 1.7604230799589549, "grad_norm": 1.8984375, "learning_rate": 6.178793573731056e-07, "loss": 0.4759, "step": 14066 }, { "epoch": 1.7605493724840162, "grad_norm": 1.828125, "learning_rate": 6.171886557704454e-07, "loss": 0.481, "step": 14067 }, { "epoch": 1.7606756650090771, "grad_norm": 1.9921875, "learning_rate": 6.164983281440462e-07, "loss": 0.5089, "step": 14068 }, { "epoch": 1.7608019575341385, "grad_norm": 2.34375, "learning_rate": 6.158083745214205e-07, "loss": 0.608, "step": 14069 }, { "epoch": 1.7609282500591996, "grad_norm": 1.8984375, "learning_rate": 6.151187949300708e-07, "loss": 0.5024, "step": 14070 }, { "epoch": 1.7610545425842608, "grad_norm": 2.3125, "learning_rate": 6.144295893974806e-07, "loss": 0.509, "step": 14071 }, { "epoch": 1.761180835109322, "grad_norm": 1.9921875, "learning_rate": 6.137407579511212e-07, "loss": 0.5065, "step": 14072 }, { "epoch": 1.761307127634383, "grad_norm": 2.109375, "learning_rate": 6.130523006184464e-07, "loss": 0.5356, "step": 14073 }, { "epoch": 1.7614334201594444, "grad_norm": 1.8515625, "learning_rate": 6.123642174268962e-07, "loss": 0.4956, "step": 14074 }, { "epoch": 1.7615597126845055, "grad_norm": 2.0, "learning_rate": 6.116765084038978e-07, "loss": 0.5097, "step": 14075 }, { "epoch": 1.7616860052095666, "grad_norm": 1.953125, "learning_rate": 6.109891735768603e-07, "loss": 0.5297, "step": 14076 }, { "epoch": 1.761812297734628, "grad_norm": 1.9609375, "learning_rate": 6.103022129731784e-07, "loss": 0.4843, "step": 14077 }, { "epoch": 1.761938590259689, "grad_norm": 1.890625, "learning_rate": 6.096156266202347e-07, "loss": 0.4153, "step": 14078 }, { "epoch": 1.7620648827847503, "grad_norm": 1.9609375, "learning_rate": 6.089294145453939e-07, "loss": 0.4088, "step": 14079 }, { "epoch": 1.7621911753098114, "grad_norm": 1.9921875, "learning_rate": 6.082435767760075e-07, "loss": 0.5914, "step": 14080 }, { "epoch": 1.7623174678348725, "grad_norm": 1.9375, "learning_rate": 6.075581133394103e-07, "loss": 0.3955, "step": 14081 }, { "epoch": 1.7624437603599337, "grad_norm": 2.109375, "learning_rate": 6.068730242629228e-07, "loss": 0.503, "step": 14082 }, { "epoch": 1.7625700528849948, "grad_norm": 2.015625, "learning_rate": 6.06188309573853e-07, "loss": 0.5756, "step": 14083 }, { "epoch": 1.7626963454100562, "grad_norm": 1.859375, "learning_rate": 6.055039692994902e-07, "loss": 0.4696, "step": 14084 }, { "epoch": 1.762822637935117, "grad_norm": 1.921875, "learning_rate": 6.048200034671114e-07, "loss": 0.5026, "step": 14085 }, { "epoch": 1.7629489304601784, "grad_norm": 1.875, "learning_rate": 6.041364121039772e-07, "loss": 0.4937, "step": 14086 }, { "epoch": 1.7630752229852396, "grad_norm": 1.953125, "learning_rate": 6.034531952373346e-07, "loss": 0.4664, "step": 14087 }, { "epoch": 1.7632015155103007, "grad_norm": 1.9765625, "learning_rate": 6.02770352894414e-07, "loss": 0.5292, "step": 14088 }, { "epoch": 1.763327808035362, "grad_norm": 2.109375, "learning_rate": 6.020878851024336e-07, "loss": 0.4802, "step": 14089 }, { "epoch": 1.763454100560423, "grad_norm": 2.0625, "learning_rate": 6.01405791888593e-07, "loss": 0.5054, "step": 14090 }, { "epoch": 1.7635803930854843, "grad_norm": 1.984375, "learning_rate": 6.00724073280079e-07, "loss": 0.4798, "step": 14091 }, { "epoch": 1.7637066856105454, "grad_norm": 1.984375, "learning_rate": 6.000427293040645e-07, "loss": 0.4962, "step": 14092 }, { "epoch": 1.7638329781356066, "grad_norm": 2.0, "learning_rate": 5.993617599877066e-07, "loss": 0.4889, "step": 14093 }, { "epoch": 1.763959270660668, "grad_norm": 2.015625, "learning_rate": 5.986811653581447e-07, "loss": 0.4873, "step": 14094 }, { "epoch": 1.7640855631857288, "grad_norm": 2.046875, "learning_rate": 5.980009454425062e-07, "loss": 0.5403, "step": 14095 }, { "epoch": 1.7642118557107902, "grad_norm": 2.078125, "learning_rate": 5.973211002679046e-07, "loss": 0.5362, "step": 14096 }, { "epoch": 1.7643381482358513, "grad_norm": 2.15625, "learning_rate": 5.966416298614374e-07, "loss": 0.4841, "step": 14097 }, { "epoch": 1.7644644407609125, "grad_norm": 3.203125, "learning_rate": 5.959625342501851e-07, "loss": 0.4964, "step": 14098 }, { "epoch": 1.7645907332859736, "grad_norm": 1.90625, "learning_rate": 5.952838134612149e-07, "loss": 0.469, "step": 14099 }, { "epoch": 1.7647170258110347, "grad_norm": 2.0625, "learning_rate": 5.946054675215785e-07, "loss": 0.5201, "step": 14100 }, { "epoch": 1.764843318336096, "grad_norm": 2.046875, "learning_rate": 5.939274964583153e-07, "loss": 0.4708, "step": 14101 }, { "epoch": 1.764969610861157, "grad_norm": 1.8671875, "learning_rate": 5.932499002984449e-07, "loss": 0.4732, "step": 14102 }, { "epoch": 1.7650959033862184, "grad_norm": 1.90625, "learning_rate": 5.925726790689768e-07, "loss": 0.4575, "step": 14103 }, { "epoch": 1.7652221959112795, "grad_norm": 2.21875, "learning_rate": 5.918958327969015e-07, "loss": 0.5476, "step": 14104 }, { "epoch": 1.7653484884363406, "grad_norm": 2.15625, "learning_rate": 5.912193615091976e-07, "loss": 0.4678, "step": 14105 }, { "epoch": 1.765474780961402, "grad_norm": 2.234375, "learning_rate": 5.90543265232828e-07, "loss": 0.4794, "step": 14106 }, { "epoch": 1.765601073486463, "grad_norm": 1.890625, "learning_rate": 5.898675439947399e-07, "loss": 0.4473, "step": 14107 }, { "epoch": 1.7657273660115242, "grad_norm": 2.21875, "learning_rate": 5.891921978218662e-07, "loss": 0.4885, "step": 14108 }, { "epoch": 1.7658536585365854, "grad_norm": 1.9140625, "learning_rate": 5.885172267411232e-07, "loss": 0.4719, "step": 14109 }, { "epoch": 1.7659799510616465, "grad_norm": 2.03125, "learning_rate": 5.878426307794149e-07, "loss": 0.5463, "step": 14110 }, { "epoch": 1.7661062435867079, "grad_norm": 2.03125, "learning_rate": 5.871684099636288e-07, "loss": 0.4906, "step": 14111 }, { "epoch": 1.7662325361117688, "grad_norm": 2.15625, "learning_rate": 5.864945643206376e-07, "loss": 0.455, "step": 14112 }, { "epoch": 1.7663588286368301, "grad_norm": 2.046875, "learning_rate": 5.858210938772968e-07, "loss": 0.5454, "step": 14113 }, { "epoch": 1.7664851211618913, "grad_norm": 2.03125, "learning_rate": 5.851479986604547e-07, "loss": 0.4423, "step": 14114 }, { "epoch": 1.7666114136869524, "grad_norm": 2.09375, "learning_rate": 5.844752786969354e-07, "loss": 0.5496, "step": 14115 }, { "epoch": 1.7667377062120135, "grad_norm": 2.046875, "learning_rate": 5.838029340135531e-07, "loss": 0.4678, "step": 14116 }, { "epoch": 1.7668639987370747, "grad_norm": 1.859375, "learning_rate": 5.831309646371053e-07, "loss": 0.4141, "step": 14117 }, { "epoch": 1.766990291262136, "grad_norm": 2.4375, "learning_rate": 5.82459370594376e-07, "loss": 0.5616, "step": 14118 }, { "epoch": 1.767116583787197, "grad_norm": 2.15625, "learning_rate": 5.817881519121315e-07, "loss": 0.4853, "step": 14119 }, { "epoch": 1.7672428763122583, "grad_norm": 1.9609375, "learning_rate": 5.811173086171274e-07, "loss": 0.5153, "step": 14120 }, { "epoch": 1.7673691688373194, "grad_norm": 1.8984375, "learning_rate": 5.804468407360996e-07, "loss": 0.4234, "step": 14121 }, { "epoch": 1.7674954613623806, "grad_norm": 1.9921875, "learning_rate": 5.797767482957717e-07, "loss": 0.5028, "step": 14122 }, { "epoch": 1.767621753887442, "grad_norm": 2.078125, "learning_rate": 5.791070313228542e-07, "loss": 0.4666, "step": 14123 }, { "epoch": 1.7677480464125028, "grad_norm": 1.96875, "learning_rate": 5.78437689844038e-07, "loss": 0.4667, "step": 14124 }, { "epoch": 1.7678743389375642, "grad_norm": 1.90625, "learning_rate": 5.777687238860019e-07, "loss": 0.462, "step": 14125 }, { "epoch": 1.7680006314626253, "grad_norm": 1.9140625, "learning_rate": 5.771001334754101e-07, "loss": 0.3752, "step": 14126 }, { "epoch": 1.7681269239876864, "grad_norm": 1.9375, "learning_rate": 5.7643191863891e-07, "loss": 0.4225, "step": 14127 }, { "epoch": 1.7682532165127478, "grad_norm": 1.8671875, "learning_rate": 5.757640794031361e-07, "loss": 0.4455, "step": 14128 }, { "epoch": 1.7683795090378087, "grad_norm": 2.046875, "learning_rate": 5.750966157947058e-07, "loss": 0.534, "step": 14129 }, { "epoch": 1.76850580156287, "grad_norm": 2.125, "learning_rate": 5.744295278402235e-07, "loss": 0.5357, "step": 14130 }, { "epoch": 1.7686320940879312, "grad_norm": 2.40625, "learning_rate": 5.737628155662755e-07, "loss": 0.496, "step": 14131 }, { "epoch": 1.7687583866129923, "grad_norm": 2.203125, "learning_rate": 5.730964789994386e-07, "loss": 0.5563, "step": 14132 }, { "epoch": 1.7688846791380535, "grad_norm": 1.8828125, "learning_rate": 5.724305181662704e-07, "loss": 0.4437, "step": 14133 }, { "epoch": 1.7690109716631146, "grad_norm": 2.09375, "learning_rate": 5.71764933093314e-07, "loss": 0.5226, "step": 14134 }, { "epoch": 1.769137264188176, "grad_norm": 2.0625, "learning_rate": 5.710997238070981e-07, "loss": 0.4574, "step": 14135 }, { "epoch": 1.7692635567132369, "grad_norm": 2.078125, "learning_rate": 5.704348903341361e-07, "loss": 0.5116, "step": 14136 }, { "epoch": 1.7693898492382982, "grad_norm": 2.03125, "learning_rate": 5.697704327009268e-07, "loss": 0.4568, "step": 14137 }, { "epoch": 1.7695161417633594, "grad_norm": 1.875, "learning_rate": 5.691063509339534e-07, "loss": 0.4421, "step": 14138 }, { "epoch": 1.7696424342884205, "grad_norm": 2.203125, "learning_rate": 5.684426450596858e-07, "loss": 0.569, "step": 14139 }, { "epoch": 1.7697687268134819, "grad_norm": 1.8984375, "learning_rate": 5.677793151045763e-07, "loss": 0.4969, "step": 14140 }, { "epoch": 1.7698950193385428, "grad_norm": 2.046875, "learning_rate": 5.671163610950647e-07, "loss": 0.4426, "step": 14141 }, { "epoch": 1.7700213118636041, "grad_norm": 2.015625, "learning_rate": 5.664537830575756e-07, "loss": 0.4852, "step": 14142 }, { "epoch": 1.7701476043886653, "grad_norm": 2.0, "learning_rate": 5.657915810185155e-07, "loss": 0.5317, "step": 14143 }, { "epoch": 1.7702738969137264, "grad_norm": 2.015625, "learning_rate": 5.651297550042789e-07, "loss": 0.523, "step": 14144 }, { "epoch": 1.7704001894387877, "grad_norm": 2.15625, "learning_rate": 5.644683050412458e-07, "loss": 0.5358, "step": 14145 }, { "epoch": 1.7705264819638487, "grad_norm": 1.84375, "learning_rate": 5.638072311557785e-07, "loss": 0.5096, "step": 14146 }, { "epoch": 1.77065277448891, "grad_norm": 2.078125, "learning_rate": 5.631465333742247e-07, "loss": 0.4238, "step": 14147 }, { "epoch": 1.7707790670139711, "grad_norm": 2.015625, "learning_rate": 5.624862117229224e-07, "loss": 0.4824, "step": 14148 }, { "epoch": 1.7709053595390323, "grad_norm": 1.9921875, "learning_rate": 5.618262662281882e-07, "loss": 0.4894, "step": 14149 }, { "epoch": 1.7710316520640934, "grad_norm": 1.9921875, "learning_rate": 5.611666969163243e-07, "loss": 0.4567, "step": 14150 }, { "epoch": 1.7711579445891545, "grad_norm": 1.859375, "learning_rate": 5.60507503813622e-07, "loss": 0.4409, "step": 14151 }, { "epoch": 1.771284237114216, "grad_norm": 2.078125, "learning_rate": 5.598486869463549e-07, "loss": 0.4881, "step": 14152 }, { "epoch": 1.7714105296392768, "grad_norm": 2.09375, "learning_rate": 5.591902463407794e-07, "loss": 0.5343, "step": 14153 }, { "epoch": 1.7715368221643382, "grad_norm": 1.9609375, "learning_rate": 5.585321820231426e-07, "loss": 0.521, "step": 14154 }, { "epoch": 1.7716631146893993, "grad_norm": 1.9609375, "learning_rate": 5.578744940196712e-07, "loss": 0.5279, "step": 14155 }, { "epoch": 1.7717894072144604, "grad_norm": 2.0, "learning_rate": 5.572171823565797e-07, "loss": 0.557, "step": 14156 }, { "epoch": 1.7719156997395218, "grad_norm": 1.859375, "learning_rate": 5.565602470600662e-07, "loss": 0.4923, "step": 14157 }, { "epoch": 1.7720419922645827, "grad_norm": 2.140625, "learning_rate": 5.559036881563162e-07, "loss": 0.5437, "step": 14158 }, { "epoch": 1.772168284789644, "grad_norm": 2.03125, "learning_rate": 5.552475056714979e-07, "loss": 0.621, "step": 14159 }, { "epoch": 1.7722945773147052, "grad_norm": 1.890625, "learning_rate": 5.545916996317646e-07, "loss": 0.4807, "step": 14160 }, { "epoch": 1.7724208698397663, "grad_norm": 2.25, "learning_rate": 5.539362700632545e-07, "loss": 0.5197, "step": 14161 }, { "epoch": 1.7725471623648277, "grad_norm": 2.203125, "learning_rate": 5.532812169920942e-07, "loss": 0.4646, "step": 14162 }, { "epoch": 1.7726734548898886, "grad_norm": 1.8515625, "learning_rate": 5.526265404443876e-07, "loss": 0.4661, "step": 14163 }, { "epoch": 1.77279974741495, "grad_norm": 2.015625, "learning_rate": 5.519722404462335e-07, "loss": 0.5074, "step": 14164 }, { "epoch": 1.772926039940011, "grad_norm": 1.953125, "learning_rate": 5.513183170237102e-07, "loss": 0.514, "step": 14165 }, { "epoch": 1.7730523324650722, "grad_norm": 1.859375, "learning_rate": 5.506647702028789e-07, "loss": 0.468, "step": 14166 }, { "epoch": 1.7731786249901333, "grad_norm": 2.0625, "learning_rate": 5.5001160000979e-07, "loss": 0.54, "step": 14167 }, { "epoch": 1.7733049175151945, "grad_norm": 2.0625, "learning_rate": 5.493588064704769e-07, "loss": 0.591, "step": 14168 }, { "epoch": 1.7734312100402558, "grad_norm": 1.8359375, "learning_rate": 5.48706389610959e-07, "loss": 0.4898, "step": 14169 }, { "epoch": 1.773557502565317, "grad_norm": 2.15625, "learning_rate": 5.480543494572387e-07, "loss": 0.5346, "step": 14170 }, { "epoch": 1.773683795090378, "grad_norm": 1.953125, "learning_rate": 5.474026860353066e-07, "loss": 0.4557, "step": 14171 }, { "epoch": 1.7738100876154392, "grad_norm": 2.125, "learning_rate": 5.467513993711349e-07, "loss": 0.4786, "step": 14172 }, { "epoch": 1.7739363801405004, "grad_norm": 1.84375, "learning_rate": 5.461004894906829e-07, "loss": 0.461, "step": 14173 }, { "epoch": 1.7740626726655617, "grad_norm": 2.125, "learning_rate": 5.454499564198935e-07, "loss": 0.5028, "step": 14174 }, { "epoch": 1.7741889651906226, "grad_norm": 2.078125, "learning_rate": 5.447998001846977e-07, "loss": 0.4421, "step": 14175 }, { "epoch": 1.774315257715684, "grad_norm": 1.96875, "learning_rate": 5.441500208110062e-07, "loss": 0.4462, "step": 14176 }, { "epoch": 1.7744415502407451, "grad_norm": 1.984375, "learning_rate": 5.435006183247194e-07, "loss": 0.5537, "step": 14177 }, { "epoch": 1.7745678427658063, "grad_norm": 1.984375, "learning_rate": 5.428515927517209e-07, "loss": 0.5678, "step": 14178 }, { "epoch": 1.7746941352908676, "grad_norm": 2.09375, "learning_rate": 5.422029441178767e-07, "loss": 0.5139, "step": 14179 }, { "epoch": 1.7748204278159285, "grad_norm": 1.8984375, "learning_rate": 5.41554672449045e-07, "loss": 0.463, "step": 14180 }, { "epoch": 1.7749467203409899, "grad_norm": 2.078125, "learning_rate": 5.409067777710619e-07, "loss": 0.5538, "step": 14181 }, { "epoch": 1.775073012866051, "grad_norm": 1.8203125, "learning_rate": 5.4025926010975e-07, "loss": 0.4135, "step": 14182 }, { "epoch": 1.7751993053911121, "grad_norm": 1.84375, "learning_rate": 5.396121194909199e-07, "loss": 0.4449, "step": 14183 }, { "epoch": 1.7753255979161733, "grad_norm": 1.9140625, "learning_rate": 5.389653559403629e-07, "loss": 0.445, "step": 14184 }, { "epoch": 1.7754518904412344, "grad_norm": 1.890625, "learning_rate": 5.383189694838597e-07, "loss": 0.4392, "step": 14185 }, { "epoch": 1.7755781829662958, "grad_norm": 1.8359375, "learning_rate": 5.37672960147172e-07, "loss": 0.4881, "step": 14186 }, { "epoch": 1.775704475491357, "grad_norm": 1.9375, "learning_rate": 5.370273279560478e-07, "loss": 0.452, "step": 14187 }, { "epoch": 1.775830768016418, "grad_norm": 1.9375, "learning_rate": 5.363820729362224e-07, "loss": 0.4889, "step": 14188 }, { "epoch": 1.7759570605414792, "grad_norm": 1.9609375, "learning_rate": 5.357371951134127e-07, "loss": 0.4341, "step": 14189 }, { "epoch": 1.7760833530665403, "grad_norm": 1.9296875, "learning_rate": 5.350926945133228e-07, "loss": 0.4439, "step": 14190 }, { "epoch": 1.7762096455916017, "grad_norm": 1.8046875, "learning_rate": 5.344485711616399e-07, "loss": 0.414, "step": 14191 }, { "epoch": 1.7763359381166626, "grad_norm": 1.9453125, "learning_rate": 5.338048250840377e-07, "loss": 0.5301, "step": 14192 }, { "epoch": 1.776462230641724, "grad_norm": 2.078125, "learning_rate": 5.331614563061738e-07, "loss": 0.6185, "step": 14193 }, { "epoch": 1.776588523166785, "grad_norm": 1.8203125, "learning_rate": 5.32518464853693e-07, "loss": 0.4758, "step": 14194 }, { "epoch": 1.7767148156918462, "grad_norm": 1.8515625, "learning_rate": 5.318758507522203e-07, "loss": 0.5061, "step": 14195 }, { "epoch": 1.7768411082169075, "grad_norm": 1.7734375, "learning_rate": 5.312336140273721e-07, "loss": 0.4417, "step": 14196 }, { "epoch": 1.7769674007419685, "grad_norm": 2.265625, "learning_rate": 5.305917547047457e-07, "loss": 0.6677, "step": 14197 }, { "epoch": 1.7770936932670298, "grad_norm": 1.7734375, "learning_rate": 5.299502728099226e-07, "loss": 0.4691, "step": 14198 }, { "epoch": 1.777219985792091, "grad_norm": 2.015625, "learning_rate": 5.293091683684725e-07, "loss": 0.45, "step": 14199 }, { "epoch": 1.777346278317152, "grad_norm": 1.8828125, "learning_rate": 5.286684414059473e-07, "loss": 0.5143, "step": 14200 }, { "epoch": 1.7774725708422134, "grad_norm": 1.8984375, "learning_rate": 5.280280919478842e-07, "loss": 0.5177, "step": 14201 }, { "epoch": 1.7775988633672744, "grad_norm": 2.046875, "learning_rate": 5.273881200198073e-07, "loss": 0.5473, "step": 14202 }, { "epoch": 1.7777251558923357, "grad_norm": 2.078125, "learning_rate": 5.267485256472238e-07, "loss": 0.5207, "step": 14203 }, { "epoch": 1.7778514484173968, "grad_norm": 2.03125, "learning_rate": 5.261093088556257e-07, "loss": 0.5209, "step": 14204 }, { "epoch": 1.777977740942458, "grad_norm": 2.15625, "learning_rate": 5.254704696704915e-07, "loss": 0.4936, "step": 14205 }, { "epoch": 1.778104033467519, "grad_norm": 2.0625, "learning_rate": 5.24832008117283e-07, "loss": 0.5056, "step": 14206 }, { "epoch": 1.7782303259925802, "grad_norm": 1.96875, "learning_rate": 5.241939242214489e-07, "loss": 0.5168, "step": 14207 }, { "epoch": 1.7783566185176416, "grad_norm": 1.890625, "learning_rate": 5.235562180084219e-07, "loss": 0.4555, "step": 14208 }, { "epoch": 1.7784829110427025, "grad_norm": 2.125, "learning_rate": 5.229188895036174e-07, "loss": 0.4862, "step": 14209 }, { "epoch": 1.7786092035677639, "grad_norm": 1.9296875, "learning_rate": 5.222819387324396e-07, "loss": 0.4697, "step": 14210 }, { "epoch": 1.778735496092825, "grad_norm": 2.03125, "learning_rate": 5.216453657202736e-07, "loss": 0.5302, "step": 14211 }, { "epoch": 1.7788617886178861, "grad_norm": 2.015625, "learning_rate": 5.210091704924947e-07, "loss": 0.4696, "step": 14212 }, { "epoch": 1.7789880811429475, "grad_norm": 2.0625, "learning_rate": 5.203733530744581e-07, "loss": 0.5601, "step": 14213 }, { "epoch": 1.7791143736680084, "grad_norm": 2.0, "learning_rate": 5.19737913491507e-07, "loss": 0.4389, "step": 14214 }, { "epoch": 1.7792406661930698, "grad_norm": 1.984375, "learning_rate": 5.191028517689677e-07, "loss": 0.4587, "step": 14215 }, { "epoch": 1.7793669587181309, "grad_norm": 1.875, "learning_rate": 5.184681679321534e-07, "loss": 0.4301, "step": 14216 }, { "epoch": 1.779493251243192, "grad_norm": 1.875, "learning_rate": 5.178338620063595e-07, "loss": 0.4324, "step": 14217 }, { "epoch": 1.7796195437682534, "grad_norm": 2.09375, "learning_rate": 5.171999340168687e-07, "loss": 0.4906, "step": 14218 }, { "epoch": 1.7797458362933143, "grad_norm": 2.09375, "learning_rate": 5.165663839889479e-07, "loss": 0.5786, "step": 14219 }, { "epoch": 1.7798721288183756, "grad_norm": 1.8046875, "learning_rate": 5.159332119478488e-07, "loss": 0.4168, "step": 14220 }, { "epoch": 1.7799984213434368, "grad_norm": 2.03125, "learning_rate": 5.153004179188082e-07, "loss": 0.5251, "step": 14221 }, { "epoch": 1.780124713868498, "grad_norm": 1.84375, "learning_rate": 5.146680019270478e-07, "loss": 0.4682, "step": 14222 }, { "epoch": 1.780251006393559, "grad_norm": 1.8828125, "learning_rate": 5.140359639977732e-07, "loss": 0.4533, "step": 14223 }, { "epoch": 1.7803772989186202, "grad_norm": 1.984375, "learning_rate": 5.134043041561776e-07, "loss": 0.4881, "step": 14224 }, { "epoch": 1.7805035914436815, "grad_norm": 1.921875, "learning_rate": 5.127730224274363e-07, "loss": 0.5692, "step": 14225 }, { "epoch": 1.7806298839687424, "grad_norm": 2.015625, "learning_rate": 5.121421188367104e-07, "loss": 0.5831, "step": 14226 }, { "epoch": 1.7807561764938038, "grad_norm": 1.96875, "learning_rate": 5.115115934091452e-07, "loss": 0.4828, "step": 14227 }, { "epoch": 1.780882469018865, "grad_norm": 1.96875, "learning_rate": 5.108814461698752e-07, "loss": 0.4511, "step": 14228 }, { "epoch": 1.781008761543926, "grad_norm": 2.015625, "learning_rate": 5.102516771440147e-07, "loss": 0.4493, "step": 14229 }, { "epoch": 1.7811350540689874, "grad_norm": 2.15625, "learning_rate": 5.096222863566658e-07, "loss": 0.5618, "step": 14230 }, { "epoch": 1.7812613465940483, "grad_norm": 2.1875, "learning_rate": 5.089932738329129e-07, "loss": 0.4743, "step": 14231 }, { "epoch": 1.7813876391191097, "grad_norm": 1.953125, "learning_rate": 5.08364639597827e-07, "loss": 0.5112, "step": 14232 }, { "epoch": 1.7815139316441708, "grad_norm": 1.9375, "learning_rate": 5.077363836764648e-07, "loss": 0.407, "step": 14233 }, { "epoch": 1.781640224169232, "grad_norm": 1.859375, "learning_rate": 5.071085060938674e-07, "loss": 0.4114, "step": 14234 }, { "epoch": 1.7817665166942933, "grad_norm": 1.9609375, "learning_rate": 5.064810068750592e-07, "loss": 0.4518, "step": 14235 }, { "epoch": 1.7818928092193542, "grad_norm": 1.9453125, "learning_rate": 5.058538860450513e-07, "loss": 0.4952, "step": 14236 }, { "epoch": 1.7820191017444156, "grad_norm": 1.9296875, "learning_rate": 5.052271436288403e-07, "loss": 0.4594, "step": 14237 }, { "epoch": 1.7821453942694767, "grad_norm": 1.875, "learning_rate": 5.046007796514052e-07, "loss": 0.4507, "step": 14238 }, { "epoch": 1.7822716867945378, "grad_norm": 1.9765625, "learning_rate": 5.039747941377116e-07, "loss": 0.4846, "step": 14239 }, { "epoch": 1.782397979319599, "grad_norm": 1.875, "learning_rate": 5.033491871127105e-07, "loss": 0.4501, "step": 14240 }, { "epoch": 1.7825242718446601, "grad_norm": 1.9921875, "learning_rate": 5.027239586013366e-07, "loss": 0.5887, "step": 14241 }, { "epoch": 1.7826505643697215, "grad_norm": 2.34375, "learning_rate": 5.020991086285099e-07, "loss": 0.5247, "step": 14242 }, { "epoch": 1.7827768568947824, "grad_norm": 2.21875, "learning_rate": 5.014746372191348e-07, "loss": 0.4938, "step": 14243 }, { "epoch": 1.7829031494198437, "grad_norm": 1.9765625, "learning_rate": 5.008505443981038e-07, "loss": 0.4792, "step": 14244 }, { "epoch": 1.7830294419449049, "grad_norm": 1.984375, "learning_rate": 5.00226830190289e-07, "loss": 0.509, "step": 14245 }, { "epoch": 1.783155734469966, "grad_norm": 1.984375, "learning_rate": 4.996034946205508e-07, "loss": 0.4666, "step": 14246 }, { "epoch": 1.7832820269950274, "grad_norm": 1.9609375, "learning_rate": 4.989805377137347e-07, "loss": 0.4937, "step": 14247 }, { "epoch": 1.7834083195200883, "grad_norm": 2.0, "learning_rate": 4.983579594946697e-07, "loss": 0.4592, "step": 14248 }, { "epoch": 1.7835346120451496, "grad_norm": 1.9375, "learning_rate": 4.977357599881705e-07, "loss": 0.4659, "step": 14249 }, { "epoch": 1.7836609045702108, "grad_norm": 2.078125, "learning_rate": 4.971139392190361e-07, "loss": 0.5667, "step": 14250 }, { "epoch": 1.783787197095272, "grad_norm": 1.8828125, "learning_rate": 4.96492497212051e-07, "loss": 0.4582, "step": 14251 }, { "epoch": 1.7839134896203332, "grad_norm": 2.140625, "learning_rate": 4.958714339919845e-07, "loss": 0.5965, "step": 14252 }, { "epoch": 1.7840397821453942, "grad_norm": 2.234375, "learning_rate": 4.952507495835901e-07, "loss": 0.5909, "step": 14253 }, { "epoch": 1.7841660746704555, "grad_norm": 2.125, "learning_rate": 4.946304440116078e-07, "loss": 0.5674, "step": 14254 }, { "epoch": 1.7842923671955166, "grad_norm": 2.03125, "learning_rate": 4.940105173007603e-07, "loss": 0.5269, "step": 14255 }, { "epoch": 1.7844186597205778, "grad_norm": 2.046875, "learning_rate": 4.933909694757577e-07, "loss": 0.4956, "step": 14256 }, { "epoch": 1.784544952245639, "grad_norm": 2.109375, "learning_rate": 4.927718005612925e-07, "loss": 0.474, "step": 14257 }, { "epoch": 1.7846712447707, "grad_norm": 1.9453125, "learning_rate": 4.921530105820426e-07, "loss": 0.4766, "step": 14258 }, { "epoch": 1.7847975372957614, "grad_norm": 1.9453125, "learning_rate": 4.91534599562673e-07, "loss": 0.4211, "step": 14259 }, { "epoch": 1.7849238298208223, "grad_norm": 2.109375, "learning_rate": 4.909165675278327e-07, "loss": 0.4708, "step": 14260 }, { "epoch": 1.7850501223458837, "grad_norm": 1.8125, "learning_rate": 4.902989145021531e-07, "loss": 0.3979, "step": 14261 }, { "epoch": 1.7851764148709448, "grad_norm": 2.046875, "learning_rate": 4.896816405102533e-07, "loss": 0.5025, "step": 14262 }, { "epoch": 1.785302707396006, "grad_norm": 1.84375, "learning_rate": 4.890647455767372e-07, "loss": 0.4333, "step": 14263 }, { "epoch": 1.7854289999210673, "grad_norm": 2.0, "learning_rate": 4.884482297261916e-07, "loss": 0.4818, "step": 14264 }, { "epoch": 1.7855552924461282, "grad_norm": 1.8984375, "learning_rate": 4.878320929831892e-07, "loss": 0.4704, "step": 14265 }, { "epoch": 1.7856815849711896, "grad_norm": 2.15625, "learning_rate": 4.872163353722881e-07, "loss": 0.5148, "step": 14266 }, { "epoch": 1.7858078774962507, "grad_norm": 2.0625, "learning_rate": 4.866009569180307e-07, "loss": 0.4947, "step": 14267 }, { "epoch": 1.7859341700213118, "grad_norm": 1.734375, "learning_rate": 4.859859576449444e-07, "loss": 0.4025, "step": 14268 }, { "epoch": 1.7860604625463732, "grad_norm": 2.0, "learning_rate": 4.853713375775426e-07, "loss": 0.441, "step": 14269 }, { "epoch": 1.786186755071434, "grad_norm": 1.8125, "learning_rate": 4.847570967403214e-07, "loss": 0.4354, "step": 14270 }, { "epoch": 1.7863130475964955, "grad_norm": 2.140625, "learning_rate": 4.841432351577646e-07, "loss": 0.5556, "step": 14271 }, { "epoch": 1.7864393401215566, "grad_norm": 1.8671875, "learning_rate": 4.835297528543371e-07, "loss": 0.4304, "step": 14272 }, { "epoch": 1.7865656326466177, "grad_norm": 1.8125, "learning_rate": 4.829166498544913e-07, "loss": 0.4162, "step": 14273 }, { "epoch": 1.7866919251716789, "grad_norm": 2.109375, "learning_rate": 4.823039261826656e-07, "loss": 0.5225, "step": 14274 }, { "epoch": 1.78681821769674, "grad_norm": 1.8828125, "learning_rate": 4.816915818632795e-07, "loss": 0.5032, "step": 14275 }, { "epoch": 1.7869445102218013, "grad_norm": 1.953125, "learning_rate": 4.810796169207398e-07, "loss": 0.5193, "step": 14276 }, { "epoch": 1.7870708027468625, "grad_norm": 1.8359375, "learning_rate": 4.804680313794408e-07, "loss": 0.4047, "step": 14277 }, { "epoch": 1.7871970952719236, "grad_norm": 2.0625, "learning_rate": 4.798568252637559e-07, "loss": 0.5098, "step": 14278 }, { "epoch": 1.7873233877969847, "grad_norm": 1.875, "learning_rate": 4.79245998598048e-07, "loss": 0.4643, "step": 14279 }, { "epoch": 1.7874496803220459, "grad_norm": 1.8515625, "learning_rate": 4.786355514066621e-07, "loss": 0.4382, "step": 14280 }, { "epoch": 1.7875759728471072, "grad_norm": 2.03125, "learning_rate": 4.780254837139298e-07, "loss": 0.497, "step": 14281 }, { "epoch": 1.7877022653721681, "grad_norm": 2.203125, "learning_rate": 4.77415795544166e-07, "loss": 0.4793, "step": 14282 }, { "epoch": 1.7878285578972295, "grad_norm": 2.09375, "learning_rate": 4.7680648692167154e-07, "loss": 0.5142, "step": 14283 }, { "epoch": 1.7879548504222906, "grad_norm": 1.84375, "learning_rate": 4.7619755787073343e-07, "loss": 0.4489, "step": 14284 }, { "epoch": 1.7880811429473518, "grad_norm": 1.8984375, "learning_rate": 4.7558900841562116e-07, "loss": 0.463, "step": 14285 }, { "epoch": 1.7882074354724131, "grad_norm": 1.8828125, "learning_rate": 4.7498083858058984e-07, "loss": 0.4214, "step": 14286 }, { "epoch": 1.788333727997474, "grad_norm": 2.03125, "learning_rate": 4.743730483898801e-07, "loss": 0.472, "step": 14287 }, { "epoch": 1.7884600205225354, "grad_norm": 2.015625, "learning_rate": 4.737656378677158e-07, "loss": 0.4252, "step": 14288 }, { "epoch": 1.7885863130475965, "grad_norm": 2.0, "learning_rate": 4.731586070383087e-07, "loss": 0.5577, "step": 14289 }, { "epoch": 1.7887126055726577, "grad_norm": 2.03125, "learning_rate": 4.725519559258518e-07, "loss": 0.4718, "step": 14290 }, { "epoch": 1.7888388980977188, "grad_norm": 1.90625, "learning_rate": 4.7194568455452674e-07, "loss": 0.4723, "step": 14291 }, { "epoch": 1.78896519062278, "grad_norm": 1.890625, "learning_rate": 4.713397929484964e-07, "loss": 0.4604, "step": 14292 }, { "epoch": 1.7890914831478413, "grad_norm": 2.0625, "learning_rate": 4.7073428113191045e-07, "loss": 0.5624, "step": 14293 }, { "epoch": 1.7892177756729024, "grad_norm": 2.015625, "learning_rate": 4.701291491289028e-07, "loss": 0.5015, "step": 14294 }, { "epoch": 1.7893440681979635, "grad_norm": 1.8984375, "learning_rate": 4.695243969635932e-07, "loss": 0.5305, "step": 14295 }, { "epoch": 1.7894703607230247, "grad_norm": 1.875, "learning_rate": 4.6892002466008666e-07, "loss": 0.4477, "step": 14296 }, { "epoch": 1.7895966532480858, "grad_norm": 2.078125, "learning_rate": 4.683160322424707e-07, "loss": 0.4905, "step": 14297 }, { "epoch": 1.7897229457731472, "grad_norm": 2.0625, "learning_rate": 4.677124197348193e-07, "loss": 0.4833, "step": 14298 }, { "epoch": 1.789849238298208, "grad_norm": 2.015625, "learning_rate": 4.671091871611899e-07, "loss": 0.4887, "step": 14299 }, { "epoch": 1.7899755308232694, "grad_norm": 2.0625, "learning_rate": 4.6650633454562776e-07, "loss": 0.4566, "step": 14300 }, { "epoch": 1.7901018233483306, "grad_norm": 2.125, "learning_rate": 4.659038619121603e-07, "loss": 0.5557, "step": 14301 }, { "epoch": 1.7902281158733917, "grad_norm": 1.9296875, "learning_rate": 4.653017692848005e-07, "loss": 0.4622, "step": 14302 }, { "epoch": 1.790354408398453, "grad_norm": 1.9296875, "learning_rate": 4.6470005668754704e-07, "loss": 0.4911, "step": 14303 }, { "epoch": 1.790480700923514, "grad_norm": 2.109375, "learning_rate": 4.6409872414438064e-07, "loss": 0.4993, "step": 14304 }, { "epoch": 1.7906069934485753, "grad_norm": 1.890625, "learning_rate": 4.6349777167927103e-07, "loss": 0.4613, "step": 14305 }, { "epoch": 1.7907332859736365, "grad_norm": 1.984375, "learning_rate": 4.628971993161713e-07, "loss": 0.5102, "step": 14306 }, { "epoch": 1.7908595784986976, "grad_norm": 2.03125, "learning_rate": 4.622970070790167e-07, "loss": 0.4509, "step": 14307 }, { "epoch": 1.790985871023759, "grad_norm": 2.28125, "learning_rate": 4.6169719499173037e-07, "loss": 0.5409, "step": 14308 }, { "epoch": 1.7911121635488199, "grad_norm": 1.9609375, "learning_rate": 4.6109776307821873e-07, "loss": 0.5109, "step": 14309 }, { "epoch": 1.7912384560738812, "grad_norm": 2.140625, "learning_rate": 4.604987113623749e-07, "loss": 0.5032, "step": 14310 }, { "epoch": 1.7913647485989423, "grad_norm": 1.8984375, "learning_rate": 4.599000398680753e-07, "loss": 0.4584, "step": 14311 }, { "epoch": 1.7914910411240035, "grad_norm": 1.8984375, "learning_rate": 4.5930174861917974e-07, "loss": 0.4698, "step": 14312 }, { "epoch": 1.7916173336490646, "grad_norm": 2.203125, "learning_rate": 4.587038376395381e-07, "loss": 0.4792, "step": 14313 }, { "epoch": 1.7917436261741257, "grad_norm": 2.046875, "learning_rate": 4.581063069529801e-07, "loss": 0.511, "step": 14314 }, { "epoch": 1.791869918699187, "grad_norm": 2.046875, "learning_rate": 4.575091565833212e-07, "loss": 0.4968, "step": 14315 }, { "epoch": 1.791996211224248, "grad_norm": 1.9765625, "learning_rate": 4.569123865543623e-07, "loss": 0.4972, "step": 14316 }, { "epoch": 1.7921225037493094, "grad_norm": 1.953125, "learning_rate": 4.5631599688989004e-07, "loss": 0.5032, "step": 14317 }, { "epoch": 1.7922487962743705, "grad_norm": 2.03125, "learning_rate": 4.5571998761367533e-07, "loss": 0.4963, "step": 14318 }, { "epoch": 1.7923750887994316, "grad_norm": 1.828125, "learning_rate": 4.551243587494725e-07, "loss": 0.4073, "step": 14319 }, { "epoch": 1.792501381324493, "grad_norm": 1.890625, "learning_rate": 4.5452911032102145e-07, "loss": 0.4792, "step": 14320 }, { "epoch": 1.792627673849554, "grad_norm": 2.125, "learning_rate": 4.5393424235204984e-07, "loss": 0.5486, "step": 14321 }, { "epoch": 1.7927539663746153, "grad_norm": 1.9609375, "learning_rate": 4.533397548662655e-07, "loss": 0.5104, "step": 14322 }, { "epoch": 1.7928802588996764, "grad_norm": 1.953125, "learning_rate": 4.5274564788736486e-07, "loss": 0.5147, "step": 14323 }, { "epoch": 1.7930065514247375, "grad_norm": 2.0625, "learning_rate": 4.5215192143902577e-07, "loss": 0.5246, "step": 14324 }, { "epoch": 1.7931328439497989, "grad_norm": 2.015625, "learning_rate": 4.515585755449137e-07, "loss": 0.5056, "step": 14325 }, { "epoch": 1.7932591364748598, "grad_norm": 2.296875, "learning_rate": 4.509656102286786e-07, "loss": 0.5308, "step": 14326 }, { "epoch": 1.7933854289999211, "grad_norm": 1.9453125, "learning_rate": 4.503730255139527e-07, "loss": 0.4569, "step": 14327 }, { "epoch": 1.7935117215249823, "grad_norm": 2.0625, "learning_rate": 4.497808214243571e-07, "loss": 0.4853, "step": 14328 }, { "epoch": 1.7936380140500434, "grad_norm": 2.046875, "learning_rate": 4.491889979834951e-07, "loss": 0.4875, "step": 14329 }, { "epoch": 1.7937643065751046, "grad_norm": 1.984375, "learning_rate": 4.485975552149535e-07, "loss": 0.4305, "step": 14330 }, { "epoch": 1.7938905991001657, "grad_norm": 2.0625, "learning_rate": 4.4800649314230893e-07, "loss": 0.5569, "step": 14331 }, { "epoch": 1.794016891625227, "grad_norm": 1.859375, "learning_rate": 4.474158117891181e-07, "loss": 0.4353, "step": 14332 }, { "epoch": 1.794143184150288, "grad_norm": 2.078125, "learning_rate": 4.468255111789233e-07, "loss": 0.4902, "step": 14333 }, { "epoch": 1.7942694766753493, "grad_norm": 2.078125, "learning_rate": 4.4623559133525453e-07, "loss": 0.4133, "step": 14334 }, { "epoch": 1.7943957692004104, "grad_norm": 1.921875, "learning_rate": 4.4564605228162193e-07, "loss": 0.4468, "step": 14335 }, { "epoch": 1.7945220617254716, "grad_norm": 2.171875, "learning_rate": 4.4505689404152563e-07, "loss": 0.5531, "step": 14336 }, { "epoch": 1.794648354250533, "grad_norm": 2.0625, "learning_rate": 4.444681166384468e-07, "loss": 0.5493, "step": 14337 }, { "epoch": 1.7947746467755938, "grad_norm": 1.890625, "learning_rate": 4.4387972009585334e-07, "loss": 0.4495, "step": 14338 }, { "epoch": 1.7949009393006552, "grad_norm": 2.09375, "learning_rate": 4.432917044371965e-07, "loss": 0.5385, "step": 14339 }, { "epoch": 1.7950272318257163, "grad_norm": 4.21875, "learning_rate": 4.4270406968591416e-07, "loss": 0.5436, "step": 14340 }, { "epoch": 1.7951535243507775, "grad_norm": 2.15625, "learning_rate": 4.4211681586542656e-07, "loss": 0.5288, "step": 14341 }, { "epoch": 1.7952798168758388, "grad_norm": 2.03125, "learning_rate": 4.4152994299914155e-07, "loss": 0.3973, "step": 14342 }, { "epoch": 1.7954061094008997, "grad_norm": 2.171875, "learning_rate": 4.4094345111044934e-07, "loss": 0.5922, "step": 14343 }, { "epoch": 1.795532401925961, "grad_norm": 2.171875, "learning_rate": 4.403573402227268e-07, "loss": 0.4102, "step": 14344 }, { "epoch": 1.7956586944510222, "grad_norm": 2.03125, "learning_rate": 4.3977161035933524e-07, "loss": 0.5043, "step": 14345 }, { "epoch": 1.7957849869760834, "grad_norm": 2.015625, "learning_rate": 4.3918626154362044e-07, "loss": 0.4317, "step": 14346 }, { "epoch": 1.7959112795011445, "grad_norm": 1.8671875, "learning_rate": 4.386012937989126e-07, "loss": 0.4722, "step": 14347 }, { "epoch": 1.7960375720262056, "grad_norm": 2.109375, "learning_rate": 4.380167071485253e-07, "loss": 0.5144, "step": 14348 }, { "epoch": 1.796163864551267, "grad_norm": 2.046875, "learning_rate": 4.37432501615761e-07, "loss": 0.5063, "step": 14349 }, { "epoch": 1.7962901570763279, "grad_norm": 1.9140625, "learning_rate": 4.3684867722390555e-07, "loss": 0.4983, "step": 14350 }, { "epoch": 1.7964164496013892, "grad_norm": 1.8671875, "learning_rate": 4.36265233996227e-07, "loss": 0.4379, "step": 14351 }, { "epoch": 1.7965427421264504, "grad_norm": 1.875, "learning_rate": 4.3568217195598117e-07, "loss": 0.516, "step": 14352 }, { "epoch": 1.7966690346515115, "grad_norm": 1.8671875, "learning_rate": 4.3509949112640615e-07, "loss": 0.4758, "step": 14353 }, { "epoch": 1.7967953271765729, "grad_norm": 1.8984375, "learning_rate": 4.345171915307278e-07, "loss": 0.4774, "step": 14354 }, { "epoch": 1.7969216197016338, "grad_norm": 1.9609375, "learning_rate": 4.339352731921531e-07, "loss": 0.5334, "step": 14355 }, { "epoch": 1.7970479122266951, "grad_norm": 1.84375, "learning_rate": 4.33353736133878e-07, "loss": 0.4334, "step": 14356 }, { "epoch": 1.7971742047517563, "grad_norm": 2.015625, "learning_rate": 4.3277258037907945e-07, "loss": 0.4545, "step": 14357 }, { "epoch": 1.7973004972768174, "grad_norm": 1.84375, "learning_rate": 4.3219180595092227e-07, "loss": 0.4756, "step": 14358 }, { "epoch": 1.7974267898018788, "grad_norm": 1.9140625, "learning_rate": 4.316114128725546e-07, "loss": 0.4962, "step": 14359 }, { "epoch": 1.7975530823269397, "grad_norm": 2.125, "learning_rate": 4.310314011671091e-07, "loss": 0.5204, "step": 14360 }, { "epoch": 1.797679374852001, "grad_norm": 2.125, "learning_rate": 4.304517708577027e-07, "loss": 0.5087, "step": 14361 }, { "epoch": 1.7978056673770622, "grad_norm": 2.125, "learning_rate": 4.298725219674393e-07, "loss": 0.5221, "step": 14362 }, { "epoch": 1.7979319599021233, "grad_norm": 2.375, "learning_rate": 4.2929365451940597e-07, "loss": 0.6104, "step": 14363 }, { "epoch": 1.7980582524271844, "grad_norm": 1.9140625, "learning_rate": 4.287151685366764e-07, "loss": 0.4459, "step": 14364 }, { "epoch": 1.7981845449522456, "grad_norm": 2.0, "learning_rate": 4.281370640423044e-07, "loss": 0.4767, "step": 14365 }, { "epoch": 1.798310837477307, "grad_norm": 2.125, "learning_rate": 4.275593410593337e-07, "loss": 0.5038, "step": 14366 }, { "epoch": 1.7984371300023678, "grad_norm": 1.9609375, "learning_rate": 4.2698199961079157e-07, "loss": 0.4579, "step": 14367 }, { "epoch": 1.7985634225274292, "grad_norm": 2.03125, "learning_rate": 4.264050397196895e-07, "loss": 0.6049, "step": 14368 }, { "epoch": 1.7986897150524903, "grad_norm": 1.953125, "learning_rate": 4.2582846140902355e-07, "loss": 0.4639, "step": 14369 }, { "epoch": 1.7988160075775514, "grad_norm": 2.15625, "learning_rate": 4.252522647017732e-07, "loss": 0.6025, "step": 14370 }, { "epoch": 1.7989423001026128, "grad_norm": 2.078125, "learning_rate": 4.246764496209055e-07, "loss": 0.5524, "step": 14371 }, { "epoch": 1.7990685926276737, "grad_norm": 2.265625, "learning_rate": 4.241010161893711e-07, "loss": 0.5748, "step": 14372 }, { "epoch": 1.799194885152735, "grad_norm": 1.890625, "learning_rate": 4.2352596443010597e-07, "loss": 0.4844, "step": 14373 }, { "epoch": 1.7993211776777962, "grad_norm": 2.0625, "learning_rate": 4.2295129436602855e-07, "loss": 0.4871, "step": 14374 }, { "epoch": 1.7994474702028573, "grad_norm": 1.859375, "learning_rate": 4.223770060200449e-07, "loss": 0.4589, "step": 14375 }, { "epoch": 1.7995737627279187, "grad_norm": 2.140625, "learning_rate": 4.218030994150446e-07, "loss": 0.5312, "step": 14376 }, { "epoch": 1.7997000552529796, "grad_norm": 1.859375, "learning_rate": 4.212295745739026e-07, "loss": 0.5007, "step": 14377 }, { "epoch": 1.799826347778041, "grad_norm": 2.109375, "learning_rate": 4.2065643151947724e-07, "loss": 0.508, "step": 14378 }, { "epoch": 1.799952640303102, "grad_norm": 1.9453125, "learning_rate": 4.200836702746136e-07, "loss": 0.4548, "step": 14379 }, { "epoch": 1.8000789328281632, "grad_norm": 1.953125, "learning_rate": 4.1951129086214015e-07, "loss": 0.4927, "step": 14380 }, { "epoch": 1.8002052253532244, "grad_norm": 2.1875, "learning_rate": 4.189392933048708e-07, "loss": 0.4748, "step": 14381 }, { "epoch": 1.8003315178782855, "grad_norm": 2.125, "learning_rate": 4.1836767762560403e-07, "loss": 0.4873, "step": 14382 }, { "epoch": 1.8004578104033468, "grad_norm": 2.0625, "learning_rate": 4.177964438471227e-07, "loss": 0.4803, "step": 14383 }, { "epoch": 1.8005841029284078, "grad_norm": 2.03125, "learning_rate": 4.1722559199219304e-07, "loss": 0.4878, "step": 14384 }, { "epoch": 1.8007103954534691, "grad_norm": 1.8125, "learning_rate": 4.166551220835713e-07, "loss": 0.4351, "step": 14385 }, { "epoch": 1.8008366879785302, "grad_norm": 2.171875, "learning_rate": 4.160850341439937e-07, "loss": 0.5217, "step": 14386 }, { "epoch": 1.8009629805035914, "grad_norm": 1.875, "learning_rate": 4.1551532819618215e-07, "loss": 0.388, "step": 14387 }, { "epoch": 1.8010892730286527, "grad_norm": 2.09375, "learning_rate": 4.149460042628439e-07, "loss": 0.5462, "step": 14388 }, { "epoch": 1.8012155655537136, "grad_norm": 2.203125, "learning_rate": 4.1437706236667095e-07, "loss": 0.5447, "step": 14389 }, { "epoch": 1.801341858078775, "grad_norm": 1.9140625, "learning_rate": 4.1380850253033955e-07, "loss": 0.4494, "step": 14390 }, { "epoch": 1.8014681506038361, "grad_norm": 1.9453125, "learning_rate": 4.1324032477651155e-07, "loss": 0.5038, "step": 14391 }, { "epoch": 1.8015944431288973, "grad_norm": 2.125, "learning_rate": 4.126725291278333e-07, "loss": 0.4755, "step": 14392 }, { "epoch": 1.8017207356539586, "grad_norm": 1.9609375, "learning_rate": 4.1210511560693555e-07, "loss": 0.4537, "step": 14393 }, { "epoch": 1.8018470281790195, "grad_norm": 2.03125, "learning_rate": 4.115380842364336e-07, "loss": 0.5265, "step": 14394 }, { "epoch": 1.801973320704081, "grad_norm": 2.03125, "learning_rate": 4.109714350389293e-07, "loss": 0.4424, "step": 14395 }, { "epoch": 1.802099613229142, "grad_norm": 2.09375, "learning_rate": 4.104051680370058e-07, "loss": 0.5232, "step": 14396 }, { "epoch": 1.8022259057542032, "grad_norm": 1.8984375, "learning_rate": 4.0983928325323497e-07, "loss": 0.4499, "step": 14397 }, { "epoch": 1.8023521982792643, "grad_norm": 2.0, "learning_rate": 4.0927378071017096e-07, "loss": 0.5135, "step": 14398 }, { "epoch": 1.8024784908043254, "grad_norm": 2.0, "learning_rate": 4.0870866043035253e-07, "loss": 0.5218, "step": 14399 }, { "epoch": 1.8026047833293868, "grad_norm": 1.8828125, "learning_rate": 4.0814392243630597e-07, "loss": 0.4406, "step": 14400 }, { "epoch": 1.802731075854448, "grad_norm": 2.078125, "learning_rate": 4.075795667505389e-07, "loss": 0.4014, "step": 14401 }, { "epoch": 1.802857368379509, "grad_norm": 2.265625, "learning_rate": 4.070155933955433e-07, "loss": 0.5753, "step": 14402 }, { "epoch": 1.8029836609045702, "grad_norm": 2.09375, "learning_rate": 4.0645200239380235e-07, "loss": 0.5553, "step": 14403 }, { "epoch": 1.8031099534296313, "grad_norm": 2.0, "learning_rate": 4.058887937677769e-07, "loss": 0.5024, "step": 14404 }, { "epoch": 1.8032362459546927, "grad_norm": 2.1875, "learning_rate": 4.053259675399157e-07, "loss": 0.5312, "step": 14405 }, { "epoch": 1.8033625384797536, "grad_norm": 1.828125, "learning_rate": 4.047635237326497e-07, "loss": 0.446, "step": 14406 }, { "epoch": 1.803488831004815, "grad_norm": 1.859375, "learning_rate": 4.0420146236839987e-07, "loss": 0.4276, "step": 14407 }, { "epoch": 1.803615123529876, "grad_norm": 2.09375, "learning_rate": 4.03639783469566e-07, "loss": 0.482, "step": 14408 }, { "epoch": 1.8037414160549372, "grad_norm": 2.0, "learning_rate": 4.030784870585358e-07, "loss": 0.5585, "step": 14409 }, { "epoch": 1.8038677085799986, "grad_norm": 1.9609375, "learning_rate": 4.0251757315768134e-07, "loss": 0.4656, "step": 14410 }, { "epoch": 1.8039940011050595, "grad_norm": 2.046875, "learning_rate": 4.019570417893604e-07, "loss": 0.5406, "step": 14411 }, { "epoch": 1.8041202936301208, "grad_norm": 1.953125, "learning_rate": 4.0139689297591265e-07, "loss": 0.4231, "step": 14412 }, { "epoch": 1.804246586155182, "grad_norm": 1.984375, "learning_rate": 4.008371267396649e-07, "loss": 0.4675, "step": 14413 }, { "epoch": 1.804372878680243, "grad_norm": 2.0625, "learning_rate": 4.002777431029281e-07, "loss": 0.5184, "step": 14414 }, { "epoch": 1.8044991712053042, "grad_norm": 1.859375, "learning_rate": 3.9971874208799886e-07, "loss": 0.4402, "step": 14415 }, { "epoch": 1.8046254637303654, "grad_norm": 2.015625, "learning_rate": 3.991601237171572e-07, "loss": 0.4654, "step": 14416 }, { "epoch": 1.8047517562554267, "grad_norm": 1.953125, "learning_rate": 3.986018880126663e-07, "loss": 0.4945, "step": 14417 }, { "epoch": 1.8048780487804879, "grad_norm": 2.140625, "learning_rate": 3.980440349967796e-07, "loss": 0.5402, "step": 14418 }, { "epoch": 1.805004341305549, "grad_norm": 1.8984375, "learning_rate": 3.974865646917292e-07, "loss": 0.4927, "step": 14419 }, { "epoch": 1.8051306338306101, "grad_norm": 2.0, "learning_rate": 3.9692947711973293e-07, "loss": 0.5261, "step": 14420 }, { "epoch": 1.8052569263556713, "grad_norm": 1.953125, "learning_rate": 3.9637277230299977e-07, "loss": 0.5032, "step": 14421 }, { "epoch": 1.8053832188807326, "grad_norm": 1.8515625, "learning_rate": 3.958164502637152e-07, "loss": 0.4766, "step": 14422 }, { "epoch": 1.8055095114057935, "grad_norm": 1.828125, "learning_rate": 3.9526051102405484e-07, "loss": 0.4405, "step": 14423 }, { "epoch": 1.8056358039308549, "grad_norm": 1.9453125, "learning_rate": 3.947049546061754e-07, "loss": 0.4863, "step": 14424 }, { "epoch": 1.805762096455916, "grad_norm": 1.765625, "learning_rate": 3.9414978103222036e-07, "loss": 0.4436, "step": 14425 }, { "epoch": 1.8058883889809771, "grad_norm": 1.9765625, "learning_rate": 3.935949903243186e-07, "loss": 0.5096, "step": 14426 }, { "epoch": 1.8060146815060385, "grad_norm": 2.1875, "learning_rate": 3.930405825045813e-07, "loss": 0.4332, "step": 14427 }, { "epoch": 1.8061409740310994, "grad_norm": 2.296875, "learning_rate": 3.9248655759510755e-07, "loss": 0.57, "step": 14428 }, { "epoch": 1.8062672665561608, "grad_norm": 2.109375, "learning_rate": 3.9193291561797854e-07, "loss": 0.4423, "step": 14429 }, { "epoch": 1.806393559081222, "grad_norm": 2.015625, "learning_rate": 3.913796565952599e-07, "loss": 0.4354, "step": 14430 }, { "epoch": 1.806519851606283, "grad_norm": 2.0, "learning_rate": 3.908267805490051e-07, "loss": 0.5559, "step": 14431 }, { "epoch": 1.8066461441313444, "grad_norm": 2.03125, "learning_rate": 3.9027428750124885e-07, "loss": 0.4988, "step": 14432 }, { "epoch": 1.8067724366564053, "grad_norm": 1.9609375, "learning_rate": 3.8972217747401455e-07, "loss": 0.5178, "step": 14433 }, { "epoch": 1.8068987291814667, "grad_norm": 2.125, "learning_rate": 3.891704504893046e-07, "loss": 0.54, "step": 14434 }, { "epoch": 1.8070250217065278, "grad_norm": 2.015625, "learning_rate": 3.886191065691125e-07, "loss": 0.535, "step": 14435 }, { "epoch": 1.807151314231589, "grad_norm": 2.078125, "learning_rate": 3.8806814573541185e-07, "loss": 0.5322, "step": 14436 }, { "epoch": 1.80727760675665, "grad_norm": 1.9921875, "learning_rate": 3.8751756801016396e-07, "loss": 0.5457, "step": 14437 }, { "epoch": 1.8074038992817112, "grad_norm": 1.7890625, "learning_rate": 3.869673734153112e-07, "loss": 0.4709, "step": 14438 }, { "epoch": 1.8075301918067725, "grad_norm": 2.015625, "learning_rate": 3.8641756197278503e-07, "loss": 0.6168, "step": 14439 }, { "epoch": 1.8076564843318335, "grad_norm": 1.953125, "learning_rate": 3.8586813370450007e-07, "loss": 0.4282, "step": 14440 }, { "epoch": 1.8077827768568948, "grad_norm": 1.9765625, "learning_rate": 3.8531908863235323e-07, "loss": 0.4464, "step": 14441 }, { "epoch": 1.807909069381956, "grad_norm": 2.078125, "learning_rate": 3.847704267782293e-07, "loss": 0.5387, "step": 14442 }, { "epoch": 1.808035361907017, "grad_norm": 2.046875, "learning_rate": 3.8422214816399626e-07, "loss": 0.5012, "step": 14443 }, { "epoch": 1.8081616544320784, "grad_norm": 2.0, "learning_rate": 3.836742528115078e-07, "loss": 0.475, "step": 14444 }, { "epoch": 1.8082879469571393, "grad_norm": 1.9375, "learning_rate": 3.8312674074260204e-07, "loss": 0.4193, "step": 14445 }, { "epoch": 1.8084142394822007, "grad_norm": 1.96875, "learning_rate": 3.825796119790992e-07, "loss": 0.4996, "step": 14446 }, { "epoch": 1.8085405320072618, "grad_norm": 2.03125, "learning_rate": 3.820328665428086e-07, "loss": 0.4623, "step": 14447 }, { "epoch": 1.808666824532323, "grad_norm": 2.0625, "learning_rate": 3.814865044555216e-07, "loss": 0.4659, "step": 14448 }, { "epoch": 1.8087931170573843, "grad_norm": 1.8515625, "learning_rate": 3.809405257390153e-07, "loss": 0.4622, "step": 14449 }, { "epoch": 1.8089194095824452, "grad_norm": 2.109375, "learning_rate": 3.8039493041505003e-07, "loss": 0.4392, "step": 14450 }, { "epoch": 1.8090457021075066, "grad_norm": 2.078125, "learning_rate": 3.798497185053729e-07, "loss": 0.4855, "step": 14451 }, { "epoch": 1.8091719946325677, "grad_norm": 2.203125, "learning_rate": 3.793048900317142e-07, "loss": 0.5317, "step": 14452 }, { "epoch": 1.8092982871576289, "grad_norm": 2.15625, "learning_rate": 3.7876044501579003e-07, "loss": 0.4919, "step": 14453 }, { "epoch": 1.80942457968269, "grad_norm": 2.109375, "learning_rate": 3.7821638347930065e-07, "loss": 0.5376, "step": 14454 }, { "epoch": 1.8095508722077511, "grad_norm": 1.9609375, "learning_rate": 3.7767270544392995e-07, "loss": 0.47, "step": 14455 }, { "epoch": 1.8096771647328125, "grad_norm": 2.1875, "learning_rate": 3.7712941093134726e-07, "loss": 0.5611, "step": 14456 }, { "epoch": 1.8098034572578734, "grad_norm": 1.953125, "learning_rate": 3.765864999632096e-07, "loss": 0.4295, "step": 14457 }, { "epoch": 1.8099297497829347, "grad_norm": 2.3125, "learning_rate": 3.760439725611542e-07, "loss": 0.5055, "step": 14458 }, { "epoch": 1.8100560423079959, "grad_norm": 2.140625, "learning_rate": 3.75501828746806e-07, "loss": 0.5514, "step": 14459 }, { "epoch": 1.810182334833057, "grad_norm": 1.8828125, "learning_rate": 3.74960068541772e-07, "loss": 0.511, "step": 14460 }, { "epoch": 1.8103086273581184, "grad_norm": 1.9375, "learning_rate": 3.7441869196764737e-07, "loss": 0.472, "step": 14461 }, { "epoch": 1.8104349198831793, "grad_norm": 2.09375, "learning_rate": 3.73877699046008e-07, "loss": 0.5276, "step": 14462 }, { "epoch": 1.8105612124082406, "grad_norm": 2.0625, "learning_rate": 3.7333708979841787e-07, "loss": 0.5515, "step": 14463 }, { "epoch": 1.8106875049333018, "grad_norm": 1.875, "learning_rate": 3.7279686424642413e-07, "loss": 0.4547, "step": 14464 }, { "epoch": 1.810813797458363, "grad_norm": 1.9921875, "learning_rate": 3.7225702241155957e-07, "loss": 0.4863, "step": 14465 }, { "epoch": 1.8109400899834243, "grad_norm": 2.046875, "learning_rate": 3.7171756431533925e-07, "loss": 0.4794, "step": 14466 }, { "epoch": 1.8110663825084852, "grad_norm": 1.9453125, "learning_rate": 3.7117848997926585e-07, "loss": 0.4708, "step": 14467 }, { "epoch": 1.8111926750335465, "grad_norm": 2.640625, "learning_rate": 3.706397994248256e-07, "loss": 0.5883, "step": 14468 }, { "epoch": 1.8113189675586077, "grad_norm": 2.03125, "learning_rate": 3.7010149267348804e-07, "loss": 0.4969, "step": 14469 }, { "epoch": 1.8114452600836688, "grad_norm": 2.015625, "learning_rate": 3.695635697467115e-07, "loss": 0.5339, "step": 14470 }, { "epoch": 1.81157155260873, "grad_norm": 1.9765625, "learning_rate": 3.6902603066593435e-07, "loss": 0.52, "step": 14471 }, { "epoch": 1.811697845133791, "grad_norm": 1.953125, "learning_rate": 3.684888754525817e-07, "loss": 0.4677, "step": 14472 }, { "epoch": 1.8118241376588524, "grad_norm": 2.046875, "learning_rate": 3.6795210412806314e-07, "loss": 0.5362, "step": 14473 }, { "epoch": 1.8119504301839133, "grad_norm": 2.15625, "learning_rate": 3.674157167137726e-07, "loss": 0.4812, "step": 14474 }, { "epoch": 1.8120767227089747, "grad_norm": 2.03125, "learning_rate": 3.6687971323109084e-07, "loss": 0.5221, "step": 14475 }, { "epoch": 1.8122030152340358, "grad_norm": 2.078125, "learning_rate": 3.663440937013818e-07, "loss": 0.5379, "step": 14476 }, { "epoch": 1.812329307759097, "grad_norm": 1.9375, "learning_rate": 3.6580885814599286e-07, "loss": 0.5129, "step": 14477 }, { "epoch": 1.8124556002841583, "grad_norm": 2.1875, "learning_rate": 3.65274006586257e-07, "loss": 0.4888, "step": 14478 }, { "epoch": 1.8125818928092192, "grad_norm": 1.7734375, "learning_rate": 3.647395390434927e-07, "loss": 0.4152, "step": 14479 }, { "epoch": 1.8127081853342806, "grad_norm": 2.03125, "learning_rate": 3.642054555390029e-07, "loss": 0.4961, "step": 14480 }, { "epoch": 1.8128344778593417, "grad_norm": 2.0625, "learning_rate": 3.636717560940739e-07, "loss": 0.4799, "step": 14481 }, { "epoch": 1.8129607703844028, "grad_norm": 2.015625, "learning_rate": 3.631384407299776e-07, "loss": 0.5826, "step": 14482 }, { "epoch": 1.8130870629094642, "grad_norm": 2.15625, "learning_rate": 3.6260550946797147e-07, "loss": 0.4792, "step": 14483 }, { "epoch": 1.813213355434525, "grad_norm": 2.015625, "learning_rate": 3.6207296232929736e-07, "loss": 0.4742, "step": 14484 }, { "epoch": 1.8133396479595865, "grad_norm": 2.015625, "learning_rate": 3.615407993351805e-07, "loss": 0.5468, "step": 14485 }, { "epoch": 1.8134659404846476, "grad_norm": 2.15625, "learning_rate": 3.610090205068306e-07, "loss": 0.4348, "step": 14486 }, { "epoch": 1.8135922330097087, "grad_norm": 2.203125, "learning_rate": 3.604776258654452e-07, "loss": 0.5031, "step": 14487 }, { "epoch": 1.8137185255347699, "grad_norm": 1.828125, "learning_rate": 3.599466154322029e-07, "loss": 0.4584, "step": 14488 }, { "epoch": 1.813844818059831, "grad_norm": 2.171875, "learning_rate": 3.594159892282689e-07, "loss": 0.5617, "step": 14489 }, { "epoch": 1.8139711105848924, "grad_norm": 2.296875, "learning_rate": 3.588857472747931e-07, "loss": 0.5476, "step": 14490 }, { "epoch": 1.8140974031099533, "grad_norm": 2.015625, "learning_rate": 3.5835588959290957e-07, "loss": 0.4833, "step": 14491 }, { "epoch": 1.8142236956350146, "grad_norm": 1.9609375, "learning_rate": 3.578264162037348e-07, "loss": 0.5218, "step": 14492 }, { "epoch": 1.8143499881600758, "grad_norm": 1.9375, "learning_rate": 3.572973271283764e-07, "loss": 0.5247, "step": 14493 }, { "epoch": 1.8144762806851369, "grad_norm": 1.8671875, "learning_rate": 3.567686223879208e-07, "loss": 0.4433, "step": 14494 }, { "epoch": 1.8146025732101982, "grad_norm": 1.96875, "learning_rate": 3.5624030200343996e-07, "loss": 0.4385, "step": 14495 }, { "epoch": 1.8147288657352592, "grad_norm": 1.9609375, "learning_rate": 3.557123659959927e-07, "loss": 0.4596, "step": 14496 }, { "epoch": 1.8148551582603205, "grad_norm": 2.0625, "learning_rate": 3.55184814386621e-07, "loss": 0.482, "step": 14497 }, { "epoch": 1.8149814507853816, "grad_norm": 1.875, "learning_rate": 3.546576471963514e-07, "loss": 0.4666, "step": 14498 }, { "epoch": 1.8151077433104428, "grad_norm": 2.09375, "learning_rate": 3.5413086444619606e-07, "loss": 0.568, "step": 14499 }, { "epoch": 1.8152340358355041, "grad_norm": 1.8828125, "learning_rate": 3.536044661571503e-07, "loss": 0.4799, "step": 14500 }, { "epoch": 1.815360328360565, "grad_norm": 2.0625, "learning_rate": 3.530784523501962e-07, "loss": 0.5678, "step": 14501 }, { "epoch": 1.8154866208856264, "grad_norm": 1.984375, "learning_rate": 3.5255282304629934e-07, "loss": 0.4444, "step": 14502 }, { "epoch": 1.8156129134106875, "grad_norm": 2.03125, "learning_rate": 3.5202757826640844e-07, "loss": 0.5288, "step": 14503 }, { "epoch": 1.8157392059357487, "grad_norm": 2.109375, "learning_rate": 3.515027180314601e-07, "loss": 0.4864, "step": 14504 }, { "epoch": 1.8158654984608098, "grad_norm": 1.8046875, "learning_rate": 3.509782423623742e-07, "loss": 0.4884, "step": 14505 }, { "epoch": 1.815991790985871, "grad_norm": 2.125, "learning_rate": 3.504541512800541e-07, "loss": 0.5272, "step": 14506 }, { "epoch": 1.8161180835109323, "grad_norm": 2.078125, "learning_rate": 3.499304448053886e-07, "loss": 0.504, "step": 14507 }, { "epoch": 1.8162443760359934, "grad_norm": 2.03125, "learning_rate": 3.4940712295925215e-07, "loss": 0.5528, "step": 14508 }, { "epoch": 1.8163706685610546, "grad_norm": 2.25, "learning_rate": 3.488841857625036e-07, "loss": 0.5482, "step": 14509 }, { "epoch": 1.8164969610861157, "grad_norm": 2.09375, "learning_rate": 3.4836163323598293e-07, "loss": 0.4993, "step": 14510 }, { "epoch": 1.8166232536111768, "grad_norm": 2.171875, "learning_rate": 3.478394654005224e-07, "loss": 0.5441, "step": 14511 }, { "epoch": 1.8167495461362382, "grad_norm": 2.0625, "learning_rate": 3.473176822769309e-07, "loss": 0.5516, "step": 14512 }, { "epoch": 1.816875838661299, "grad_norm": 2.296875, "learning_rate": 3.467962838860073e-07, "loss": 0.5778, "step": 14513 }, { "epoch": 1.8170021311863604, "grad_norm": 2.171875, "learning_rate": 3.462752702485328e-07, "loss": 0.5537, "step": 14514 }, { "epoch": 1.8171284237114216, "grad_norm": 2.25, "learning_rate": 3.45754641385273e-07, "loss": 0.5553, "step": 14515 }, { "epoch": 1.8172547162364827, "grad_norm": 1.9140625, "learning_rate": 3.4523439731698026e-07, "loss": 0.5214, "step": 14516 }, { "epoch": 1.817381008761544, "grad_norm": 2.09375, "learning_rate": 3.4471453806438903e-07, "loss": 0.4826, "step": 14517 }, { "epoch": 1.817507301286605, "grad_norm": 1.84375, "learning_rate": 3.441950636482205e-07, "loss": 0.4325, "step": 14518 }, { "epoch": 1.8176335938116663, "grad_norm": 1.9453125, "learning_rate": 3.436759740891782e-07, "loss": 0.4552, "step": 14519 }, { "epoch": 1.8177598863367275, "grad_norm": 1.9765625, "learning_rate": 3.4315726940795436e-07, "loss": 0.4794, "step": 14520 }, { "epoch": 1.8178861788617886, "grad_norm": 2.03125, "learning_rate": 3.4263894962522027e-07, "loss": 0.4746, "step": 14521 }, { "epoch": 1.8180124713868497, "grad_norm": 1.8828125, "learning_rate": 3.4212101476163717e-07, "loss": 0.4557, "step": 14522 }, { "epoch": 1.8181387639119109, "grad_norm": 2.0625, "learning_rate": 3.416034648378486e-07, "loss": 0.4258, "step": 14523 }, { "epoch": 1.8182650564369722, "grad_norm": 2.078125, "learning_rate": 3.410862998744813e-07, "loss": 0.5157, "step": 14524 }, { "epoch": 1.8183913489620334, "grad_norm": 2.046875, "learning_rate": 3.4056951989214993e-07, "loss": 0.5078, "step": 14525 }, { "epoch": 1.8185176414870945, "grad_norm": 1.8515625, "learning_rate": 3.4005312491145026e-07, "loss": 0.4734, "step": 14526 }, { "epoch": 1.8186439340121556, "grad_norm": 2.125, "learning_rate": 3.395371149529658e-07, "loss": 0.5376, "step": 14527 }, { "epoch": 1.8187702265372168, "grad_norm": 2.015625, "learning_rate": 3.390214900372635e-07, "loss": 0.4793, "step": 14528 }, { "epoch": 1.8188965190622781, "grad_norm": 1.9921875, "learning_rate": 3.3850625018489457e-07, "loss": 0.4637, "step": 14529 }, { "epoch": 1.819022811587339, "grad_norm": 1.8359375, "learning_rate": 3.3799139541639494e-07, "loss": 0.4817, "step": 14530 }, { "epoch": 1.8191491041124004, "grad_norm": 1.859375, "learning_rate": 3.3747692575228696e-07, "loss": 0.4657, "step": 14531 }, { "epoch": 1.8192753966374615, "grad_norm": 2.03125, "learning_rate": 3.369628412130743e-07, "loss": 0.5342, "step": 14532 }, { "epoch": 1.8194016891625227, "grad_norm": 2.09375, "learning_rate": 3.364491418192484e-07, "loss": 0.5576, "step": 14533 }, { "epoch": 1.819527981687584, "grad_norm": 1.9296875, "learning_rate": 3.3593582759128276e-07, "loss": 0.4851, "step": 14534 }, { "epoch": 1.819654274212645, "grad_norm": 1.828125, "learning_rate": 3.3542289854963883e-07, "loss": 0.4446, "step": 14535 }, { "epoch": 1.8197805667377063, "grad_norm": 2.015625, "learning_rate": 3.349103547147592e-07, "loss": 0.5023, "step": 14536 }, { "epoch": 1.8199068592627674, "grad_norm": 1.8984375, "learning_rate": 3.34398196107073e-07, "loss": 0.459, "step": 14537 }, { "epoch": 1.8200331517878285, "grad_norm": 2.140625, "learning_rate": 3.338864227469929e-07, "loss": 0.5468, "step": 14538 }, { "epoch": 1.82015944431289, "grad_norm": 2.0625, "learning_rate": 3.3337503465491806e-07, "loss": 0.4895, "step": 14539 }, { "epoch": 1.8202857368379508, "grad_norm": 1.859375, "learning_rate": 3.3286403185123104e-07, "loss": 0.4588, "step": 14540 }, { "epoch": 1.8204120293630122, "grad_norm": 2.265625, "learning_rate": 3.32353414356299e-07, "loss": 0.5463, "step": 14541 }, { "epoch": 1.8205383218880733, "grad_norm": 1.8828125, "learning_rate": 3.318431821904744e-07, "loss": 0.5071, "step": 14542 }, { "epoch": 1.8206646144131344, "grad_norm": 2.046875, "learning_rate": 3.3133333537409217e-07, "loss": 0.5169, "step": 14543 }, { "epoch": 1.8207909069381956, "grad_norm": 2.28125, "learning_rate": 3.3082387392747605e-07, "loss": 0.4621, "step": 14544 }, { "epoch": 1.8209171994632567, "grad_norm": 1.9609375, "learning_rate": 3.303147978709298e-07, "loss": 0.3807, "step": 14545 }, { "epoch": 1.821043491988318, "grad_norm": 2.015625, "learning_rate": 3.298061072247438e-07, "loss": 0.4607, "step": 14546 }, { "epoch": 1.821169784513379, "grad_norm": 2.046875, "learning_rate": 3.2929780200919527e-07, "loss": 0.5383, "step": 14547 }, { "epoch": 1.8212960770384403, "grad_norm": 2.140625, "learning_rate": 3.2878988224454346e-07, "loss": 0.4733, "step": 14548 }, { "epoch": 1.8214223695635015, "grad_norm": 1.8671875, "learning_rate": 3.282823479510322e-07, "loss": 0.481, "step": 14549 }, { "epoch": 1.8215486620885626, "grad_norm": 2.046875, "learning_rate": 3.277751991488898e-07, "loss": 0.4962, "step": 14550 }, { "epoch": 1.821674954613624, "grad_norm": 1.9453125, "learning_rate": 3.2726843585833224e-07, "loss": 0.5253, "step": 14551 }, { "epoch": 1.8218012471386849, "grad_norm": 1.984375, "learning_rate": 3.2676205809955564e-07, "loss": 0.4608, "step": 14552 }, { "epoch": 1.8219275396637462, "grad_norm": 2.015625, "learning_rate": 3.2625606589274496e-07, "loss": 0.5431, "step": 14553 }, { "epoch": 1.8220538321888073, "grad_norm": 1.921875, "learning_rate": 3.2575045925806626e-07, "loss": 0.4887, "step": 14554 }, { "epoch": 1.8221801247138685, "grad_norm": 1.921875, "learning_rate": 3.2524523821567234e-07, "loss": 0.4054, "step": 14555 }, { "epoch": 1.8223064172389298, "grad_norm": 1.8203125, "learning_rate": 3.247404027856993e-07, "loss": 0.4047, "step": 14556 }, { "epoch": 1.8224327097639907, "grad_norm": 2.046875, "learning_rate": 3.2423595298827106e-07, "loss": 0.5129, "step": 14557 }, { "epoch": 1.822559002289052, "grad_norm": 2.203125, "learning_rate": 3.2373188884349147e-07, "loss": 0.5585, "step": 14558 }, { "epoch": 1.8226852948141132, "grad_norm": 2.609375, "learning_rate": 3.2322821037145125e-07, "loss": 0.5557, "step": 14559 }, { "epoch": 1.8228115873391744, "grad_norm": 2.15625, "learning_rate": 3.2272491759222756e-07, "loss": 0.4524, "step": 14560 }, { "epoch": 1.8229378798642355, "grad_norm": 1.9765625, "learning_rate": 3.2222201052588e-07, "loss": 0.495, "step": 14561 }, { "epoch": 1.8230641723892966, "grad_norm": 1.9140625, "learning_rate": 3.2171948919245134e-07, "loss": 0.4502, "step": 14562 }, { "epoch": 1.823190464914358, "grad_norm": 2.078125, "learning_rate": 3.2121735361197336e-07, "loss": 0.4965, "step": 14563 }, { "epoch": 1.823316757439419, "grad_norm": 2.0, "learning_rate": 3.2071560380445674e-07, "loss": 0.4908, "step": 14564 }, { "epoch": 1.8234430499644803, "grad_norm": 1.96875, "learning_rate": 3.202142397899044e-07, "loss": 0.4828, "step": 14565 }, { "epoch": 1.8235693424895414, "grad_norm": 2.015625, "learning_rate": 3.1971326158829696e-07, "loss": 0.4749, "step": 14566 }, { "epoch": 1.8236956350146025, "grad_norm": 2.171875, "learning_rate": 3.192126692196018e-07, "loss": 0.4929, "step": 14567 }, { "epoch": 1.8238219275396639, "grad_norm": 2.84375, "learning_rate": 3.187124627037719e-07, "loss": 0.5681, "step": 14568 }, { "epoch": 1.8239482200647248, "grad_norm": 2.078125, "learning_rate": 3.1821264206074565e-07, "loss": 0.4947, "step": 14569 }, { "epoch": 1.8240745125897861, "grad_norm": 1.9765625, "learning_rate": 3.1771320731044164e-07, "loss": 0.445, "step": 14570 }, { "epoch": 1.8242008051148473, "grad_norm": 2.140625, "learning_rate": 3.172141584727695e-07, "loss": 0.513, "step": 14571 }, { "epoch": 1.8243270976399084, "grad_norm": 2.0, "learning_rate": 3.167154955676177e-07, "loss": 0.4096, "step": 14572 }, { "epoch": 1.8244533901649698, "grad_norm": 2.0625, "learning_rate": 3.162172186148627e-07, "loss": 0.5688, "step": 14573 }, { "epoch": 1.8245796826900307, "grad_norm": 2.015625, "learning_rate": 3.1571932763436507e-07, "loss": 0.5077, "step": 14574 }, { "epoch": 1.824705975215092, "grad_norm": 2.078125, "learning_rate": 3.152218226459691e-07, "loss": 0.5174, "step": 14575 }, { "epoch": 1.8248322677401532, "grad_norm": 2.078125, "learning_rate": 3.147247036695034e-07, "loss": 0.571, "step": 14576 }, { "epoch": 1.8249585602652143, "grad_norm": 1.921875, "learning_rate": 3.14227970724782e-07, "loss": 0.4458, "step": 14577 }, { "epoch": 1.8250848527902754, "grad_norm": 1.9375, "learning_rate": 3.137316238316046e-07, "loss": 0.487, "step": 14578 }, { "epoch": 1.8252111453153366, "grad_norm": 2.046875, "learning_rate": 3.132356630097544e-07, "loss": 0.5531, "step": 14579 }, { "epoch": 1.825337437840398, "grad_norm": 2.0, "learning_rate": 3.127400882789988e-07, "loss": 0.4959, "step": 14580 }, { "epoch": 1.8254637303654588, "grad_norm": 2.078125, "learning_rate": 3.1224489965908876e-07, "loss": 0.5281, "step": 14581 }, { "epoch": 1.8255900228905202, "grad_norm": 1.9453125, "learning_rate": 3.117500971697629e-07, "loss": 0.4103, "step": 14582 }, { "epoch": 1.8257163154155813, "grad_norm": 2.0, "learning_rate": 3.112556808307421e-07, "loss": 0.5174, "step": 14583 }, { "epoch": 1.8258426079406425, "grad_norm": 1.921875, "learning_rate": 3.107616506617339e-07, "loss": 0.4313, "step": 14584 }, { "epoch": 1.8259689004657038, "grad_norm": 2.109375, "learning_rate": 3.102680066824282e-07, "loss": 0.5091, "step": 14585 }, { "epoch": 1.8260951929907647, "grad_norm": 1.9609375, "learning_rate": 3.0977474891250136e-07, "loss": 0.455, "step": 14586 }, { "epoch": 1.826221485515826, "grad_norm": 2.125, "learning_rate": 3.092818773716122e-07, "loss": 0.5127, "step": 14587 }, { "epoch": 1.8263477780408872, "grad_norm": 2.296875, "learning_rate": 3.0878939207940496e-07, "loss": 0.663, "step": 14588 }, { "epoch": 1.8264740705659483, "grad_norm": 2.078125, "learning_rate": 3.0829729305551057e-07, "loss": 0.5333, "step": 14589 }, { "epoch": 1.8266003630910097, "grad_norm": 2.0, "learning_rate": 3.0780558031954234e-07, "loss": 0.5283, "step": 14590 }, { "epoch": 1.8267266556160706, "grad_norm": 2.015625, "learning_rate": 3.073142538910978e-07, "loss": 0.5434, "step": 14591 }, { "epoch": 1.826852948141132, "grad_norm": 1.984375, "learning_rate": 3.068233137897614e-07, "loss": 0.4905, "step": 14592 }, { "epoch": 1.826979240666193, "grad_norm": 2.078125, "learning_rate": 3.063327600351007e-07, "loss": 0.5705, "step": 14593 }, { "epoch": 1.8271055331912542, "grad_norm": 2.296875, "learning_rate": 3.0584259264666685e-07, "loss": 0.5141, "step": 14594 }, { "epoch": 1.8272318257163154, "grad_norm": 1.7890625, "learning_rate": 3.0535281164399745e-07, "loss": 0.4352, "step": 14595 }, { "epoch": 1.8273581182413765, "grad_norm": 2.046875, "learning_rate": 3.048634170466136e-07, "loss": 0.4898, "step": 14596 }, { "epoch": 1.8274844107664379, "grad_norm": 2.03125, "learning_rate": 3.0437440887402304e-07, "loss": 0.5348, "step": 14597 }, { "epoch": 1.8276107032914988, "grad_norm": 2.109375, "learning_rate": 3.038857871457135e-07, "loss": 0.5563, "step": 14598 }, { "epoch": 1.8277369958165601, "grad_norm": 1.9765625, "learning_rate": 3.033975518811627e-07, "loss": 0.5329, "step": 14599 }, { "epoch": 1.8278632883416213, "grad_norm": 1.8125, "learning_rate": 3.029097030998296e-07, "loss": 0.4393, "step": 14600 }, { "epoch": 1.8279895808666824, "grad_norm": 2.359375, "learning_rate": 3.024222408211586e-07, "loss": 0.4686, "step": 14601 }, { "epoch": 1.8281158733917438, "grad_norm": 1.8984375, "learning_rate": 3.0193516506457964e-07, "loss": 0.5298, "step": 14602 }, { "epoch": 1.8282421659168047, "grad_norm": 1.96875, "learning_rate": 3.014484758495051e-07, "loss": 0.5012, "step": 14603 }, { "epoch": 1.828368458441866, "grad_norm": 1.75, "learning_rate": 3.0096217319533386e-07, "loss": 0.3937, "step": 14604 }, { "epoch": 1.8284947509669272, "grad_norm": 2.0625, "learning_rate": 3.0047625712144925e-07, "loss": 0.5222, "step": 14605 }, { "epoch": 1.8286210434919883, "grad_norm": 1.9453125, "learning_rate": 2.9999072764721916e-07, "loss": 0.4394, "step": 14606 }, { "epoch": 1.8287473360170496, "grad_norm": 1.9921875, "learning_rate": 2.9950558479199363e-07, "loss": 0.5731, "step": 14607 }, { "epoch": 1.8288736285421106, "grad_norm": 1.9609375, "learning_rate": 2.990208285751106e-07, "loss": 0.4552, "step": 14608 }, { "epoch": 1.828999921067172, "grad_norm": 1.9453125, "learning_rate": 2.9853645901589123e-07, "loss": 0.5281, "step": 14609 }, { "epoch": 1.829126213592233, "grad_norm": 2.171875, "learning_rate": 2.980524761336401e-07, "loss": 0.5117, "step": 14610 }, { "epoch": 1.8292525061172942, "grad_norm": 1.9140625, "learning_rate": 2.975688799476495e-07, "loss": 0.4709, "step": 14611 }, { "epoch": 1.8293787986423553, "grad_norm": 1.953125, "learning_rate": 2.9708567047719405e-07, "loss": 0.4773, "step": 14612 }, { "epoch": 1.8295050911674164, "grad_norm": 2.171875, "learning_rate": 2.966028477415328e-07, "loss": 0.5298, "step": 14613 }, { "epoch": 1.8296313836924778, "grad_norm": 2.125, "learning_rate": 2.9612041175990925e-07, "loss": 0.4852, "step": 14614 }, { "epoch": 1.8297576762175387, "grad_norm": 1.8515625, "learning_rate": 2.9563836255155356e-07, "loss": 0.435, "step": 14615 }, { "epoch": 1.8298839687426, "grad_norm": 1.9609375, "learning_rate": 2.95156700135677e-07, "loss": 0.4072, "step": 14616 }, { "epoch": 1.8300102612676612, "grad_norm": 2.125, "learning_rate": 2.946754245314798e-07, "loss": 0.5206, "step": 14617 }, { "epoch": 1.8301365537927223, "grad_norm": 1.921875, "learning_rate": 2.941945357581422e-07, "loss": 0.4505, "step": 14618 }, { "epoch": 1.8302628463177837, "grad_norm": 2.0, "learning_rate": 2.937140338348343e-07, "loss": 0.5651, "step": 14619 }, { "epoch": 1.8303891388428446, "grad_norm": 2.03125, "learning_rate": 2.932339187807043e-07, "loss": 0.4719, "step": 14620 }, { "epoch": 1.830515431367906, "grad_norm": 1.9296875, "learning_rate": 2.927541906148912e-07, "loss": 0.5011, "step": 14621 }, { "epoch": 1.830641723892967, "grad_norm": 2.015625, "learning_rate": 2.922748493565153e-07, "loss": 0.488, "step": 14622 }, { "epoch": 1.8307680164180282, "grad_norm": 1.8828125, "learning_rate": 2.917958950246802e-07, "loss": 0.4521, "step": 14623 }, { "epoch": 1.8308943089430896, "grad_norm": 2.015625, "learning_rate": 2.913173276384773e-07, "loss": 0.5295, "step": 14624 }, { "epoch": 1.8310206014681505, "grad_norm": 1.84375, "learning_rate": 2.9083914721698136e-07, "loss": 0.4116, "step": 14625 }, { "epoch": 1.8311468939932118, "grad_norm": 1.90625, "learning_rate": 2.9036135377925046e-07, "loss": 0.4933, "step": 14626 }, { "epoch": 1.831273186518273, "grad_norm": 2.046875, "learning_rate": 2.8988394734432935e-07, "loss": 0.4857, "step": 14627 }, { "epoch": 1.831399479043334, "grad_norm": 1.7734375, "learning_rate": 2.8940692793124615e-07, "loss": 0.3903, "step": 14628 }, { "epoch": 1.8315257715683952, "grad_norm": 2.0, "learning_rate": 2.889302955590123e-07, "loss": 0.5272, "step": 14629 }, { "epoch": 1.8316520640934564, "grad_norm": 1.984375, "learning_rate": 2.884540502466271e-07, "loss": 0.5885, "step": 14630 }, { "epoch": 1.8317783566185177, "grad_norm": 2.0625, "learning_rate": 2.8797819201307085e-07, "loss": 0.6378, "step": 14631 }, { "epoch": 1.8319046491435789, "grad_norm": 1.96875, "learning_rate": 2.875027208773118e-07, "loss": 0.4961, "step": 14632 }, { "epoch": 1.83203094166864, "grad_norm": 1.84375, "learning_rate": 2.870276368583003e-07, "loss": 0.4798, "step": 14633 }, { "epoch": 1.8321572341937011, "grad_norm": 1.984375, "learning_rate": 2.8655293997497225e-07, "loss": 0.4859, "step": 14634 }, { "epoch": 1.8322835267187623, "grad_norm": 1.953125, "learning_rate": 2.86078630246247e-07, "loss": 0.5335, "step": 14635 }, { "epoch": 1.8324098192438236, "grad_norm": 1.96875, "learning_rate": 2.8560470769102953e-07, "loss": 0.5394, "step": 14636 }, { "epoch": 1.8325361117688845, "grad_norm": 1.8828125, "learning_rate": 2.851311723282102e-07, "loss": 0.4772, "step": 14637 }, { "epoch": 1.832662404293946, "grad_norm": 2.125, "learning_rate": 2.846580241766628e-07, "loss": 0.5069, "step": 14638 }, { "epoch": 1.832788696819007, "grad_norm": 1.90625, "learning_rate": 2.8418526325524663e-07, "loss": 0.5149, "step": 14639 }, { "epoch": 1.8329149893440682, "grad_norm": 2.046875, "learning_rate": 2.837128895828034e-07, "loss": 0.5383, "step": 14640 }, { "epoch": 1.8330412818691295, "grad_norm": 1.9765625, "learning_rate": 2.8324090317816023e-07, "loss": 0.4758, "step": 14641 }, { "epoch": 1.8331675743941904, "grad_norm": 2.0625, "learning_rate": 2.8276930406013203e-07, "loss": 0.4561, "step": 14642 }, { "epoch": 1.8332938669192518, "grad_norm": 2.046875, "learning_rate": 2.822980922475127e-07, "loss": 0.49, "step": 14643 }, { "epoch": 1.833420159444313, "grad_norm": 1.875, "learning_rate": 2.81827267759085e-07, "loss": 0.4474, "step": 14644 }, { "epoch": 1.833546451969374, "grad_norm": 2.34375, "learning_rate": 2.81356830613615e-07, "loss": 0.5712, "step": 14645 }, { "epoch": 1.8336727444944352, "grad_norm": 1.984375, "learning_rate": 2.808867808298532e-07, "loss": 0.5677, "step": 14646 }, { "epoch": 1.8337990370194963, "grad_norm": 1.9921875, "learning_rate": 2.8041711842653365e-07, "loss": 0.4291, "step": 14647 }, { "epoch": 1.8339253295445577, "grad_norm": 1.8671875, "learning_rate": 2.7994784342237793e-07, "loss": 0.4391, "step": 14648 }, { "epoch": 1.8340516220696188, "grad_norm": 2.109375, "learning_rate": 2.794789558360877e-07, "loss": 0.4945, "step": 14649 }, { "epoch": 1.83417791459468, "grad_norm": 2.0625, "learning_rate": 2.790104556863538e-07, "loss": 0.4753, "step": 14650 }, { "epoch": 1.834304207119741, "grad_norm": 2.09375, "learning_rate": 2.7854234299184766e-07, "loss": 0.5206, "step": 14651 }, { "epoch": 1.8344304996448022, "grad_norm": 2.0, "learning_rate": 2.78074617771229e-07, "loss": 0.5037, "step": 14652 }, { "epoch": 1.8345567921698636, "grad_norm": 2.078125, "learning_rate": 2.7760728004313954e-07, "loss": 0.5468, "step": 14653 }, { "epoch": 1.8346830846949245, "grad_norm": 2.1875, "learning_rate": 2.7714032982620434e-07, "loss": 0.575, "step": 14654 }, { "epoch": 1.8348093772199858, "grad_norm": 1.8984375, "learning_rate": 2.766737671390374e-07, "loss": 0.4942, "step": 14655 }, { "epoch": 1.834935669745047, "grad_norm": 2.15625, "learning_rate": 2.7620759200023496e-07, "loss": 0.4717, "step": 14656 }, { "epoch": 1.835061962270108, "grad_norm": 2.046875, "learning_rate": 2.757418044283755e-07, "loss": 0.4703, "step": 14657 }, { "epoch": 1.8351882547951694, "grad_norm": 2.140625, "learning_rate": 2.752764044420264e-07, "loss": 0.5447, "step": 14658 }, { "epoch": 1.8353145473202304, "grad_norm": 1.96875, "learning_rate": 2.748113920597362e-07, "loss": 0.4854, "step": 14659 }, { "epoch": 1.8354408398452917, "grad_norm": 2.203125, "learning_rate": 2.7434676730003886e-07, "loss": 0.4995, "step": 14660 }, { "epoch": 1.8355671323703528, "grad_norm": 2.0, "learning_rate": 2.738825301814541e-07, "loss": 0.4483, "step": 14661 }, { "epoch": 1.835693424895414, "grad_norm": 1.8203125, "learning_rate": 2.734186807224848e-07, "loss": 0.4518, "step": 14662 }, { "epoch": 1.8358197174204753, "grad_norm": 1.96875, "learning_rate": 2.7295521894161846e-07, "loss": 0.5008, "step": 14663 }, { "epoch": 1.8359460099455363, "grad_norm": 2.078125, "learning_rate": 2.724921448573281e-07, "loss": 0.458, "step": 14664 }, { "epoch": 1.8360723024705976, "grad_norm": 2.046875, "learning_rate": 2.7202945848807116e-07, "loss": 0.5285, "step": 14665 }, { "epoch": 1.8361985949956587, "grad_norm": 1.890625, "learning_rate": 2.715671598522884e-07, "loss": 0.4647, "step": 14666 }, { "epoch": 1.8363248875207199, "grad_norm": 1.9765625, "learning_rate": 2.711052489684074e-07, "loss": 0.5714, "step": 14667 }, { "epoch": 1.836451180045781, "grad_norm": 1.9765625, "learning_rate": 2.7064372585483667e-07, "loss": 0.4699, "step": 14668 }, { "epoch": 1.8365774725708421, "grad_norm": 2.03125, "learning_rate": 2.7018259052997155e-07, "loss": 0.4855, "step": 14669 }, { "epoch": 1.8367037650959035, "grad_norm": 2.03125, "learning_rate": 2.6972184301219396e-07, "loss": 0.5554, "step": 14670 }, { "epoch": 1.8368300576209644, "grad_norm": 1.8671875, "learning_rate": 2.692614833198659e-07, "loss": 0.4934, "step": 14671 }, { "epoch": 1.8369563501460258, "grad_norm": 2.109375, "learning_rate": 2.6880151147133826e-07, "loss": 0.4737, "step": 14672 }, { "epoch": 1.837082642671087, "grad_norm": 1.96875, "learning_rate": 2.683419274849419e-07, "loss": 0.4347, "step": 14673 }, { "epoch": 1.837208935196148, "grad_norm": 2.078125, "learning_rate": 2.678827313789967e-07, "loss": 0.4655, "step": 14674 }, { "epoch": 1.8373352277212094, "grad_norm": 2.0, "learning_rate": 2.674239231718045e-07, "loss": 0.4432, "step": 14675 }, { "epoch": 1.8374615202462703, "grad_norm": 2.03125, "learning_rate": 2.6696550288165315e-07, "loss": 0.4575, "step": 14676 }, { "epoch": 1.8375878127713317, "grad_norm": 2.09375, "learning_rate": 2.6650747052681225e-07, "loss": 0.5474, "step": 14677 }, { "epoch": 1.8377141052963928, "grad_norm": 1.8671875, "learning_rate": 2.6604982612553953e-07, "loss": 0.4366, "step": 14678 }, { "epoch": 1.837840397821454, "grad_norm": 2.09375, "learning_rate": 2.6559256969607483e-07, "loss": 0.6286, "step": 14679 }, { "epoch": 1.8379666903465153, "grad_norm": 1.953125, "learning_rate": 2.6513570125664466e-07, "loss": 0.4667, "step": 14680 }, { "epoch": 1.8380929828715762, "grad_norm": 1.9140625, "learning_rate": 2.6467922082545673e-07, "loss": 0.5284, "step": 14681 }, { "epoch": 1.8382192753966375, "grad_norm": 2.078125, "learning_rate": 2.642231284207053e-07, "loss": 0.4857, "step": 14682 }, { "epoch": 1.8383455679216987, "grad_norm": 2.046875, "learning_rate": 2.6376742406057144e-07, "loss": 0.5827, "step": 14683 }, { "epoch": 1.8384718604467598, "grad_norm": 1.9453125, "learning_rate": 2.6331210776321614e-07, "loss": 0.4301, "step": 14684 }, { "epoch": 1.838598152971821, "grad_norm": 2.015625, "learning_rate": 2.6285717954678713e-07, "loss": 0.4618, "step": 14685 }, { "epoch": 1.838724445496882, "grad_norm": 2.015625, "learning_rate": 2.6240263942941877e-07, "loss": 0.4759, "step": 14686 }, { "epoch": 1.8388507380219434, "grad_norm": 1.7578125, "learning_rate": 2.6194848742922553e-07, "loss": 0.4217, "step": 14687 }, { "epoch": 1.8389770305470043, "grad_norm": 2.078125, "learning_rate": 2.614947235643106e-07, "loss": 0.5245, "step": 14688 }, { "epoch": 1.8391033230720657, "grad_norm": 1.9453125, "learning_rate": 2.610413478527596e-07, "loss": 0.4598, "step": 14689 }, { "epoch": 1.8392296155971268, "grad_norm": 1.90625, "learning_rate": 2.6058836031264247e-07, "loss": 0.4965, "step": 14690 }, { "epoch": 1.839355908122188, "grad_norm": 2.203125, "learning_rate": 2.6013576096201367e-07, "loss": 0.4591, "step": 14691 }, { "epoch": 1.8394822006472493, "grad_norm": 2.125, "learning_rate": 2.5968354981891433e-07, "loss": 0.4445, "step": 14692 }, { "epoch": 1.8396084931723102, "grad_norm": 1.9375, "learning_rate": 2.5923172690136777e-07, "loss": 0.4704, "step": 14693 }, { "epoch": 1.8397347856973716, "grad_norm": 1.953125, "learning_rate": 2.5878029222738297e-07, "loss": 0.4878, "step": 14694 }, { "epoch": 1.8398610782224327, "grad_norm": 1.9375, "learning_rate": 2.5832924581495223e-07, "loss": 0.419, "step": 14695 }, { "epoch": 1.8399873707474939, "grad_norm": 1.984375, "learning_rate": 2.578785876820522e-07, "loss": 0.4536, "step": 14696 }, { "epoch": 1.8401136632725552, "grad_norm": 1.984375, "learning_rate": 2.574283178466475e-07, "loss": 0.4012, "step": 14697 }, { "epoch": 1.8402399557976161, "grad_norm": 2.171875, "learning_rate": 2.569784363266836e-07, "loss": 0.5204, "step": 14698 }, { "epoch": 1.8403662483226775, "grad_norm": 2.046875, "learning_rate": 2.565289431400919e-07, "loss": 0.5706, "step": 14699 }, { "epoch": 1.8404925408477386, "grad_norm": 1.875, "learning_rate": 2.560798383047869e-07, "loss": 0.426, "step": 14700 }, { "epoch": 1.8406188333727997, "grad_norm": 1.8203125, "learning_rate": 2.556311218386709e-07, "loss": 0.4257, "step": 14701 }, { "epoch": 1.8407451258978609, "grad_norm": 1.796875, "learning_rate": 2.551827937596274e-07, "loss": 0.416, "step": 14702 }, { "epoch": 1.840871418422922, "grad_norm": 1.90625, "learning_rate": 2.5473485408552543e-07, "loss": 0.4614, "step": 14703 }, { "epoch": 1.8409977109479834, "grad_norm": 2.0625, "learning_rate": 2.5428730283421966e-07, "loss": 0.5255, "step": 14704 }, { "epoch": 1.8411240034730443, "grad_norm": 1.953125, "learning_rate": 2.5384014002354686e-07, "loss": 0.5217, "step": 14705 }, { "epoch": 1.8412502959981056, "grad_norm": 1.9140625, "learning_rate": 2.533933656713328e-07, "loss": 0.4701, "step": 14706 }, { "epoch": 1.8413765885231668, "grad_norm": 2.046875, "learning_rate": 2.52946979795381e-07, "loss": 0.4731, "step": 14707 }, { "epoch": 1.841502881048228, "grad_norm": 2.0, "learning_rate": 2.525009824134872e-07, "loss": 0.5161, "step": 14708 }, { "epoch": 1.8416291735732893, "grad_norm": 1.984375, "learning_rate": 2.5205537354342393e-07, "loss": 0.485, "step": 14709 }, { "epoch": 1.8417554660983502, "grad_norm": 1.9609375, "learning_rate": 2.516101532029547e-07, "loss": 0.5409, "step": 14710 }, { "epoch": 1.8418817586234115, "grad_norm": 2.09375, "learning_rate": 2.511653214098253e-07, "loss": 0.4445, "step": 14711 }, { "epoch": 1.8420080511484727, "grad_norm": 2.109375, "learning_rate": 2.507208781817638e-07, "loss": 0.5362, "step": 14712 }, { "epoch": 1.8421343436735338, "grad_norm": 2.046875, "learning_rate": 2.5027682353648606e-07, "loss": 0.4779, "step": 14713 }, { "epoch": 1.8422606361985951, "grad_norm": 1.9609375, "learning_rate": 2.498331574916901e-07, "loss": 0.4738, "step": 14714 }, { "epoch": 1.842386928723656, "grad_norm": 2.0, "learning_rate": 2.4938988006506073e-07, "loss": 0.5069, "step": 14715 }, { "epoch": 1.8425132212487174, "grad_norm": 1.90625, "learning_rate": 2.489469912742637e-07, "loss": 0.4498, "step": 14716 }, { "epoch": 1.8426395137737785, "grad_norm": 1.984375, "learning_rate": 2.4850449113695275e-07, "loss": 0.5554, "step": 14717 }, { "epoch": 1.8427658062988397, "grad_norm": 2.015625, "learning_rate": 2.48062379670766e-07, "loss": 0.5047, "step": 14718 }, { "epoch": 1.8428920988239008, "grad_norm": 2.03125, "learning_rate": 2.476206568933226e-07, "loss": 0.4932, "step": 14719 }, { "epoch": 1.843018391348962, "grad_norm": 2.125, "learning_rate": 2.4717932282222965e-07, "loss": 0.493, "step": 14720 }, { "epoch": 1.8431446838740233, "grad_norm": 1.8046875, "learning_rate": 2.467383774750787e-07, "loss": 0.4777, "step": 14721 }, { "epoch": 1.8432709763990842, "grad_norm": 2.078125, "learning_rate": 2.4629782086944333e-07, "loss": 0.4596, "step": 14722 }, { "epoch": 1.8433972689241456, "grad_norm": 2.046875, "learning_rate": 2.45857653022884e-07, "loss": 0.4908, "step": 14723 }, { "epoch": 1.8435235614492067, "grad_norm": 1.8828125, "learning_rate": 2.4541787395294335e-07, "loss": 0.4983, "step": 14724 }, { "epoch": 1.8436498539742678, "grad_norm": 1.953125, "learning_rate": 2.449784836771518e-07, "loss": 0.5149, "step": 14725 }, { "epoch": 1.8437761464993292, "grad_norm": 1.9375, "learning_rate": 2.445394822130209e-07, "loss": 0.5385, "step": 14726 }, { "epoch": 1.84390243902439, "grad_norm": 2.0, "learning_rate": 2.441008695780478e-07, "loss": 0.5558, "step": 14727 }, { "epoch": 1.8440287315494515, "grad_norm": 1.9375, "learning_rate": 2.436626457897162e-07, "loss": 0.5038, "step": 14728 }, { "epoch": 1.8441550240745126, "grad_norm": 2.046875, "learning_rate": 2.432248108654922e-07, "loss": 0.4494, "step": 14729 }, { "epoch": 1.8442813165995737, "grad_norm": 2.140625, "learning_rate": 2.427873648228274e-07, "loss": 0.5111, "step": 14730 }, { "epoch": 1.844407609124635, "grad_norm": 1.859375, "learning_rate": 2.4235030767915557e-07, "loss": 0.4689, "step": 14731 }, { "epoch": 1.844533901649696, "grad_norm": 2.09375, "learning_rate": 2.4191363945189726e-07, "loss": 0.5216, "step": 14732 }, { "epoch": 1.8446601941747574, "grad_norm": 1.9140625, "learning_rate": 2.4147736015845855e-07, "loss": 0.4558, "step": 14733 }, { "epoch": 1.8447864866998185, "grad_norm": 2.296875, "learning_rate": 2.4104146981622665e-07, "loss": 0.52, "step": 14734 }, { "epoch": 1.8449127792248796, "grad_norm": 1.9453125, "learning_rate": 2.4060596844257654e-07, "loss": 0.5045, "step": 14735 }, { "epoch": 1.8450390717499408, "grad_norm": 2.15625, "learning_rate": 2.4017085605486433e-07, "loss": 0.4498, "step": 14736 }, { "epoch": 1.8451653642750019, "grad_norm": 1.96875, "learning_rate": 2.3973613267043505e-07, "loss": 0.49, "step": 14737 }, { "epoch": 1.8452916568000632, "grad_norm": 1.9921875, "learning_rate": 2.393017983066137e-07, "loss": 0.4761, "step": 14738 }, { "epoch": 1.8454179493251244, "grad_norm": 1.9765625, "learning_rate": 2.3886785298071313e-07, "loss": 0.4451, "step": 14739 }, { "epoch": 1.8455442418501855, "grad_norm": 1.9921875, "learning_rate": 2.384342967100284e-07, "loss": 0.4896, "step": 14740 }, { "epoch": 1.8456705343752466, "grad_norm": 2.09375, "learning_rate": 2.3800112951184007e-07, "loss": 0.5875, "step": 14741 }, { "epoch": 1.8457968269003078, "grad_norm": 2.21875, "learning_rate": 2.375683514034144e-07, "loss": 0.5066, "step": 14742 }, { "epoch": 1.8459231194253691, "grad_norm": 2.015625, "learning_rate": 2.3713596240199975e-07, "loss": 0.471, "step": 14743 }, { "epoch": 1.84604941195043, "grad_norm": 2.09375, "learning_rate": 2.3670396252483018e-07, "loss": 0.5169, "step": 14744 }, { "epoch": 1.8461757044754914, "grad_norm": 1.9765625, "learning_rate": 2.36272351789123e-07, "loss": 0.4302, "step": 14745 }, { "epoch": 1.8463019970005525, "grad_norm": 2.078125, "learning_rate": 2.3584113021208333e-07, "loss": 0.4841, "step": 14746 }, { "epoch": 1.8464282895256137, "grad_norm": 1.9765625, "learning_rate": 2.3541029781089742e-07, "loss": 0.4701, "step": 14747 }, { "epoch": 1.846554582050675, "grad_norm": 2.09375, "learning_rate": 2.349798546027382e-07, "loss": 0.5213, "step": 14748 }, { "epoch": 1.846680874575736, "grad_norm": 1.84375, "learning_rate": 2.3454980060476194e-07, "loss": 0.4689, "step": 14749 }, { "epoch": 1.8468071671007973, "grad_norm": 2.046875, "learning_rate": 2.341201358341083e-07, "loss": 0.5372, "step": 14750 }, { "epoch": 1.8469334596258584, "grad_norm": 2.03125, "learning_rate": 2.3369086030790356e-07, "loss": 0.5106, "step": 14751 }, { "epoch": 1.8470597521509196, "grad_norm": 1.9140625, "learning_rate": 2.3326197404325734e-07, "loss": 0.4361, "step": 14752 }, { "epoch": 1.8471860446759807, "grad_norm": 2.296875, "learning_rate": 2.328334770572649e-07, "loss": 0.4155, "step": 14753 }, { "epoch": 1.8473123372010418, "grad_norm": 2.15625, "learning_rate": 2.3240536936700364e-07, "loss": 0.5269, "step": 14754 }, { "epoch": 1.8474386297261032, "grad_norm": 2.109375, "learning_rate": 2.3197765098953773e-07, "loss": 0.5171, "step": 14755 }, { "epoch": 1.8475649222511643, "grad_norm": 2.015625, "learning_rate": 2.3155032194191573e-07, "loss": 0.535, "step": 14756 }, { "epoch": 1.8476912147762254, "grad_norm": 1.8984375, "learning_rate": 2.3112338224116736e-07, "loss": 0.5395, "step": 14757 }, { "epoch": 1.8478175073012866, "grad_norm": 2.078125, "learning_rate": 2.3069683190431235e-07, "loss": 0.4939, "step": 14758 }, { "epoch": 1.8479437998263477, "grad_norm": 2.28125, "learning_rate": 2.3027067094835154e-07, "loss": 0.5437, "step": 14759 }, { "epoch": 1.848070092351409, "grad_norm": 1.8359375, "learning_rate": 2.29844899390268e-07, "loss": 0.4741, "step": 14760 }, { "epoch": 1.84819638487647, "grad_norm": 2.015625, "learning_rate": 2.2941951724703594e-07, "loss": 0.534, "step": 14761 }, { "epoch": 1.8483226774015313, "grad_norm": 1.9453125, "learning_rate": 2.289945245356062e-07, "loss": 0.4656, "step": 14762 }, { "epoch": 1.8484489699265925, "grad_norm": 1.96875, "learning_rate": 2.2856992127291978e-07, "loss": 0.4994, "step": 14763 }, { "epoch": 1.8485752624516536, "grad_norm": 2.03125, "learning_rate": 2.281457074759008e-07, "loss": 0.5696, "step": 14764 }, { "epoch": 1.848701554976715, "grad_norm": 1.9375, "learning_rate": 2.2772188316145804e-07, "loss": 0.4155, "step": 14765 }, { "epoch": 1.8488278475017759, "grad_norm": 2.09375, "learning_rate": 2.272984483464813e-07, "loss": 0.4725, "step": 14766 }, { "epoch": 1.8489541400268372, "grad_norm": 1.953125, "learning_rate": 2.2687540304785038e-07, "loss": 0.3886, "step": 14767 }, { "epoch": 1.8490804325518984, "grad_norm": 2.046875, "learning_rate": 2.2645274728242628e-07, "loss": 0.413, "step": 14768 }, { "epoch": 1.8492067250769595, "grad_norm": 1.84375, "learning_rate": 2.2603048106705326e-07, "loss": 0.5118, "step": 14769 }, { "epoch": 1.8493330176020208, "grad_norm": 2.046875, "learning_rate": 2.2560860441856457e-07, "loss": 0.5407, "step": 14770 }, { "epoch": 1.8494593101270818, "grad_norm": 2.0, "learning_rate": 2.251871173537734e-07, "loss": 0.493, "step": 14771 }, { "epoch": 1.8495856026521431, "grad_norm": 1.9296875, "learning_rate": 2.2476601988947965e-07, "loss": 0.4987, "step": 14772 }, { "epoch": 1.8497118951772042, "grad_norm": 1.8515625, "learning_rate": 2.2434531204246656e-07, "loss": 0.4705, "step": 14773 }, { "epoch": 1.8498381877022654, "grad_norm": 1.8359375, "learning_rate": 2.2392499382950407e-07, "loss": 0.4701, "step": 14774 }, { "epoch": 1.8499644802273265, "grad_norm": 2.109375, "learning_rate": 2.2350506526734317e-07, "loss": 0.5502, "step": 14775 }, { "epoch": 1.8500907727523876, "grad_norm": 1.90625, "learning_rate": 2.2308552637272273e-07, "loss": 0.5083, "step": 14776 }, { "epoch": 1.850217065277449, "grad_norm": 2.09375, "learning_rate": 2.2266637716236384e-07, "loss": 0.6099, "step": 14777 }, { "epoch": 1.85034335780251, "grad_norm": 1.9921875, "learning_rate": 2.222476176529742e-07, "loss": 0.5073, "step": 14778 }, { "epoch": 1.8504696503275713, "grad_norm": 1.9140625, "learning_rate": 2.218292478612416e-07, "loss": 0.5269, "step": 14779 }, { "epoch": 1.8505959428526324, "grad_norm": 1.9375, "learning_rate": 2.214112678038438e-07, "loss": 0.4465, "step": 14780 }, { "epoch": 1.8507222353776935, "grad_norm": 2.1875, "learning_rate": 2.2099367749743862e-07, "loss": 0.4639, "step": 14781 }, { "epoch": 1.850848527902755, "grad_norm": 1.9921875, "learning_rate": 2.205764769586727e-07, "loss": 0.4707, "step": 14782 }, { "epoch": 1.8509748204278158, "grad_norm": 2.09375, "learning_rate": 2.2015966620417273e-07, "loss": 0.5071, "step": 14783 }, { "epoch": 1.8511011129528772, "grad_norm": 2.03125, "learning_rate": 2.1974324525055213e-07, "loss": 0.4931, "step": 14784 }, { "epoch": 1.8512274054779383, "grad_norm": 2.03125, "learning_rate": 2.193272141144087e-07, "loss": 0.4931, "step": 14785 }, { "epoch": 1.8513536980029994, "grad_norm": 2.09375, "learning_rate": 2.1891157281232366e-07, "loss": 0.4982, "step": 14786 }, { "epoch": 1.8514799905280608, "grad_norm": 1.9921875, "learning_rate": 2.1849632136086486e-07, "loss": 0.5333, "step": 14787 }, { "epoch": 1.8516062830531217, "grad_norm": 2.046875, "learning_rate": 2.1808145977658236e-07, "loss": 0.4827, "step": 14788 }, { "epoch": 1.851732575578183, "grad_norm": 1.9609375, "learning_rate": 2.1766698807601183e-07, "loss": 0.4393, "step": 14789 }, { "epoch": 1.8518588681032442, "grad_norm": 1.9921875, "learning_rate": 2.172529062756723e-07, "loss": 0.4978, "step": 14790 }, { "epoch": 1.8519851606283053, "grad_norm": 1.953125, "learning_rate": 2.1683921439206946e-07, "loss": 0.4716, "step": 14791 }, { "epoch": 1.8521114531533664, "grad_norm": 2.28125, "learning_rate": 2.164259124416901e-07, "loss": 0.5068, "step": 14792 }, { "epoch": 1.8522377456784276, "grad_norm": 2.03125, "learning_rate": 2.1601300044100992e-07, "loss": 0.489, "step": 14793 }, { "epoch": 1.852364038203489, "grad_norm": 1.8671875, "learning_rate": 2.156004784064847e-07, "loss": 0.4385, "step": 14794 }, { "epoch": 1.8524903307285498, "grad_norm": 2.09375, "learning_rate": 2.1518834635455787e-07, "loss": 0.4856, "step": 14795 }, { "epoch": 1.8526166232536112, "grad_norm": 1.984375, "learning_rate": 2.1477660430165415e-07, "loss": 0.5288, "step": 14796 }, { "epoch": 1.8527429157786723, "grad_norm": 1.9375, "learning_rate": 2.143652522641859e-07, "loss": 0.4437, "step": 14797 }, { "epoch": 1.8528692083037335, "grad_norm": 2.046875, "learning_rate": 2.1395429025854898e-07, "loss": 0.5263, "step": 14798 }, { "epoch": 1.8529955008287948, "grad_norm": 1.9375, "learning_rate": 2.1354371830112353e-07, "loss": 0.4902, "step": 14799 }, { "epoch": 1.8531217933538557, "grad_norm": 1.9375, "learning_rate": 2.1313353640827207e-07, "loss": 0.5124, "step": 14800 }, { "epoch": 1.853248085878917, "grad_norm": 2.1875, "learning_rate": 2.1272374459634591e-07, "loss": 0.6232, "step": 14801 }, { "epoch": 1.8533743784039782, "grad_norm": 2.015625, "learning_rate": 2.1231434288167652e-07, "loss": 0.5347, "step": 14802 }, { "epoch": 1.8535006709290394, "grad_norm": 2.09375, "learning_rate": 2.119053312805819e-07, "loss": 0.5074, "step": 14803 }, { "epoch": 1.8536269634541007, "grad_norm": 1.9453125, "learning_rate": 2.1149670980936566e-07, "loss": 0.519, "step": 14804 }, { "epoch": 1.8537532559791616, "grad_norm": 1.9140625, "learning_rate": 2.1108847848431259e-07, "loss": 0.4845, "step": 14805 }, { "epoch": 1.853879548504223, "grad_norm": 1.9765625, "learning_rate": 2.1068063732169518e-07, "loss": 0.4788, "step": 14806 }, { "epoch": 1.8540058410292841, "grad_norm": 2.015625, "learning_rate": 2.1027318633776938e-07, "loss": 0.5352, "step": 14807 }, { "epoch": 1.8541321335543453, "grad_norm": 2.0, "learning_rate": 2.0986612554877328e-07, "loss": 0.4621, "step": 14808 }, { "epoch": 1.8542584260794064, "grad_norm": 2.15625, "learning_rate": 2.0945945497093277e-07, "loss": 0.5215, "step": 14809 }, { "epoch": 1.8543847186044675, "grad_norm": 2.109375, "learning_rate": 2.0905317462045604e-07, "loss": 0.5104, "step": 14810 }, { "epoch": 1.8545110111295289, "grad_norm": 2.015625, "learning_rate": 2.0864728451353678e-07, "loss": 0.4748, "step": 14811 }, { "epoch": 1.8546373036545898, "grad_norm": 2.046875, "learning_rate": 2.0824178466635315e-07, "loss": 0.5222, "step": 14812 }, { "epoch": 1.8547635961796511, "grad_norm": 1.984375, "learning_rate": 2.0783667509506555e-07, "loss": 0.486, "step": 14813 }, { "epoch": 1.8548898887047123, "grad_norm": 1.890625, "learning_rate": 2.0743195581582333e-07, "loss": 0.4612, "step": 14814 }, { "epoch": 1.8550161812297734, "grad_norm": 2.109375, "learning_rate": 2.0702762684475685e-07, "loss": 0.4806, "step": 14815 }, { "epoch": 1.8551424737548348, "grad_norm": 1.96875, "learning_rate": 2.0662368819798108e-07, "loss": 0.4817, "step": 14816 }, { "epoch": 1.8552687662798957, "grad_norm": 1.9453125, "learning_rate": 2.062201398915964e-07, "loss": 0.4035, "step": 14817 }, { "epoch": 1.855395058804957, "grad_norm": 1.984375, "learning_rate": 2.0581698194168664e-07, "loss": 0.4369, "step": 14818 }, { "epoch": 1.8555213513300182, "grad_norm": 2.09375, "learning_rate": 2.054142143643223e-07, "loss": 0.5289, "step": 14819 }, { "epoch": 1.8556476438550793, "grad_norm": 1.9375, "learning_rate": 2.0501183717555494e-07, "loss": 0.5197, "step": 14820 }, { "epoch": 1.8557739363801407, "grad_norm": 1.90625, "learning_rate": 2.046098503914229e-07, "loss": 0.4477, "step": 14821 }, { "epoch": 1.8559002289052016, "grad_norm": 2.125, "learning_rate": 2.0420825402794885e-07, "loss": 0.5167, "step": 14822 }, { "epoch": 1.856026521430263, "grad_norm": 2.203125, "learning_rate": 2.0380704810113895e-07, "loss": 0.466, "step": 14823 }, { "epoch": 1.856152813955324, "grad_norm": 1.890625, "learning_rate": 2.0340623262698478e-07, "loss": 0.4823, "step": 14824 }, { "epoch": 1.8562791064803852, "grad_norm": 2.1875, "learning_rate": 2.0300580762146138e-07, "loss": 0.558, "step": 14825 }, { "epoch": 1.8564053990054463, "grad_norm": 1.953125, "learning_rate": 2.026057731005282e-07, "loss": 0.516, "step": 14826 }, { "epoch": 1.8565316915305075, "grad_norm": 1.984375, "learning_rate": 2.0220612908013138e-07, "loss": 0.5031, "step": 14827 }, { "epoch": 1.8566579840555688, "grad_norm": 1.734375, "learning_rate": 2.0180687557619816e-07, "loss": 0.4476, "step": 14828 }, { "epoch": 1.8567842765806297, "grad_norm": 1.890625, "learning_rate": 2.014080126046425e-07, "loss": 0.4368, "step": 14829 }, { "epoch": 1.856910569105691, "grad_norm": 2.0625, "learning_rate": 2.0100954018136277e-07, "loss": 0.4311, "step": 14830 }, { "epoch": 1.8570368616307522, "grad_norm": 2.0, "learning_rate": 2.006114583222396e-07, "loss": 0.4805, "step": 14831 }, { "epoch": 1.8571631541558133, "grad_norm": 2.015625, "learning_rate": 2.0021376704314034e-07, "loss": 0.5015, "step": 14832 }, { "epoch": 1.8572894466808747, "grad_norm": 2.171875, "learning_rate": 1.998164663599167e-07, "loss": 0.5076, "step": 14833 }, { "epoch": 1.8574157392059356, "grad_norm": 1.9296875, "learning_rate": 1.994195562884038e-07, "loss": 0.5059, "step": 14834 }, { "epoch": 1.857542031730997, "grad_norm": 1.984375, "learning_rate": 1.9902303684442013e-07, "loss": 0.4925, "step": 14835 }, { "epoch": 1.857668324256058, "grad_norm": 2.0625, "learning_rate": 1.9862690804377194e-07, "loss": 0.4345, "step": 14836 }, { "epoch": 1.8577946167811192, "grad_norm": 1.9375, "learning_rate": 1.9823116990224766e-07, "loss": 0.4769, "step": 14837 }, { "epoch": 1.8579209093061806, "grad_norm": 2.15625, "learning_rate": 1.9783582243561916e-07, "loss": 0.5764, "step": 14838 }, { "epoch": 1.8580472018312415, "grad_norm": 1.9765625, "learning_rate": 1.974408656596438e-07, "loss": 0.4503, "step": 14839 }, { "epoch": 1.8581734943563029, "grad_norm": 1.953125, "learning_rate": 1.970462995900657e-07, "loss": 0.4749, "step": 14840 }, { "epoch": 1.858299786881364, "grad_norm": 2.09375, "learning_rate": 1.9665212424261004e-07, "loss": 0.5027, "step": 14841 }, { "epoch": 1.8584260794064251, "grad_norm": 2.109375, "learning_rate": 1.9625833963298867e-07, "loss": 0.4623, "step": 14842 }, { "epoch": 1.8585523719314863, "grad_norm": 2.09375, "learning_rate": 1.9586494577689462e-07, "loss": 0.5428, "step": 14843 }, { "epoch": 1.8586786644565474, "grad_norm": 2.1875, "learning_rate": 1.954719426900098e-07, "loss": 0.5014, "step": 14844 }, { "epoch": 1.8588049569816087, "grad_norm": 2.0625, "learning_rate": 1.950793303879972e-07, "loss": 0.4852, "step": 14845 }, { "epoch": 1.8589312495066697, "grad_norm": 1.9375, "learning_rate": 1.9468710888650545e-07, "loss": 0.4917, "step": 14846 }, { "epoch": 1.859057542031731, "grad_norm": 1.9296875, "learning_rate": 1.9429527820116978e-07, "loss": 0.4716, "step": 14847 }, { "epoch": 1.8591838345567921, "grad_norm": 2.046875, "learning_rate": 1.939038383476044e-07, "loss": 0.4993, "step": 14848 }, { "epoch": 1.8593101270818533, "grad_norm": 2.0, "learning_rate": 1.9351278934141237e-07, "loss": 0.5803, "step": 14849 }, { "epoch": 1.8594364196069146, "grad_norm": 2.140625, "learning_rate": 1.9312213119818125e-07, "loss": 0.5866, "step": 14850 }, { "epoch": 1.8595627121319755, "grad_norm": 1.9296875, "learning_rate": 1.9273186393347964e-07, "loss": 0.4914, "step": 14851 }, { "epoch": 1.859689004657037, "grad_norm": 2.140625, "learning_rate": 1.9234198756286514e-07, "loss": 0.4833, "step": 14852 }, { "epoch": 1.859815297182098, "grad_norm": 1.8671875, "learning_rate": 1.919525021018742e-07, "loss": 0.4498, "step": 14853 }, { "epoch": 1.8599415897071592, "grad_norm": 2.09375, "learning_rate": 1.9156340756603331e-07, "loss": 0.5055, "step": 14854 }, { "epoch": 1.8600678822322205, "grad_norm": 2.1875, "learning_rate": 1.911747039708489e-07, "loss": 0.5378, "step": 14855 }, { "epoch": 1.8601941747572814, "grad_norm": 2.0, "learning_rate": 1.9078639133181532e-07, "loss": 0.4675, "step": 14856 }, { "epoch": 1.8603204672823428, "grad_norm": 2.046875, "learning_rate": 1.9039846966441012e-07, "loss": 0.5183, "step": 14857 }, { "epoch": 1.860446759807404, "grad_norm": 1.8828125, "learning_rate": 1.9001093898409318e-07, "loss": 0.4643, "step": 14858 }, { "epoch": 1.860573052332465, "grad_norm": 2.15625, "learning_rate": 1.8962379930631103e-07, "loss": 0.5364, "step": 14859 }, { "epoch": 1.8606993448575262, "grad_norm": 2.0, "learning_rate": 1.8923705064649468e-07, "loss": 0.5312, "step": 14860 }, { "epoch": 1.8608256373825873, "grad_norm": 2.03125, "learning_rate": 1.8885069302005843e-07, "loss": 0.4957, "step": 14861 }, { "epoch": 1.8609519299076487, "grad_norm": 1.984375, "learning_rate": 1.884647264424022e-07, "loss": 0.4919, "step": 14862 }, { "epoch": 1.8610782224327098, "grad_norm": 1.9609375, "learning_rate": 1.8807915092890928e-07, "loss": 0.5375, "step": 14863 }, { "epoch": 1.861204514957771, "grad_norm": 2.15625, "learning_rate": 1.8769396649494843e-07, "loss": 0.5545, "step": 14864 }, { "epoch": 1.861330807482832, "grad_norm": 2.09375, "learning_rate": 1.8730917315587182e-07, "loss": 0.4846, "step": 14865 }, { "epoch": 1.8614571000078932, "grad_norm": 2.0625, "learning_rate": 1.869247709270161e-07, "loss": 0.5357, "step": 14866 }, { "epoch": 1.8615833925329546, "grad_norm": 2.265625, "learning_rate": 1.8654075982370235e-07, "loss": 0.5576, "step": 14867 }, { "epoch": 1.8617096850580155, "grad_norm": 1.8671875, "learning_rate": 1.8615713986123607e-07, "loss": 0.4656, "step": 14868 }, { "epoch": 1.8618359775830768, "grad_norm": 2.21875, "learning_rate": 1.857739110549095e-07, "loss": 0.5743, "step": 14869 }, { "epoch": 1.861962270108138, "grad_norm": 1.890625, "learning_rate": 1.8539107341999485e-07, "loss": 0.4915, "step": 14870 }, { "epoch": 1.862088562633199, "grad_norm": 2.0, "learning_rate": 1.8500862697175215e-07, "loss": 0.542, "step": 14871 }, { "epoch": 1.8622148551582605, "grad_norm": 2.15625, "learning_rate": 1.8462657172542475e-07, "loss": 0.4759, "step": 14872 }, { "epoch": 1.8623411476833214, "grad_norm": 2.078125, "learning_rate": 1.842449076962416e-07, "loss": 0.5215, "step": 14873 }, { "epoch": 1.8624674402083827, "grad_norm": 2.03125, "learning_rate": 1.8386363489941162e-07, "loss": 0.4879, "step": 14874 }, { "epoch": 1.8625937327334439, "grad_norm": 2.0625, "learning_rate": 1.8348275335013487e-07, "loss": 0.467, "step": 14875 }, { "epoch": 1.862720025258505, "grad_norm": 1.9765625, "learning_rate": 1.8310226306359036e-07, "loss": 0.4499, "step": 14876 }, { "epoch": 1.8628463177835661, "grad_norm": 1.90625, "learning_rate": 1.827221640549437e-07, "loss": 0.4821, "step": 14877 }, { "epoch": 1.8629726103086273, "grad_norm": 1.9140625, "learning_rate": 1.8234245633934611e-07, "loss": 0.4486, "step": 14878 }, { "epoch": 1.8630989028336886, "grad_norm": 1.9765625, "learning_rate": 1.8196313993193103e-07, "loss": 0.4859, "step": 14879 }, { "epoch": 1.8632251953587498, "grad_norm": 1.890625, "learning_rate": 1.815842148478164e-07, "loss": 0.4212, "step": 14880 }, { "epoch": 1.8633514878838109, "grad_norm": 2.046875, "learning_rate": 1.812056811021068e-07, "loss": 0.5066, "step": 14881 }, { "epoch": 1.863477780408872, "grad_norm": 1.90625, "learning_rate": 1.8082753870988789e-07, "loss": 0.4346, "step": 14882 }, { "epoch": 1.8636040729339332, "grad_norm": 1.9453125, "learning_rate": 1.804497876862321e-07, "loss": 0.4509, "step": 14883 }, { "epoch": 1.8637303654589945, "grad_norm": 2.046875, "learning_rate": 1.800724280461963e-07, "loss": 0.5759, "step": 14884 }, { "epoch": 1.8638566579840554, "grad_norm": 1.859375, "learning_rate": 1.796954598048206e-07, "loss": 0.445, "step": 14885 }, { "epoch": 1.8639829505091168, "grad_norm": 1.8203125, "learning_rate": 1.7931888297712973e-07, "loss": 0.498, "step": 14886 }, { "epoch": 1.864109243034178, "grad_norm": 1.921875, "learning_rate": 1.7894269757813498e-07, "loss": 0.4651, "step": 14887 }, { "epoch": 1.864235535559239, "grad_norm": 2.078125, "learning_rate": 1.785669036228277e-07, "loss": 0.6012, "step": 14888 }, { "epoch": 1.8643618280843004, "grad_norm": 1.875, "learning_rate": 1.78191501126187e-07, "loss": 0.441, "step": 14889 }, { "epoch": 1.8644881206093613, "grad_norm": 2.0, "learning_rate": 1.778164901031765e-07, "loss": 0.5226, "step": 14890 }, { "epoch": 1.8646144131344227, "grad_norm": 2.265625, "learning_rate": 1.7744187056874195e-07, "loss": 0.4489, "step": 14891 }, { "epoch": 1.8647407056594838, "grad_norm": 1.9453125, "learning_rate": 1.7706764253781595e-07, "loss": 0.4382, "step": 14892 }, { "epoch": 1.864866998184545, "grad_norm": 1.984375, "learning_rate": 1.7669380602531317e-07, "loss": 0.4901, "step": 14893 }, { "epoch": 1.8649932907096063, "grad_norm": 2.09375, "learning_rate": 1.7632036104613393e-07, "loss": 0.5207, "step": 14894 }, { "epoch": 1.8651195832346672, "grad_norm": 2.078125, "learning_rate": 1.759473076151641e-07, "loss": 0.5457, "step": 14895 }, { "epoch": 1.8652458757597286, "grad_norm": 2.078125, "learning_rate": 1.7557464574727178e-07, "loss": 0.53, "step": 14896 }, { "epoch": 1.8653721682847897, "grad_norm": 2.046875, "learning_rate": 1.7520237545731067e-07, "loss": 0.4795, "step": 14897 }, { "epoch": 1.8654984608098508, "grad_norm": 2.0625, "learning_rate": 1.7483049676011777e-07, "loss": 0.5113, "step": 14898 }, { "epoch": 1.865624753334912, "grad_norm": 2.015625, "learning_rate": 1.7445900967051567e-07, "loss": 0.4693, "step": 14899 }, { "epoch": 1.865751045859973, "grad_norm": 1.8984375, "learning_rate": 1.740879142033125e-07, "loss": 0.4754, "step": 14900 }, { "epoch": 1.8658773383850344, "grad_norm": 2.0625, "learning_rate": 1.7371721037329649e-07, "loss": 0.488, "step": 14901 }, { "epoch": 1.8660036309100954, "grad_norm": 1.9609375, "learning_rate": 1.7334689819524464e-07, "loss": 0.4618, "step": 14902 }, { "epoch": 1.8661299234351567, "grad_norm": 1.9921875, "learning_rate": 1.729769776839174e-07, "loss": 0.5082, "step": 14903 }, { "epoch": 1.8662562159602178, "grad_norm": 1.984375, "learning_rate": 1.7260744885405746e-07, "loss": 0.5133, "step": 14904 }, { "epoch": 1.866382508485279, "grad_norm": 2.03125, "learning_rate": 1.7223831172039297e-07, "loss": 0.5118, "step": 14905 }, { "epoch": 1.8665088010103403, "grad_norm": 2.078125, "learning_rate": 1.7186956629763886e-07, "loss": 0.5707, "step": 14906 }, { "epoch": 1.8666350935354012, "grad_norm": 1.9921875, "learning_rate": 1.7150121260049114e-07, "loss": 0.4119, "step": 14907 }, { "epoch": 1.8667613860604626, "grad_norm": 2.015625, "learning_rate": 1.711332506436314e-07, "loss": 0.5633, "step": 14908 }, { "epoch": 1.8668876785855237, "grad_norm": 2.015625, "learning_rate": 1.707656804417246e-07, "loss": 0.5404, "step": 14909 }, { "epoch": 1.8670139711105849, "grad_norm": 2.109375, "learning_rate": 1.7039850200942455e-07, "loss": 0.4805, "step": 14910 }, { "epoch": 1.8671402636356462, "grad_norm": 2.265625, "learning_rate": 1.7003171536136286e-07, "loss": 0.4561, "step": 14911 }, { "epoch": 1.8672665561607071, "grad_norm": 1.8984375, "learning_rate": 1.6966532051216122e-07, "loss": 0.4457, "step": 14912 }, { "epoch": 1.8673928486857685, "grad_norm": 1.921875, "learning_rate": 1.6929931747642125e-07, "loss": 0.4659, "step": 14913 }, { "epoch": 1.8675191412108296, "grad_norm": 2.109375, "learning_rate": 1.689337062687324e-07, "loss": 0.6048, "step": 14914 }, { "epoch": 1.8676454337358908, "grad_norm": 2.03125, "learning_rate": 1.6856848690366522e-07, "loss": 0.5398, "step": 14915 }, { "epoch": 1.867771726260952, "grad_norm": 1.875, "learning_rate": 1.682036593957792e-07, "loss": 0.4312, "step": 14916 }, { "epoch": 1.867898018786013, "grad_norm": 1.8984375, "learning_rate": 1.678392237596127e-07, "loss": 0.5216, "step": 14917 }, { "epoch": 1.8680243113110744, "grad_norm": 1.765625, "learning_rate": 1.67475180009693e-07, "loss": 0.4706, "step": 14918 }, { "epoch": 1.8681506038361353, "grad_norm": 2.0, "learning_rate": 1.6711152816052955e-07, "loss": 0.5321, "step": 14919 }, { "epoch": 1.8682768963611966, "grad_norm": 1.890625, "learning_rate": 1.667482682266175e-07, "loss": 0.4323, "step": 14920 }, { "epoch": 1.8684031888862578, "grad_norm": 1.890625, "learning_rate": 1.6638540022243298e-07, "loss": 0.4761, "step": 14921 }, { "epoch": 1.868529481411319, "grad_norm": 2.015625, "learning_rate": 1.6602292416244114e-07, "loss": 0.4877, "step": 14922 }, { "epoch": 1.8686557739363803, "grad_norm": 1.9765625, "learning_rate": 1.6566084006108928e-07, "loss": 0.4857, "step": 14923 }, { "epoch": 1.8687820664614412, "grad_norm": 1.9921875, "learning_rate": 1.652991479328092e-07, "loss": 0.4925, "step": 14924 }, { "epoch": 1.8689083589865025, "grad_norm": 2.03125, "learning_rate": 1.6493784779201495e-07, "loss": 0.4988, "step": 14925 }, { "epoch": 1.8690346515115637, "grad_norm": 2.046875, "learning_rate": 1.645769396531105e-07, "loss": 0.5258, "step": 14926 }, { "epoch": 1.8691609440366248, "grad_norm": 2.171875, "learning_rate": 1.6421642353047883e-07, "loss": 0.4723, "step": 14927 }, { "epoch": 1.8692872365616862, "grad_norm": 1.9453125, "learning_rate": 1.6385629943848956e-07, "loss": 0.4649, "step": 14928 }, { "epoch": 1.869413529086747, "grad_norm": 1.921875, "learning_rate": 1.6349656739149676e-07, "loss": 0.4519, "step": 14929 }, { "epoch": 1.8695398216118084, "grad_norm": 1.9453125, "learning_rate": 1.6313722740383674e-07, "loss": 0.4058, "step": 14930 }, { "epoch": 1.8696661141368696, "grad_norm": 2.046875, "learning_rate": 1.6277827948983472e-07, "loss": 0.4899, "step": 14931 }, { "epoch": 1.8697924066619307, "grad_norm": 1.953125, "learning_rate": 1.624197236637959e-07, "loss": 0.4945, "step": 14932 }, { "epoch": 1.8699186991869918, "grad_norm": 1.9609375, "learning_rate": 1.6206155994001104e-07, "loss": 0.5301, "step": 14933 }, { "epoch": 1.870044991712053, "grad_norm": 2.125, "learning_rate": 1.6170378833275658e-07, "loss": 0.5495, "step": 14934 }, { "epoch": 1.8701712842371143, "grad_norm": 2.28125, "learning_rate": 1.6134640885629104e-07, "loss": 0.5876, "step": 14935 }, { "epoch": 1.8702975767621752, "grad_norm": 1.890625, "learning_rate": 1.6098942152486085e-07, "loss": 0.4305, "step": 14936 }, { "epoch": 1.8704238692872366, "grad_norm": 2.21875, "learning_rate": 1.606328263526935e-07, "loss": 0.5305, "step": 14937 }, { "epoch": 1.8705501618122977, "grad_norm": 1.9375, "learning_rate": 1.6027662335400208e-07, "loss": 0.4438, "step": 14938 }, { "epoch": 1.8706764543373589, "grad_norm": 1.8046875, "learning_rate": 1.599208125429841e-07, "loss": 0.4259, "step": 14939 }, { "epoch": 1.8708027468624202, "grad_norm": 2.078125, "learning_rate": 1.5956539393382043e-07, "loss": 0.4902, "step": 14940 }, { "epoch": 1.8709290393874811, "grad_norm": 1.953125, "learning_rate": 1.5921036754067864e-07, "loss": 0.5057, "step": 14941 }, { "epoch": 1.8710553319125425, "grad_norm": 2.078125, "learning_rate": 1.5885573337770855e-07, "loss": 0.4625, "step": 14942 }, { "epoch": 1.8711816244376036, "grad_norm": 2.015625, "learning_rate": 1.5850149145904438e-07, "loss": 0.4935, "step": 14943 }, { "epoch": 1.8713079169626647, "grad_norm": 1.9765625, "learning_rate": 1.5814764179880592e-07, "loss": 0.4549, "step": 14944 }, { "epoch": 1.871434209487726, "grad_norm": 2.046875, "learning_rate": 1.577941844110975e-07, "loss": 0.4757, "step": 14945 }, { "epoch": 1.871560502012787, "grad_norm": 2.078125, "learning_rate": 1.5744111931000672e-07, "loss": 0.5327, "step": 14946 }, { "epoch": 1.8716867945378484, "grad_norm": 2.03125, "learning_rate": 1.5708844650960563e-07, "loss": 0.4768, "step": 14947 }, { "epoch": 1.8718130870629095, "grad_norm": 1.9609375, "learning_rate": 1.567361660239508e-07, "loss": 0.4743, "step": 14948 }, { "epoch": 1.8719393795879706, "grad_norm": 2.171875, "learning_rate": 1.563842778670832e-07, "loss": 0.5752, "step": 14949 }, { "epoch": 1.8720656721130318, "grad_norm": 1.90625, "learning_rate": 1.560327820530283e-07, "loss": 0.4635, "step": 14950 }, { "epoch": 1.872191964638093, "grad_norm": 1.921875, "learning_rate": 1.5568167859579596e-07, "loss": 0.4871, "step": 14951 }, { "epoch": 1.8723182571631543, "grad_norm": 1.90625, "learning_rate": 1.553309675093817e-07, "loss": 0.5026, "step": 14952 }, { "epoch": 1.8724445496882152, "grad_norm": 2.125, "learning_rate": 1.5498064880776098e-07, "loss": 0.5169, "step": 14953 }, { "epoch": 1.8725708422132765, "grad_norm": 2.0625, "learning_rate": 1.546307225048993e-07, "loss": 0.4398, "step": 14954 }, { "epoch": 1.8726971347383377, "grad_norm": 1.84375, "learning_rate": 1.542811886147433e-07, "loss": 0.4325, "step": 14955 }, { "epoch": 1.8728234272633988, "grad_norm": 1.953125, "learning_rate": 1.539320471512229e-07, "loss": 0.4808, "step": 14956 }, { "epoch": 1.8729497197884601, "grad_norm": 1.8671875, "learning_rate": 1.535832981282559e-07, "loss": 0.5216, "step": 14957 }, { "epoch": 1.873076012313521, "grad_norm": 1.96875, "learning_rate": 1.5323494155974227e-07, "loss": 0.4156, "step": 14958 }, { "epoch": 1.8732023048385824, "grad_norm": 2.21875, "learning_rate": 1.528869774595665e-07, "loss": 0.5018, "step": 14959 }, { "epoch": 1.8733285973636435, "grad_norm": 2.0, "learning_rate": 1.5253940584159854e-07, "loss": 0.5065, "step": 14960 }, { "epoch": 1.8734548898887047, "grad_norm": 1.984375, "learning_rate": 1.5219222671968847e-07, "loss": 0.5071, "step": 14961 }, { "epoch": 1.873581182413766, "grad_norm": 2.0625, "learning_rate": 1.5184544010767742e-07, "loss": 0.5128, "step": 14962 }, { "epoch": 1.873707474938827, "grad_norm": 1.90625, "learning_rate": 1.5149904601938658e-07, "loss": 0.4641, "step": 14963 }, { "epoch": 1.8738337674638883, "grad_norm": 2.109375, "learning_rate": 1.5115304446862268e-07, "loss": 0.4544, "step": 14964 }, { "epoch": 1.8739600599889494, "grad_norm": 1.890625, "learning_rate": 1.5080743546917464e-07, "loss": 0.439, "step": 14965 }, { "epoch": 1.8740863525140106, "grad_norm": 2.015625, "learning_rate": 1.504622190348204e-07, "loss": 0.4843, "step": 14966 }, { "epoch": 1.8742126450390717, "grad_norm": 1.9140625, "learning_rate": 1.5011739517931666e-07, "loss": 0.4911, "step": 14967 }, { "epoch": 1.8743389375641328, "grad_norm": 2.0625, "learning_rate": 1.4977296391641026e-07, "loss": 0.4568, "step": 14968 }, { "epoch": 1.8744652300891942, "grad_norm": 2.078125, "learning_rate": 1.4942892525982577e-07, "loss": 0.4405, "step": 14969 }, { "epoch": 1.8745915226142553, "grad_norm": 1.9296875, "learning_rate": 1.4908527922327886e-07, "loss": 0.4518, "step": 14970 }, { "epoch": 1.8747178151393165, "grad_norm": 2.140625, "learning_rate": 1.4874202582046526e-07, "loss": 0.528, "step": 14971 }, { "epoch": 1.8748441076643776, "grad_norm": 1.9296875, "learning_rate": 1.4839916506506623e-07, "loss": 0.4137, "step": 14972 }, { "epoch": 1.8749704001894387, "grad_norm": 1.8515625, "learning_rate": 1.4805669697074753e-07, "loss": 0.4198, "step": 14973 }, { "epoch": 1.8750966927145, "grad_norm": 1.875, "learning_rate": 1.4771462155115933e-07, "loss": 0.4188, "step": 14974 }, { "epoch": 1.875222985239561, "grad_norm": 1.875, "learning_rate": 1.473729388199352e-07, "loss": 0.459, "step": 14975 }, { "epoch": 1.8753492777646223, "grad_norm": 1.90625, "learning_rate": 1.470316487906942e-07, "loss": 0.4793, "step": 14976 }, { "epoch": 1.8754755702896835, "grad_norm": 2.203125, "learning_rate": 1.4669075147703881e-07, "loss": 0.5714, "step": 14977 }, { "epoch": 1.8756018628147446, "grad_norm": 2.140625, "learning_rate": 1.4635024689255705e-07, "loss": 0.4994, "step": 14978 }, { "epoch": 1.875728155339806, "grad_norm": 2.109375, "learning_rate": 1.460101350508203e-07, "loss": 0.4579, "step": 14979 }, { "epoch": 1.8758544478648669, "grad_norm": 2.078125, "learning_rate": 1.4567041596538545e-07, "loss": 0.5555, "step": 14980 }, { "epoch": 1.8759807403899282, "grad_norm": 2.0625, "learning_rate": 1.453310896497917e-07, "loss": 0.5056, "step": 14981 }, { "epoch": 1.8761070329149894, "grad_norm": 1.9609375, "learning_rate": 1.449921561175649e-07, "loss": 0.4794, "step": 14982 }, { "epoch": 1.8762333254400505, "grad_norm": 2.046875, "learning_rate": 1.44653615382212e-07, "loss": 0.5139, "step": 14983 }, { "epoch": 1.8763596179651116, "grad_norm": 2.09375, "learning_rate": 1.4431546745722892e-07, "loss": 0.4856, "step": 14984 }, { "epoch": 1.8764859104901728, "grad_norm": 2.0625, "learning_rate": 1.4397771235609258e-07, "loss": 0.5043, "step": 14985 }, { "epoch": 1.8766122030152341, "grad_norm": 2.109375, "learning_rate": 1.436403500922656e-07, "loss": 0.5725, "step": 14986 }, { "epoch": 1.8767384955402953, "grad_norm": 2.140625, "learning_rate": 1.433033806791928e-07, "loss": 0.4573, "step": 14987 }, { "epoch": 1.8768647880653564, "grad_norm": 1.8359375, "learning_rate": 1.429668041303056e-07, "loss": 0.5013, "step": 14988 }, { "epoch": 1.8769910805904175, "grad_norm": 2.171875, "learning_rate": 1.4263062045902e-07, "loss": 0.556, "step": 14989 }, { "epoch": 1.8771173731154787, "grad_norm": 2.140625, "learning_rate": 1.422948296787341e-07, "loss": 0.5131, "step": 14990 }, { "epoch": 1.87724366564054, "grad_norm": 2.078125, "learning_rate": 1.419594318028339e-07, "loss": 0.5865, "step": 14991 }, { "epoch": 1.877369958165601, "grad_norm": 1.9375, "learning_rate": 1.4162442684468535e-07, "loss": 0.4761, "step": 14992 }, { "epoch": 1.8774962506906623, "grad_norm": 2.046875, "learning_rate": 1.4128981481764115e-07, "loss": 0.5014, "step": 14993 }, { "epoch": 1.8776225432157234, "grad_norm": 2.203125, "learning_rate": 1.4095559573503946e-07, "loss": 0.5247, "step": 14994 }, { "epoch": 1.8777488357407845, "grad_norm": 1.90625, "learning_rate": 1.406217696102008e-07, "loss": 0.4806, "step": 14995 }, { "epoch": 1.877875128265846, "grad_norm": 1.90625, "learning_rate": 1.4028833645643113e-07, "loss": 0.4464, "step": 14996 }, { "epoch": 1.8780014207909068, "grad_norm": 2.046875, "learning_rate": 1.3995529628701764e-07, "loss": 0.5383, "step": 14997 }, { "epoch": 1.8781277133159682, "grad_norm": 2.09375, "learning_rate": 1.396226491152386e-07, "loss": 0.5048, "step": 14998 }, { "epoch": 1.8782540058410293, "grad_norm": 2.515625, "learning_rate": 1.3929039495435003e-07, "loss": 0.5624, "step": 14999 }, { "epoch": 1.8783802983660904, "grad_norm": 1.9921875, "learning_rate": 1.389585338175947e-07, "loss": 0.5015, "step": 15000 }, { "epoch": 1.8785065908911518, "grad_norm": 1.984375, "learning_rate": 1.3862706571820094e-07, "loss": 0.4573, "step": 15001 }, { "epoch": 1.8786328834162127, "grad_norm": 1.953125, "learning_rate": 1.3829599066938039e-07, "loss": 0.4271, "step": 15002 }, { "epoch": 1.878759175941274, "grad_norm": 2.234375, "learning_rate": 1.3796530868432801e-07, "loss": 0.5133, "step": 15003 }, { "epoch": 1.8788854684663352, "grad_norm": 1.9375, "learning_rate": 1.3763501977622329e-07, "loss": 0.5059, "step": 15004 }, { "epoch": 1.8790117609913963, "grad_norm": 1.984375, "learning_rate": 1.3730512395823238e-07, "loss": 0.4721, "step": 15005 }, { "epoch": 1.8791380535164575, "grad_norm": 2.015625, "learning_rate": 1.369756212435036e-07, "loss": 0.4807, "step": 15006 }, { "epoch": 1.8792643460415186, "grad_norm": 2.109375, "learning_rate": 1.3664651164516985e-07, "loss": 0.5298, "step": 15007 }, { "epoch": 1.87939063856658, "grad_norm": 2.015625, "learning_rate": 1.3631779517634836e-07, "loss": 0.4403, "step": 15008 }, { "epoch": 1.8795169310916409, "grad_norm": 2.015625, "learning_rate": 1.3598947185014198e-07, "loss": 0.6555, "step": 15009 }, { "epoch": 1.8796432236167022, "grad_norm": 2.640625, "learning_rate": 1.3566154167963585e-07, "loss": 0.5997, "step": 15010 }, { "epoch": 1.8797695161417634, "grad_norm": 1.8359375, "learning_rate": 1.3533400467790059e-07, "loss": 0.4956, "step": 15011 }, { "epoch": 1.8798958086668245, "grad_norm": 1.828125, "learning_rate": 1.3500686085799242e-07, "loss": 0.4914, "step": 15012 }, { "epoch": 1.8800221011918858, "grad_norm": 1.9140625, "learning_rate": 1.3468011023294869e-07, "loss": 0.4979, "step": 15013 }, { "epoch": 1.8801483937169468, "grad_norm": 1.9140625, "learning_rate": 1.3435375281579343e-07, "loss": 0.4892, "step": 15014 }, { "epoch": 1.880274686242008, "grad_norm": 2.109375, "learning_rate": 1.3402778861953513e-07, "loss": 0.4733, "step": 15015 }, { "epoch": 1.8804009787670692, "grad_norm": 1.8203125, "learning_rate": 1.337022176571645e-07, "loss": 0.4221, "step": 15016 }, { "epoch": 1.8805272712921304, "grad_norm": 1.8671875, "learning_rate": 1.3337703994166008e-07, "loss": 0.4872, "step": 15017 }, { "epoch": 1.8806535638171917, "grad_norm": 2.265625, "learning_rate": 1.3305225548598144e-07, "loss": 0.4808, "step": 15018 }, { "epoch": 1.8807798563422526, "grad_norm": 2.078125, "learning_rate": 1.3272786430307382e-07, "loss": 0.4584, "step": 15019 }, { "epoch": 1.880906148867314, "grad_norm": 1.9375, "learning_rate": 1.324038664058669e-07, "loss": 0.4878, "step": 15020 }, { "epoch": 1.8810324413923751, "grad_norm": 2.046875, "learning_rate": 1.3208026180727473e-07, "loss": 0.4961, "step": 15021 }, { "epoch": 1.8811587339174363, "grad_norm": 1.90625, "learning_rate": 1.3175705052019372e-07, "loss": 0.4571, "step": 15022 }, { "epoch": 1.8812850264424974, "grad_norm": 2.0625, "learning_rate": 1.314342325575091e-07, "loss": 0.4705, "step": 15023 }, { "epoch": 1.8814113189675585, "grad_norm": 1.9453125, "learning_rate": 1.31111807932085e-07, "loss": 0.4823, "step": 15024 }, { "epoch": 1.8815376114926199, "grad_norm": 1.9765625, "learning_rate": 1.307897766567734e-07, "loss": 0.458, "step": 15025 }, { "epoch": 1.8816639040176808, "grad_norm": 1.921875, "learning_rate": 1.3046813874440956e-07, "loss": 0.5109, "step": 15026 }, { "epoch": 1.8817901965427422, "grad_norm": 2.078125, "learning_rate": 1.3014689420781434e-07, "loss": 0.4546, "step": 15027 }, { "epoch": 1.8819164890678033, "grad_norm": 1.9765625, "learning_rate": 1.2982604305979085e-07, "loss": 0.5015, "step": 15028 }, { "epoch": 1.8820427815928644, "grad_norm": 2.03125, "learning_rate": 1.295055853131266e-07, "loss": 0.5371, "step": 15029 }, { "epoch": 1.8821690741179258, "grad_norm": 2.203125, "learning_rate": 1.291855209805959e-07, "loss": 0.6233, "step": 15030 }, { "epoch": 1.8822953666429867, "grad_norm": 1.921875, "learning_rate": 1.288658500749551e-07, "loss": 0.4326, "step": 15031 }, { "epoch": 1.882421659168048, "grad_norm": 1.9296875, "learning_rate": 1.285465726089452e-07, "loss": 0.5468, "step": 15032 }, { "epoch": 1.8825479516931092, "grad_norm": 2.15625, "learning_rate": 1.2822768859529045e-07, "loss": 0.4844, "step": 15033 }, { "epoch": 1.8826742442181703, "grad_norm": 2.296875, "learning_rate": 1.2790919804670398e-07, "loss": 0.5064, "step": 15034 }, { "epoch": 1.8828005367432317, "grad_norm": 2.0625, "learning_rate": 1.275911009758779e-07, "loss": 0.4941, "step": 15035 }, { "epoch": 1.8829268292682926, "grad_norm": 1.9453125, "learning_rate": 1.2727339739549206e-07, "loss": 0.4314, "step": 15036 }, { "epoch": 1.883053121793354, "grad_norm": 1.9765625, "learning_rate": 1.2695608731820853e-07, "loss": 0.4812, "step": 15037 }, { "epoch": 1.883179414318415, "grad_norm": 2.0625, "learning_rate": 1.2663917075667386e-07, "loss": 0.4913, "step": 15038 }, { "epoch": 1.8833057068434762, "grad_norm": 2.0625, "learning_rate": 1.2632264772352133e-07, "loss": 0.4452, "step": 15039 }, { "epoch": 1.8834319993685373, "grad_norm": 1.9921875, "learning_rate": 1.2600651823136522e-07, "loss": 0.4746, "step": 15040 }, { "epoch": 1.8835582918935985, "grad_norm": 1.8984375, "learning_rate": 1.2569078229280662e-07, "loss": 0.5188, "step": 15041 }, { "epoch": 1.8836845844186598, "grad_norm": 2.0625, "learning_rate": 1.253754399204299e-07, "loss": 0.6385, "step": 15042 }, { "epoch": 1.8838108769437207, "grad_norm": 1.875, "learning_rate": 1.2506049112680273e-07, "loss": 0.4657, "step": 15043 }, { "epoch": 1.883937169468782, "grad_norm": 2.203125, "learning_rate": 1.2474593592447958e-07, "loss": 0.4902, "step": 15044 }, { "epoch": 1.8840634619938432, "grad_norm": 2.046875, "learning_rate": 1.2443177432599817e-07, "loss": 0.5292, "step": 15045 }, { "epoch": 1.8841897545189044, "grad_norm": 1.9453125, "learning_rate": 1.2411800634387848e-07, "loss": 0.4764, "step": 15046 }, { "epoch": 1.8843160470439657, "grad_norm": 1.9453125, "learning_rate": 1.238046319906272e-07, "loss": 0.4763, "step": 15047 }, { "epoch": 1.8844423395690266, "grad_norm": 1.953125, "learning_rate": 1.2349165127873542e-07, "loss": 0.512, "step": 15048 }, { "epoch": 1.884568632094088, "grad_norm": 2.125, "learning_rate": 1.2317906422067761e-07, "loss": 0.5279, "step": 15049 }, { "epoch": 1.8846949246191491, "grad_norm": 2.03125, "learning_rate": 1.2286687082891268e-07, "loss": 0.4763, "step": 15050 }, { "epoch": 1.8848212171442102, "grad_norm": 2.078125, "learning_rate": 1.2255507111588294e-07, "loss": 0.4542, "step": 15051 }, { "epoch": 1.8849475096692716, "grad_norm": 1.953125, "learning_rate": 1.2224366509401732e-07, "loss": 0.5139, "step": 15052 }, { "epoch": 1.8850738021943325, "grad_norm": 2.203125, "learning_rate": 1.2193265277572698e-07, "loss": 0.499, "step": 15053 }, { "epoch": 1.8852000947193939, "grad_norm": 2.125, "learning_rate": 1.2162203417340757e-07, "loss": 0.4908, "step": 15054 }, { "epoch": 1.885326387244455, "grad_norm": 1.96875, "learning_rate": 1.213118092994414e-07, "loss": 0.5047, "step": 15055 }, { "epoch": 1.8854526797695161, "grad_norm": 1.875, "learning_rate": 1.2100197816619198e-07, "loss": 0.4889, "step": 15056 }, { "epoch": 1.8855789722945773, "grad_norm": 2.140625, "learning_rate": 1.2069254078600823e-07, "loss": 0.4918, "step": 15057 }, { "epoch": 1.8857052648196384, "grad_norm": 2.078125, "learning_rate": 1.2038349717122478e-07, "loss": 0.4668, "step": 15058 }, { "epoch": 1.8858315573446998, "grad_norm": 1.9921875, "learning_rate": 1.200748473341573e-07, "loss": 0.4692, "step": 15059 }, { "epoch": 1.8859578498697607, "grad_norm": 1.890625, "learning_rate": 1.1976659128711044e-07, "loss": 0.426, "step": 15060 }, { "epoch": 1.886084142394822, "grad_norm": 2.140625, "learning_rate": 1.1945872904236878e-07, "loss": 0.5785, "step": 15061 }, { "epoch": 1.8862104349198832, "grad_norm": 1.96875, "learning_rate": 1.191512606122025e-07, "loss": 0.4723, "step": 15062 }, { "epoch": 1.8863367274449443, "grad_norm": 2.15625, "learning_rate": 1.1884418600886849e-07, "loss": 0.4845, "step": 15063 }, { "epoch": 1.8864630199700057, "grad_norm": 1.9375, "learning_rate": 1.1853750524460472e-07, "loss": 0.4743, "step": 15064 }, { "epoch": 1.8865893124950666, "grad_norm": 2.03125, "learning_rate": 1.1823121833163475e-07, "loss": 0.4266, "step": 15065 }, { "epoch": 1.886715605020128, "grad_norm": 2.046875, "learning_rate": 1.1792532528216771e-07, "loss": 0.4928, "step": 15066 }, { "epoch": 1.886841897545189, "grad_norm": 2.171875, "learning_rate": 1.1761982610839385e-07, "loss": 0.5482, "step": 15067 }, { "epoch": 1.8869681900702502, "grad_norm": 2.09375, "learning_rate": 1.1731472082249007e-07, "loss": 0.4915, "step": 15068 }, { "epoch": 1.8870944825953115, "grad_norm": 2.1875, "learning_rate": 1.1701000943661889e-07, "loss": 0.541, "step": 15069 }, { "epoch": 1.8872207751203725, "grad_norm": 2.015625, "learning_rate": 1.167056919629217e-07, "loss": 0.5511, "step": 15070 }, { "epoch": 1.8873470676454338, "grad_norm": 2.078125, "learning_rate": 1.1640176841353212e-07, "loss": 0.4349, "step": 15071 }, { "epoch": 1.887473360170495, "grad_norm": 2.265625, "learning_rate": 1.160982388005616e-07, "loss": 0.5647, "step": 15072 }, { "epoch": 1.887599652695556, "grad_norm": 2.359375, "learning_rate": 1.157951031361082e-07, "loss": 0.6142, "step": 15073 }, { "epoch": 1.8877259452206172, "grad_norm": 1.984375, "learning_rate": 1.1549236143225562e-07, "loss": 0.5196, "step": 15074 }, { "epoch": 1.8878522377456783, "grad_norm": 1.9375, "learning_rate": 1.1519001370106753e-07, "loss": 0.4308, "step": 15075 }, { "epoch": 1.8879785302707397, "grad_norm": 2.234375, "learning_rate": 1.1488805995459761e-07, "loss": 0.5025, "step": 15076 }, { "epoch": 1.8881048227958006, "grad_norm": 1.875, "learning_rate": 1.145865002048785e-07, "loss": 0.4371, "step": 15077 }, { "epoch": 1.888231115320862, "grad_norm": 1.9453125, "learning_rate": 1.1428533446393274e-07, "loss": 0.4979, "step": 15078 }, { "epoch": 1.888357407845923, "grad_norm": 1.859375, "learning_rate": 1.139845627437608e-07, "loss": 0.516, "step": 15079 }, { "epoch": 1.8884837003709842, "grad_norm": 2.0625, "learning_rate": 1.1368418505635303e-07, "loss": 0.4858, "step": 15080 }, { "epoch": 1.8886099928960456, "grad_norm": 2.265625, "learning_rate": 1.1338420141368101e-07, "loss": 0.5519, "step": 15081 }, { "epoch": 1.8887362854211065, "grad_norm": 2.265625, "learning_rate": 1.1308461182770069e-07, "loss": 0.5379, "step": 15082 }, { "epoch": 1.8888625779461679, "grad_norm": 2.140625, "learning_rate": 1.1278541631035367e-07, "loss": 0.5089, "step": 15083 }, { "epoch": 1.888988870471229, "grad_norm": 2.140625, "learning_rate": 1.1248661487356593e-07, "loss": 0.6165, "step": 15084 }, { "epoch": 1.8891151629962901, "grad_norm": 2.0625, "learning_rate": 1.1218820752924575e-07, "loss": 0.5277, "step": 15085 }, { "epoch": 1.8892414555213515, "grad_norm": 1.9765625, "learning_rate": 1.1189019428928693e-07, "loss": 0.4762, "step": 15086 }, { "epoch": 1.8893677480464124, "grad_norm": 1.8984375, "learning_rate": 1.1159257516556776e-07, "loss": 0.5054, "step": 15087 }, { "epoch": 1.8894940405714737, "grad_norm": 2.078125, "learning_rate": 1.1129535016995097e-07, "loss": 0.5572, "step": 15088 }, { "epoch": 1.8896203330965349, "grad_norm": 2.046875, "learning_rate": 1.1099851931428374e-07, "loss": 0.5495, "step": 15089 }, { "epoch": 1.889746625621596, "grad_norm": 1.875, "learning_rate": 1.107020826103955e-07, "loss": 0.45, "step": 15090 }, { "epoch": 1.8898729181466571, "grad_norm": 1.9375, "learning_rate": 1.1040604007010347e-07, "loss": 0.4605, "step": 15091 }, { "epoch": 1.8899992106717183, "grad_norm": 2.421875, "learning_rate": 1.1011039170520488e-07, "loss": 0.5107, "step": 15092 }, { "epoch": 1.8901255031967796, "grad_norm": 1.9140625, "learning_rate": 1.0981513752748585e-07, "loss": 0.4468, "step": 15093 }, { "epoch": 1.8902517957218408, "grad_norm": 2.0, "learning_rate": 1.0952027754871253e-07, "loss": 0.4936, "step": 15094 }, { "epoch": 1.890378088246902, "grad_norm": 2.046875, "learning_rate": 1.0922581178063884e-07, "loss": 0.5368, "step": 15095 }, { "epoch": 1.890504380771963, "grad_norm": 2.078125, "learning_rate": 1.0893174023500098e-07, "loss": 0.5464, "step": 15096 }, { "epoch": 1.8906306732970242, "grad_norm": 2.015625, "learning_rate": 1.0863806292351953e-07, "loss": 0.5453, "step": 15097 }, { "epoch": 1.8907569658220855, "grad_norm": 2.109375, "learning_rate": 1.083447798578996e-07, "loss": 0.5091, "step": 15098 }, { "epoch": 1.8908832583471464, "grad_norm": 1.9453125, "learning_rate": 1.0805189104983183e-07, "loss": 0.4428, "step": 15099 }, { "epoch": 1.8910095508722078, "grad_norm": 2.328125, "learning_rate": 1.0775939651099021e-07, "loss": 0.5004, "step": 15100 }, { "epoch": 1.891135843397269, "grad_norm": 1.9296875, "learning_rate": 1.0746729625303098e-07, "loss": 0.457, "step": 15101 }, { "epoch": 1.89126213592233, "grad_norm": 2.046875, "learning_rate": 1.0717559028759706e-07, "loss": 0.5057, "step": 15102 }, { "epoch": 1.8913884284473914, "grad_norm": 1.9375, "learning_rate": 1.068842786263169e-07, "loss": 0.4887, "step": 15103 }, { "epoch": 1.8915147209724523, "grad_norm": 2.0, "learning_rate": 1.0659336128080011e-07, "loss": 0.5494, "step": 15104 }, { "epoch": 1.8916410134975137, "grad_norm": 2.15625, "learning_rate": 1.0630283826264187e-07, "loss": 0.5723, "step": 15105 }, { "epoch": 1.8917673060225748, "grad_norm": 2.140625, "learning_rate": 1.060127095834218e-07, "loss": 0.5177, "step": 15106 }, { "epoch": 1.891893598547636, "grad_norm": 2.0, "learning_rate": 1.0572297525470399e-07, "loss": 0.4328, "step": 15107 }, { "epoch": 1.892019891072697, "grad_norm": 2.0, "learning_rate": 1.0543363528803696e-07, "loss": 0.4351, "step": 15108 }, { "epoch": 1.8921461835977582, "grad_norm": 2.09375, "learning_rate": 1.0514468969495262e-07, "loss": 0.4971, "step": 15109 }, { "epoch": 1.8922724761228196, "grad_norm": 1.890625, "learning_rate": 1.0485613848696841e-07, "loss": 0.4347, "step": 15110 }, { "epoch": 1.8923987686478807, "grad_norm": 2.125, "learning_rate": 1.0456798167558401e-07, "loss": 0.5059, "step": 15111 }, { "epoch": 1.8925250611729418, "grad_norm": 1.9375, "learning_rate": 1.0428021927228471e-07, "loss": 0.5204, "step": 15112 }, { "epoch": 1.892651353698003, "grad_norm": 1.953125, "learning_rate": 1.0399285128854131e-07, "loss": 0.4554, "step": 15113 }, { "epoch": 1.892777646223064, "grad_norm": 1.9765625, "learning_rate": 1.0370587773580576e-07, "loss": 0.5159, "step": 15114 }, { "epoch": 1.8929039387481255, "grad_norm": 1.8984375, "learning_rate": 1.0341929862551781e-07, "loss": 0.4512, "step": 15115 }, { "epoch": 1.8930302312731864, "grad_norm": 1.75, "learning_rate": 1.0313311396909942e-07, "loss": 0.4533, "step": 15116 }, { "epoch": 1.8931565237982477, "grad_norm": 1.8125, "learning_rate": 1.0284732377795703e-07, "loss": 0.4302, "step": 15117 }, { "epoch": 1.8932828163233089, "grad_norm": 1.9453125, "learning_rate": 1.0256192806348153e-07, "loss": 0.479, "step": 15118 }, { "epoch": 1.89340910884837, "grad_norm": 2.140625, "learning_rate": 1.0227692683704826e-07, "loss": 0.4563, "step": 15119 }, { "epoch": 1.8935354013734313, "grad_norm": 1.953125, "learning_rate": 1.0199232011001592e-07, "loss": 0.5156, "step": 15120 }, { "epoch": 1.8936616938984923, "grad_norm": 2.0, "learning_rate": 1.0170810789372875e-07, "loss": 0.5196, "step": 15121 }, { "epoch": 1.8937879864235536, "grad_norm": 1.984375, "learning_rate": 1.014242901995155e-07, "loss": 0.5112, "step": 15122 }, { "epoch": 1.8939142789486147, "grad_norm": 2.03125, "learning_rate": 1.0114086703868819e-07, "loss": 0.51, "step": 15123 }, { "epoch": 1.8940405714736759, "grad_norm": 1.96875, "learning_rate": 1.0085783842254115e-07, "loss": 0.4508, "step": 15124 }, { "epoch": 1.8941668639987372, "grad_norm": 2.046875, "learning_rate": 1.0057520436235868e-07, "loss": 0.5335, "step": 15125 }, { "epoch": 1.8942931565237981, "grad_norm": 2.03125, "learning_rate": 1.0029296486940399e-07, "loss": 0.5501, "step": 15126 }, { "epoch": 1.8944194490488595, "grad_norm": 1.90625, "learning_rate": 1.0001111995492696e-07, "loss": 0.4862, "step": 15127 }, { "epoch": 1.8945457415739206, "grad_norm": 1.96875, "learning_rate": 9.972966963016084e-08, "loss": 0.4702, "step": 15128 }, { "epoch": 1.8946720340989818, "grad_norm": 1.9453125, "learning_rate": 9.944861390632333e-08, "loss": 0.4567, "step": 15129 }, { "epoch": 1.894798326624043, "grad_norm": 1.8515625, "learning_rate": 9.916795279461766e-08, "loss": 0.3949, "step": 15130 }, { "epoch": 1.894924619149104, "grad_norm": 1.921875, "learning_rate": 9.888768630622936e-08, "loss": 0.4476, "step": 15131 }, { "epoch": 1.8950509116741654, "grad_norm": 1.8671875, "learning_rate": 9.860781445233058e-08, "loss": 0.6226, "step": 15132 }, { "epoch": 1.8951772041992263, "grad_norm": 2.34375, "learning_rate": 9.832833724407465e-08, "loss": 0.6145, "step": 15133 }, { "epoch": 1.8953034967242877, "grad_norm": 2.09375, "learning_rate": 9.804925469260152e-08, "loss": 0.5022, "step": 15134 }, { "epoch": 1.8954297892493488, "grad_norm": 1.9609375, "learning_rate": 9.777056680903452e-08, "loss": 0.4432, "step": 15135 }, { "epoch": 1.89555608177441, "grad_norm": 2.078125, "learning_rate": 9.749227360448143e-08, "loss": 0.5472, "step": 15136 }, { "epoch": 1.8956823742994713, "grad_norm": 1.9453125, "learning_rate": 9.721437509003562e-08, "loss": 0.4844, "step": 15137 }, { "epoch": 1.8958086668245322, "grad_norm": 1.9921875, "learning_rate": 9.693687127677153e-08, "loss": 0.5015, "step": 15138 }, { "epoch": 1.8959349593495936, "grad_norm": 2.0, "learning_rate": 9.665976217575147e-08, "loss": 0.5377, "step": 15139 }, { "epoch": 1.8960612518746547, "grad_norm": 2.078125, "learning_rate": 9.638304779801877e-08, "loss": 0.4805, "step": 15140 }, { "epoch": 1.8961875443997158, "grad_norm": 2.0, "learning_rate": 9.610672815460464e-08, "loss": 0.4877, "step": 15141 }, { "epoch": 1.8963138369247772, "grad_norm": 2.015625, "learning_rate": 9.583080325652028e-08, "loss": 0.4424, "step": 15142 }, { "epoch": 1.896440129449838, "grad_norm": 1.890625, "learning_rate": 9.555527311476465e-08, "loss": 0.5225, "step": 15143 }, { "epoch": 1.8965664219748994, "grad_norm": 2.015625, "learning_rate": 9.528013774031897e-08, "loss": 0.5705, "step": 15144 }, { "epoch": 1.8966927144999606, "grad_norm": 1.8203125, "learning_rate": 9.500539714415113e-08, "loss": 0.4429, "step": 15145 }, { "epoch": 1.8968190070250217, "grad_norm": 2.0625, "learning_rate": 9.473105133720906e-08, "loss": 0.531, "step": 15146 }, { "epoch": 1.8969452995500828, "grad_norm": 2.125, "learning_rate": 9.445710033043065e-08, "loss": 0.5325, "step": 15147 }, { "epoch": 1.897071592075144, "grad_norm": 2.3125, "learning_rate": 9.418354413473163e-08, "loss": 0.5174, "step": 15148 }, { "epoch": 1.8971978846002053, "grad_norm": 2.15625, "learning_rate": 9.391038276101771e-08, "loss": 0.5063, "step": 15149 }, { "epoch": 1.8973241771252662, "grad_norm": 2.0625, "learning_rate": 9.363761622017575e-08, "loss": 0.5219, "step": 15150 }, { "epoch": 1.8974504696503276, "grad_norm": 1.953125, "learning_rate": 9.336524452307704e-08, "loss": 0.471, "step": 15151 }, { "epoch": 1.8975767621753887, "grad_norm": 2.1875, "learning_rate": 9.309326768057847e-08, "loss": 0.5138, "step": 15152 }, { "epoch": 1.8977030547004499, "grad_norm": 1.9765625, "learning_rate": 9.282168570352024e-08, "loss": 0.4774, "step": 15153 }, { "epoch": 1.8978293472255112, "grad_norm": 2.0625, "learning_rate": 9.255049860272592e-08, "loss": 0.5709, "step": 15154 }, { "epoch": 1.8979556397505721, "grad_norm": 1.890625, "learning_rate": 9.227970638900574e-08, "loss": 0.5858, "step": 15155 }, { "epoch": 1.8980819322756335, "grad_norm": 2.03125, "learning_rate": 9.20093090731522e-08, "loss": 0.5057, "step": 15156 }, { "epoch": 1.8982082248006946, "grad_norm": 1.953125, "learning_rate": 9.173930666594222e-08, "loss": 0.4124, "step": 15157 }, { "epoch": 1.8983345173257558, "grad_norm": 1.96875, "learning_rate": 9.14696991781383e-08, "loss": 0.4592, "step": 15158 }, { "epoch": 1.898460809850817, "grad_norm": 2.09375, "learning_rate": 9.12004866204852e-08, "loss": 0.5432, "step": 15159 }, { "epoch": 1.898587102375878, "grad_norm": 2.0, "learning_rate": 9.09316690037143e-08, "loss": 0.4613, "step": 15160 }, { "epoch": 1.8987133949009394, "grad_norm": 2.109375, "learning_rate": 9.066324633853928e-08, "loss": 0.5959, "step": 15161 }, { "epoch": 1.8988396874260005, "grad_norm": 2.125, "learning_rate": 9.039521863566047e-08, "loss": 0.6048, "step": 15162 }, { "epoch": 1.8989659799510616, "grad_norm": 1.9140625, "learning_rate": 9.01275859057582e-08, "loss": 0.4719, "step": 15163 }, { "epoch": 1.8990922724761228, "grad_norm": 1.90625, "learning_rate": 8.986034815950173e-08, "loss": 0.5085, "step": 15164 }, { "epoch": 1.899218565001184, "grad_norm": 2.15625, "learning_rate": 8.95935054075403e-08, "loss": 0.5505, "step": 15165 }, { "epoch": 1.8993448575262453, "grad_norm": 2.125, "learning_rate": 8.932705766051208e-08, "loss": 0.4728, "step": 15166 }, { "epoch": 1.8994711500513062, "grad_norm": 1.8828125, "learning_rate": 8.906100492903636e-08, "loss": 0.4761, "step": 15167 }, { "epoch": 1.8995974425763675, "grad_norm": 2.171875, "learning_rate": 8.879534722371686e-08, "loss": 0.4858, "step": 15168 }, { "epoch": 1.8997237351014287, "grad_norm": 2.03125, "learning_rate": 8.853008455514178e-08, "loss": 0.4759, "step": 15169 }, { "epoch": 1.8998500276264898, "grad_norm": 1.953125, "learning_rate": 8.82652169338849e-08, "loss": 0.531, "step": 15170 }, { "epoch": 1.8999763201515512, "grad_norm": 1.90625, "learning_rate": 8.80007443705011e-08, "loss": 0.4289, "step": 15171 }, { "epoch": 1.900102612676612, "grad_norm": 2.125, "learning_rate": 8.773666687553417e-08, "loss": 0.4972, "step": 15172 }, { "epoch": 1.9002289052016734, "grad_norm": 1.890625, "learning_rate": 8.747298445950902e-08, "loss": 0.4605, "step": 15173 }, { "epoch": 1.9003551977267346, "grad_norm": 1.8984375, "learning_rate": 8.720969713293392e-08, "loss": 0.5058, "step": 15174 }, { "epoch": 1.9004814902517957, "grad_norm": 1.890625, "learning_rate": 8.694680490630381e-08, "loss": 0.4482, "step": 15175 }, { "epoch": 1.900607782776857, "grad_norm": 1.8984375, "learning_rate": 8.668430779009807e-08, "loss": 0.4766, "step": 15176 }, { "epoch": 1.900734075301918, "grad_norm": 2.0625, "learning_rate": 8.642220579477723e-08, "loss": 0.5204, "step": 15177 }, { "epoch": 1.9008603678269793, "grad_norm": 2.0625, "learning_rate": 8.616049893078848e-08, "loss": 0.5099, "step": 15178 }, { "epoch": 1.9009866603520404, "grad_norm": 2.28125, "learning_rate": 8.589918720856349e-08, "loss": 0.6072, "step": 15179 }, { "epoch": 1.9011129528771016, "grad_norm": 1.90625, "learning_rate": 8.563827063851837e-08, "loss": 0.4925, "step": 15180 }, { "epoch": 1.9012392454021627, "grad_norm": 1.9921875, "learning_rate": 8.537774923105035e-08, "loss": 0.51, "step": 15181 }, { "epoch": 1.9013655379272238, "grad_norm": 2.21875, "learning_rate": 8.511762299654447e-08, "loss": 0.6195, "step": 15182 }, { "epoch": 1.9014918304522852, "grad_norm": 1.890625, "learning_rate": 8.485789194536908e-08, "loss": 0.4517, "step": 15183 }, { "epoch": 1.9016181229773461, "grad_norm": 2.03125, "learning_rate": 8.459855608787593e-08, "loss": 0.5252, "step": 15184 }, { "epoch": 1.9017444155024075, "grad_norm": 2.03125, "learning_rate": 8.433961543440228e-08, "loss": 0.5122, "step": 15185 }, { "epoch": 1.9018707080274686, "grad_norm": 1.96875, "learning_rate": 8.408106999526767e-08, "loss": 0.4637, "step": 15186 }, { "epoch": 1.9019970005525297, "grad_norm": 1.7578125, "learning_rate": 8.382291978077716e-08, "loss": 0.4289, "step": 15187 }, { "epoch": 1.902123293077591, "grad_norm": 2.03125, "learning_rate": 8.356516480122034e-08, "loss": 0.5013, "step": 15188 }, { "epoch": 1.902249585602652, "grad_norm": 2.046875, "learning_rate": 8.33078050668712e-08, "loss": 0.5181, "step": 15189 }, { "epoch": 1.9023758781277134, "grad_norm": 1.8984375, "learning_rate": 8.305084058798707e-08, "loss": 0.4634, "step": 15190 }, { "epoch": 1.9025021706527745, "grad_norm": 2.0, "learning_rate": 8.279427137481089e-08, "loss": 0.5154, "step": 15191 }, { "epoch": 1.9026284631778356, "grad_norm": 2.046875, "learning_rate": 8.253809743756669e-08, "loss": 0.5232, "step": 15192 }, { "epoch": 1.902754755702897, "grad_norm": 2.171875, "learning_rate": 8.22823187864663e-08, "loss": 0.468, "step": 15193 }, { "epoch": 1.902881048227958, "grad_norm": 2.078125, "learning_rate": 8.202693543170493e-08, "loss": 0.4946, "step": 15194 }, { "epoch": 1.9030073407530192, "grad_norm": 1.8828125, "learning_rate": 8.177194738346106e-08, "loss": 0.4704, "step": 15195 }, { "epoch": 1.9031336332780804, "grad_norm": 1.9375, "learning_rate": 8.151735465189658e-08, "loss": 0.4935, "step": 15196 }, { "epoch": 1.9032599258031415, "grad_norm": 2.21875, "learning_rate": 8.126315724716116e-08, "loss": 0.583, "step": 15197 }, { "epoch": 1.9033862183282027, "grad_norm": 2.03125, "learning_rate": 8.100935517938558e-08, "loss": 0.4889, "step": 15198 }, { "epoch": 1.9035125108532638, "grad_norm": 2.140625, "learning_rate": 8.075594845868618e-08, "loss": 0.5468, "step": 15199 }, { "epoch": 1.9036388033783251, "grad_norm": 1.9765625, "learning_rate": 8.050293709516154e-08, "loss": 0.4891, "step": 15200 }, { "epoch": 1.9037650959033863, "grad_norm": 2.015625, "learning_rate": 8.025032109889808e-08, "loss": 0.494, "step": 15201 }, { "epoch": 1.9038913884284474, "grad_norm": 2.078125, "learning_rate": 7.999810047996326e-08, "loss": 0.4494, "step": 15202 }, { "epoch": 1.9040176809535085, "grad_norm": 1.90625, "learning_rate": 7.974627524841017e-08, "loss": 0.513, "step": 15203 }, { "epoch": 1.9041439734785697, "grad_norm": 2.078125, "learning_rate": 7.949484541427633e-08, "loss": 0.4671, "step": 15204 }, { "epoch": 1.904270266003631, "grad_norm": 2.03125, "learning_rate": 7.92438109875826e-08, "loss": 0.4905, "step": 15205 }, { "epoch": 1.904396558528692, "grad_norm": 2.0, "learning_rate": 7.899317197833545e-08, "loss": 0.4513, "step": 15206 }, { "epoch": 1.9045228510537533, "grad_norm": 1.890625, "learning_rate": 7.874292839652464e-08, "loss": 0.4319, "step": 15207 }, { "epoch": 1.9046491435788144, "grad_norm": 2.09375, "learning_rate": 7.849308025212333e-08, "loss": 0.4926, "step": 15208 }, { "epoch": 1.9047754361038756, "grad_norm": 2.140625, "learning_rate": 7.824362755509018e-08, "loss": 0.5234, "step": 15209 }, { "epoch": 1.904901728628937, "grad_norm": 1.96875, "learning_rate": 7.799457031536839e-08, "loss": 0.5707, "step": 15210 }, { "epoch": 1.9050280211539978, "grad_norm": 2.078125, "learning_rate": 7.774590854288333e-08, "loss": 0.5462, "step": 15211 }, { "epoch": 1.9051543136790592, "grad_norm": 1.9140625, "learning_rate": 7.749764224754819e-08, "loss": 0.397, "step": 15212 }, { "epoch": 1.9052806062041203, "grad_norm": 2.109375, "learning_rate": 7.724977143925727e-08, "loss": 0.4568, "step": 15213 }, { "epoch": 1.9054068987291815, "grad_norm": 1.96875, "learning_rate": 7.700229612788823e-08, "loss": 0.4995, "step": 15214 }, { "epoch": 1.9055331912542426, "grad_norm": 1.96875, "learning_rate": 7.675521632330873e-08, "loss": 0.4298, "step": 15215 }, { "epoch": 1.9056594837793037, "grad_norm": 1.9921875, "learning_rate": 7.650853203536312e-08, "loss": 0.461, "step": 15216 }, { "epoch": 1.905785776304365, "grad_norm": 1.96875, "learning_rate": 7.626224327388687e-08, "loss": 0.5698, "step": 15217 }, { "epoch": 1.9059120688294262, "grad_norm": 2.140625, "learning_rate": 7.601635004869323e-08, "loss": 0.4497, "step": 15218 }, { "epoch": 1.9060383613544873, "grad_norm": 1.90625, "learning_rate": 7.577085236958548e-08, "loss": 0.4733, "step": 15219 }, { "epoch": 1.9061646538795485, "grad_norm": 2.0625, "learning_rate": 7.55257502463469e-08, "loss": 0.5313, "step": 15220 }, { "epoch": 1.9062909464046096, "grad_norm": 1.9375, "learning_rate": 7.528104368874856e-08, "loss": 0.429, "step": 15221 }, { "epoch": 1.906417238929671, "grad_norm": 1.9765625, "learning_rate": 7.503673270654155e-08, "loss": 0.4754, "step": 15222 }, { "epoch": 1.9065435314547319, "grad_norm": 2.015625, "learning_rate": 7.479281730946474e-08, "loss": 0.5072, "step": 15223 }, { "epoch": 1.9066698239797932, "grad_norm": 1.859375, "learning_rate": 7.454929750723927e-08, "loss": 0.4764, "step": 15224 }, { "epoch": 1.9067961165048544, "grad_norm": 2.109375, "learning_rate": 7.430617330957291e-08, "loss": 0.5803, "step": 15225 }, { "epoch": 1.9069224090299155, "grad_norm": 1.90625, "learning_rate": 7.406344472615457e-08, "loss": 0.4651, "step": 15226 }, { "epoch": 1.9070487015549769, "grad_norm": 1.953125, "learning_rate": 7.382111176665873e-08, "loss": 0.5603, "step": 15227 }, { "epoch": 1.9071749940800378, "grad_norm": 1.953125, "learning_rate": 7.357917444074548e-08, "loss": 0.4865, "step": 15228 }, { "epoch": 1.9073012866050991, "grad_norm": 1.9296875, "learning_rate": 7.333763275805705e-08, "loss": 0.5305, "step": 15229 }, { "epoch": 1.9074275791301603, "grad_norm": 2.046875, "learning_rate": 7.309648672822023e-08, "loss": 0.5046, "step": 15230 }, { "epoch": 1.9075538716552214, "grad_norm": 2.265625, "learning_rate": 7.285573636084731e-08, "loss": 0.5025, "step": 15231 }, { "epoch": 1.9076801641802827, "grad_norm": 2.09375, "learning_rate": 7.261538166553283e-08, "loss": 0.4973, "step": 15232 }, { "epoch": 1.9078064567053437, "grad_norm": 1.9140625, "learning_rate": 7.237542265185803e-08, "loss": 0.4727, "step": 15233 }, { "epoch": 1.907932749230405, "grad_norm": 2.140625, "learning_rate": 7.21358593293875e-08, "loss": 0.5243, "step": 15234 }, { "epoch": 1.9080590417554661, "grad_norm": 1.984375, "learning_rate": 7.1896691707668e-08, "loss": 0.4462, "step": 15235 }, { "epoch": 1.9081853342805273, "grad_norm": 2.09375, "learning_rate": 7.165791979623305e-08, "loss": 0.5544, "step": 15236 }, { "epoch": 1.9083116268055884, "grad_norm": 1.9765625, "learning_rate": 7.141954360460057e-08, "loss": 0.5002, "step": 15237 }, { "epoch": 1.9084379193306495, "grad_norm": 2.09375, "learning_rate": 7.118156314226965e-08, "loss": 0.5614, "step": 15238 }, { "epoch": 1.908564211855711, "grad_norm": 1.9453125, "learning_rate": 7.094397841872713e-08, "loss": 0.495, "step": 15239 }, { "epoch": 1.9086905043807718, "grad_norm": 1.828125, "learning_rate": 7.0706789443441e-08, "loss": 0.4882, "step": 15240 }, { "epoch": 1.9088167969058332, "grad_norm": 1.8984375, "learning_rate": 7.046999622586703e-08, "loss": 0.5059, "step": 15241 }, { "epoch": 1.9089430894308943, "grad_norm": 1.8828125, "learning_rate": 7.023359877544211e-08, "loss": 0.4958, "step": 15242 }, { "epoch": 1.9090693819559554, "grad_norm": 2.0, "learning_rate": 6.999759710158871e-08, "loss": 0.5264, "step": 15243 }, { "epoch": 1.9091956744810168, "grad_norm": 2.0, "learning_rate": 6.976199121371263e-08, "loss": 0.4036, "step": 15244 }, { "epoch": 1.9093219670060777, "grad_norm": 2.015625, "learning_rate": 6.952678112120525e-08, "loss": 0.4987, "step": 15245 }, { "epoch": 1.909448259531139, "grad_norm": 1.8984375, "learning_rate": 6.92919668334413e-08, "loss": 0.4848, "step": 15246 }, { "epoch": 1.9095745520562002, "grad_norm": 2.03125, "learning_rate": 6.905754835977995e-08, "loss": 0.5468, "step": 15247 }, { "epoch": 1.9097008445812613, "grad_norm": 1.84375, "learning_rate": 6.882352570956485e-08, "loss": 0.4472, "step": 15248 }, { "epoch": 1.9098271371063227, "grad_norm": 2.046875, "learning_rate": 6.858989889212298e-08, "loss": 0.5888, "step": 15249 }, { "epoch": 1.9099534296313836, "grad_norm": 1.953125, "learning_rate": 6.835666791676576e-08, "loss": 0.4911, "step": 15250 }, { "epoch": 1.910079722156445, "grad_norm": 1.984375, "learning_rate": 6.812383279279133e-08, "loss": 0.474, "step": 15251 }, { "epoch": 1.910206014681506, "grad_norm": 2.046875, "learning_rate": 6.789139352947783e-08, "loss": 0.5039, "step": 15252 }, { "epoch": 1.9103323072065672, "grad_norm": 2.078125, "learning_rate": 6.765935013609004e-08, "loss": 0.503, "step": 15253 }, { "epoch": 1.9104585997316283, "grad_norm": 2.0, "learning_rate": 6.742770262187726e-08, "loss": 0.4997, "step": 15254 }, { "epoch": 1.9105848922566895, "grad_norm": 2.015625, "learning_rate": 6.719645099607208e-08, "loss": 0.4758, "step": 15255 }, { "epoch": 1.9107111847817508, "grad_norm": 1.9296875, "learning_rate": 6.69655952678927e-08, "loss": 0.5025, "step": 15256 }, { "epoch": 1.9108374773068117, "grad_norm": 2.328125, "learning_rate": 6.673513544653843e-08, "loss": 0.5585, "step": 15257 }, { "epoch": 1.910963769831873, "grad_norm": 2.0, "learning_rate": 6.650507154119634e-08, "loss": 0.5486, "step": 15258 }, { "epoch": 1.9110900623569342, "grad_norm": 1.875, "learning_rate": 6.627540356103579e-08, "loss": 0.5172, "step": 15259 }, { "epoch": 1.9112163548819954, "grad_norm": 2.125, "learning_rate": 6.604613151521055e-08, "loss": 0.5108, "step": 15260 }, { "epoch": 1.9113426474070567, "grad_norm": 2.375, "learning_rate": 6.581725541285888e-08, "loss": 0.6076, "step": 15261 }, { "epoch": 1.9114689399321176, "grad_norm": 1.8359375, "learning_rate": 6.558877526310348e-08, "loss": 0.4688, "step": 15262 }, { "epoch": 1.911595232457179, "grad_norm": 1.921875, "learning_rate": 6.53606910750515e-08, "loss": 0.4505, "step": 15263 }, { "epoch": 1.9117215249822401, "grad_norm": 2.046875, "learning_rate": 6.513300285779234e-08, "loss": 0.5401, "step": 15264 }, { "epoch": 1.9118478175073013, "grad_norm": 2.15625, "learning_rate": 6.490571062040319e-08, "loss": 0.5545, "step": 15265 }, { "epoch": 1.9119741100323626, "grad_norm": 2.0625, "learning_rate": 6.467881437194124e-08, "loss": 0.492, "step": 15266 }, { "epoch": 1.9121004025574235, "grad_norm": 2.046875, "learning_rate": 6.445231412145148e-08, "loss": 0.4719, "step": 15267 }, { "epoch": 1.9122266950824849, "grad_norm": 1.9609375, "learning_rate": 6.422620987796114e-08, "loss": 0.5174, "step": 15268 }, { "epoch": 1.912352987607546, "grad_norm": 2.03125, "learning_rate": 6.400050165048188e-08, "loss": 0.4889, "step": 15269 }, { "epoch": 1.9124792801326072, "grad_norm": 2.0625, "learning_rate": 6.377518944801098e-08, "loss": 0.4581, "step": 15270 }, { "epoch": 1.9126055726576683, "grad_norm": 2.109375, "learning_rate": 6.35502732795279e-08, "loss": 0.5392, "step": 15271 }, { "epoch": 1.9127318651827294, "grad_norm": 1.953125, "learning_rate": 6.33257531539977e-08, "loss": 0.4105, "step": 15272 }, { "epoch": 1.9128581577077908, "grad_norm": 1.90625, "learning_rate": 6.310162908036876e-08, "loss": 0.5162, "step": 15273 }, { "epoch": 1.9129844502328517, "grad_norm": 1.921875, "learning_rate": 6.287790106757396e-08, "loss": 0.487, "step": 15274 }, { "epoch": 1.913110742757913, "grad_norm": 1.8359375, "learning_rate": 6.265456912453172e-08, "loss": 0.484, "step": 15275 }, { "epoch": 1.9132370352829742, "grad_norm": 1.9453125, "learning_rate": 6.243163326014268e-08, "loss": 0.4808, "step": 15276 }, { "epoch": 1.9133633278080353, "grad_norm": 2.171875, "learning_rate": 6.220909348329196e-08, "loss": 0.4936, "step": 15277 }, { "epoch": 1.9134896203330967, "grad_norm": 2.21875, "learning_rate": 6.198694980285025e-08, "loss": 0.5924, "step": 15278 }, { "epoch": 1.9136159128581576, "grad_norm": 2.15625, "learning_rate": 6.176520222767268e-08, "loss": 0.5172, "step": 15279 }, { "epoch": 1.913742205383219, "grad_norm": 2.09375, "learning_rate": 6.154385076659442e-08, "loss": 0.5003, "step": 15280 }, { "epoch": 1.91386849790828, "grad_norm": 1.875, "learning_rate": 6.132289542844172e-08, "loss": 0.4568, "step": 15281 }, { "epoch": 1.9139947904333412, "grad_norm": 1.953125, "learning_rate": 6.110233622201867e-08, "loss": 0.4722, "step": 15282 }, { "epoch": 1.9141210829584026, "grad_norm": 2.203125, "learning_rate": 6.088217315611711e-08, "loss": 0.542, "step": 15283 }, { "epoch": 1.9142473754834635, "grad_norm": 1.953125, "learning_rate": 6.066240623951336e-08, "loss": 0.5147, "step": 15284 }, { "epoch": 1.9143736680085248, "grad_norm": 1.9296875, "learning_rate": 6.044303548096486e-08, "loss": 0.4519, "step": 15285 }, { "epoch": 1.914499960533586, "grad_norm": 2.078125, "learning_rate": 6.022406088921573e-08, "loss": 0.5475, "step": 15286 }, { "epoch": 1.914626253058647, "grad_norm": 2.078125, "learning_rate": 6.000548247299454e-08, "loss": 0.5349, "step": 15287 }, { "epoch": 1.9147525455837082, "grad_norm": 1.921875, "learning_rate": 5.978730024101209e-08, "loss": 0.4332, "step": 15288 }, { "epoch": 1.9148788381087694, "grad_norm": 2.109375, "learning_rate": 5.956951420196588e-08, "loss": 0.5192, "step": 15289 }, { "epoch": 1.9150051306338307, "grad_norm": 2.1875, "learning_rate": 5.935212436453674e-08, "loss": 0.5011, "step": 15290 }, { "epoch": 1.9151314231588916, "grad_norm": 2.015625, "learning_rate": 5.913513073738775e-08, "loss": 0.5052, "step": 15291 }, { "epoch": 1.915257715683953, "grad_norm": 2.09375, "learning_rate": 5.8918533329167525e-08, "loss": 0.5031, "step": 15292 }, { "epoch": 1.915384008209014, "grad_norm": 1.9765625, "learning_rate": 5.8702332148510285e-08, "loss": 0.4216, "step": 15293 }, { "epoch": 1.9155103007340752, "grad_norm": 2.015625, "learning_rate": 5.8486527204033585e-08, "loss": 0.4361, "step": 15294 }, { "epoch": 1.9156365932591366, "grad_norm": 2.15625, "learning_rate": 5.827111850433831e-08, "loss": 0.503, "step": 15295 }, { "epoch": 1.9157628857841975, "grad_norm": 2.03125, "learning_rate": 5.805610605800982e-08, "loss": 0.5502, "step": 15296 }, { "epoch": 1.9158891783092589, "grad_norm": 1.96875, "learning_rate": 5.784148987361793e-08, "loss": 0.483, "step": 15297 }, { "epoch": 1.91601547083432, "grad_norm": 1.9609375, "learning_rate": 5.7627269959718015e-08, "loss": 0.5171, "step": 15298 }, { "epoch": 1.9161417633593811, "grad_norm": 2.0625, "learning_rate": 5.741344632484658e-08, "loss": 0.488, "step": 15299 }, { "epoch": 1.9162680558844425, "grad_norm": 2.109375, "learning_rate": 5.7200018977526805e-08, "loss": 0.4374, "step": 15300 }, { "epoch": 1.9163943484095034, "grad_norm": 1.9765625, "learning_rate": 5.6986987926266333e-08, "loss": 0.4992, "step": 15301 }, { "epoch": 1.9165206409345648, "grad_norm": 2.0, "learning_rate": 5.677435317955504e-08, "loss": 0.4863, "step": 15302 }, { "epoch": 1.916646933459626, "grad_norm": 1.875, "learning_rate": 5.656211474586726e-08, "loss": 0.4444, "step": 15303 }, { "epoch": 1.916773225984687, "grad_norm": 2.125, "learning_rate": 5.6350272633664e-08, "loss": 0.5175, "step": 15304 }, { "epoch": 1.9168995185097482, "grad_norm": 1.921875, "learning_rate": 5.61388268513885e-08, "loss": 0.4572, "step": 15305 }, { "epoch": 1.9170258110348093, "grad_norm": 2.046875, "learning_rate": 5.592777740746735e-08, "loss": 0.4924, "step": 15306 }, { "epoch": 1.9171521035598706, "grad_norm": 1.9375, "learning_rate": 5.5717124310313844e-08, "loss": 0.4857, "step": 15307 }, { "epoch": 1.9172783960849316, "grad_norm": 2.046875, "learning_rate": 5.550686756832346e-08, "loss": 0.6472, "step": 15308 }, { "epoch": 1.917404688609993, "grad_norm": 2.109375, "learning_rate": 5.529700718987619e-08, "loss": 0.5047, "step": 15309 }, { "epoch": 1.917530981135054, "grad_norm": 1.8359375, "learning_rate": 5.5087543183336423e-08, "loss": 0.4879, "step": 15310 }, { "epoch": 1.9176572736601152, "grad_norm": 1.9609375, "learning_rate": 5.487847555705417e-08, "loss": 0.4324, "step": 15311 }, { "epoch": 1.9177835661851765, "grad_norm": 1.9609375, "learning_rate": 5.466980431936164e-08, "loss": 0.4479, "step": 15312 }, { "epoch": 1.9179098587102374, "grad_norm": 1.9765625, "learning_rate": 5.446152947857553e-08, "loss": 0.4851, "step": 15313 }, { "epoch": 1.9180361512352988, "grad_norm": 2.125, "learning_rate": 5.4253651042996956e-08, "loss": 0.5524, "step": 15314 }, { "epoch": 1.91816244376036, "grad_norm": 2.03125, "learning_rate": 5.4046169020912645e-08, "loss": 0.5057, "step": 15315 }, { "epoch": 1.918288736285421, "grad_norm": 1.7890625, "learning_rate": 5.383908342059263e-08, "loss": 0.4889, "step": 15316 }, { "epoch": 1.9184150288104824, "grad_norm": 1.9453125, "learning_rate": 5.36323942502881e-08, "loss": 0.4601, "step": 15317 }, { "epoch": 1.9185413213355433, "grad_norm": 1.921875, "learning_rate": 5.3426101518240235e-08, "loss": 0.5523, "step": 15318 }, { "epoch": 1.9186676138606047, "grad_norm": 2.03125, "learning_rate": 5.322020523267024e-08, "loss": 0.5061, "step": 15319 }, { "epoch": 1.9187939063856658, "grad_norm": 1.9453125, "learning_rate": 5.301470540178377e-08, "loss": 0.524, "step": 15320 }, { "epoch": 1.918920198910727, "grad_norm": 1.984375, "learning_rate": 5.280960203377206e-08, "loss": 0.5355, "step": 15321 }, { "epoch": 1.919046491435788, "grad_norm": 1.9453125, "learning_rate": 5.260489513681077e-08, "loss": 0.4729, "step": 15322 }, { "epoch": 1.9191727839608492, "grad_norm": 2.125, "learning_rate": 5.2400584719058955e-08, "loss": 0.5062, "step": 15323 }, { "epoch": 1.9192990764859106, "grad_norm": 2.078125, "learning_rate": 5.219667078865898e-08, "loss": 0.4978, "step": 15324 }, { "epoch": 1.9194253690109717, "grad_norm": 2.0, "learning_rate": 5.199315335373878e-08, "loss": 0.4797, "step": 15325 }, { "epoch": 1.9195516615360328, "grad_norm": 1.9375, "learning_rate": 5.1790032422410764e-08, "loss": 0.4915, "step": 15326 }, { "epoch": 1.919677954061094, "grad_norm": 1.96875, "learning_rate": 5.1587308002770674e-08, "loss": 0.5529, "step": 15327 }, { "epoch": 1.9198042465861551, "grad_norm": 2.03125, "learning_rate": 5.138498010289761e-08, "loss": 0.4315, "step": 15328 }, { "epoch": 1.9199305391112165, "grad_norm": 2.015625, "learning_rate": 5.118304873085622e-08, "loss": 0.4323, "step": 15329 }, { "epoch": 1.9200568316362774, "grad_norm": 2.203125, "learning_rate": 5.0981513894695637e-08, "loss": 0.5496, "step": 15330 }, { "epoch": 1.9201831241613387, "grad_norm": 2.3125, "learning_rate": 5.0780375602448305e-08, "loss": 0.5929, "step": 15331 }, { "epoch": 1.9203094166863999, "grad_norm": 2.0625, "learning_rate": 5.057963386213116e-08, "loss": 0.5356, "step": 15332 }, { "epoch": 1.920435709211461, "grad_norm": 1.875, "learning_rate": 5.037928868174446e-08, "loss": 0.4794, "step": 15333 }, { "epoch": 1.9205620017365224, "grad_norm": 2.09375, "learning_rate": 5.0179340069275163e-08, "loss": 0.5008, "step": 15334 }, { "epoch": 1.9206882942615833, "grad_norm": 1.890625, "learning_rate": 4.997978803269132e-08, "loss": 0.4882, "step": 15335 }, { "epoch": 1.9208145867866446, "grad_norm": 1.9140625, "learning_rate": 4.978063257994659e-08, "loss": 0.4487, "step": 15336 }, { "epoch": 1.9209408793117058, "grad_norm": 1.9375, "learning_rate": 4.9581873718979045e-08, "loss": 0.4089, "step": 15337 }, { "epoch": 1.921067171836767, "grad_norm": 1.953125, "learning_rate": 4.9383511457711254e-08, "loss": 0.4079, "step": 15338 }, { "epoch": 1.921193464361828, "grad_norm": 1.796875, "learning_rate": 4.918554580404911e-08, "loss": 0.416, "step": 15339 }, { "epoch": 1.9213197568868892, "grad_norm": 1.921875, "learning_rate": 4.898797676588296e-08, "loss": 0.4372, "step": 15340 }, { "epoch": 1.9214460494119505, "grad_norm": 1.9765625, "learning_rate": 4.8790804351087626e-08, "loss": 0.5124, "step": 15341 }, { "epoch": 1.9215723419370117, "grad_norm": 2.078125, "learning_rate": 4.859402856752238e-08, "loss": 0.5062, "step": 15342 }, { "epoch": 1.9216986344620728, "grad_norm": 2.203125, "learning_rate": 4.839764942302871e-08, "loss": 0.5311, "step": 15343 }, { "epoch": 1.921824926987134, "grad_norm": 2.0625, "learning_rate": 4.820166692543593e-08, "loss": 0.4692, "step": 15344 }, { "epoch": 1.921951219512195, "grad_norm": 1.96875, "learning_rate": 4.8006081082553336e-08, "loss": 0.5043, "step": 15345 }, { "epoch": 1.9220775120372564, "grad_norm": 2.046875, "learning_rate": 4.781089190217803e-08, "loss": 0.4771, "step": 15346 }, { "epoch": 1.9222038045623173, "grad_norm": 1.828125, "learning_rate": 4.761609939208933e-08, "loss": 0.4692, "step": 15347 }, { "epoch": 1.9223300970873787, "grad_norm": 1.7734375, "learning_rate": 4.7421703560051045e-08, "loss": 0.4318, "step": 15348 }, { "epoch": 1.9224563896124398, "grad_norm": 2.0, "learning_rate": 4.7227704413810306e-08, "loss": 0.479, "step": 15349 }, { "epoch": 1.922582682137501, "grad_norm": 2.109375, "learning_rate": 4.703410196110203e-08, "loss": 0.5693, "step": 15350 }, { "epoch": 1.9227089746625623, "grad_norm": 1.8046875, "learning_rate": 4.6840896209640055e-08, "loss": 0.4426, "step": 15351 }, { "epoch": 1.9228352671876232, "grad_norm": 2.265625, "learning_rate": 4.66480871671271e-08, "loss": 0.4185, "step": 15352 }, { "epoch": 1.9229615597126846, "grad_norm": 1.8203125, "learning_rate": 4.645567484124702e-08, "loss": 0.4301, "step": 15353 }, { "epoch": 1.9230878522377457, "grad_norm": 2.09375, "learning_rate": 4.6263659239669247e-08, "loss": 0.5297, "step": 15354 }, { "epoch": 1.9232141447628068, "grad_norm": 1.859375, "learning_rate": 4.6072040370046536e-08, "loss": 0.4664, "step": 15355 }, { "epoch": 1.9233404372878682, "grad_norm": 2.390625, "learning_rate": 4.588081824001611e-08, "loss": 0.5331, "step": 15356 }, { "epoch": 1.923466729812929, "grad_norm": 2.171875, "learning_rate": 4.568999285720188e-08, "loss": 0.5094, "step": 15357 }, { "epoch": 1.9235930223379905, "grad_norm": 2.109375, "learning_rate": 4.549956422920665e-08, "loss": 0.5736, "step": 15358 }, { "epoch": 1.9237193148630516, "grad_norm": 2.09375, "learning_rate": 4.530953236362212e-08, "loss": 0.4874, "step": 15359 }, { "epoch": 1.9238456073881127, "grad_norm": 1.8515625, "learning_rate": 4.5119897268023347e-08, "loss": 0.5179, "step": 15360 }, { "epoch": 1.9239718999131739, "grad_norm": 2.078125, "learning_rate": 4.493065894996651e-08, "loss": 0.4846, "step": 15361 }, { "epoch": 1.924098192438235, "grad_norm": 1.9375, "learning_rate": 4.474181741699557e-08, "loss": 0.5219, "step": 15362 }, { "epoch": 1.9242244849632963, "grad_norm": 2.046875, "learning_rate": 4.455337267663784e-08, "loss": 0.5072, "step": 15363 }, { "epoch": 1.9243507774883573, "grad_norm": 2.09375, "learning_rate": 4.436532473640287e-08, "loss": 0.4909, "step": 15364 }, { "epoch": 1.9244770700134186, "grad_norm": 1.96875, "learning_rate": 4.4177673603785776e-08, "loss": 0.439, "step": 15365 }, { "epoch": 1.9246033625384797, "grad_norm": 2.109375, "learning_rate": 4.3990419286267235e-08, "loss": 0.4948, "step": 15366 }, { "epoch": 1.9247296550635409, "grad_norm": 1.984375, "learning_rate": 4.380356179131018e-08, "loss": 0.5057, "step": 15367 }, { "epoch": 1.9248559475886022, "grad_norm": 1.9765625, "learning_rate": 4.361710112636197e-08, "loss": 0.4992, "step": 15368 }, { "epoch": 1.9249822401136631, "grad_norm": 1.859375, "learning_rate": 4.343103729885445e-08, "loss": 0.4858, "step": 15369 }, { "epoch": 1.9251085326387245, "grad_norm": 2.015625, "learning_rate": 4.3245370316203904e-08, "loss": 0.4859, "step": 15370 }, { "epoch": 1.9252348251637856, "grad_norm": 1.984375, "learning_rate": 4.3060100185811085e-08, "loss": 0.4855, "step": 15371 }, { "epoch": 1.9253611176888468, "grad_norm": 2.015625, "learning_rate": 4.287522691505897e-08, "loss": 0.5208, "step": 15372 }, { "epoch": 1.9254874102139081, "grad_norm": 1.96875, "learning_rate": 4.269075051131722e-08, "loss": 0.5275, "step": 15373 }, { "epoch": 1.925613702738969, "grad_norm": 1.890625, "learning_rate": 4.250667098193884e-08, "loss": 0.4727, "step": 15374 }, { "epoch": 1.9257399952640304, "grad_norm": 2.078125, "learning_rate": 4.232298833426018e-08, "loss": 0.4925, "step": 15375 }, { "epoch": 1.9258662877890915, "grad_norm": 2.078125, "learning_rate": 4.213970257560318e-08, "loss": 0.4761, "step": 15376 }, { "epoch": 1.9259925803141527, "grad_norm": 1.8984375, "learning_rate": 4.195681371327198e-08, "loss": 0.4552, "step": 15377 }, { "epoch": 1.9261188728392138, "grad_norm": 2.0, "learning_rate": 4.1774321754557424e-08, "loss": 0.574, "step": 15378 }, { "epoch": 1.926245165364275, "grad_norm": 1.921875, "learning_rate": 4.159222670673147e-08, "loss": 0.4589, "step": 15379 }, { "epoch": 1.9263714578893363, "grad_norm": 2.0625, "learning_rate": 4.1410528577053856e-08, "loss": 0.5186, "step": 15380 }, { "epoch": 1.9264977504143972, "grad_norm": 2.015625, "learning_rate": 4.122922737276547e-08, "loss": 0.5097, "step": 15381 }, { "epoch": 1.9266240429394585, "grad_norm": 1.921875, "learning_rate": 4.1048323101092745e-08, "loss": 0.4474, "step": 15382 }, { "epoch": 1.9267503354645197, "grad_norm": 1.8828125, "learning_rate": 4.0867815769246586e-08, "loss": 0.4895, "step": 15383 }, { "epoch": 1.9268766279895808, "grad_norm": 1.9453125, "learning_rate": 4.068770538442124e-08, "loss": 0.5703, "step": 15384 }, { "epoch": 1.9270029205146422, "grad_norm": 1.8125, "learning_rate": 4.0507991953795404e-08, "loss": 0.4853, "step": 15385 }, { "epoch": 1.927129213039703, "grad_norm": 2.15625, "learning_rate": 4.032867548453223e-08, "loss": 0.5571, "step": 15386 }, { "epoch": 1.9272555055647644, "grad_norm": 1.9609375, "learning_rate": 4.014975598377824e-08, "loss": 0.4646, "step": 15387 }, { "epoch": 1.9273817980898256, "grad_norm": 2.171875, "learning_rate": 3.9971233458665495e-08, "loss": 0.6018, "step": 15388 }, { "epoch": 1.9275080906148867, "grad_norm": 2.078125, "learning_rate": 3.979310791630941e-08, "loss": 0.5147, "step": 15389 }, { "epoch": 1.927634383139948, "grad_norm": 1.8984375, "learning_rate": 3.961537936380877e-08, "loss": 0.4969, "step": 15390 }, { "epoch": 1.927760675665009, "grad_norm": 1.953125, "learning_rate": 3.9438047808247895e-08, "loss": 0.492, "step": 15391 }, { "epoch": 1.9278869681900703, "grad_norm": 2.03125, "learning_rate": 3.9261113256695574e-08, "loss": 0.5187, "step": 15392 }, { "epoch": 1.9280132607151315, "grad_norm": 1.890625, "learning_rate": 3.9084575716202835e-08, "loss": 0.5324, "step": 15393 }, { "epoch": 1.9281395532401926, "grad_norm": 1.875, "learning_rate": 3.890843519380738e-08, "loss": 0.4389, "step": 15394 }, { "epoch": 1.9282658457652537, "grad_norm": 2.015625, "learning_rate": 3.873269169652805e-08, "loss": 0.5404, "step": 15395 }, { "epoch": 1.9283921382903149, "grad_norm": 1.9609375, "learning_rate": 3.855734523137034e-08, "loss": 0.4392, "step": 15396 }, { "epoch": 1.9285184308153762, "grad_norm": 1.9453125, "learning_rate": 3.83823958053231e-08, "loss": 0.5301, "step": 15397 }, { "epoch": 1.9286447233404371, "grad_norm": 1.921875, "learning_rate": 3.820784342536077e-08, "loss": 0.4732, "step": 15398 }, { "epoch": 1.9287710158654985, "grad_norm": 2.09375, "learning_rate": 3.803368809843777e-08, "loss": 0.4829, "step": 15399 }, { "epoch": 1.9288973083905596, "grad_norm": 2.09375, "learning_rate": 3.785992983149855e-08, "loss": 0.5081, "step": 15400 }, { "epoch": 1.9290236009156208, "grad_norm": 1.953125, "learning_rate": 3.768656863146647e-08, "loss": 0.5392, "step": 15401 }, { "epoch": 1.929149893440682, "grad_norm": 2.171875, "learning_rate": 3.751360450525155e-08, "loss": 0.5024, "step": 15402 }, { "epoch": 1.929276185965743, "grad_norm": 2.015625, "learning_rate": 3.734103745974827e-08, "loss": 0.5152, "step": 15403 }, { "epoch": 1.9294024784908044, "grad_norm": 1.921875, "learning_rate": 3.716886750183557e-08, "loss": 0.4626, "step": 15404 }, { "epoch": 1.9295287710158655, "grad_norm": 2.359375, "learning_rate": 3.6997094638373535e-08, "loss": 0.4878, "step": 15405 }, { "epoch": 1.9296550635409266, "grad_norm": 1.8671875, "learning_rate": 3.682571887621111e-08, "loss": 0.4899, "step": 15406 }, { "epoch": 1.929781356065988, "grad_norm": 2.09375, "learning_rate": 3.66547402221773e-08, "loss": 0.4993, "step": 15407 }, { "epoch": 1.929907648591049, "grad_norm": 2.0625, "learning_rate": 3.6484158683087743e-08, "loss": 0.4798, "step": 15408 }, { "epoch": 1.9300339411161103, "grad_norm": 1.9609375, "learning_rate": 3.631397426574035e-08, "loss": 0.4291, "step": 15409 }, { "epoch": 1.9301602336411714, "grad_norm": 1.9453125, "learning_rate": 3.614418697691857e-08, "loss": 0.451, "step": 15410 }, { "epoch": 1.9302865261662325, "grad_norm": 2.015625, "learning_rate": 3.597479682339145e-08, "loss": 0.5179, "step": 15411 }, { "epoch": 1.9304128186912937, "grad_norm": 2.015625, "learning_rate": 3.5805803811906904e-08, "loss": 0.4503, "step": 15412 }, { "epoch": 1.9305391112163548, "grad_norm": 2.0, "learning_rate": 3.5637207949204e-08, "loss": 0.4984, "step": 15413 }, { "epoch": 1.9306654037414162, "grad_norm": 2.125, "learning_rate": 3.5469009242001804e-08, "loss": 0.5414, "step": 15414 }, { "epoch": 1.930791696266477, "grad_norm": 1.890625, "learning_rate": 3.530120769700385e-08, "loss": 0.4603, "step": 15415 }, { "epoch": 1.9309179887915384, "grad_norm": 1.984375, "learning_rate": 3.5133803320897e-08, "loss": 0.5448, "step": 15416 }, { "epoch": 1.9310442813165996, "grad_norm": 1.984375, "learning_rate": 3.4966796120355917e-08, "loss": 0.4961, "step": 15417 }, { "epoch": 1.9311705738416607, "grad_norm": 2.125, "learning_rate": 3.480018610203528e-08, "loss": 0.513, "step": 15418 }, { "epoch": 1.931296866366722, "grad_norm": 1.75, "learning_rate": 3.463397327257756e-08, "loss": 0.4014, "step": 15419 }, { "epoch": 1.931423158891783, "grad_norm": 2.046875, "learning_rate": 3.446815763860634e-08, "loss": 0.4251, "step": 15420 }, { "epoch": 1.9315494514168443, "grad_norm": 2.046875, "learning_rate": 3.430273920673077e-08, "loss": 0.5253, "step": 15421 }, { "epoch": 1.9316757439419054, "grad_norm": 1.9375, "learning_rate": 3.4137717983543375e-08, "loss": 0.4949, "step": 15422 }, { "epoch": 1.9318020364669666, "grad_norm": 1.9765625, "learning_rate": 3.397309397562332e-08, "loss": 0.4813, "step": 15423 }, { "epoch": 1.931928328992028, "grad_norm": 2.0625, "learning_rate": 3.3808867189529805e-08, "loss": 0.4715, "step": 15424 }, { "epoch": 1.9320546215170888, "grad_norm": 1.8984375, "learning_rate": 3.364503763181093e-08, "loss": 0.4884, "step": 15425 }, { "epoch": 1.9321809140421502, "grad_norm": 2.078125, "learning_rate": 3.3481605308994805e-08, "loss": 0.5042, "step": 15426 }, { "epoch": 1.9323072065672113, "grad_norm": 2.078125, "learning_rate": 3.3318570227596215e-08, "loss": 0.5612, "step": 15427 }, { "epoch": 1.9324334990922725, "grad_norm": 1.84375, "learning_rate": 3.31559323941133e-08, "loss": 0.4444, "step": 15428 }, { "epoch": 1.9325597916173336, "grad_norm": 1.9609375, "learning_rate": 3.299369181502865e-08, "loss": 0.5105, "step": 15429 }, { "epoch": 1.9326860841423947, "grad_norm": 2.078125, "learning_rate": 3.2831848496807095e-08, "loss": 0.5699, "step": 15430 }, { "epoch": 1.932812376667456, "grad_norm": 1.8671875, "learning_rate": 3.267040244590125e-08, "loss": 0.4484, "step": 15431 }, { "epoch": 1.9329386691925172, "grad_norm": 2.0625, "learning_rate": 3.250935366874597e-08, "loss": 0.504, "step": 15432 }, { "epoch": 1.9330649617175784, "grad_norm": 1.953125, "learning_rate": 3.234870217175834e-08, "loss": 0.4319, "step": 15433 }, { "epoch": 1.9331912542426395, "grad_norm": 1.9765625, "learning_rate": 3.218844796134435e-08, "loss": 0.4656, "step": 15434 }, { "epoch": 1.9333175467677006, "grad_norm": 1.90625, "learning_rate": 3.2028591043888894e-08, "loss": 0.5141, "step": 15435 }, { "epoch": 1.933443839292762, "grad_norm": 1.9765625, "learning_rate": 3.186913142576464e-08, "loss": 0.4754, "step": 15436 }, { "epoch": 1.933570131817823, "grad_norm": 1.84375, "learning_rate": 3.171006911332764e-08, "loss": 0.4427, "step": 15437 }, { "epoch": 1.9336964243428842, "grad_norm": 2.09375, "learning_rate": 3.155140411291613e-08, "loss": 0.4462, "step": 15438 }, { "epoch": 1.9338227168679454, "grad_norm": 2.078125, "learning_rate": 3.139313643085617e-08, "loss": 0.4592, "step": 15439 }, { "epoch": 1.9339490093930065, "grad_norm": 2.046875, "learning_rate": 3.1235266073453844e-08, "loss": 0.5136, "step": 15440 }, { "epoch": 1.9340753019180679, "grad_norm": 2.140625, "learning_rate": 3.107779304700298e-08, "loss": 0.5307, "step": 15441 }, { "epoch": 1.9342015944431288, "grad_norm": 2.15625, "learning_rate": 3.0920717357779684e-08, "loss": 0.5635, "step": 15442 }, { "epoch": 1.9343278869681901, "grad_norm": 2.0, "learning_rate": 3.076403901204561e-08, "loss": 0.5778, "step": 15443 }, { "epoch": 1.9344541794932513, "grad_norm": 2.15625, "learning_rate": 3.0607758016043546e-08, "loss": 0.4737, "step": 15444 }, { "epoch": 1.9345804720183124, "grad_norm": 2.046875, "learning_rate": 3.045187437600294e-08, "loss": 0.4871, "step": 15445 }, { "epoch": 1.9347067645433735, "grad_norm": 2.09375, "learning_rate": 3.029638809813773e-08, "loss": 0.5337, "step": 15446 }, { "epoch": 1.9348330570684347, "grad_norm": 1.953125, "learning_rate": 3.014129918864517e-08, "loss": 0.5412, "step": 15447 }, { "epoch": 1.934959349593496, "grad_norm": 2.078125, "learning_rate": 2.998660765370698e-08, "loss": 0.4949, "step": 15448 }, { "epoch": 1.9350856421185572, "grad_norm": 1.8046875, "learning_rate": 2.983231349948712e-08, "loss": 0.4208, "step": 15449 }, { "epoch": 1.9352119346436183, "grad_norm": 1.875, "learning_rate": 2.9678416732137338e-08, "loss": 0.482, "step": 15450 }, { "epoch": 1.9353382271686794, "grad_norm": 1.9609375, "learning_rate": 2.9524917357791615e-08, "loss": 0.4355, "step": 15451 }, { "epoch": 1.9354645196937406, "grad_norm": 1.9375, "learning_rate": 2.937181538256617e-08, "loss": 0.44, "step": 15452 }, { "epoch": 1.935590812218802, "grad_norm": 2.078125, "learning_rate": 2.921911081256501e-08, "loss": 0.4952, "step": 15453 }, { "epoch": 1.9357171047438628, "grad_norm": 2.0, "learning_rate": 2.9066803653873266e-08, "loss": 0.481, "step": 15454 }, { "epoch": 1.9358433972689242, "grad_norm": 2.015625, "learning_rate": 2.8914893912562748e-08, "loss": 0.4568, "step": 15455 }, { "epoch": 1.9359696897939853, "grad_norm": 2.09375, "learning_rate": 2.8763381594687502e-08, "loss": 0.4948, "step": 15456 }, { "epoch": 1.9360959823190464, "grad_norm": 1.8828125, "learning_rate": 2.861226670628714e-08, "loss": 0.5254, "step": 15457 }, { "epoch": 1.9362222748441078, "grad_norm": 2.140625, "learning_rate": 2.8461549253383513e-08, "loss": 0.5387, "step": 15458 }, { "epoch": 1.9363485673691687, "grad_norm": 2.15625, "learning_rate": 2.8311229241985148e-08, "loss": 0.5097, "step": 15459 }, { "epoch": 1.93647485989423, "grad_norm": 2.234375, "learning_rate": 2.816130667808281e-08, "loss": 0.4615, "step": 15460 }, { "epoch": 1.9366011524192912, "grad_norm": 2.15625, "learning_rate": 2.801178156765172e-08, "loss": 0.5235, "step": 15461 }, { "epoch": 1.9367274449443523, "grad_norm": 1.875, "learning_rate": 2.786265391665155e-08, "loss": 0.4915, "step": 15462 }, { "epoch": 1.9368537374694137, "grad_norm": 1.9765625, "learning_rate": 2.7713923731027548e-08, "loss": 0.5133, "step": 15463 }, { "epoch": 1.9369800299944746, "grad_norm": 1.9140625, "learning_rate": 2.7565591016706083e-08, "loss": 0.4159, "step": 15464 }, { "epoch": 1.937106322519536, "grad_norm": 1.953125, "learning_rate": 2.7417655779600205e-08, "loss": 0.4401, "step": 15465 }, { "epoch": 1.937232615044597, "grad_norm": 2.03125, "learning_rate": 2.7270118025606306e-08, "loss": 0.455, "step": 15466 }, { "epoch": 1.9373589075696582, "grad_norm": 1.9765625, "learning_rate": 2.712297776060524e-08, "loss": 0.4607, "step": 15467 }, { "epoch": 1.9374852000947194, "grad_norm": 1.9921875, "learning_rate": 2.6976234990460095e-08, "loss": 0.5054, "step": 15468 }, { "epoch": 1.9376114926197805, "grad_norm": 1.9296875, "learning_rate": 2.682988972102174e-08, "loss": 0.4649, "step": 15469 }, { "epoch": 1.9377377851448419, "grad_norm": 2.03125, "learning_rate": 2.6683941958121074e-08, "loss": 0.5026, "step": 15470 }, { "epoch": 1.9378640776699028, "grad_norm": 1.9453125, "learning_rate": 2.6538391707576773e-08, "loss": 0.4483, "step": 15471 }, { "epoch": 1.9379903701949641, "grad_norm": 2.125, "learning_rate": 2.639323897518975e-08, "loss": 0.4972, "step": 15472 }, { "epoch": 1.9381166627200253, "grad_norm": 2.09375, "learning_rate": 2.624848376674538e-08, "loss": 0.587, "step": 15473 }, { "epoch": 1.9382429552450864, "grad_norm": 1.90625, "learning_rate": 2.610412608801238e-08, "loss": 0.5514, "step": 15474 }, { "epoch": 1.9383692477701477, "grad_norm": 2.03125, "learning_rate": 2.596016594474615e-08, "loss": 0.4952, "step": 15475 }, { "epoch": 1.9384955402952087, "grad_norm": 1.984375, "learning_rate": 2.581660334268321e-08, "loss": 0.4776, "step": 15476 }, { "epoch": 1.93862183282027, "grad_norm": 2.046875, "learning_rate": 2.5673438287545648e-08, "loss": 0.5808, "step": 15477 }, { "epoch": 1.9387481253453311, "grad_norm": 1.8515625, "learning_rate": 2.5530670785041123e-08, "loss": 0.4695, "step": 15478 }, { "epoch": 1.9388744178703923, "grad_norm": 2.046875, "learning_rate": 2.5388300840858416e-08, "loss": 0.4856, "step": 15479 }, { "epoch": 1.9390007103954536, "grad_norm": 2.046875, "learning_rate": 2.5246328460672985e-08, "loss": 0.4906, "step": 15480 }, { "epoch": 1.9391270029205145, "grad_norm": 2.046875, "learning_rate": 2.5104753650142534e-08, "loss": 0.489, "step": 15481 }, { "epoch": 1.939253295445576, "grad_norm": 1.875, "learning_rate": 2.4963576414911428e-08, "loss": 0.4571, "step": 15482 }, { "epoch": 1.939379587970637, "grad_norm": 2.015625, "learning_rate": 2.482279676060517e-08, "loss": 0.4741, "step": 15483 }, { "epoch": 1.9395058804956982, "grad_norm": 2.0, "learning_rate": 2.4682414692835945e-08, "loss": 0.4923, "step": 15484 }, { "epoch": 1.9396321730207593, "grad_norm": 1.984375, "learning_rate": 2.4542430217198154e-08, "loss": 0.4988, "step": 15485 }, { "epoch": 1.9397584655458204, "grad_norm": 2.015625, "learning_rate": 2.4402843339272896e-08, "loss": 0.4717, "step": 15486 }, { "epoch": 1.9398847580708818, "grad_norm": 2.046875, "learning_rate": 2.426365406462128e-08, "loss": 0.5863, "step": 15487 }, { "epoch": 1.9400110505959427, "grad_norm": 1.7890625, "learning_rate": 2.4124862398793304e-08, "loss": 0.401, "step": 15488 }, { "epoch": 1.940137343121004, "grad_norm": 2.125, "learning_rate": 2.3986468347318992e-08, "loss": 0.5528, "step": 15489 }, { "epoch": 1.9402636356460652, "grad_norm": 2.125, "learning_rate": 2.3848471915716153e-08, "loss": 0.5597, "step": 15490 }, { "epoch": 1.9403899281711263, "grad_norm": 2.25, "learning_rate": 2.3710873109483722e-08, "loss": 0.5327, "step": 15491 }, { "epoch": 1.9405162206961877, "grad_norm": 2.234375, "learning_rate": 2.35736719341062e-08, "loss": 0.5672, "step": 15492 }, { "epoch": 1.9406425132212486, "grad_norm": 1.875, "learning_rate": 2.3436868395052548e-08, "loss": 0.4887, "step": 15493 }, { "epoch": 1.94076880574631, "grad_norm": 2.03125, "learning_rate": 2.3300462497775067e-08, "loss": 0.4659, "step": 15494 }, { "epoch": 1.940895098271371, "grad_norm": 1.96875, "learning_rate": 2.3164454247711633e-08, "loss": 0.4921, "step": 15495 }, { "epoch": 1.9410213907964322, "grad_norm": 1.8828125, "learning_rate": 2.302884365028124e-08, "loss": 0.427, "step": 15496 }, { "epoch": 1.9411476833214936, "grad_norm": 2.125, "learning_rate": 2.289363071089068e-08, "loss": 0.4525, "step": 15497 }, { "epoch": 1.9412739758465545, "grad_norm": 2.109375, "learning_rate": 2.275881543492897e-08, "loss": 0.5673, "step": 15498 }, { "epoch": 1.9414002683716158, "grad_norm": 2.0, "learning_rate": 2.2624397827768485e-08, "loss": 0.4777, "step": 15499 }, { "epoch": 1.941526560896677, "grad_norm": 1.8984375, "learning_rate": 2.2490377894768266e-08, "loss": 0.5042, "step": 15500 }, { "epoch": 1.941652853421738, "grad_norm": 1.84375, "learning_rate": 2.2356755641268492e-08, "loss": 0.4551, "step": 15501 }, { "epoch": 1.9417791459467992, "grad_norm": 1.9296875, "learning_rate": 2.2223531072596005e-08, "loss": 0.4768, "step": 15502 }, { "epoch": 1.9419054384718604, "grad_norm": 2.046875, "learning_rate": 2.209070419406101e-08, "loss": 0.4551, "step": 15503 }, { "epoch": 1.9420317309969217, "grad_norm": 1.953125, "learning_rate": 2.195827501095704e-08, "loss": 0.4833, "step": 15504 }, { "epoch": 1.9421580235219826, "grad_norm": 2.109375, "learning_rate": 2.1826243528562106e-08, "loss": 0.4765, "step": 15505 }, { "epoch": 1.942284316047044, "grad_norm": 1.8671875, "learning_rate": 2.169460975213977e-08, "loss": 0.4418, "step": 15506 }, { "epoch": 1.9424106085721051, "grad_norm": 2.0, "learning_rate": 2.156337368693584e-08, "loss": 0.4882, "step": 15507 }, { "epoch": 1.9425369010971663, "grad_norm": 1.921875, "learning_rate": 2.1432535338180572e-08, "loss": 0.481, "step": 15508 }, { "epoch": 1.9426631936222276, "grad_norm": 1.8046875, "learning_rate": 2.1302094711090904e-08, "loss": 0.4999, "step": 15509 }, { "epoch": 1.9427894861472885, "grad_norm": 2.25, "learning_rate": 2.1172051810863792e-08, "loss": 0.5056, "step": 15510 }, { "epoch": 1.9429157786723499, "grad_norm": 2.0625, "learning_rate": 2.1042406642682865e-08, "loss": 0.5187, "step": 15511 }, { "epoch": 1.943042071197411, "grad_norm": 1.9140625, "learning_rate": 2.0913159211716217e-08, "loss": 0.4795, "step": 15512 }, { "epoch": 1.9431683637224721, "grad_norm": 2.078125, "learning_rate": 2.0784309523114166e-08, "loss": 0.5159, "step": 15513 }, { "epoch": 1.9432946562475335, "grad_norm": 1.921875, "learning_rate": 2.0655857582013716e-08, "loss": 0.4799, "step": 15514 }, { "epoch": 1.9434209487725944, "grad_norm": 1.890625, "learning_rate": 2.0527803393534108e-08, "loss": 0.4974, "step": 15515 }, { "epoch": 1.9435472412976558, "grad_norm": 1.8984375, "learning_rate": 2.0400146962779034e-08, "loss": 0.4646, "step": 15516 }, { "epoch": 1.943673533822717, "grad_norm": 2.0625, "learning_rate": 2.0272888294837756e-08, "loss": 0.4612, "step": 15517 }, { "epoch": 1.943799826347778, "grad_norm": 1.9296875, "learning_rate": 2.0146027394779553e-08, "loss": 0.5633, "step": 15518 }, { "epoch": 1.9439261188728392, "grad_norm": 1.8515625, "learning_rate": 2.0019564267663715e-08, "loss": 0.4288, "step": 15519 }, { "epoch": 1.9440524113979003, "grad_norm": 2.296875, "learning_rate": 1.9893498918530652e-08, "loss": 0.5188, "step": 15520 }, { "epoch": 1.9441787039229617, "grad_norm": 2.1875, "learning_rate": 1.976783135240301e-08, "loss": 0.5587, "step": 15521 }, { "epoch": 1.9443049964480226, "grad_norm": 1.890625, "learning_rate": 1.9642561574291232e-08, "loss": 0.4784, "step": 15522 }, { "epoch": 1.944431288973084, "grad_norm": 2.0, "learning_rate": 1.9517689589186873e-08, "loss": 0.4598, "step": 15523 }, { "epoch": 1.944557581498145, "grad_norm": 1.984375, "learning_rate": 1.9393215402068176e-08, "loss": 0.4781, "step": 15524 }, { "epoch": 1.9446838740232062, "grad_norm": 2.25, "learning_rate": 1.926913901789673e-08, "loss": 0.4806, "step": 15525 }, { "epoch": 1.9448101665482675, "grad_norm": 2.09375, "learning_rate": 1.914546044161747e-08, "loss": 0.5218, "step": 15526 }, { "epoch": 1.9449364590733285, "grad_norm": 2.109375, "learning_rate": 1.902217967815867e-08, "loss": 0.5628, "step": 15527 }, { "epoch": 1.9450627515983898, "grad_norm": 1.90625, "learning_rate": 1.889929673243529e-08, "loss": 0.4413, "step": 15528 }, { "epoch": 1.945189044123451, "grad_norm": 1.984375, "learning_rate": 1.8776811609345635e-08, "loss": 0.5058, "step": 15529 }, { "epoch": 1.945315336648512, "grad_norm": 2.078125, "learning_rate": 1.8654724313771357e-08, "loss": 0.4738, "step": 15530 }, { "epoch": 1.9454416291735734, "grad_norm": 1.90625, "learning_rate": 1.8533034850577448e-08, "loss": 0.4756, "step": 15531 }, { "epoch": 1.9455679216986344, "grad_norm": 2.015625, "learning_rate": 1.8411743224614477e-08, "loss": 0.479, "step": 15532 }, { "epoch": 1.9456942142236957, "grad_norm": 2.125, "learning_rate": 1.829084944071857e-08, "loss": 0.5296, "step": 15533 }, { "epoch": 1.9458205067487568, "grad_norm": 2.25, "learning_rate": 1.8170353503705883e-08, "loss": 0.6511, "step": 15534 }, { "epoch": 1.945946799273818, "grad_norm": 1.9296875, "learning_rate": 1.805025541838035e-08, "loss": 0.4549, "step": 15535 }, { "epoch": 1.946073091798879, "grad_norm": 1.9921875, "learning_rate": 1.7930555189528132e-08, "loss": 0.5073, "step": 15536 }, { "epoch": 1.9461993843239402, "grad_norm": 2.078125, "learning_rate": 1.7811252821922087e-08, "loss": 0.4686, "step": 15537 }, { "epoch": 1.9463256768490016, "grad_norm": 1.8984375, "learning_rate": 1.7692348320315077e-08, "loss": 0.5033, "step": 15538 }, { "epoch": 1.9464519693740625, "grad_norm": 2.0625, "learning_rate": 1.7573841689446648e-08, "loss": 0.5298, "step": 15539 }, { "epoch": 1.9465782618991239, "grad_norm": 1.859375, "learning_rate": 1.7455732934041904e-08, "loss": 0.4711, "step": 15540 }, { "epoch": 1.946704554424185, "grad_norm": 2.265625, "learning_rate": 1.733802205880597e-08, "loss": 0.5302, "step": 15541 }, { "epoch": 1.9468308469492461, "grad_norm": 2.203125, "learning_rate": 1.722070906843176e-08, "loss": 0.5793, "step": 15542 }, { "epoch": 1.9469571394743075, "grad_norm": 1.9765625, "learning_rate": 1.710379396759554e-08, "loss": 0.5208, "step": 15543 }, { "epoch": 1.9470834319993684, "grad_norm": 2.109375, "learning_rate": 1.6987276760956907e-08, "loss": 0.5019, "step": 15544 }, { "epoch": 1.9472097245244298, "grad_norm": 1.921875, "learning_rate": 1.6871157453159927e-08, "loss": 0.504, "step": 15545 }, { "epoch": 1.9473360170494909, "grad_norm": 2.03125, "learning_rate": 1.675543604883201e-08, "loss": 0.4537, "step": 15546 }, { "epoch": 1.947462309574552, "grad_norm": 2.0, "learning_rate": 1.664011255258724e-08, "loss": 0.4773, "step": 15547 }, { "epoch": 1.9475886020996134, "grad_norm": 2.1875, "learning_rate": 1.6525186969020833e-08, "loss": 0.509, "step": 15548 }, { "epoch": 1.9477148946246743, "grad_norm": 2.015625, "learning_rate": 1.6410659302713572e-08, "loss": 0.4672, "step": 15549 }, { "epoch": 1.9478411871497356, "grad_norm": 2.1875, "learning_rate": 1.629652955823069e-08, "loss": 0.4746, "step": 15550 }, { "epoch": 1.9479674796747968, "grad_norm": 2.234375, "learning_rate": 1.618279774011966e-08, "loss": 0.4997, "step": 15551 }, { "epoch": 1.948093772199858, "grad_norm": 2.0, "learning_rate": 1.6069463852915745e-08, "loss": 0.495, "step": 15552 }, { "epoch": 1.948220064724919, "grad_norm": 1.890625, "learning_rate": 1.5956527901135334e-08, "loss": 0.4731, "step": 15553 }, { "epoch": 1.9483463572499802, "grad_norm": 2.15625, "learning_rate": 1.5843989889280376e-08, "loss": 0.5327, "step": 15554 }, { "epoch": 1.9484726497750415, "grad_norm": 2.046875, "learning_rate": 1.5731849821833955e-08, "loss": 0.5488, "step": 15555 }, { "epoch": 1.9485989423001027, "grad_norm": 2.03125, "learning_rate": 1.562010770326916e-08, "loss": 0.5242, "step": 15556 }, { "epoch": 1.9487252348251638, "grad_norm": 2.09375, "learning_rate": 1.5508763538036874e-08, "loss": 0.5101, "step": 15557 }, { "epoch": 1.948851527350225, "grad_norm": 1.9609375, "learning_rate": 1.539781733057577e-08, "loss": 0.5014, "step": 15558 }, { "epoch": 1.948977819875286, "grad_norm": 1.796875, "learning_rate": 1.5287269085308976e-08, "loss": 0.4972, "step": 15559 }, { "epoch": 1.9491041124003474, "grad_norm": 2.1875, "learning_rate": 1.5177118806641856e-08, "loss": 0.5498, "step": 15560 }, { "epoch": 1.9492304049254083, "grad_norm": 1.9453125, "learning_rate": 1.5067366498964232e-08, "loss": 0.4691, "step": 15561 }, { "epoch": 1.9493566974504697, "grad_norm": 2.125, "learning_rate": 1.4958012166651494e-08, "loss": 0.4764, "step": 15562 }, { "epoch": 1.9494829899755308, "grad_norm": 2.046875, "learning_rate": 1.4849055814062374e-08, "loss": 0.5273, "step": 15563 }, { "epoch": 1.949609282500592, "grad_norm": 2.0, "learning_rate": 1.474049744553896e-08, "loss": 0.5833, "step": 15564 }, { "epoch": 1.9497355750256533, "grad_norm": 1.9140625, "learning_rate": 1.4632337065407786e-08, "loss": 0.4569, "step": 15565 }, { "epoch": 1.9498618675507142, "grad_norm": 1.9375, "learning_rate": 1.4524574677980963e-08, "loss": 0.5035, "step": 15566 }, { "epoch": 1.9499881600757756, "grad_norm": 2.03125, "learning_rate": 1.441721028755283e-08, "loss": 0.5422, "step": 15567 }, { "epoch": 1.9501144526008367, "grad_norm": 2.0625, "learning_rate": 1.4310243898403297e-08, "loss": 0.4877, "step": 15568 }, { "epoch": 1.9502407451258978, "grad_norm": 2.09375, "learning_rate": 1.4203675514794512e-08, "loss": 0.5211, "step": 15569 }, { "epoch": 1.950367037650959, "grad_norm": 1.875, "learning_rate": 1.409750514097641e-08, "loss": 0.4786, "step": 15570 }, { "epoch": 1.9504933301760201, "grad_norm": 2.203125, "learning_rate": 1.3991732781177824e-08, "loss": 0.4693, "step": 15571 }, { "epoch": 1.9506196227010815, "grad_norm": 1.9140625, "learning_rate": 1.3886358439616498e-08, "loss": 0.4868, "step": 15572 }, { "epoch": 1.9507459152261426, "grad_norm": 2.03125, "learning_rate": 1.3781382120491294e-08, "loss": 0.4757, "step": 15573 }, { "epoch": 1.9508722077512037, "grad_norm": 2.0, "learning_rate": 1.367680382798775e-08, "loss": 0.5327, "step": 15574 }, { "epoch": 1.9509985002762649, "grad_norm": 1.921875, "learning_rate": 1.3572623566272535e-08, "loss": 0.43, "step": 15575 }, { "epoch": 1.951124792801326, "grad_norm": 1.9609375, "learning_rate": 1.3468841339498995e-08, "loss": 0.489, "step": 15576 }, { "epoch": 1.9512510853263874, "grad_norm": 1.859375, "learning_rate": 1.3365457151803818e-08, "loss": 0.4777, "step": 15577 }, { "epoch": 1.9513773778514483, "grad_norm": 2.015625, "learning_rate": 1.3262471007307043e-08, "loss": 0.5717, "step": 15578 }, { "epoch": 1.9515036703765096, "grad_norm": 1.984375, "learning_rate": 1.3159882910113164e-08, "loss": 0.4436, "step": 15579 }, { "epoch": 1.9516299629015708, "grad_norm": 2.03125, "learning_rate": 1.3057692864311134e-08, "loss": 0.5424, "step": 15580 }, { "epoch": 1.951756255426632, "grad_norm": 2.0625, "learning_rate": 1.2955900873975469e-08, "loss": 0.49, "step": 15581 }, { "epoch": 1.9518825479516932, "grad_norm": 1.96875, "learning_rate": 1.2854506943161814e-08, "loss": 0.4882, "step": 15582 }, { "epoch": 1.9520088404767542, "grad_norm": 2.109375, "learning_rate": 1.2753511075912495e-08, "loss": 0.4919, "step": 15583 }, { "epoch": 1.9521351330018155, "grad_norm": 1.96875, "learning_rate": 1.2652913276250956e-08, "loss": 0.5471, "step": 15584 }, { "epoch": 1.9522614255268766, "grad_norm": 1.9765625, "learning_rate": 1.2552713548189544e-08, "loss": 0.5329, "step": 15585 }, { "epoch": 1.9523877180519378, "grad_norm": 1.9609375, "learning_rate": 1.2452911895720622e-08, "loss": 0.4542, "step": 15586 }, { "epoch": 1.9525140105769991, "grad_norm": 2.0, "learning_rate": 1.2353508322822116e-08, "loss": 0.501, "step": 15587 }, { "epoch": 1.95264030310206, "grad_norm": 1.890625, "learning_rate": 1.2254502833455307e-08, "loss": 0.4839, "step": 15588 }, { "epoch": 1.9527665956271214, "grad_norm": 2.078125, "learning_rate": 1.2155895431568143e-08, "loss": 0.4984, "step": 15589 }, { "epoch": 1.9528928881521825, "grad_norm": 2.09375, "learning_rate": 1.2057686121088596e-08, "loss": 0.5284, "step": 15590 }, { "epoch": 1.9530191806772437, "grad_norm": 1.796875, "learning_rate": 1.1959874905932423e-08, "loss": 0.4028, "step": 15591 }, { "epoch": 1.9531454732023048, "grad_norm": 1.921875, "learning_rate": 1.1862461789998725e-08, "loss": 0.5429, "step": 15592 }, { "epoch": 1.953271765727366, "grad_norm": 2.046875, "learning_rate": 1.1765446777168842e-08, "loss": 0.5075, "step": 15593 }, { "epoch": 1.9533980582524273, "grad_norm": 1.859375, "learning_rate": 1.1668829871309684e-08, "loss": 0.4928, "step": 15594 }, { "epoch": 1.9535243507774882, "grad_norm": 2.1875, "learning_rate": 1.1572611076272611e-08, "loss": 0.583, "step": 15595 }, { "epoch": 1.9536506433025496, "grad_norm": 1.9609375, "learning_rate": 1.1476790395893444e-08, "loss": 0.4773, "step": 15596 }, { "epoch": 1.9537769358276107, "grad_norm": 2.203125, "learning_rate": 1.1381367833990242e-08, "loss": 0.5458, "step": 15597 }, { "epoch": 1.9539032283526718, "grad_norm": 2.078125, "learning_rate": 1.1286343394366628e-08, "loss": 0.5356, "step": 15598 }, { "epoch": 1.9540295208777332, "grad_norm": 1.9140625, "learning_rate": 1.1191717080809571e-08, "loss": 0.5321, "step": 15599 }, { "epoch": 1.954155813402794, "grad_norm": 2.234375, "learning_rate": 1.1097488897091613e-08, "loss": 0.5157, "step": 15600 }, { "epoch": 1.9542821059278555, "grad_norm": 2.03125, "learning_rate": 1.1003658846967525e-08, "loss": 0.5611, "step": 15601 }, { "epoch": 1.9544083984529166, "grad_norm": 2.265625, "learning_rate": 1.0910226934177648e-08, "loss": 0.5515, "step": 15602 }, { "epoch": 1.9545346909779777, "grad_norm": 2.015625, "learning_rate": 1.0817193162446782e-08, "loss": 0.426, "step": 15603 }, { "epoch": 1.954660983503039, "grad_norm": 1.9375, "learning_rate": 1.072455753548085e-08, "loss": 0.4897, "step": 15604 }, { "epoch": 1.9547872760281, "grad_norm": 1.8671875, "learning_rate": 1.0632320056974677e-08, "loss": 0.4402, "step": 15605 }, { "epoch": 1.9549135685531613, "grad_norm": 1.890625, "learning_rate": 1.0540480730601987e-08, "loss": 0.5279, "step": 15606 }, { "epoch": 1.9550398610782225, "grad_norm": 2.140625, "learning_rate": 1.0449039560025409e-08, "loss": 0.5798, "step": 15607 }, { "epoch": 1.9551661536032836, "grad_norm": 2.0, "learning_rate": 1.0357996548888693e-08, "loss": 0.5489, "step": 15608 }, { "epoch": 1.9552924461283447, "grad_norm": 2.15625, "learning_rate": 1.026735170082116e-08, "loss": 0.5724, "step": 15609 }, { "epoch": 1.9554187386534059, "grad_norm": 2.109375, "learning_rate": 1.0177105019434363e-08, "loss": 0.6067, "step": 15610 }, { "epoch": 1.9555450311784672, "grad_norm": 2.015625, "learning_rate": 1.0087256508326538e-08, "loss": 0.4652, "step": 15611 }, { "epoch": 1.9556713237035281, "grad_norm": 2.1875, "learning_rate": 9.99780617107815e-09, "loss": 0.5157, "step": 15612 }, { "epoch": 1.9557976162285895, "grad_norm": 2.0625, "learning_rate": 9.908754011255239e-09, "loss": 0.4753, "step": 15613 }, { "epoch": 1.9559239087536506, "grad_norm": 1.9375, "learning_rate": 9.820100032406078e-09, "loss": 0.478, "step": 15614 }, { "epoch": 1.9560502012787118, "grad_norm": 1.765625, "learning_rate": 9.731844238065613e-09, "loss": 0.4097, "step": 15615 }, { "epoch": 1.9561764938037731, "grad_norm": 2.203125, "learning_rate": 9.643986631749924e-09, "loss": 0.4819, "step": 15616 }, { "epoch": 1.956302786328834, "grad_norm": 2.34375, "learning_rate": 9.556527216962874e-09, "loss": 0.5376, "step": 15617 }, { "epoch": 1.9564290788538954, "grad_norm": 2.21875, "learning_rate": 9.469465997188344e-09, "loss": 0.4851, "step": 15618 }, { "epoch": 1.9565553713789565, "grad_norm": 1.828125, "learning_rate": 9.382802975896887e-09, "loss": 0.4885, "step": 15619 }, { "epoch": 1.9566816639040177, "grad_norm": 2.015625, "learning_rate": 9.296538156543523e-09, "loss": 0.4858, "step": 15620 }, { "epoch": 1.956807956429079, "grad_norm": 1.9453125, "learning_rate": 9.210671542565497e-09, "loss": 0.5037, "step": 15621 }, { "epoch": 1.95693424895414, "grad_norm": 2.328125, "learning_rate": 9.12520313738674e-09, "loss": 0.5422, "step": 15622 }, { "epoch": 1.9570605414792013, "grad_norm": 1.921875, "learning_rate": 9.040132944412306e-09, "loss": 0.4963, "step": 15623 }, { "epoch": 1.9571868340042624, "grad_norm": 2.046875, "learning_rate": 8.955460967033924e-09, "loss": 0.5987, "step": 15624 }, { "epoch": 1.9573131265293235, "grad_norm": 1.9375, "learning_rate": 8.871187208624454e-09, "loss": 0.465, "step": 15625 }, { "epoch": 1.9574394190543847, "grad_norm": 2.15625, "learning_rate": 8.78731167254565e-09, "loss": 0.5496, "step": 15626 }, { "epoch": 1.9575657115794458, "grad_norm": 2.0, "learning_rate": 8.703834362139285e-09, "loss": 0.4648, "step": 15627 }, { "epoch": 1.9576920041045072, "grad_norm": 1.9921875, "learning_rate": 8.620755280731585e-09, "loss": 0.4585, "step": 15628 }, { "epoch": 1.957818296629568, "grad_norm": 2.078125, "learning_rate": 8.538074431635457e-09, "loss": 0.4738, "step": 15629 }, { "epoch": 1.9579445891546294, "grad_norm": 1.9609375, "learning_rate": 8.455791818146043e-09, "loss": 0.4552, "step": 15630 }, { "epoch": 1.9580708816796906, "grad_norm": 1.9296875, "learning_rate": 8.37390744354183e-09, "loss": 0.4299, "step": 15631 }, { "epoch": 1.9581971742047517, "grad_norm": 1.90625, "learning_rate": 8.292421311087983e-09, "loss": 0.4626, "step": 15632 }, { "epoch": 1.958323466729813, "grad_norm": 2.09375, "learning_rate": 8.211333424031908e-09, "loss": 0.5042, "step": 15633 }, { "epoch": 1.958449759254874, "grad_norm": 1.921875, "learning_rate": 8.130643785604353e-09, "loss": 0.4819, "step": 15634 }, { "epoch": 1.9585760517799353, "grad_norm": 2.015625, "learning_rate": 8.050352399023853e-09, "loss": 0.5898, "step": 15635 }, { "epoch": 1.9587023443049965, "grad_norm": 1.859375, "learning_rate": 7.970459267487852e-09, "loss": 0.4641, "step": 15636 }, { "epoch": 1.9588286368300576, "grad_norm": 1.8984375, "learning_rate": 7.890964394182688e-09, "loss": 0.4544, "step": 15637 }, { "epoch": 1.958954929355119, "grad_norm": 1.890625, "learning_rate": 7.811867782275829e-09, "loss": 0.4161, "step": 15638 }, { "epoch": 1.9590812218801799, "grad_norm": 2.0625, "learning_rate": 7.733169434920306e-09, "loss": 0.4921, "step": 15639 }, { "epoch": 1.9592075144052412, "grad_norm": 1.90625, "learning_rate": 7.654869355252503e-09, "loss": 0.4146, "step": 15640 }, { "epoch": 1.9593338069303023, "grad_norm": 2.09375, "learning_rate": 7.576967546394365e-09, "loss": 0.4773, "step": 15641 }, { "epoch": 1.9594600994553635, "grad_norm": 2.140625, "learning_rate": 7.499464011450074e-09, "loss": 0.5062, "step": 15642 }, { "epoch": 1.9595863919804246, "grad_norm": 2.015625, "learning_rate": 7.422358753507164e-09, "loss": 0.4834, "step": 15643 }, { "epoch": 1.9597126845054857, "grad_norm": 1.765625, "learning_rate": 7.3456517756420596e-09, "loss": 0.4399, "step": 15644 }, { "epoch": 1.959838977030547, "grad_norm": 1.921875, "learning_rate": 7.2693430809100965e-09, "loss": 0.4558, "step": 15645 }, { "epoch": 1.959965269555608, "grad_norm": 2.09375, "learning_rate": 7.193432672352174e-09, "loss": 0.5163, "step": 15646 }, { "epoch": 1.9600915620806694, "grad_norm": 1.9140625, "learning_rate": 7.117920552995872e-09, "loss": 0.4609, "step": 15647 }, { "epoch": 1.9602178546057305, "grad_norm": 2.109375, "learning_rate": 7.042806725849893e-09, "loss": 0.6095, "step": 15648 }, { "epoch": 1.9603441471307916, "grad_norm": 2.0, "learning_rate": 6.9680911939074004e-09, "loss": 0.5405, "step": 15649 }, { "epoch": 1.960470439655853, "grad_norm": 2.015625, "learning_rate": 6.893773960147121e-09, "loss": 0.4782, "step": 15650 }, { "epoch": 1.960596732180914, "grad_norm": 1.984375, "learning_rate": 6.81985502753113e-09, "loss": 0.4016, "step": 15651 }, { "epoch": 1.9607230247059753, "grad_norm": 1.8125, "learning_rate": 6.74633439900596e-09, "loss": 0.4309, "step": 15652 }, { "epoch": 1.9608493172310364, "grad_norm": 2.078125, "learning_rate": 6.67321207750149e-09, "loss": 0.6151, "step": 15653 }, { "epoch": 1.9609756097560975, "grad_norm": 1.8671875, "learning_rate": 6.600488065933164e-09, "loss": 0.5004, "step": 15654 }, { "epoch": 1.9611019022811589, "grad_norm": 2.203125, "learning_rate": 6.528162367197555e-09, "loss": 0.4937, "step": 15655 }, { "epoch": 1.9612281948062198, "grad_norm": 1.921875, "learning_rate": 6.4562349841790215e-09, "loss": 0.5039, "step": 15656 }, { "epoch": 1.9613544873312811, "grad_norm": 1.8515625, "learning_rate": 6.3847059197441605e-09, "loss": 0.513, "step": 15657 }, { "epoch": 1.9614807798563423, "grad_norm": 2.109375, "learning_rate": 6.3135751767440244e-09, "loss": 0.5581, "step": 15658 }, { "epoch": 1.9616070723814034, "grad_norm": 2.125, "learning_rate": 6.242842758013012e-09, "loss": 0.5199, "step": 15659 }, { "epoch": 1.9617333649064645, "grad_norm": 2.0, "learning_rate": 6.172508666371091e-09, "loss": 0.5696, "step": 15660 }, { "epoch": 1.9618596574315257, "grad_norm": 2.015625, "learning_rate": 6.102572904620463e-09, "loss": 0.5127, "step": 15661 }, { "epoch": 1.961985949956587, "grad_norm": 2.078125, "learning_rate": 6.033035475551119e-09, "loss": 0.5071, "step": 15662 }, { "epoch": 1.9621122424816482, "grad_norm": 1.9609375, "learning_rate": 5.963896381931955e-09, "loss": 0.4702, "step": 15663 }, { "epoch": 1.9622385350067093, "grad_norm": 2.0625, "learning_rate": 5.895155626519655e-09, "loss": 0.5573, "step": 15664 }, { "epoch": 1.9623648275317704, "grad_norm": 1.8828125, "learning_rate": 5.82681321205425e-09, "loss": 0.4343, "step": 15665 }, { "epoch": 1.9624911200568316, "grad_norm": 1.859375, "learning_rate": 5.758869141259116e-09, "loss": 0.4491, "step": 15666 }, { "epoch": 1.962617412581893, "grad_norm": 1.9296875, "learning_rate": 5.691323416844307e-09, "loss": 0.5586, "step": 15667 }, { "epoch": 1.9627437051069538, "grad_norm": 1.8984375, "learning_rate": 5.6241760414987856e-09, "loss": 0.5102, "step": 15668 }, { "epoch": 1.9628699976320152, "grad_norm": 1.953125, "learning_rate": 5.557427017901518e-09, "loss": 0.4883, "step": 15669 }, { "epoch": 1.9629962901570763, "grad_norm": 2.03125, "learning_rate": 5.491076348712598e-09, "loss": 0.4553, "step": 15670 }, { "epoch": 1.9631225826821375, "grad_norm": 1.921875, "learning_rate": 5.425124036575469e-09, "loss": 0.4505, "step": 15671 }, { "epoch": 1.9632488752071988, "grad_norm": 1.984375, "learning_rate": 5.359570084119136e-09, "loss": 0.4478, "step": 15672 }, { "epoch": 1.9633751677322597, "grad_norm": 2.0625, "learning_rate": 5.2944144939570675e-09, "loss": 0.4877, "step": 15673 }, { "epoch": 1.963501460257321, "grad_norm": 2.0625, "learning_rate": 5.229657268686072e-09, "loss": 0.4934, "step": 15674 }, { "epoch": 1.9636277527823822, "grad_norm": 1.9765625, "learning_rate": 5.16529841088742e-09, "loss": 0.5179, "step": 15675 }, { "epoch": 1.9637540453074434, "grad_norm": 2.09375, "learning_rate": 5.101337923124616e-09, "loss": 0.568, "step": 15676 }, { "epoch": 1.9638803378325045, "grad_norm": 1.9765625, "learning_rate": 5.0377758079500625e-09, "loss": 0.4387, "step": 15677 }, { "epoch": 1.9640066303575656, "grad_norm": 1.9453125, "learning_rate": 4.974612067893958e-09, "loss": 0.4821, "step": 15678 }, { "epoch": 1.964132922882627, "grad_norm": 2.046875, "learning_rate": 4.911846705475398e-09, "loss": 0.5207, "step": 15679 }, { "epoch": 1.964259215407688, "grad_norm": 1.921875, "learning_rate": 4.849479723196826e-09, "loss": 0.4484, "step": 15680 }, { "epoch": 1.9643855079327492, "grad_norm": 2.078125, "learning_rate": 4.78751112354181e-09, "loss": 0.5414, "step": 15681 }, { "epoch": 1.9645118004578104, "grad_norm": 2.15625, "learning_rate": 4.725940908982818e-09, "loss": 0.5324, "step": 15682 }, { "epoch": 1.9646380929828715, "grad_norm": 1.8671875, "learning_rate": 4.664769081972331e-09, "loss": 0.3901, "step": 15683 }, { "epoch": 1.9647643855079329, "grad_norm": 2.140625, "learning_rate": 4.6039956449484e-09, "loss": 0.4427, "step": 15684 }, { "epoch": 1.9648906780329938, "grad_norm": 2.015625, "learning_rate": 4.5436206003346416e-09, "loss": 0.5231, "step": 15685 }, { "epoch": 1.9650169705580551, "grad_norm": 2.0625, "learning_rate": 4.4836439505357984e-09, "loss": 0.4666, "step": 15686 }, { "epoch": 1.9651432630831163, "grad_norm": 2.0625, "learning_rate": 4.424065697943291e-09, "loss": 0.4444, "step": 15687 }, { "epoch": 1.9652695556081774, "grad_norm": 1.96875, "learning_rate": 4.364885844931887e-09, "loss": 0.5214, "step": 15688 }, { "epoch": 1.9653958481332388, "grad_norm": 1.7421875, "learning_rate": 4.30610439386081e-09, "loss": 0.4154, "step": 15689 }, { "epoch": 1.9655221406582997, "grad_norm": 1.9375, "learning_rate": 4.247721347071521e-09, "loss": 0.4821, "step": 15690 }, { "epoch": 1.965648433183361, "grad_norm": 2.46875, "learning_rate": 4.189736706892156e-09, "loss": 0.5919, "step": 15691 }, { "epoch": 1.9657747257084222, "grad_norm": 1.8671875, "learning_rate": 4.13215047563309e-09, "loss": 0.4667, "step": 15692 }, { "epoch": 1.9659010182334833, "grad_norm": 2.09375, "learning_rate": 4.074962655590264e-09, "loss": 0.4555, "step": 15693 }, { "epoch": 1.9660273107585446, "grad_norm": 2.015625, "learning_rate": 4.018173249041857e-09, "loss": 0.5177, "step": 15694 }, { "epoch": 1.9661536032836056, "grad_norm": 1.9296875, "learning_rate": 3.9617822582527225e-09, "loss": 0.4867, "step": 15695 }, { "epoch": 1.966279895808667, "grad_norm": 2.34375, "learning_rate": 3.905789685471062e-09, "loss": 0.5565, "step": 15696 }, { "epoch": 1.966406188333728, "grad_norm": 1.9140625, "learning_rate": 3.850195532926204e-09, "loss": 0.4833, "step": 15697 }, { "epoch": 1.9665324808587892, "grad_norm": 2.0, "learning_rate": 3.794999802835264e-09, "loss": 0.5628, "step": 15698 }, { "epoch": 1.9666587733838503, "grad_norm": 1.8125, "learning_rate": 3.740202497398704e-09, "loss": 0.427, "step": 15699 }, { "epoch": 1.9667850659089114, "grad_norm": 1.9765625, "learning_rate": 3.685803618800332e-09, "loss": 0.4907, "step": 15700 }, { "epoch": 1.9669113584339728, "grad_norm": 1.96875, "learning_rate": 3.6318031692084143e-09, "loss": 0.4847, "step": 15701 }, { "epoch": 1.9670376509590337, "grad_norm": 1.9296875, "learning_rate": 3.5782011507745627e-09, "loss": 0.4921, "step": 15702 }, { "epoch": 1.967163943484095, "grad_norm": 1.9921875, "learning_rate": 3.524997565635957e-09, "loss": 0.4783, "step": 15703 }, { "epoch": 1.9672902360091562, "grad_norm": 1.9375, "learning_rate": 3.4721924159131225e-09, "loss": 0.4523, "step": 15704 }, { "epoch": 1.9674165285342173, "grad_norm": 2.015625, "learning_rate": 3.4197857037099325e-09, "loss": 0.4906, "step": 15705 }, { "epoch": 1.9675428210592787, "grad_norm": 1.8984375, "learning_rate": 3.367777431115826e-09, "loss": 0.4801, "step": 15706 }, { "epoch": 1.9676691135843396, "grad_norm": 2.03125, "learning_rate": 3.3161676002047005e-09, "loss": 0.5222, "step": 15707 }, { "epoch": 1.967795406109401, "grad_norm": 2.125, "learning_rate": 3.2649562130315782e-09, "loss": 0.5079, "step": 15708 }, { "epoch": 1.967921698634462, "grad_norm": 1.9296875, "learning_rate": 3.2141432716392694e-09, "loss": 0.5237, "step": 15709 }, { "epoch": 1.9680479911595232, "grad_norm": 1.859375, "learning_rate": 3.163728778051711e-09, "loss": 0.4559, "step": 15710 }, { "epoch": 1.9681742836845846, "grad_norm": 1.859375, "learning_rate": 3.113712734279517e-09, "loss": 0.4386, "step": 15711 }, { "epoch": 1.9683005762096455, "grad_norm": 1.953125, "learning_rate": 3.0640951423155375e-09, "loss": 0.4726, "step": 15712 }, { "epoch": 1.9684268687347068, "grad_norm": 1.9296875, "learning_rate": 3.0148760041381896e-09, "loss": 0.4386, "step": 15713 }, { "epoch": 1.968553161259768, "grad_norm": 2.328125, "learning_rate": 2.9660553217081278e-09, "loss": 0.5963, "step": 15714 }, { "epoch": 1.9686794537848291, "grad_norm": 2.21875, "learning_rate": 2.917633096971573e-09, "loss": 0.5973, "step": 15715 }, { "epoch": 1.9688057463098902, "grad_norm": 1.8984375, "learning_rate": 2.8696093318592023e-09, "loss": 0.4623, "step": 15716 }, { "epoch": 1.9689320388349514, "grad_norm": 2.078125, "learning_rate": 2.8219840282839305e-09, "loss": 0.4907, "step": 15717 }, { "epoch": 1.9690583313600127, "grad_norm": 1.96875, "learning_rate": 2.7747571881453495e-09, "loss": 0.4271, "step": 15718 }, { "epoch": 1.9691846238850736, "grad_norm": 2.265625, "learning_rate": 2.7279288133252867e-09, "loss": 0.5437, "step": 15719 }, { "epoch": 1.969310916410135, "grad_norm": 1.96875, "learning_rate": 2.6814989056889172e-09, "loss": 0.4807, "step": 15720 }, { "epoch": 1.9694372089351961, "grad_norm": 2.140625, "learning_rate": 2.6354674670892034e-09, "loss": 0.476, "step": 15721 }, { "epoch": 1.9695635014602573, "grad_norm": 2.0625, "learning_rate": 2.589834499359123e-09, "loss": 0.4744, "step": 15722 }, { "epoch": 1.9696897939853186, "grad_norm": 1.953125, "learning_rate": 2.5446000043172216e-09, "loss": 0.4823, "step": 15723 }, { "epoch": 1.9698160865103795, "grad_norm": 1.9921875, "learning_rate": 2.4997639837687217e-09, "loss": 0.4266, "step": 15724 }, { "epoch": 1.969942379035441, "grad_norm": 2.21875, "learning_rate": 2.4553264394977515e-09, "loss": 0.5161, "step": 15725 }, { "epoch": 1.970068671560502, "grad_norm": 1.9375, "learning_rate": 2.411287373277338e-09, "loss": 0.475, "step": 15726 }, { "epoch": 1.9701949640855632, "grad_norm": 2.125, "learning_rate": 2.3676467868627427e-09, "loss": 0.5396, "step": 15727 }, { "epoch": 1.9703212566106245, "grad_norm": 2.0625, "learning_rate": 2.3244046819925756e-09, "loss": 0.5529, "step": 15728 }, { "epoch": 1.9704475491356854, "grad_norm": 2.015625, "learning_rate": 2.281561060389903e-09, "loss": 0.542, "step": 15729 }, { "epoch": 1.9705738416607468, "grad_norm": 2.09375, "learning_rate": 2.2391159237644676e-09, "loss": 0.5076, "step": 15730 }, { "epoch": 1.970700134185808, "grad_norm": 1.9453125, "learning_rate": 2.1970692738060296e-09, "loss": 0.5058, "step": 15731 }, { "epoch": 1.970826426710869, "grad_norm": 2.21875, "learning_rate": 2.155421112191025e-09, "loss": 0.5619, "step": 15732 }, { "epoch": 1.9709527192359302, "grad_norm": 1.9765625, "learning_rate": 2.114171440580348e-09, "loss": 0.4919, "step": 15733 }, { "epoch": 1.9710790117609913, "grad_norm": 2.0625, "learning_rate": 2.073320260617129e-09, "loss": 0.5395, "step": 15734 }, { "epoch": 1.9712053042860527, "grad_norm": 1.984375, "learning_rate": 2.032867573930064e-09, "loss": 0.4836, "step": 15735 }, { "epoch": 1.9713315968111136, "grad_norm": 2.140625, "learning_rate": 1.9928133821311978e-09, "loss": 0.5781, "step": 15736 }, { "epoch": 1.971457889336175, "grad_norm": 2.015625, "learning_rate": 1.9531576868159206e-09, "loss": 0.5028, "step": 15737 }, { "epoch": 1.971584181861236, "grad_norm": 1.9453125, "learning_rate": 1.9139004895674107e-09, "loss": 0.5217, "step": 15738 }, { "epoch": 1.9717104743862972, "grad_norm": 1.984375, "learning_rate": 1.8750417919488616e-09, "loss": 0.4823, "step": 15739 }, { "epoch": 1.9718367669113586, "grad_norm": 1.9921875, "learning_rate": 1.8365815955079247e-09, "loss": 0.5351, "step": 15740 }, { "epoch": 1.9719630594364195, "grad_norm": 1.984375, "learning_rate": 1.7985199017800382e-09, "loss": 0.5113, "step": 15741 }, { "epoch": 1.9720893519614808, "grad_norm": 2.09375, "learning_rate": 1.7608567122806564e-09, "loss": 0.5641, "step": 15742 }, { "epoch": 1.972215644486542, "grad_norm": 2.078125, "learning_rate": 1.723592028510801e-09, "loss": 0.6017, "step": 15743 }, { "epoch": 1.972341937011603, "grad_norm": 1.96875, "learning_rate": 1.6867258519570607e-09, "loss": 0.4535, "step": 15744 }, { "epoch": 1.9724682295366645, "grad_norm": 2.296875, "learning_rate": 1.65025818408715e-09, "loss": 0.523, "step": 15745 }, { "epoch": 1.9725945220617254, "grad_norm": 2.125, "learning_rate": 1.6141890263554617e-09, "loss": 0.4732, "step": 15746 }, { "epoch": 1.9727208145867867, "grad_norm": 2.125, "learning_rate": 1.578518380199734e-09, "loss": 0.5952, "step": 15747 }, { "epoch": 1.9728471071118479, "grad_norm": 1.90625, "learning_rate": 1.5432462470421627e-09, "loss": 0.429, "step": 15748 }, { "epoch": 1.972973399636909, "grad_norm": 1.9375, "learning_rate": 1.5083726282871802e-09, "loss": 0.4995, "step": 15749 }, { "epoch": 1.9730996921619701, "grad_norm": 2.03125, "learning_rate": 1.4738975253258958e-09, "loss": 0.4951, "step": 15750 }, { "epoch": 1.9732259846870313, "grad_norm": 1.8984375, "learning_rate": 1.4398209395316555e-09, "loss": 0.4414, "step": 15751 }, { "epoch": 1.9733522772120926, "grad_norm": 2.125, "learning_rate": 1.406142872263372e-09, "loss": 0.5644, "step": 15752 }, { "epoch": 1.9734785697371535, "grad_norm": 2.109375, "learning_rate": 1.372863324863305e-09, "loss": 0.5061, "step": 15753 }, { "epoch": 1.9736048622622149, "grad_norm": 1.984375, "learning_rate": 1.3399822986581713e-09, "loss": 0.5269, "step": 15754 }, { "epoch": 1.973731154787276, "grad_norm": 2.015625, "learning_rate": 1.3074997949569234e-09, "loss": 0.484, "step": 15755 }, { "epoch": 1.9738574473123371, "grad_norm": 1.9140625, "learning_rate": 1.2754158150563023e-09, "loss": 0.4996, "step": 15756 }, { "epoch": 1.9739837398373985, "grad_norm": 1.84375, "learning_rate": 1.2437303602341744e-09, "loss": 0.4718, "step": 15757 }, { "epoch": 1.9741100323624594, "grad_norm": 1.8828125, "learning_rate": 1.2124434317528633e-09, "loss": 0.5509, "step": 15758 }, { "epoch": 1.9742363248875208, "grad_norm": 1.9296875, "learning_rate": 1.1815550308602598e-09, "loss": 0.5104, "step": 15759 }, { "epoch": 1.974362617412582, "grad_norm": 2.046875, "learning_rate": 1.151065158787601e-09, "loss": 0.452, "step": 15760 }, { "epoch": 1.974488909937643, "grad_norm": 2.015625, "learning_rate": 1.1209738167483608e-09, "loss": 0.4487, "step": 15761 }, { "epoch": 1.9746152024627044, "grad_norm": 2.0625, "learning_rate": 1.0912810059449108e-09, "loss": 0.4775, "step": 15762 }, { "epoch": 1.9747414949877653, "grad_norm": 1.953125, "learning_rate": 1.0619867275585283e-09, "loss": 0.4422, "step": 15763 }, { "epoch": 1.9748677875128267, "grad_norm": 1.9453125, "learning_rate": 1.0330909827571677e-09, "loss": 0.421, "step": 15764 }, { "epoch": 1.9749940800378878, "grad_norm": 2.0, "learning_rate": 1.004593772693241e-09, "loss": 0.5038, "step": 15765 }, { "epoch": 1.975120372562949, "grad_norm": 1.890625, "learning_rate": 9.764950985013954e-10, "loss": 0.5847, "step": 15766 }, { "epoch": 1.97524666508801, "grad_norm": 2.109375, "learning_rate": 9.487949613029569e-10, "loss": 0.5489, "step": 15767 }, { "epoch": 1.9753729576130712, "grad_norm": 2.15625, "learning_rate": 9.214933622014866e-10, "loss": 0.544, "step": 15768 }, { "epoch": 1.9754992501381325, "grad_norm": 2.078125, "learning_rate": 8.945903022838931e-10, "loss": 0.5242, "step": 15769 }, { "epoch": 1.9756255426631935, "grad_norm": 2.109375, "learning_rate": 8.680857826248724e-10, "loss": 0.4946, "step": 15770 }, { "epoch": 1.9757518351882548, "grad_norm": 1.890625, "learning_rate": 8.419798042791361e-10, "loss": 0.4602, "step": 15771 }, { "epoch": 1.975878127713316, "grad_norm": 2.03125, "learning_rate": 8.162723682869634e-10, "loss": 0.4217, "step": 15772 }, { "epoch": 1.976004420238377, "grad_norm": 2.03125, "learning_rate": 7.909634756742002e-10, "loss": 0.4694, "step": 15773 }, { "epoch": 1.9761307127634384, "grad_norm": 1.984375, "learning_rate": 7.660531274489291e-10, "loss": 0.4194, "step": 15774 }, { "epoch": 1.9762570052884993, "grad_norm": 2.0, "learning_rate": 7.415413246036895e-10, "loss": 0.4763, "step": 15775 }, { "epoch": 1.9763832978135607, "grad_norm": 1.9765625, "learning_rate": 7.17428068116588e-10, "loss": 0.4636, "step": 15776 }, { "epoch": 1.9765095903386218, "grad_norm": 2.03125, "learning_rate": 6.937133589479672e-10, "loss": 0.4488, "step": 15777 }, { "epoch": 1.976635882863683, "grad_norm": 1.984375, "learning_rate": 6.703971980426271e-10, "loss": 0.5562, "step": 15778 }, { "epoch": 1.9767621753887443, "grad_norm": 1.9296875, "learning_rate": 6.474795863309347e-10, "loss": 0.4526, "step": 15779 }, { "epoch": 1.9768884679138052, "grad_norm": 1.8359375, "learning_rate": 6.24960524725493e-10, "loss": 0.451, "step": 15780 }, { "epoch": 1.9770147604388666, "grad_norm": 1.84375, "learning_rate": 6.028400141233626e-10, "loss": 0.4563, "step": 15781 }, { "epoch": 1.9771410529639277, "grad_norm": 1.921875, "learning_rate": 5.811180554082807e-10, "loss": 0.476, "step": 15782 }, { "epoch": 1.9772673454889889, "grad_norm": 1.9921875, "learning_rate": 5.59794649444001e-10, "loss": 0.4629, "step": 15783 }, { "epoch": 1.97739363801405, "grad_norm": 1.921875, "learning_rate": 5.388697970809542e-10, "loss": 0.5208, "step": 15784 }, { "epoch": 1.9775199305391111, "grad_norm": 2.109375, "learning_rate": 5.183434991540282e-10, "loss": 0.4689, "step": 15785 }, { "epoch": 1.9776462230641725, "grad_norm": 1.9609375, "learning_rate": 4.982157564792367e-10, "loss": 0.4628, "step": 15786 }, { "epoch": 1.9777725155892336, "grad_norm": 1.9921875, "learning_rate": 4.784865698614916e-10, "loss": 0.5321, "step": 15787 }, { "epoch": 1.9778988081142947, "grad_norm": 1.7734375, "learning_rate": 4.591559400857204e-10, "loss": 0.404, "step": 15788 }, { "epoch": 1.9780251006393559, "grad_norm": 2.046875, "learning_rate": 4.40223867922418e-10, "loss": 0.4827, "step": 15789 }, { "epoch": 1.978151393164417, "grad_norm": 2.03125, "learning_rate": 4.2169035412653605e-10, "loss": 0.4903, "step": 15790 }, { "epoch": 1.9782776856894784, "grad_norm": 1.9375, "learning_rate": 4.0355539943637276e-10, "loss": 0.4823, "step": 15791 }, { "epoch": 1.9784039782145393, "grad_norm": 2.03125, "learning_rate": 3.858190045757937e-10, "loss": 0.5455, "step": 15792 }, { "epoch": 1.9785302707396006, "grad_norm": 1.984375, "learning_rate": 3.6848117024979034e-10, "loss": 0.51, "step": 15793 }, { "epoch": 1.9786565632646618, "grad_norm": 2.0625, "learning_rate": 3.5154189715114194e-10, "loss": 0.539, "step": 15794 }, { "epoch": 1.978782855789723, "grad_norm": 2.265625, "learning_rate": 3.3500118595375386e-10, "loss": 0.5552, "step": 15795 }, { "epoch": 1.9789091483147843, "grad_norm": 1.9609375, "learning_rate": 3.188590373182088e-10, "loss": 0.4841, "step": 15796 }, { "epoch": 1.9790354408398452, "grad_norm": 2.046875, "learning_rate": 3.031154518873258e-10, "loss": 0.4846, "step": 15797 }, { "epoch": 1.9791617333649065, "grad_norm": 1.9921875, "learning_rate": 2.877704302872708e-10, "loss": 0.4687, "step": 15798 }, { "epoch": 1.9792880258899677, "grad_norm": 2.015625, "learning_rate": 2.7282397313199707e-10, "loss": 0.4285, "step": 15799 }, { "epoch": 1.9794143184150288, "grad_norm": 2.140625, "learning_rate": 2.582760810154739e-10, "loss": 0.4835, "step": 15800 }, { "epoch": 1.97954061094009, "grad_norm": 1.8671875, "learning_rate": 2.441267545194581e-10, "loss": 0.4439, "step": 15801 }, { "epoch": 1.979666903465151, "grad_norm": 1.984375, "learning_rate": 2.3037599420572265e-10, "loss": 0.5538, "step": 15802 }, { "epoch": 1.9797931959902124, "grad_norm": 1.8046875, "learning_rate": 2.1702380062271766e-10, "loss": 0.4183, "step": 15803 }, { "epoch": 1.9799194885152736, "grad_norm": 1.8984375, "learning_rate": 2.040701743033502e-10, "loss": 0.5094, "step": 15804 }, { "epoch": 1.9800457810403347, "grad_norm": 1.8828125, "learning_rate": 1.9151511576387393e-10, "loss": 0.4471, "step": 15805 }, { "epoch": 1.9801720735653958, "grad_norm": 2.0, "learning_rate": 1.793586255049995e-10, "loss": 0.5088, "step": 15806 }, { "epoch": 1.980298366090457, "grad_norm": 2.09375, "learning_rate": 1.676007040107841e-10, "loss": 0.5407, "step": 15807 }, { "epoch": 1.9804246586155183, "grad_norm": 2.21875, "learning_rate": 1.5624135174974186e-10, "loss": 0.535, "step": 15808 }, { "epoch": 1.9805509511405792, "grad_norm": 2.09375, "learning_rate": 1.4528056917373357e-10, "loss": 0.4678, "step": 15809 }, { "epoch": 1.9806772436656406, "grad_norm": 2.0625, "learning_rate": 1.3471835672240752e-10, "loss": 0.4888, "step": 15810 }, { "epoch": 1.9808035361907017, "grad_norm": 1.953125, "learning_rate": 1.245547148143178e-10, "loss": 0.464, "step": 15811 }, { "epoch": 1.9809298287157628, "grad_norm": 2.046875, "learning_rate": 1.1478964385469582e-10, "loss": 0.4949, "step": 15812 }, { "epoch": 1.9810561212408242, "grad_norm": 1.7578125, "learning_rate": 1.0542314423322986e-10, "loss": 0.4489, "step": 15813 }, { "epoch": 1.981182413765885, "grad_norm": 1.9453125, "learning_rate": 9.645521632406507e-11, "loss": 0.4649, "step": 15814 }, { "epoch": 1.9813087062909465, "grad_norm": 1.984375, "learning_rate": 8.788586048358305e-11, "loss": 0.4813, "step": 15815 }, { "epoch": 1.9814349988160076, "grad_norm": 1.9375, "learning_rate": 7.97150770537325e-11, "loss": 0.4443, "step": 15816 }, { "epoch": 1.9815612913410687, "grad_norm": 2.21875, "learning_rate": 7.194286636091896e-11, "loss": 0.5719, "step": 15817 }, { "epoch": 1.98168758386613, "grad_norm": 1.9296875, "learning_rate": 6.456922871378446e-11, "loss": 0.4605, "step": 15818 }, { "epoch": 1.981813876391191, "grad_norm": 2.1875, "learning_rate": 5.759416440542787e-11, "loss": 0.4799, "step": 15819 }, { "epoch": 1.9819401689162524, "grad_norm": 1.9296875, "learning_rate": 5.1017673715625384e-11, "loss": 0.4708, "step": 15820 }, { "epoch": 1.9820664614413135, "grad_norm": 1.921875, "learning_rate": 4.483975690638964e-11, "loss": 0.4878, "step": 15821 }, { "epoch": 1.9821927539663746, "grad_norm": 1.7578125, "learning_rate": 3.9060414223079934e-11, "loss": 0.4617, "step": 15822 }, { "epoch": 1.9823190464914358, "grad_norm": 2.359375, "learning_rate": 3.367964589662265e-11, "loss": 0.5286, "step": 15823 }, { "epoch": 1.9824453390164969, "grad_norm": 1.9453125, "learning_rate": 2.8697452141290828e-11, "loss": 0.547, "step": 15824 }, { "epoch": 1.9825716315415582, "grad_norm": 2.15625, "learning_rate": 2.411383315470417e-11, "loss": 0.4917, "step": 15825 }, { "epoch": 1.9826979240666192, "grad_norm": 1.9296875, "learning_rate": 1.9928789121159698e-11, "loss": 0.457, "step": 15826 }, { "epoch": 1.9828242165916805, "grad_norm": 2.015625, "learning_rate": 1.6142320207190866e-11, "loss": 0.4501, "step": 15827 }, { "epoch": 1.9829505091167416, "grad_norm": 2.125, "learning_rate": 1.275442656267778e-11, "loss": 0.4958, "step": 15828 }, { "epoch": 1.9830768016418028, "grad_norm": 1.875, "learning_rate": 9.765108324177875e-12, "loss": 0.4609, "step": 15829 }, { "epoch": 1.9832030941668641, "grad_norm": 1.9921875, "learning_rate": 7.174365608264566e-12, "loss": 0.5167, "step": 15830 }, { "epoch": 1.983329386691925, "grad_norm": 1.8203125, "learning_rate": 4.982198521519266e-12, "loss": 0.3986, "step": 15831 }, { "epoch": 1.9834556792169864, "grad_norm": 1.8828125, "learning_rate": 3.188607149429146e-12, "loss": 0.4332, "step": 15832 }, { "epoch": 1.9835819717420475, "grad_norm": 1.953125, "learning_rate": 1.79359156304848e-12, "loss": 0.5215, "step": 15833 }, { "epoch": 1.9837082642671087, "grad_norm": 2.046875, "learning_rate": 7.971518189986427e-13, "loss": 0.4814, "step": 15834 }, { "epoch": 1.98383455679217, "grad_norm": 1.953125, "learning_rate": 1.9928795724766247e-13, "loss": 0.4964, "step": 15835 }, { "epoch": 1.983960849317231, "grad_norm": 2.0625, "learning_rate": 0.0, "loss": 0.4943, "step": 15836 }, { "epoch": 1.983960849317231, "eval_loss": 0.8104427456855774, "eval_runtime": 4339.4628, "eval_samples_per_second": 11.485, "eval_steps_per_second": 3.829, "step": 15836 } ], "logging_steps": 1, "max_steps": 15836, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 7918, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.454932075318608e+18, "train_batch_size": 3, "trial_name": null, "trial_params": null }