{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 102000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.014705882352941176, "grad_norm": 3.0940372943878174, "learning_rate": 4.975490196078432e-05, "loss": 2.042, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.029411764705882353, "grad_norm": 3.2075328826904297, "learning_rate": 4.9509803921568634e-05, "loss": 1.5982, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 1000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.04411764705882353, "grad_norm": 4.371872425079346, "learning_rate": 4.9264705882352944e-05, "loss": 1.4943, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 1500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.058823529411764705, "grad_norm": 5.18107271194458, "learning_rate": 4.901960784313725e-05, "loss": 1.4003, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 2000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.07352941176470588, "grad_norm": 4.011467933654785, "learning_rate": 4.877450980392157e-05, "loss": 1.3578, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 2500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.08823529411764706, "grad_norm": 3.5927531719207764, "learning_rate": 4.8529411764705885e-05, "loss": 1.2995, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 3000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.10294117647058823, "grad_norm": 4.120026111602783, "learning_rate": 4.82843137254902e-05, "loss": 1.2598, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 3500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.11764705882352941, "grad_norm": 6.63548469543457, "learning_rate": 4.803921568627452e-05, "loss": 1.2075, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 4000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.1323529411764706, "grad_norm": 4.13482666015625, "learning_rate": 4.7794117647058826e-05, "loss": 1.2405, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 4500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.14705882352941177, "grad_norm": 5.699426174163818, "learning_rate": 4.7549019607843135e-05, "loss": 1.2242, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 5000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.16176470588235295, "grad_norm": 6.220912933349609, "learning_rate": 4.730392156862745e-05, "loss": 1.1701, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 5500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.17647058823529413, "grad_norm": 5.078765869140625, "learning_rate": 4.705882352941177e-05, "loss": 1.1904, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 6000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.19117647058823528, "grad_norm": 1.5997480154037476, "learning_rate": 4.681372549019608e-05, "loss": 1.1321, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 6500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.20588235294117646, "grad_norm": 3.2184040546417236, "learning_rate": 4.656862745098039e-05, "loss": 1.1452, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 7000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.22058823529411764, "grad_norm": 5.408567905426025, "learning_rate": 4.632352941176471e-05, "loss": 1.0764, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 7500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.23529411764705882, "grad_norm": 6.1005539894104, "learning_rate": 4.607843137254902e-05, "loss": 1.0756, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 8000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.25, "grad_norm": 10.336429595947266, "learning_rate": 4.5833333333333334e-05, "loss": 1.084, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 8500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.2647058823529412, "grad_norm": 5.947171211242676, "learning_rate": 4.558823529411765e-05, "loss": 1.095, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 9000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.27941176470588236, "grad_norm": 3.9784250259399414, "learning_rate": 4.5343137254901966e-05, "loss": 1.0716, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 9500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.29411764705882354, "grad_norm": 7.9583420753479, "learning_rate": 4.5098039215686275e-05, "loss": 1.0725, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 10000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.3088235294117647, "grad_norm": 5.341268062591553, "learning_rate": 4.485294117647059e-05, "loss": 1.0656, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 10500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.3235294117647059, "grad_norm": 8.68230152130127, "learning_rate": 4.460784313725491e-05, "loss": 0.9894, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 11000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.3382352941176471, "grad_norm": 3.516399621963501, "learning_rate": 4.4362745098039216e-05, "loss": 1.0527, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 11500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.35294117647058826, "grad_norm": 5.295083045959473, "learning_rate": 4.411764705882353e-05, "loss": 1.0073, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 12000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.36764705882352944, "grad_norm": 3.7748844623565674, "learning_rate": 4.387254901960784e-05, "loss": 1.0035, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 12500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.38235294117647056, "grad_norm": 7.610727310180664, "learning_rate": 4.362745098039216e-05, "loss": 0.9888, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 13000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.39705882352941174, "grad_norm": 5.661556243896484, "learning_rate": 4.3382352941176474e-05, "loss": 1.005, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 13500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.4117647058823529, "grad_norm": 7.081089496612549, "learning_rate": 4.313725490196079e-05, "loss": 0.9848, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 14000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.4264705882352941, "grad_norm": 3.7689192295074463, "learning_rate": 4.28921568627451e-05, "loss": 0.9745, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 14500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.4411764705882353, "grad_norm": 3.3003389835357666, "learning_rate": 4.2647058823529415e-05, "loss": 0.9684, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 15000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.45588235294117646, "grad_norm": 5.606069564819336, "learning_rate": 4.2401960784313724e-05, "loss": 0.9821, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 15500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.47058823529411764, "grad_norm": 5.626067638397217, "learning_rate": 4.215686274509804e-05, "loss": 0.9537, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 16000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.4852941176470588, "grad_norm": 5.848409175872803, "learning_rate": 4.1911764705882356e-05, "loss": 0.9675, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 16500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.5, "grad_norm": 5.057644844055176, "learning_rate": 4.166666666666667e-05, "loss": 0.9714, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 17000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.5147058823529411, "grad_norm": 3.4304769039154053, "learning_rate": 4.142156862745099e-05, "loss": 0.9857, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 17500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.5294117647058824, "grad_norm": 3.0925090312957764, "learning_rate": 4.11764705882353e-05, "loss": 0.9716, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 18000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.5441176470588235, "grad_norm": 7.13212776184082, "learning_rate": 4.0931372549019607e-05, "loss": 0.9471, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 18500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.5588235294117647, "grad_norm": 3.437065362930298, "learning_rate": 4.068627450980392e-05, "loss": 0.9125, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 19000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.5735294117647058, "grad_norm": 3.033743143081665, "learning_rate": 4.044117647058824e-05, "loss": 0.9608, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 19500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.5882352941176471, "grad_norm": 4.206902980804443, "learning_rate": 4.0196078431372555e-05, "loss": 0.9494, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 20000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.6029411764705882, "grad_norm": 4.862374305725098, "learning_rate": 3.9950980392156864e-05, "loss": 0.9636, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 20500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.6176470588235294, "grad_norm": 2.435167074203491, "learning_rate": 3.970588235294117e-05, "loss": 0.947, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 21000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.6323529411764706, "grad_norm": 3.3232932090759277, "learning_rate": 3.946078431372549e-05, "loss": 0.9104, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 21500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.6470588235294118, "grad_norm": 6.478214740753174, "learning_rate": 3.9215686274509805e-05, "loss": 0.9457, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 22000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.6617647058823529, "grad_norm": 4.218869209289551, "learning_rate": 3.897058823529412e-05, "loss": 0.9154, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 22500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.6764705882352942, "grad_norm": 3.7748098373413086, "learning_rate": 3.872549019607844e-05, "loss": 0.9, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 23000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.6911764705882353, "grad_norm": 4.164327621459961, "learning_rate": 3.8480392156862746e-05, "loss": 0.9025, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 23500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.7058823529411765, "grad_norm": 4.087977886199951, "learning_rate": 3.8235294117647055e-05, "loss": 0.9147, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 24000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.7205882352941176, "grad_norm": 7.114366054534912, "learning_rate": 3.799019607843137e-05, "loss": 0.8656, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 24500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.7352941176470589, "grad_norm": 6.7599101066589355, "learning_rate": 3.774509803921569e-05, "loss": 0.8887, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 25000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.75, "grad_norm": 5.847716808319092, "learning_rate": 3.7500000000000003e-05, "loss": 0.8807, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 25500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.7647058823529411, "grad_norm": 9.692086219787598, "learning_rate": 3.725490196078432e-05, "loss": 0.8615, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 26000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.7794117647058824, "grad_norm": 0.6128278970718384, "learning_rate": 3.700980392156863e-05, "loss": 0.8675, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 26500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.7941176470588235, "grad_norm": 8.303850173950195, "learning_rate": 3.6764705882352945e-05, "loss": 0.8809, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 27000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.8088235294117647, "grad_norm": 3.569289445877075, "learning_rate": 3.6519607843137254e-05, "loss": 0.8891, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 27500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.8235294117647058, "grad_norm": 0.8430338501930237, "learning_rate": 3.627450980392157e-05, "loss": 0.8613, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 28000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.8382352941176471, "grad_norm": 3.0527398586273193, "learning_rate": 3.6029411764705886e-05, "loss": 0.8622, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 28500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.8529411764705882, "grad_norm": 7.135374069213867, "learning_rate": 3.5784313725490195e-05, "loss": 0.8671, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 29000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.8676470588235294, "grad_norm": 3.676905393600464, "learning_rate": 3.553921568627451e-05, "loss": 0.8788, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 29500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.8823529411764706, "grad_norm": 7.306654453277588, "learning_rate": 3.529411764705883e-05, "loss": 0.8283, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 30000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.8970588235294118, "grad_norm": 6.978466510772705, "learning_rate": 3.5049019607843136e-05, "loss": 0.8755, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 30500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.9117647058823529, "grad_norm": 8.922977447509766, "learning_rate": 3.480392156862745e-05, "loss": 0.8398, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 31000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.9264705882352942, "grad_norm": 3.4424359798431396, "learning_rate": 3.455882352941177e-05, "loss": 0.8565, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 31500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.9411764705882353, "grad_norm": 4.229028701782227, "learning_rate": 3.431372549019608e-05, "loss": 0.851, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 32000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.9558823529411765, "grad_norm": 12.309649467468262, "learning_rate": 3.4068627450980394e-05, "loss": 0.8563, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 32500, "total_memory_available (GB)": 126.62 }, { "epoch": 0.9705882352941176, "grad_norm": 0.5380654335021973, "learning_rate": 3.382352941176471e-05, "loss": 0.8338, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 33000, "total_memory_available (GB)": 126.62 }, { "epoch": 0.9852941176470589, "grad_norm": 4.774703025817871, "learning_rate": 3.357843137254902e-05, "loss": 0.8425, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 33500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.0, "grad_norm": 5.663310527801514, "learning_rate": 3.3333333333333335e-05, "loss": 0.8711, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 34000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.0147058823529411, "grad_norm": 5.9944562911987305, "learning_rate": 3.308823529411765e-05, "loss": 0.8495, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 34500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.0294117647058822, "grad_norm": 4.68066930770874, "learning_rate": 3.284313725490196e-05, "loss": 0.7897, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 35000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.0441176470588236, "grad_norm": 1.811689853668213, "learning_rate": 3.2598039215686276e-05, "loss": 0.8056, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 35500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.0588235294117647, "grad_norm": 5.509186744689941, "learning_rate": 3.235294117647059e-05, "loss": 0.8267, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 36000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.0735294117647058, "grad_norm": 9.382353782653809, "learning_rate": 3.210784313725491e-05, "loss": 0.8298, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 36500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.088235294117647, "grad_norm": 5.186063766479492, "learning_rate": 3.186274509803922e-05, "loss": 0.8466, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 37000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.1029411764705883, "grad_norm": 6.467650413513184, "learning_rate": 3.161764705882353e-05, "loss": 0.8027, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 37500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.1176470588235294, "grad_norm": 4.107296943664551, "learning_rate": 3.137254901960784e-05, "loss": 0.7936, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 38000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.1323529411764706, "grad_norm": 2.7116880416870117, "learning_rate": 3.112745098039216e-05, "loss": 0.7658, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 38500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.1470588235294117, "grad_norm": 4.101169109344482, "learning_rate": 3.0882352941176475e-05, "loss": 0.8117, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 39000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.161764705882353, "grad_norm": 4.81936502456665, "learning_rate": 3.063725490196079e-05, "loss": 0.8001, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 39500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.1764705882352942, "grad_norm": 7.343831539154053, "learning_rate": 3.0392156862745097e-05, "loss": 0.7704, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 40000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.1911764705882353, "grad_norm": 8.532153129577637, "learning_rate": 3.0147058823529413e-05, "loss": 0.7917, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 40500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.2058823529411764, "grad_norm": 7.05020809173584, "learning_rate": 2.9901960784313725e-05, "loss": 0.808, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 41000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.2205882352941178, "grad_norm": 3.989778995513916, "learning_rate": 2.965686274509804e-05, "loss": 0.8121, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 41500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.2352941176470589, "grad_norm": 6.2639689445495605, "learning_rate": 2.9411764705882354e-05, "loss": 0.7748, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 42000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.25, "grad_norm": 3.3618924617767334, "learning_rate": 2.916666666666667e-05, "loss": 0.813, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 42500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.2647058823529411, "grad_norm": 4.954437255859375, "learning_rate": 2.8921568627450986e-05, "loss": 0.7693, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 43000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.2794117647058822, "grad_norm": 3.4438722133636475, "learning_rate": 2.8676470588235295e-05, "loss": 0.7994, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 43500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.2941176470588236, "grad_norm": 4.17124080657959, "learning_rate": 2.8431372549019608e-05, "loss": 0.7764, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 44000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.3088235294117647, "grad_norm": 8.348847389221191, "learning_rate": 2.8186274509803924e-05, "loss": 0.7478, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 44500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.3235294117647058, "grad_norm": 1.2507061958312988, "learning_rate": 2.7941176470588236e-05, "loss": 0.7974, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 45000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.3382352941176472, "grad_norm": 13.898884773254395, "learning_rate": 2.7696078431372552e-05, "loss": 0.7541, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 45500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.3529411764705883, "grad_norm": 7.0425872802734375, "learning_rate": 2.7450980392156865e-05, "loss": 0.7837, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 46000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.3676470588235294, "grad_norm": 5.521500587463379, "learning_rate": 2.7205882352941174e-05, "loss": 0.7847, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 46500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.3823529411764706, "grad_norm": 7.491474628448486, "learning_rate": 2.696078431372549e-05, "loss": 0.748, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 47000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.3970588235294117, "grad_norm": 10.513397216796875, "learning_rate": 2.6715686274509806e-05, "loss": 0.7801, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 47500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.4117647058823528, "grad_norm": 15.570920944213867, "learning_rate": 2.647058823529412e-05, "loss": 0.7697, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 48000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.4264705882352942, "grad_norm": 4.1163010597229, "learning_rate": 2.6225490196078435e-05, "loss": 0.749, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 48500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.4411764705882353, "grad_norm": 7.289860248565674, "learning_rate": 2.5980392156862747e-05, "loss": 0.7788, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 49000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.4558823529411764, "grad_norm": 5.316174507141113, "learning_rate": 2.5735294117647057e-05, "loss": 0.8029, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 49500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.4705882352941178, "grad_norm": 3.60591721534729, "learning_rate": 2.5490196078431373e-05, "loss": 0.7372, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 50000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.4852941176470589, "grad_norm": 5.769986152648926, "learning_rate": 2.5245098039215685e-05, "loss": 0.765, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 50500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.5, "grad_norm": 3.587907552719116, "learning_rate": 2.5e-05, "loss": 0.7608, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 51000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.5147058823529411, "grad_norm": 2.3503806591033936, "learning_rate": 2.4754901960784317e-05, "loss": 0.7619, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 51500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.5294117647058822, "grad_norm": 9.60110855102539, "learning_rate": 2.4509803921568626e-05, "loss": 0.7633, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 52000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.5441176470588234, "grad_norm": 9.7755708694458, "learning_rate": 2.4264705882352942e-05, "loss": 0.7282, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 52500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.5588235294117647, "grad_norm": 6.872705936431885, "learning_rate": 2.401960784313726e-05, "loss": 0.7501, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 53000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.5735294117647058, "grad_norm": 4.416250228881836, "learning_rate": 2.3774509803921568e-05, "loss": 0.7622, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 53500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.5882352941176472, "grad_norm": 3.751142978668213, "learning_rate": 2.3529411764705884e-05, "loss": 0.7469, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 54000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.6029411764705883, "grad_norm": 2.0974745750427246, "learning_rate": 2.3284313725490196e-05, "loss": 0.7557, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 54500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.6176470588235294, "grad_norm": 3.7702689170837402, "learning_rate": 2.303921568627451e-05, "loss": 0.7468, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 55000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.6323529411764706, "grad_norm": 5.6461968421936035, "learning_rate": 2.2794117647058825e-05, "loss": 0.7501, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 55500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.6470588235294117, "grad_norm": 2.914978504180908, "learning_rate": 2.2549019607843138e-05, "loss": 0.7126, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 56000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.6617647058823528, "grad_norm": 5.506552219390869, "learning_rate": 2.2303921568627454e-05, "loss": 0.7176, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 56500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.6764705882352942, "grad_norm": 3.4189107418060303, "learning_rate": 2.2058823529411766e-05, "loss": 0.756, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 57000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.6911764705882353, "grad_norm": 10.335149765014648, "learning_rate": 2.181372549019608e-05, "loss": 0.7443, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 57500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.7058823529411766, "grad_norm": 1.2579532861709595, "learning_rate": 2.1568627450980395e-05, "loss": 0.7317, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 58000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.7205882352941178, "grad_norm": 5.039107322692871, "learning_rate": 2.1323529411764707e-05, "loss": 0.7626, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 58500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.7352941176470589, "grad_norm": 7.849141597747803, "learning_rate": 2.107843137254902e-05, "loss": 0.7506, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 59000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.75, "grad_norm": 5.323431491851807, "learning_rate": 2.0833333333333336e-05, "loss": 0.7366, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 59500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.7647058823529411, "grad_norm": 4.651150703430176, "learning_rate": 2.058823529411765e-05, "loss": 0.7467, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 60000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.7794117647058822, "grad_norm": 2.9276676177978516, "learning_rate": 2.034313725490196e-05, "loss": 0.7328, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 60500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.7941176470588234, "grad_norm": 6.4653401374816895, "learning_rate": 2.0098039215686277e-05, "loss": 0.7213, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 61000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.8088235294117647, "grad_norm": 5.86958122253418, "learning_rate": 1.9852941176470586e-05, "loss": 0.726, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 61500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.8235294117647058, "grad_norm": 1.54404878616333, "learning_rate": 1.9607843137254903e-05, "loss": 0.7228, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 62000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.8382352941176472, "grad_norm": 3.6029295921325684, "learning_rate": 1.936274509803922e-05, "loss": 0.7194, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 62500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.8529411764705883, "grad_norm": 13.423847198486328, "learning_rate": 1.9117647058823528e-05, "loss": 0.745, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 63000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.8676470588235294, "grad_norm": 2.904106855392456, "learning_rate": 1.8872549019607844e-05, "loss": 0.7321, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 63500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.8823529411764706, "grad_norm": 2.838707447052002, "learning_rate": 1.862745098039216e-05, "loss": 0.7012, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 64000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.8970588235294117, "grad_norm": 3.0846309661865234, "learning_rate": 1.8382352941176472e-05, "loss": 0.6912, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 64500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.9117647058823528, "grad_norm": 5.948828220367432, "learning_rate": 1.8137254901960785e-05, "loss": 0.6854, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 65000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.9264705882352942, "grad_norm": 13.732516288757324, "learning_rate": 1.7892156862745098e-05, "loss": 0.6873, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 65500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.9411764705882353, "grad_norm": 10.169962882995605, "learning_rate": 1.7647058823529414e-05, "loss": 0.7158, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 66000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.9558823529411766, "grad_norm": 8.26551628112793, "learning_rate": 1.7401960784313726e-05, "loss": 0.7263, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 66500, "total_memory_available (GB)": 126.62 }, { "epoch": 1.9705882352941178, "grad_norm": 8.526754379272461, "learning_rate": 1.715686274509804e-05, "loss": 0.6879, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 67000, "total_memory_available (GB)": 126.62 }, { "epoch": 1.9852941176470589, "grad_norm": 11.833305358886719, "learning_rate": 1.6911764705882355e-05, "loss": 0.6696, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 67500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.0, "grad_norm": 3.8232524394989014, "learning_rate": 1.6666666666666667e-05, "loss": 0.6767, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 68000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.014705882352941, "grad_norm": 7.861486434936523, "learning_rate": 1.642156862745098e-05, "loss": 0.688, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 68500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.0294117647058822, "grad_norm": 5.00877046585083, "learning_rate": 1.6176470588235296e-05, "loss": 0.6502, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 69000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.0441176470588234, "grad_norm": 3.252054214477539, "learning_rate": 1.593137254901961e-05, "loss": 0.6735, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 69500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.0588235294117645, "grad_norm": 8.655946731567383, "learning_rate": 1.568627450980392e-05, "loss": 0.7076, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 70000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.073529411764706, "grad_norm": 5.065808296203613, "learning_rate": 1.5441176470588237e-05, "loss": 0.6808, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 70500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.088235294117647, "grad_norm": 7.614666938781738, "learning_rate": 1.5196078431372548e-05, "loss": 0.6565, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 71000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.1029411764705883, "grad_norm": 5.335823059082031, "learning_rate": 1.4950980392156863e-05, "loss": 0.6929, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 71500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.1176470588235294, "grad_norm": 8.399900436401367, "learning_rate": 1.4705882352941177e-05, "loss": 0.6575, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 72000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.1323529411764706, "grad_norm": 4.3644537925720215, "learning_rate": 1.4460784313725493e-05, "loss": 0.7093, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 72500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.1470588235294117, "grad_norm": 5.606255054473877, "learning_rate": 1.4215686274509804e-05, "loss": 0.6497, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 73000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.161764705882353, "grad_norm": 8.045038223266602, "learning_rate": 1.3970588235294118e-05, "loss": 0.7041, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 73500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.176470588235294, "grad_norm": 10.108804702758789, "learning_rate": 1.3725490196078432e-05, "loss": 0.6512, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 74000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.1911764705882355, "grad_norm": 4.9611992835998535, "learning_rate": 1.3480392156862745e-05, "loss": 0.6748, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 74500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.2058823529411766, "grad_norm": 2.3439674377441406, "learning_rate": 1.323529411764706e-05, "loss": 0.6671, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 75000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.2205882352941178, "grad_norm": 5.686610698699951, "learning_rate": 1.2990196078431374e-05, "loss": 0.6246, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 75500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.235294117647059, "grad_norm": 3.122039794921875, "learning_rate": 1.2745098039215686e-05, "loss": 0.7125, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 76000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.25, "grad_norm": 12.2803373336792, "learning_rate": 1.25e-05, "loss": 0.6599, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 76500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.264705882352941, "grad_norm": 6.284506797790527, "learning_rate": 1.2254901960784313e-05, "loss": 0.6759, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 77000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.2794117647058822, "grad_norm": 3.540226697921753, "learning_rate": 1.200980392156863e-05, "loss": 0.6832, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 77500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.2941176470588234, "grad_norm": 7.354804992675781, "learning_rate": 1.1764705882352942e-05, "loss": 0.6289, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 78000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.3088235294117645, "grad_norm": 4.479936122894287, "learning_rate": 1.1519607843137254e-05, "loss": 0.6235, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 78500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.323529411764706, "grad_norm": 5.0150299072265625, "learning_rate": 1.1274509803921569e-05, "loss": 0.6257, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 79000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.338235294117647, "grad_norm": 6.954928874969482, "learning_rate": 1.1029411764705883e-05, "loss": 0.6543, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 79500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.3529411764705883, "grad_norm": 1.8401598930358887, "learning_rate": 1.0784313725490197e-05, "loss": 0.6433, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 80000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.3676470588235294, "grad_norm": 0.5422260761260986, "learning_rate": 1.053921568627451e-05, "loss": 0.652, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 80500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.3823529411764706, "grad_norm": 6.412265300750732, "learning_rate": 1.0294117647058824e-05, "loss": 0.6637, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 81000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.3970588235294117, "grad_norm": 5.444230556488037, "learning_rate": 1.0049019607843139e-05, "loss": 0.6218, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 81500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.411764705882353, "grad_norm": 3.0248329639434814, "learning_rate": 9.803921568627451e-06, "loss": 0.6663, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 82000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.426470588235294, "grad_norm": 2.8557262420654297, "learning_rate": 9.558823529411764e-06, "loss": 0.6768, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 82500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.4411764705882355, "grad_norm": 3.229672908782959, "learning_rate": 9.31372549019608e-06, "loss": 0.6566, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 83000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.4558823529411766, "grad_norm": 6.890370845794678, "learning_rate": 9.068627450980392e-06, "loss": 0.6589, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 83500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.4705882352941178, "grad_norm": 7.899547100067139, "learning_rate": 8.823529411764707e-06, "loss": 0.6775, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 84000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.485294117647059, "grad_norm": 7.249077796936035, "learning_rate": 8.57843137254902e-06, "loss": 0.6572, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 84500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.5, "grad_norm": 5.247090816497803, "learning_rate": 8.333333333333334e-06, "loss": 0.6348, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 85000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.514705882352941, "grad_norm": 5.179773330688477, "learning_rate": 8.088235294117648e-06, "loss": 0.6224, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 85500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.5294117647058822, "grad_norm": 8.414857864379883, "learning_rate": 7.84313725490196e-06, "loss": 0.6136, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 86000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.5441176470588234, "grad_norm": 1.9255198240280151, "learning_rate": 7.598039215686274e-06, "loss": 0.6418, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 86500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.5588235294117645, "grad_norm": 4.827670574188232, "learning_rate": 7.3529411764705884e-06, "loss": 0.6562, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 87000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.5735294117647056, "grad_norm": 5.845033645629883, "learning_rate": 7.107843137254902e-06, "loss": 0.6471, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 87500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.588235294117647, "grad_norm": 6.777614593505859, "learning_rate": 6.862745098039216e-06, "loss": 0.6311, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 88000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.6029411764705883, "grad_norm": 11.79529094696045, "learning_rate": 6.61764705882353e-06, "loss": 0.6289, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 88500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.6176470588235294, "grad_norm": 8.824967384338379, "learning_rate": 6.372549019607843e-06, "loss": 0.619, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 89000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.6323529411764706, "grad_norm": 6.913949012756348, "learning_rate": 6.127450980392157e-06, "loss": 0.652, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 89500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.6470588235294117, "grad_norm": 3.8313751220703125, "learning_rate": 5.882352941176471e-06, "loss": 0.6701, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 90000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.661764705882353, "grad_norm": 8.215780258178711, "learning_rate": 5.637254901960784e-06, "loss": 0.6433, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 90500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.6764705882352944, "grad_norm": 6.524360179901123, "learning_rate": 5.392156862745099e-06, "loss": 0.6048, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 91000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.6911764705882355, "grad_norm": 15.196798324584961, "learning_rate": 5.147058823529412e-06, "loss": 0.6427, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 91500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.7058823529411766, "grad_norm": 1.6884765625, "learning_rate": 4.901960784313726e-06, "loss": 0.6236, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 92000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.7205882352941178, "grad_norm": 2.3077046871185303, "learning_rate": 4.65686274509804e-06, "loss": 0.6163, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 92500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.735294117647059, "grad_norm": 5.261138916015625, "learning_rate": 4.411764705882353e-06, "loss": 0.6361, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 93000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.75, "grad_norm": 6.308595657348633, "learning_rate": 4.166666666666667e-06, "loss": 0.6245, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 93500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.764705882352941, "grad_norm": 5.467650413513184, "learning_rate": 3.92156862745098e-06, "loss": 0.621, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 94000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.7794117647058822, "grad_norm": 5.5255632400512695, "learning_rate": 3.6764705882352942e-06, "loss": 0.6446, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 94500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.7941176470588234, "grad_norm": 9.804576873779297, "learning_rate": 3.431372549019608e-06, "loss": 0.6226, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 95000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.8088235294117645, "grad_norm": 7.767389297485352, "learning_rate": 3.1862745098039216e-06, "loss": 0.6147, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 95500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.8235294117647056, "grad_norm": 2.8358209133148193, "learning_rate": 2.9411764705882355e-06, "loss": 0.6416, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 96000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.838235294117647, "grad_norm": 7.5477190017700195, "learning_rate": 2.6960784313725493e-06, "loss": 0.6328, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 96500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.8529411764705883, "grad_norm": 13.355735778808594, "learning_rate": 2.450980392156863e-06, "loss": 0.6087, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 97000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.8676470588235294, "grad_norm": 5.399519443511963, "learning_rate": 2.2058823529411767e-06, "loss": 0.6063, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 97500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.8823529411764706, "grad_norm": 7.818796634674072, "learning_rate": 1.96078431372549e-06, "loss": 0.6211, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 98000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.8970588235294117, "grad_norm": 4.248461723327637, "learning_rate": 1.715686274509804e-06, "loss": 0.6054, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 98500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.911764705882353, "grad_norm": 6.80518102645874, "learning_rate": 1.4705882352941177e-06, "loss": 0.6261, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 99000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.9264705882352944, "grad_norm": 0.6795050501823425, "learning_rate": 1.2254901960784314e-06, "loss": 0.6062, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 99500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.9411764705882355, "grad_norm": 6.518540382385254, "learning_rate": 9.80392156862745e-07, "loss": 0.6304, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 100000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.9558823529411766, "grad_norm": 5.016809463500977, "learning_rate": 7.352941176470589e-07, "loss": 0.636, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 100500, "total_memory_available (GB)": 126.62 }, { "epoch": 2.9705882352941178, "grad_norm": 7.016627788543701, "learning_rate": 4.901960784313725e-07, "loss": 0.6178, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 101000, "total_memory_available (GB)": 126.62 }, { "epoch": 2.985294117647059, "grad_norm": 12.064628601074219, "learning_rate": 2.4509803921568627e-07, "loss": 0.6343, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 101500, "total_memory_available (GB)": 126.62 }, { "epoch": 3.0, "grad_norm": 3.9333810806274414, "learning_rate": 0.0, "loss": 0.5962, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 102000, "total_memory_available (GB)": 126.62 }, { "epoch": 3.0, "max_memory_allocated (GB)": 2.1, "memory_allocated (GB)": 1.46, "step": 102000, "total_flos": 6.324139790696448e+19, "total_memory_available (GB)": 126.62, "train_loss": 0.8088104343788297, "train_runtime": 4169.6612, "train_samples_per_second": 195.699, "train_steps_per_second": 24.462 } ], "logging_steps": 500, "max_steps": 102000, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.324139790696448e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }