|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 8969, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 14.375, |
|
"learning_rate": 0.001, |
|
"loss": 10.66, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.15625, |
|
"learning_rate": 0.001, |
|
"loss": 9.0861, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 0.001, |
|
"loss": 7.5929, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.80078125, |
|
"learning_rate": 0.001, |
|
"loss": 7.2385, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.890625, |
|
"learning_rate": 0.001, |
|
"loss": 7.1247, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 0.001, |
|
"loss": 6.9193, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 0.001, |
|
"loss": 6.8173, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.68359375, |
|
"learning_rate": 0.001, |
|
"loss": 6.7162, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 0.001, |
|
"loss": 6.6121, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.7265625, |
|
"learning_rate": 0.001, |
|
"loss": 6.4806, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.63671875, |
|
"learning_rate": 0.001, |
|
"loss": 6.4596, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.875, |
|
"learning_rate": 0.001, |
|
"loss": 6.3081, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.8125, |
|
"learning_rate": 0.001, |
|
"loss": 6.2485, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 0.001, |
|
"loss": 6.1595, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.80859375, |
|
"learning_rate": 0.001, |
|
"loss": 6.113, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 0.001, |
|
"loss": 6.0143, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.203125, |
|
"learning_rate": 0.001, |
|
"loss": 5.9454, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 0.001, |
|
"loss": 5.8563, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.89453125, |
|
"learning_rate": 0.001, |
|
"loss": 5.8343, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6015625, |
|
"learning_rate": 0.001, |
|
"loss": 5.8936, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.765625, |
|
"learning_rate": 0.001, |
|
"loss": 5.6957, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5, |
|
"learning_rate": 0.001, |
|
"loss": 5.6288, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 0.001, |
|
"loss": 5.601, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 0.001, |
|
"loss": 5.6385, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 0.001, |
|
"loss": 5.5606, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.625, |
|
"learning_rate": 0.001, |
|
"loss": 5.53, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 0.001, |
|
"loss": 5.512, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 0.001, |
|
"loss": 5.3661, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 0.001, |
|
"loss": 5.4498, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 0.001, |
|
"loss": 5.3858, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 0.001, |
|
"loss": 5.3242, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.9140625, |
|
"learning_rate": 0.001, |
|
"loss": 5.3414, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.15625, |
|
"learning_rate": 0.001, |
|
"loss": 5.3306, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.79296875, |
|
"learning_rate": 0.001, |
|
"loss": 5.2121, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 0.001, |
|
"loss": 5.1801, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 0.001, |
|
"loss": 5.1879, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.76953125, |
|
"learning_rate": 0.001, |
|
"loss": 5.0849, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.75, |
|
"learning_rate": 0.001, |
|
"loss": 5.1201, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.77734375, |
|
"learning_rate": 0.001, |
|
"loss": 5.0651, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.7109375, |
|
"learning_rate": 0.001, |
|
"loss": 5.0007, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 0.001, |
|
"loss": 5.0496, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 0.001, |
|
"loss": 4.9546, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.73828125, |
|
"learning_rate": 0.001, |
|
"loss": 4.9174, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 0.001, |
|
"loss": 4.8822, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.6171875, |
|
"learning_rate": 0.001, |
|
"loss": 4.9179, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 0.001, |
|
"loss": 4.8214, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.625, |
|
"learning_rate": 0.001, |
|
"loss": 4.8761, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 0.001, |
|
"loss": 4.9586, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 0.001, |
|
"loss": 4.8608, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.0703125, |
|
"learning_rate": 0.001, |
|
"loss": 4.8165, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 0.001, |
|
"loss": 4.8616, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.671875, |
|
"learning_rate": 0.001, |
|
"loss": 4.7908, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 0.001, |
|
"loss": 4.7085, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6484375, |
|
"learning_rate": 0.001, |
|
"loss": 4.6968, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.67578125, |
|
"learning_rate": 0.001, |
|
"loss": 4.648, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 0.001, |
|
"loss": 4.7703, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.71484375, |
|
"learning_rate": 0.001, |
|
"loss": 4.6552, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 0.001, |
|
"loss": 4.6484, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 0.001, |
|
"loss": 4.6077, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.69140625, |
|
"learning_rate": 0.001, |
|
"loss": 4.5301, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 0.001, |
|
"loss": 4.5485, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.73046875, |
|
"learning_rate": 0.001, |
|
"loss": 4.6582, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 0.001, |
|
"loss": 4.6396, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 0.001, |
|
"loss": 4.4431, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 0.001, |
|
"loss": 4.4276, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 0.001, |
|
"loss": 4.5243, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 0.001, |
|
"loss": 4.4766, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 0.001, |
|
"loss": 4.5035, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5, |
|
"learning_rate": 0.001, |
|
"loss": 4.4449, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 0.001, |
|
"loss": 4.3884, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 0.001, |
|
"loss": 4.4832, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 0.001, |
|
"loss": 4.3619, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 0.001, |
|
"loss": 4.4318, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 0.001, |
|
"loss": 4.3091, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.609375, |
|
"learning_rate": 0.001, |
|
"loss": 4.3793, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 0.001, |
|
"loss": 4.3673, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.7734375, |
|
"learning_rate": 0.001, |
|
"loss": 4.2584, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5078125, |
|
"learning_rate": 0.001, |
|
"loss": 4.432, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 0.001, |
|
"loss": 4.2351, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 0.001, |
|
"loss": 4.3768, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.5, |
|
"learning_rate": 0.001, |
|
"loss": 4.2398, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 0.001, |
|
"loss": 4.2591, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 0.001, |
|
"loss": 4.2083, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 0.001, |
|
"loss": 4.2354, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 0.001, |
|
"loss": 4.2133, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 0.001, |
|
"loss": 4.2055, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.56640625, |
|
"learning_rate": 0.001, |
|
"loss": 4.1682, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 0.001, |
|
"loss": 4.2379, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 0.001, |
|
"loss": 4.1974, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 0.001, |
|
"loss": 4.2177, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 0.001, |
|
"loss": 4.1886, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.58203125, |
|
"learning_rate": 0.001, |
|
"loss": 4.1948, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 0.001, |
|
"loss": 4.2911, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 0.001, |
|
"loss": 4.2212, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 0.001, |
|
"loss": 4.1132, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.6796875, |
|
"learning_rate": 0.001, |
|
"loss": 4.1323, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.5546875, |
|
"learning_rate": 0.001, |
|
"loss": 4.0926, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 0.001, |
|
"loss": 4.0398, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 0.001, |
|
"loss": 4.1016, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 0.001, |
|
"loss": 3.9908, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5, |
|
"learning_rate": 0.001, |
|
"loss": 4.1144, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 0.001, |
|
"loss": 4.0445, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 0.001, |
|
"loss": 4.0426, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 0.001, |
|
"loss": 4.0339, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 0.001, |
|
"loss": 4.0517, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 0.001, |
|
"loss": 3.9779, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 0.001, |
|
"loss": 3.9787, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 0.001, |
|
"loss": 3.9433, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 0.001, |
|
"loss": 3.9909, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 0.001, |
|
"loss": 3.9582, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.996, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.546875, |
|
"learning_rate": 0.001, |
|
"loss": 4.0016, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.64453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.9915, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.640625, |
|
"learning_rate": 0.001, |
|
"loss": 3.8457, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.5, |
|
"learning_rate": 0.001, |
|
"loss": 3.8556, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.578125, |
|
"learning_rate": 0.001, |
|
"loss": 3.9233, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.62109375, |
|
"learning_rate": 0.001, |
|
"loss": 3.9413, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 0.001, |
|
"loss": 3.968, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.486328125, |
|
"learning_rate": 0.001, |
|
"loss": 3.971, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 0.001, |
|
"loss": 4.0015, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 0.001, |
|
"loss": 3.8761, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 0.001, |
|
"loss": 3.8149, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5, |
|
"learning_rate": 0.001, |
|
"loss": 3.8305, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 0.001, |
|
"loss": 3.7626, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 0.001, |
|
"loss": 3.8127, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 0.001, |
|
"loss": 3.8408, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 0.001, |
|
"loss": 3.9199, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.60546875, |
|
"learning_rate": 0.001, |
|
"loss": 3.8861, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.8037, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.8155, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5859375, |
|
"learning_rate": 0.001, |
|
"loss": 3.8044, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.7604, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.7374, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 0.001, |
|
"loss": 3.7128, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.6821, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 0.001, |
|
"loss": 3.689, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 0.001, |
|
"loss": 3.7706, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.53125, |
|
"learning_rate": 0.001, |
|
"loss": 3.7512, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.687, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.57421875, |
|
"learning_rate": 0.001, |
|
"loss": 3.6497, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.55859375, |
|
"learning_rate": 0.001, |
|
"loss": 3.7325, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.59375, |
|
"learning_rate": 0.001, |
|
"loss": 3.6482, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 0.001, |
|
"loss": 3.7139, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.58984375, |
|
"learning_rate": 0.001, |
|
"loss": 3.6265, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.6359, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 0.001, |
|
"loss": 3.689, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.664, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 0.001, |
|
"loss": 3.5956, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 0.001, |
|
"loss": 3.6903, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 0.001, |
|
"loss": 3.7775, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 0.001, |
|
"loss": 3.604, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.5746, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 0.001, |
|
"loss": 3.5907, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 3.6306, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 0.001, |
|
"loss": 3.7143, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.6161, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 0.001, |
|
"loss": 3.6404, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 0.001, |
|
"loss": 3.6052, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 0.001, |
|
"loss": 3.5776, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 0.001, |
|
"loss": 3.56, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.484375, |
|
"learning_rate": 0.001, |
|
"loss": 3.5469, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 0.001, |
|
"loss": 3.637, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.582, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.6892, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 0.001, |
|
"loss": 3.5234, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 0.001, |
|
"loss": 3.5884, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.541, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 0.001, |
|
"loss": 3.4975, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 0.001, |
|
"loss": 3.5908, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 0.001, |
|
"loss": 3.5859, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 3.4903, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 0.001, |
|
"loss": 3.4214, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 0.001, |
|
"loss": 3.4849, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.4287, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.3918, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.4713, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.4109, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5625, |
|
"learning_rate": 0.001, |
|
"loss": 3.4379, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.4546, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 0.001, |
|
"loss": 3.3923, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 0.001, |
|
"loss": 3.4302, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 0.001, |
|
"loss": 3.4475, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 0.001, |
|
"loss": 3.3023, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.3485, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 0.001, |
|
"loss": 3.3677, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.52734375, |
|
"learning_rate": 0.001, |
|
"loss": 3.5076, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.53515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.421, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 0.001, |
|
"loss": 3.4751, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 0.001, |
|
"loss": 3.2864, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.3964, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.367, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 0.001, |
|
"loss": 3.3819, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 0.001, |
|
"loss": 3.2415, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 0.001, |
|
"loss": 3.2919, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 0.001, |
|
"loss": 3.4433, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 0.001, |
|
"loss": 3.3476, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.2323, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.3249, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.215, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 0.001, |
|
"loss": 3.2893, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.48828125, |
|
"learning_rate": 0.001, |
|
"loss": 3.3386, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.2875, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 0.001, |
|
"loss": 3.3014, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 0.001, |
|
"loss": 3.176, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 0.001, |
|
"loss": 3.2795, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 0.001, |
|
"loss": 3.2852, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.5703125, |
|
"learning_rate": 0.001, |
|
"loss": 3.278, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.54296875, |
|
"learning_rate": 0.001, |
|
"loss": 3.2953, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.482421875, |
|
"learning_rate": 0.001, |
|
"loss": 3.217, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1406, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.2346, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1984, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.49609375, |
|
"learning_rate": 0.001, |
|
"loss": 3.2343, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 0.001, |
|
"loss": 3.1503, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 0.001, |
|
"loss": 3.224, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 0.001, |
|
"loss": 3.1678, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1891, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.197, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 3.1608, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 0.001, |
|
"loss": 3.2191, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.1926, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 0.001, |
|
"loss": 3.2302, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1557, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.1696, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.2021, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4609375, |
|
"learning_rate": 0.001, |
|
"loss": 3.1396, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 3.126, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1153, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 0.001, |
|
"loss": 3.2244, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 0.001, |
|
"loss": 3.1862, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 0.001, |
|
"loss": 3.1397, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1137, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 3.0842, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4765625, |
|
"learning_rate": 0.001, |
|
"loss": 3.1646, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 0.001, |
|
"loss": 3.1085, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 0.001, |
|
"loss": 3.1618, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1494, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 0.001, |
|
"loss": 3.1187, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1668, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 0.001, |
|
"loss": 3.0082, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.5234375, |
|
"learning_rate": 0.001, |
|
"loss": 3.0856, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 0.001, |
|
"loss": 3.0244, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 0.001, |
|
"loss": 3.1119, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 0.001, |
|
"loss": 3.1893, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.46484375, |
|
"learning_rate": 0.001, |
|
"loss": 3.0317, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.498046875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0754, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0712, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 0.001, |
|
"loss": 3.0459, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1248, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.0001, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0746, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0217, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.65234375, |
|
"learning_rate": 0.001, |
|
"loss": 3.0922, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.478515625, |
|
"learning_rate": 0.001, |
|
"loss": 3.0769, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 0.001, |
|
"loss": 3.0214, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 3.0603, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.0081, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.1073, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 0.001, |
|
"loss": 3.0161, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.0399, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 0.001, |
|
"loss": 3.0176, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0438, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9605, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.490234375, |
|
"learning_rate": 0.001, |
|
"loss": 3.1799, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.0367, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.48046875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0772, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9758, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.9411, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0461, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.466796875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0639, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.51171875, |
|
"learning_rate": 0.001, |
|
"loss": 3.0514, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 0.001, |
|
"loss": 2.9288, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.9909, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 0.001, |
|
"loss": 3.0404, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.9636, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.44921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.9691, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 3.0421, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 0.001, |
|
"loss": 3.0106, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9206, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.9851, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8973, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9926, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 0.001, |
|
"loss": 2.9713, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.5, |
|
"learning_rate": 0.001, |
|
"loss": 2.9641, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.9165, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9043, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 3.0078, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 0.001, |
|
"loss": 2.9917, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.51953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9517, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.9723, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8912, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.997, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8812, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 0.001, |
|
"loss": 2.9449, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8727, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.9621, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8718, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.924, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.975, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8352, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8661, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8946, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8299, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8354, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8615, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.858, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.9946, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9449, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8862, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8575, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8596, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.9418, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8957, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8291, |
|
"step": 1565 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.455078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8985, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9283, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.9447, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9154, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9456, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8641, |
|
"step": 1595 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.996, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.9102, |
|
"step": 1605 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8548, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8116, |
|
"step": 1615 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.9482, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8992, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8682, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8866, |
|
"step": 1635 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8674, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8261, |
|
"step": 1645 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 3.0015, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.859, |
|
"step": 1655 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.451171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8403, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8866, |
|
"step": 1665 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8863, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8138, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.9055, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.494140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8598, |
|
"step": 1685 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8609, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7287, |
|
"step": 1695 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8634, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8631, |
|
"step": 1705 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.43359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8012, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8285, |
|
"step": 1715 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8212, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8448, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8296, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8172, |
|
"step": 1735 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8287, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8966, |
|
"step": 1745 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7409, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8212, |
|
"step": 1755 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7865, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.8383, |
|
"step": 1765 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8843, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7954, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7362, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7698, |
|
"step": 1785 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.4921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.9273, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.774, |
|
"step": 1795 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.842, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.443359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7766, |
|
"step": 1805 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7165, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.427734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8396, |
|
"step": 1815 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7133, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7195, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7769, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8127, |
|
"step": 1835 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7074, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8092, |
|
"step": 1845 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.4375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8355, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7602, |
|
"step": 1855 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7212, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8304, |
|
"step": 1865 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8164, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8169, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7487, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7734, |
|
"step": 1885 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7757, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8023, |
|
"step": 1895 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7523, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.818, |
|
"step": 1905 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7938, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7914, |
|
"step": 1915 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.431640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7482, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.474609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7364, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7383, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7172, |
|
"step": 1935 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6953, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6664, |
|
"step": 1945 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6742, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7493, |
|
"step": 1955 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7087, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7275, |
|
"step": 1965 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.4296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7522, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7288, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7241, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7156, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7864, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.46875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7317, |
|
"step": 1995 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6977, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7876, |
|
"step": 2005 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.8518, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6466, |
|
"step": 2015 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.8142, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6646, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7533, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7672, |
|
"step": 2035 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6538, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7903, |
|
"step": 2045 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6225, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6761, |
|
"step": 2055 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7514, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7695, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6763, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7686, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6337, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7801, |
|
"step": 2085 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7185, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7413, |
|
"step": 2095 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6984, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7253, |
|
"step": 2105 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7485, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6853, |
|
"step": 2115 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6931, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6173, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.624, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6629, |
|
"step": 2135 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7097, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7286, |
|
"step": 2145 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7264, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.47265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6504, |
|
"step": 2155 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7058, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7127, |
|
"step": 2165 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6349, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7502, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7021, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.470703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6564, |
|
"step": 2185 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6715, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7572, |
|
"step": 2195 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6954, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7125, |
|
"step": 2205 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6772, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.8311, |
|
"step": 2215 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6858, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.44140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6552, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7589, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6547, |
|
"step": 2235 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6727, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7218, |
|
"step": 2245 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.439453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6825, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.7252, |
|
"step": 2255 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7026, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7449, |
|
"step": 2265 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5876, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6584, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6238, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7556, |
|
"step": 2285 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6804, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6864, |
|
"step": 2295 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5995, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6537, |
|
"step": 2305 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6645, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7136, |
|
"step": 2315 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6908, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7102, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.50390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7089, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6295, |
|
"step": 2335 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6105, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.41796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6332, |
|
"step": 2345 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6549, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.7258, |
|
"step": 2355 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6932, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5853, |
|
"step": 2365 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6075, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7695, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.675, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6556, |
|
"step": 2385 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6259, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.627, |
|
"step": 2395 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.4453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6976, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6732, |
|
"step": 2405 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6387, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.613, |
|
"step": 2415 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6863, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6637, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5994, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6521, |
|
"step": 2435 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.423828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6928, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6497, |
|
"step": 2445 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.412109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6095, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.637, |
|
"step": 2455 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5598, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.419921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5683, |
|
"step": 2465 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.7179, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5267, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.643, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5632, |
|
"step": 2485 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.611, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.572, |
|
"step": 2495 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6059, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5368, |
|
"step": 2505 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.447265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6391, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6533, |
|
"step": 2515 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6177, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5711, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6115, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.674, |
|
"step": 2535 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6826, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5226, |
|
"step": 2545 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6192, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6914, |
|
"step": 2555 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6802, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5906, |
|
"step": 2565 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5617, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5559, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5432, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6188, |
|
"step": 2585 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.375, |
|
"learning_rate": 0.001, |
|
"loss": 2.594, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6327, |
|
"step": 2595 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6117, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6003, |
|
"step": 2605 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5405, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5711, |
|
"step": 2615 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6618, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.63, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5909, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5804, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6332, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6762, |
|
"step": 2645 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5429, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6548, |
|
"step": 2655 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5452, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6768, |
|
"step": 2665 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6133, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6124, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5989, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5802, |
|
"step": 2685 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5502, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.416015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5251, |
|
"step": 2695 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6167, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5672, |
|
"step": 2705 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5343, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5582, |
|
"step": 2715 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5223, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5679, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5854, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5077, |
|
"step": 2735 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5738, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5829, |
|
"step": 2745 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5914, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5735, |
|
"step": 2755 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.400390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5015, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6339, |
|
"step": 2765 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6208, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5872, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5484, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6061, |
|
"step": 2785 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.7287, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4569, |
|
"step": 2795 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5641, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5877, |
|
"step": 2805 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.40234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5578, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5764, |
|
"step": 2815 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5233, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5845, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6143, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5839, |
|
"step": 2835 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5518, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5763, |
|
"step": 2845 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5789, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6161, |
|
"step": 2855 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4979, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.462890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5239, |
|
"step": 2865 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5278, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5839, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5451, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5832, |
|
"step": 2885 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6994, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5226, |
|
"step": 2895 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.568, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5504, |
|
"step": 2905 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5429, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5131, |
|
"step": 2915 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5571, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 0.001, |
|
"loss": 2.63, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5114, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.587, |
|
"step": 2935 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6734, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6092, |
|
"step": 2945 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4977, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5451, |
|
"step": 2955 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.6318, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.489, |
|
"step": 2965 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4592, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5569, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4976, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5439, |
|
"step": 2985 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5567, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5471, |
|
"step": 2995 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5934, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.527, |
|
"step": 3005 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5382, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5225, |
|
"step": 3015 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.571, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5245, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5326, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5773, |
|
"step": 3035 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5171, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5531, |
|
"step": 3045 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5284, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5586, |
|
"step": 3055 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5146, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5981, |
|
"step": 3065 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5523, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5508, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5361, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.396484375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6033, |
|
"step": 3085 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6071, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5747, |
|
"step": 3095 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4875, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5199, |
|
"step": 3105 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5829, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4627, |
|
"step": 3115 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5592, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.6057, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.538, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5633, |
|
"step": 3135 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5373, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4773, |
|
"step": 3145 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.41015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5214, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.498, |
|
"step": 3155 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4634, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4578, |
|
"step": 3165 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5839, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.40625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5751, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4664, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4624, |
|
"step": 3185 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.42578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5735, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5191, |
|
"step": 3195 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.408203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4841, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5304, |
|
"step": 3205 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4461, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5183, |
|
"step": 3215 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.481, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.6264, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.544, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5118, |
|
"step": 3235 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5537, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5551, |
|
"step": 3245 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4869, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5788, |
|
"step": 3255 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5389, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5014, |
|
"step": 3265 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5224, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4611, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5188, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5296, |
|
"step": 3285 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5364, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5207, |
|
"step": 3295 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5104, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.513, |
|
"step": 3305 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.523, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4677, |
|
"step": 3315 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.528, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5352, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4736, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5234, |
|
"step": 3335 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4768, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5709, |
|
"step": 3345 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4996, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4553, |
|
"step": 3355 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5061, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4686, |
|
"step": 3365 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5281, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5412, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4708, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4491, |
|
"step": 3385 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4432, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5601, |
|
"step": 3395 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4834, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5377, |
|
"step": 3405 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4399, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4336, |
|
"step": 3415 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5348, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4751, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.39453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4556, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.426, |
|
"step": 3435 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4736, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4077, |
|
"step": 3445 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5495, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.380859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4385, |
|
"step": 3455 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5411, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4391, |
|
"step": 3465 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5512, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5375, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4875, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5244, |
|
"step": 3485 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5694, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5128, |
|
"step": 3495 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4621, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3882, |
|
"step": 3505 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4457, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.6129, |
|
"step": 3515 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4673, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4322, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4767, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5015, |
|
"step": 3535 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4496, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4416, |
|
"step": 3545 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4924, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.444, |
|
"step": 3555 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5554, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5222, |
|
"step": 3565 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4904, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.51, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.491, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5569, |
|
"step": 3585 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4702, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4147, |
|
"step": 3595 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5733, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4938, |
|
"step": 3605 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4076, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4868, |
|
"step": 3615 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4626, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5284, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4889, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4571, |
|
"step": 3635 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5282, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5391, |
|
"step": 3645 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5301, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4556, |
|
"step": 3655 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4623, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5289, |
|
"step": 3665 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4913, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4986, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5109, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5675, |
|
"step": 3685 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.404296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5055, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4677, |
|
"step": 3695 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5347, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.4140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4168, |
|
"step": 3705 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4156, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5117, |
|
"step": 3715 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5008, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5145, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4302, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5553, |
|
"step": 3735 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4049, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4319, |
|
"step": 3745 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5425, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5042, |
|
"step": 3755 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4445, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4256, |
|
"step": 3765 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4933, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4679, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4482, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4219, |
|
"step": 3785 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5463, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4565, |
|
"step": 3795 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4338, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4545, |
|
"step": 3805 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4431, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5105, |
|
"step": 3815 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4994, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4036, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3943, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4016, |
|
"step": 3835 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3733, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4746, |
|
"step": 3845 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4641, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4163, |
|
"step": 3855 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5641, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4459, |
|
"step": 3865 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.48, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3711, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4004, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3915, |
|
"step": 3885 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4762, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4994, |
|
"step": 3895 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4486, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4192, |
|
"step": 3905 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4359, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4987, |
|
"step": 3915 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.494, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4081, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4449, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4008, |
|
"step": 3935 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4596, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4376, |
|
"step": 3945 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4765, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4726, |
|
"step": 3955 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4462, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3776, |
|
"step": 3965 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3864, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4396, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5275, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5021, |
|
"step": 3985 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5053, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4067, |
|
"step": 3995 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3718, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.396, |
|
"step": 4005 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4531, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4581, |
|
"step": 4015 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4616, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4446, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3241, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5278, |
|
"step": 4035 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4402, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.375, |
|
"learning_rate": 0.001, |
|
"loss": 2.526, |
|
"step": 4045 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4471, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5027, |
|
"step": 4055 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4513, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4194, |
|
"step": 4065 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4691, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4777, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3957, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4746, |
|
"step": 4085 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3968, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4137, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5123, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4194, |
|
"step": 4105 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3577, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4128, |
|
"step": 4115 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4063, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.37890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4535, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3966, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4123, |
|
"step": 4135 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.45703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4634, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3939, |
|
"step": 4145 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3884, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4213, |
|
"step": 4155 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3715, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4166, |
|
"step": 4165 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4946, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4918, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.381, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3991, |
|
"step": 4185 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4139, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5107, |
|
"step": 4195 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.387, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.447, |
|
"step": 4205 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3473, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3907, |
|
"step": 4215 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3656, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.5121, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4212, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3811, |
|
"step": 4235 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.368, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4631, |
|
"step": 4245 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4267, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4453, |
|
"step": 4255 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3846, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3578, |
|
"step": 4265 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4811, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4718, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4631, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.428, |
|
"step": 4285 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4018, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3948, |
|
"step": 4295 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3939, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3406, |
|
"step": 4305 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.427, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4806, |
|
"step": 4315 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3565, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4658, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3383, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4433, |
|
"step": 4335 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4428, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3914, |
|
"step": 4345 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4283, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4238, |
|
"step": 4355 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3536, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3306, |
|
"step": 4365 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4716, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3698, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4516, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3544, |
|
"step": 4385 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4579, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4471, |
|
"step": 4395 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4688, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4272, |
|
"step": 4405 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3917, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4167, |
|
"step": 4415 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4342, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.5076, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4897, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4231, |
|
"step": 4435 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.358, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3515, |
|
"step": 4445 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4139, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5031, |
|
"step": 4455 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3742, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.467, |
|
"step": 4465 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3063, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2742, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3993, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3711, |
|
"step": 4485 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3366, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3711, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4112, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.5269, |
|
"step": 4505 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4256, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4714, |
|
"step": 4515 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4565, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4071, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4281, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4384, |
|
"step": 4535 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4587, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4184, |
|
"step": 4545 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3867, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3673, |
|
"step": 4555 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4027, |
|
"step": 4565 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4282, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4324, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4159, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3382, |
|
"step": 4585 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3968, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3331, |
|
"step": 4595 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4229, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.37109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3908, |
|
"step": 4605 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4211, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4101, |
|
"step": 4615 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3875, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3807, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3716, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4305, |
|
"step": 4635 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4326, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4278, |
|
"step": 4645 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3428, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3823, |
|
"step": 4655 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4197, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4177, |
|
"step": 4665 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2897, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3867, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2943, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4413, |
|
"step": 4685 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3828, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.451, |
|
"step": 4695 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3077, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4278, |
|
"step": 4705 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.371, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.429, |
|
"step": 4715 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3383, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3408, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.388671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4862, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.5457, |
|
"step": 4735 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4219, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.277, |
|
"step": 4745 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3562, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4333, |
|
"step": 4755 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3555, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4844, |
|
"step": 4765 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.435546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4931, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3959, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3608, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3661, |
|
"step": 4785 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4261, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3362, |
|
"step": 4795 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4043, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3257, |
|
"step": 4805 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4065, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3205, |
|
"step": 4815 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4849, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.373046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4492, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3474, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3716, |
|
"step": 4835 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3561, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3908, |
|
"step": 4845 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4253, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3876, |
|
"step": 4855 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3442, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3322, |
|
"step": 4865 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3931, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4097, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3871, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.383, |
|
"step": 4885 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3096, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3468, |
|
"step": 4895 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.339, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3719, |
|
"step": 4905 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3645, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3295, |
|
"step": 4915 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4452, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3908, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3639, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3453, |
|
"step": 4935 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4583, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3326, |
|
"step": 4945 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3368, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3238, |
|
"step": 4955 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3857, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4269, |
|
"step": 4965 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4069, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4184, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3375, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3276, |
|
"step": 4985 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.387, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4152, |
|
"step": 4995 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4344, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3746, |
|
"step": 5005 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3692, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3626, |
|
"step": 5015 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4002, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4398, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2824, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3933, |
|
"step": 5035 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3513, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3421, |
|
"step": 5045 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3887, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3859, |
|
"step": 5055 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3026, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.326, |
|
"step": 5065 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3422, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3424, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.339, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3462, |
|
"step": 5085 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3583, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4476, |
|
"step": 5095 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3964, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3881, |
|
"step": 5105 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3989, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3513, |
|
"step": 5115 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4226, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.384765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.318, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.55078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3471, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3504, |
|
"step": 5135 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3703, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.356, |
|
"step": 5145 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3881, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3372, |
|
"step": 5155 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2871, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4192, |
|
"step": 5165 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3266, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3311, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3426, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3464, |
|
"step": 5185 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4051, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3669, |
|
"step": 5195 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4088, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3947, |
|
"step": 5205 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3573, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4141, |
|
"step": 5215 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3164, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3358, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.324, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4203, |
|
"step": 5235 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3093, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3323, |
|
"step": 5245 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3769, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3383, |
|
"step": 5255 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2656, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2729, |
|
"step": 5265 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3279, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4237, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3641, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3602, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.385, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3023, |
|
"step": 5295 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3724, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3638, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3663, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.365234375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4862, |
|
"step": 5315 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3922, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3677, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3606, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3088, |
|
"step": 5335 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4073, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3641, |
|
"step": 5345 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3953, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3133, |
|
"step": 5355 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3236, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3984, |
|
"step": 5365 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3896, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3215, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4614, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3136, |
|
"step": 5385 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3548, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3609, |
|
"step": 5395 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3043, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3837, |
|
"step": 5405 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4236, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3492, |
|
"step": 5415 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3594, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4392, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3757, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3445, |
|
"step": 5435 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4049, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4535, |
|
"step": 5445 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3116, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3645, |
|
"step": 5455 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3229, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3648, |
|
"step": 5465 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4006, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.357421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3397, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3543, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3028, |
|
"step": 5485 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3388, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3664, |
|
"step": 5495 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2125, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4234, |
|
"step": 5505 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3316, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.367, |
|
"step": 5515 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4614, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2681, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3821, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3998, |
|
"step": 5535 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3163, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3876, |
|
"step": 5545 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3965, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.382, |
|
"step": 5555 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2978, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3291, |
|
"step": 5565 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3339, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3565, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3237, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4315, |
|
"step": 5585 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.231, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3903, |
|
"step": 5595 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2526, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3466, |
|
"step": 5605 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3012, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4253, |
|
"step": 5615 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2762, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2793, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3468, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4084, |
|
"step": 5635 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.347, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3839, |
|
"step": 5645 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3001, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2755, |
|
"step": 5655 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2986, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3897, |
|
"step": 5665 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3523, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3523, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3354, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3529, |
|
"step": 5685 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.357, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.368, |
|
"step": 5695 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3112, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3848, |
|
"step": 5705 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3793, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3272, |
|
"step": 5715 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.4029, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3996, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.329, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3392, |
|
"step": 5735 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3056, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3413, |
|
"step": 5745 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3949, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.438, |
|
"step": 5755 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.4453, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3045, |
|
"step": 5765 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2906, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2219, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3613, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3199, |
|
"step": 5785 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3443, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2965, |
|
"step": 5795 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2862, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3733, |
|
"step": 5805 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2991, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3591, |
|
"step": 5815 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3919, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2326, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3035, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3228, |
|
"step": 5835 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3327, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.265, |
|
"step": 5845 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3723, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2181, |
|
"step": 5855 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.314, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2494, |
|
"step": 5865 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 0.38671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2576, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3208, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3072, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3374, |
|
"step": 5885 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3156, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3659, |
|
"step": 5895 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3847, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.29, |
|
"step": 5905 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.282, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3172, |
|
"step": 5915 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2918, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3004, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3571, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3381, |
|
"step": 5935 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2733, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3277, |
|
"step": 5945 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4188, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2578, |
|
"step": 5955 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 0.392578125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2164, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.361328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3124, |
|
"step": 5965 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2956, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2811, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3955, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3116, |
|
"step": 5985 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3184, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3773, |
|
"step": 5995 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3402, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3409, |
|
"step": 6005 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3015, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2731, |
|
"step": 6015 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.352, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3109, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3099, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2662, |
|
"step": 6035 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3723, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3657, |
|
"step": 6045 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3337, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2631, |
|
"step": 6055 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3135, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3438, |
|
"step": 6065 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2664, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3946, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2588, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2975, |
|
"step": 6085 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3101, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3642, |
|
"step": 6095 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3494, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2839, |
|
"step": 6105 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3428, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2826, |
|
"step": 6115 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3481, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3043, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2965, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2599, |
|
"step": 6135 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2636, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3423, |
|
"step": 6145 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2692, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2963, |
|
"step": 6155 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2212, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3136, |
|
"step": 6165 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3659, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2902, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3175, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3527, |
|
"step": 6185 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3352, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.458984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3264, |
|
"step": 6195 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3021, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3128, |
|
"step": 6205 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3982, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3052, |
|
"step": 6215 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.269, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.357, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3996, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3695, |
|
"step": 6235 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3206, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2325, |
|
"step": 6245 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.334, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2761, |
|
"step": 6255 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3011, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3958, |
|
"step": 6265 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3143, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3211, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2919, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3387, |
|
"step": 6285 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3049, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3557, |
|
"step": 6295 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3384, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3372, |
|
"step": 6305 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2197, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2533, |
|
"step": 6315 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2072, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2987, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2634, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3597, |
|
"step": 6335 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2872, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3196, |
|
"step": 6345 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2671, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2474, |
|
"step": 6355 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.334, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3749, |
|
"step": 6365 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2718, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3395, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3243, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3901, |
|
"step": 6385 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2454, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.287, |
|
"step": 6395 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.283, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3251, |
|
"step": 6405 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3293, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.353515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2905, |
|
"step": 6415 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2483, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3314, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3804, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3254, |
|
"step": 6435 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3487, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3102, |
|
"step": 6445 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2659, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3119, |
|
"step": 6455 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3058, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2881, |
|
"step": 6465 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2166, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3242, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3057, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2805, |
|
"step": 6485 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2795, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2733, |
|
"step": 6495 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2482, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2718, |
|
"step": 6505 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2996, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2118, |
|
"step": 6515 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.28515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3748, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3675, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2712, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2544, |
|
"step": 6535 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3075, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.299, |
|
"step": 6545 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.327, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3941, |
|
"step": 6555 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2842, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3262, |
|
"step": 6565 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3961, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3889, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3397, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2399, |
|
"step": 6585 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3182, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3264, |
|
"step": 6595 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3071, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.4024, |
|
"step": 6605 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2123, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2309, |
|
"step": 6615 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1942, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2871, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3449, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3352, |
|
"step": 6635 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3952, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2695, |
|
"step": 6645 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3078, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3105, |
|
"step": 6655 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3317, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3116, |
|
"step": 6665 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2339, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3095, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.239, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.4118, |
|
"step": 6685 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2235, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.333984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3407, |
|
"step": 6695 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2263, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2005, |
|
"step": 6705 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3457, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1686, |
|
"step": 6715 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2701, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2268, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.232, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2332, |
|
"step": 6735 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3156, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2292, |
|
"step": 6745 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.267, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3458, |
|
"step": 6755 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2835, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2314, |
|
"step": 6765 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2215, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3393, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2511, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2021, |
|
"step": 6785 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2558, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2729, |
|
"step": 6795 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2082, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2801, |
|
"step": 6805 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.312, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2518, |
|
"step": 6815 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2459, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1997, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3033, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2142, |
|
"step": 6835 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3164, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2225, |
|
"step": 6845 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3232, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2207, |
|
"step": 6855 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3717, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2952, |
|
"step": 6865 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2302, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3487, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2465, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3268, |
|
"step": 6885 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2749, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3893, |
|
"step": 6895 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3373, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2797, |
|
"step": 6905 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3495, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.297, |
|
"step": 6915 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2722, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2851, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2873, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3203, |
|
"step": 6935 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3418, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2656, |
|
"step": 6945 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2917, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3157, |
|
"step": 6955 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3485, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3457, |
|
"step": 6965 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3021, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3554, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.214, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2834, |
|
"step": 6985 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2508, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3555, |
|
"step": 6995 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2949, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3022, |
|
"step": 7005 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2868, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3297, |
|
"step": 7015 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.257, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2424, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2686, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2936, |
|
"step": 7035 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3123, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2571, |
|
"step": 7045 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2647, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.304, |
|
"step": 7055 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2541, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.354, |
|
"step": 7065 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.336, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3124, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3281, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3212, |
|
"step": 7085 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.3671875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3689, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2572, |
|
"step": 7095 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.335, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2974, |
|
"step": 7105 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3526, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3439, |
|
"step": 7115 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3026, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3288, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2952, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2942, |
|
"step": 7135 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2384, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2436, |
|
"step": 7145 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3166, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2552, |
|
"step": 7155 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2395, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2334, |
|
"step": 7165 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3261, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2233, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3239, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.208, |
|
"step": 7185 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2938, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3055, |
|
"step": 7195 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2853, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2827, |
|
"step": 7205 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.390625, |
|
"learning_rate": 0.001, |
|
"loss": 2.234, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2621, |
|
"step": 7215 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2557, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1625, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2853, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1697, |
|
"step": 7235 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3439, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2568, |
|
"step": 7245 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2698, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3522, |
|
"step": 7255 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2786, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2758, |
|
"step": 7265 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2267, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2619, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.235, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1946, |
|
"step": 7285 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3306, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.3515625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3197, |
|
"step": 7295 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3484, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2644, |
|
"step": 7305 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3429, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.248, |
|
"step": 7315 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2096, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.376953125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1851, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2935, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3234, |
|
"step": 7335 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2555, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2168, |
|
"step": 7345 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3627, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2858, |
|
"step": 7355 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3002, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2817, |
|
"step": 7365 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2995, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2896, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2691, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3571, |
|
"step": 7385 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2753, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1712, |
|
"step": 7395 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3137, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3335, |
|
"step": 7405 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1866, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.219, |
|
"step": 7415 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3086, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2989, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2209, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2487, |
|
"step": 7435 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2269, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2807, |
|
"step": 7445 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3287, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2568, |
|
"step": 7455 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2662, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2783, |
|
"step": 7465 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.239, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2978, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3329, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1576, |
|
"step": 7485 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2521, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2536, |
|
"step": 7495 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2902, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2477, |
|
"step": 7505 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2292, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2579, |
|
"step": 7515 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2531, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3611, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2527, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3296, |
|
"step": 7535 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2596, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2396, |
|
"step": 7545 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2573, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3043, |
|
"step": 7555 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2121, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3462, |
|
"step": 7565 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3078, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.316, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.292, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3084, |
|
"step": 7585 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.235, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2572, |
|
"step": 7595 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2612, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2995, |
|
"step": 7605 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2694, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2282, |
|
"step": 7615 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2096, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2345, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2535, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2774, |
|
"step": 7635 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2084, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2479, |
|
"step": 7645 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2153, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2901, |
|
"step": 7655 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2653, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2731, |
|
"step": 7665 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1943, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1499, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2693, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2261, |
|
"step": 7685 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1544, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2456, |
|
"step": 7695 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2612, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2848, |
|
"step": 7705 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2257, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2658, |
|
"step": 7715 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2202, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2599, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.369140625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3019, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3055, |
|
"step": 7735 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1914, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2936, |
|
"step": 7745 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2759, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2421, |
|
"step": 7755 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3079, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2364, |
|
"step": 7765 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2714, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2086, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2275, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3385, |
|
"step": 7785 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2574, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2743, |
|
"step": 7795 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2457, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3624, |
|
"step": 7805 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2603, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2317, |
|
"step": 7815 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2466, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3946, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2815, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2996, |
|
"step": 7835 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2776, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.184, |
|
"step": 7845 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2714, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2512, |
|
"step": 7855 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2536, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1873, |
|
"step": 7865 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.337890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2324, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3213, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2541, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2901, |
|
"step": 7885 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3655, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1354, |
|
"step": 7895 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2691, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.159, |
|
"step": 7905 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1469, |
|
"step": 7910 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2344, |
|
"step": 7915 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2182, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3306, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1549, |
|
"step": 7930 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3377, |
|
"step": 7935 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2853, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2024, |
|
"step": 7945 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2813, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3001, |
|
"step": 7955 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2503, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1995, |
|
"step": 7965 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2804, |
|
"step": 7970 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2591, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2143, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2275, |
|
"step": 7985 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3919, |
|
"step": 7990 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.309, |
|
"step": 7995 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2363, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3152, |
|
"step": 8005 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2732, |
|
"step": 8010 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2561, |
|
"step": 8015 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2615, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2131, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2894, |
|
"step": 8030 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3007, |
|
"step": 8035 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2376, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3124, |
|
"step": 8045 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1713, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3618, |
|
"step": 8055 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3386, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2925, |
|
"step": 8065 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2123, |
|
"step": 8070 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2016, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2361, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2677, |
|
"step": 8085 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2364, |
|
"step": 8090 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2845, |
|
"step": 8095 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2399, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2638, |
|
"step": 8105 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2932, |
|
"step": 8110 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2401, |
|
"step": 8115 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2824, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.35546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2077, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2046, |
|
"step": 8130 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1893, |
|
"step": 8135 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2769, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.217, |
|
"step": 8145 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2389, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.266, |
|
"step": 8155 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2453, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.281, |
|
"step": 8165 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2076, |
|
"step": 8170 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1804, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2309, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2703, |
|
"step": 8185 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3598, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2038, |
|
"step": 8195 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2529, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2816, |
|
"step": 8205 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.238, |
|
"step": 8210 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2579, |
|
"step": 8215 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1992, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1818, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1506, |
|
"step": 8230 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2157, |
|
"step": 8235 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2456, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2597, |
|
"step": 8245 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2447, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1631, |
|
"step": 8255 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2394, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2786, |
|
"step": 8265 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2592, |
|
"step": 8270 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2017, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2654, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2704, |
|
"step": 8285 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.34375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3055, |
|
"step": 8290 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1985, |
|
"step": 8295 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.259, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.276, |
|
"step": 8305 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1859, |
|
"step": 8310 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2696, |
|
"step": 8315 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2577, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2533, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3143, |
|
"step": 8330 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2516, |
|
"step": 8335 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2624, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1764, |
|
"step": 8345 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2136, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2297, |
|
"step": 8355 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2867, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2896, |
|
"step": 8365 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2175, |
|
"step": 8370 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.19, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.134, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2358, |
|
"step": 8385 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1986, |
|
"step": 8390 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.203, |
|
"step": 8395 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2181, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2655, |
|
"step": 8405 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2862, |
|
"step": 8410 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3058, |
|
"step": 8415 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2694, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2549, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.34765625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2976, |
|
"step": 8430 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2406, |
|
"step": 8435 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2485, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1618, |
|
"step": 8445 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2759, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2618, |
|
"step": 8455 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.203, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1972, |
|
"step": 8465 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1704, |
|
"step": 8470 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2045, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.188, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2592, |
|
"step": 8485 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2913, |
|
"step": 8490 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2592, |
|
"step": 8495 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2068, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2432, |
|
"step": 8505 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1883, |
|
"step": 8510 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2438, |
|
"step": 8515 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1702, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2469, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.3203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1156, |
|
"step": 8530 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1934, |
|
"step": 8535 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3777, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2576, |
|
"step": 8545 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2462, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2936, |
|
"step": 8555 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3116, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2689, |
|
"step": 8565 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.36328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.3067, |
|
"step": 8570 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3342, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2079, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.33984375, |
|
"learning_rate": 0.001, |
|
"loss": 2.243, |
|
"step": 8585 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2, |
|
"step": 8590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2437, |
|
"step": 8595 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1937, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2336, |
|
"step": 8605 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.314453125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2758, |
|
"step": 8610 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.345703125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2071, |
|
"step": 8615 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.149, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.257, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.207, |
|
"step": 8630 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2858, |
|
"step": 8635 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.328125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2318, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.295, |
|
"step": 8645 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1429, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1635, |
|
"step": 8655 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.3268, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2371, |
|
"step": 8665 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2604, |
|
"step": 8670 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2545, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1592, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2776, |
|
"step": 8685 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.213, |
|
"step": 8690 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.287109375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1203, |
|
"step": 8695 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3063, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2479, |
|
"step": 8705 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2708, |
|
"step": 8710 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.326171875, |
|
"learning_rate": 0.001, |
|
"loss": 2.308, |
|
"step": 8715 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2732, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2556, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.3359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2082, |
|
"step": 8730 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2652, |
|
"step": 8735 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 0.296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3049, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2438, |
|
"step": 8745 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1741, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2807, |
|
"step": 8755 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2431, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2556, |
|
"step": 8765 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2498, |
|
"step": 8770 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2788, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.29296875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2636, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.330078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2968, |
|
"step": 8785 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2984, |
|
"step": 8790 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2495, |
|
"step": 8795 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2131, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.33203125, |
|
"learning_rate": 0.001, |
|
"loss": 2.176, |
|
"step": 8805 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3043, |
|
"step": 8810 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2409, |
|
"step": 8815 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1861, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2472, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 0.306640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2569, |
|
"step": 8830 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.32421875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2432, |
|
"step": 8835 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2889, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1974, |
|
"step": 8845 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.3096, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1649, |
|
"step": 8855 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.298828125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2163, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.302734375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2611, |
|
"step": 8865 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.245, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.30078125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1904, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.310546875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2698, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.3046875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2447, |
|
"step": 8885 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.349609375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2229, |
|
"step": 8890 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1953, |
|
"step": 8895 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2675, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.322265625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3104, |
|
"step": 8905 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.31640625, |
|
"learning_rate": 0.001, |
|
"loss": 2.3058, |
|
"step": 8910 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.294921875, |
|
"learning_rate": 0.001, |
|
"loss": 2.1981, |
|
"step": 8915 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1727, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.1917, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.291015625, |
|
"learning_rate": 0.001, |
|
"loss": 2.1705, |
|
"step": 8930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.3125, |
|
"learning_rate": 0.001, |
|
"loss": 2.2193, |
|
"step": 8935 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2683, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.1673, |
|
"step": 8945 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.2890625, |
|
"learning_rate": 0.001, |
|
"loss": 2.2151, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.341796875, |
|
"learning_rate": 0.001, |
|
"loss": 2.2106, |
|
"step": 8955 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.318359375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2439, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.30859375, |
|
"learning_rate": 0.001, |
|
"loss": 2.2279, |
|
"step": 8965 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.2348995208740234, |
|
"eval_runtime": 2639.8137, |
|
"eval_samples_per_second": 6.016, |
|
"eval_steps_per_second": 0.752, |
|
"step": 8969 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 8969, |
|
"total_flos": 1.50079627163861e+17, |
|
"train_loss": 2.666482654486044, |
|
"train_runtime": 30048.5081, |
|
"train_samples_per_second": 4.775, |
|
"train_steps_per_second": 0.298 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 8969, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"total_flos": 1.50079627163861e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|