|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.7266610597140453, |
|
"global_step": 810, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 1.6216, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.301029995663981e-05, |
|
"loss": 1.285, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.4771212547196623e-05, |
|
"loss": 1.2164, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.6020599913279622e-05, |
|
"loss": 1.1761, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6989700043360187e-05, |
|
"loss": 1.1658, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.7781512503836432e-05, |
|
"loss": 1.1914, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.8450980400142568e-05, |
|
"loss": 1.1324, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.9030899869919434e-05, |
|
"loss": 1.1571, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.9542425094393246e-05, |
|
"loss": 1.1633, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 1.1991, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1653, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1272, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1054, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1359, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2e-05, |
|
"loss": 1.12, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0979, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1227, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1003, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0942, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1098, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0949, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2e-05, |
|
"loss": 1.056, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0822, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1038, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0813, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0721, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0604, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1054, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2e-05, |
|
"loss": 1.1074, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0971, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0838, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0419, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0528, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0475, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2e-05, |
|
"loss": 1.036, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0341, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0354, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0481, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0248, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0237, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2e-05, |
|
"loss": 1.052, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0383, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0423, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0181, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0059, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0392, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9981, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0486, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0198, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2e-05, |
|
"loss": 1.039, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0518, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0497, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0346, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 2e-05, |
|
"loss": 1.046, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0148, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0166, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0207, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0118, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2e-05, |
|
"loss": 1.052, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0217, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0094, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0067, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0122, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9602, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9589, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9808, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0098, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0272, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2e-05, |
|
"loss": 0.996, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2e-05, |
|
"loss": 1.005, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0229, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0058, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0254, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0081, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0074, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0385, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0092, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9963, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0051, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9937, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0372, |
|
"step": 810 |
|
} |
|
], |
|
"max_steps": 891, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.1487001401653985e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|