{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.6470588235294117, "global_step": 1120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 3.4948500216800934e-06, "loss": 0.6941, "step": 10 }, { "epoch": 0.03, "learning_rate": 5.730640178391189e-06, "loss": 0.5573, "step": 20 }, { "epoch": 0.04, "learning_rate": 6.90105620855803e-06, "loss": 0.5586, "step": 30 }, { "epoch": 0.06, "learning_rate": 7.657394585211274e-06, "loss": 0.5574, "step": 40 }, { "epoch": 0.07, "learning_rate": 8.217263382430936e-06, "loss": 0.5588, "step": 50 }, { "epoch": 0.09, "learning_rate": 8.661968799114844e-06, "loss": 0.5516, "step": 60 }, { "epoch": 0.1, "learning_rate": 9.030899869919434e-06, "loss": 0.5443, "step": 70 }, { "epoch": 0.12, "learning_rate": 9.346158598654881e-06, "loss": 0.5404, "step": 80 }, { "epoch": 0.13, "learning_rate": 9.621396430309407e-06, "loss": 0.5439, "step": 90 }, { "epoch": 0.15, "learning_rate": 9.865639267998493e-06, "loss": 0.5335, "step": 100 }, { "epoch": 0.16, "learning_rate": 1e-05, "loss": 0.5398, "step": 110 }, { "epoch": 0.18, "learning_rate": 1e-05, "loss": 0.5516, "step": 120 }, { "epoch": 0.19, "learning_rate": 1e-05, "loss": 0.5362, "step": 130 }, { "epoch": 0.21, "learning_rate": 1e-05, "loss": 0.5447, "step": 140 }, { "epoch": 0.22, "learning_rate": 1e-05, "loss": 0.5404, "step": 150 }, { "epoch": 0.24, "learning_rate": 1e-05, "loss": 0.5402, "step": 160 }, { "epoch": 0.25, "learning_rate": 1e-05, "loss": 0.53, "step": 170 }, { "epoch": 0.26, "learning_rate": 1e-05, "loss": 0.5221, "step": 180 }, { "epoch": 0.28, "learning_rate": 1e-05, "loss": 0.5306, "step": 190 }, { "epoch": 0.29, "learning_rate": 1e-05, "loss": 0.5335, "step": 200 }, { "epoch": 0.31, "learning_rate": 1e-05, "loss": 0.5428, "step": 210 }, { "epoch": 0.32, "learning_rate": 1e-05, "loss": 0.5282, "step": 220 }, { "epoch": 0.34, "learning_rate": 1e-05, "loss": 0.5374, "step": 230 }, { "epoch": 0.35, "learning_rate": 1e-05, "loss": 0.535, "step": 240 }, { "epoch": 0.37, "learning_rate": 1e-05, "loss": 0.5187, "step": 250 }, { "epoch": 0.38, "learning_rate": 1e-05, "loss": 0.525, "step": 260 }, { "epoch": 0.4, "learning_rate": 1e-05, "loss": 0.5341, "step": 270 }, { "epoch": 0.41, "learning_rate": 1e-05, "loss": 0.5326, "step": 280 }, { "epoch": 0.43, "learning_rate": 1e-05, "loss": 0.5321, "step": 290 }, { "epoch": 0.44, "learning_rate": 1e-05, "loss": 0.5209, "step": 300 }, { "epoch": 0.46, "learning_rate": 1e-05, "loss": 0.5253, "step": 310 }, { "epoch": 0.47, "learning_rate": 1e-05, "loss": 0.5175, "step": 320 }, { "epoch": 0.49, "learning_rate": 1e-05, "loss": 0.5299, "step": 330 }, { "epoch": 0.5, "learning_rate": 1e-05, "loss": 0.5225, "step": 340 }, { "epoch": 0.51, "learning_rate": 1e-05, "loss": 0.5239, "step": 350 }, { "epoch": 0.53, "learning_rate": 1e-05, "loss": 0.528, "step": 360 }, { "epoch": 0.54, "learning_rate": 1e-05, "loss": 0.5219, "step": 370 }, { "epoch": 0.56, "learning_rate": 1e-05, "loss": 0.5152, "step": 380 }, { "epoch": 0.57, "learning_rate": 1e-05, "loss": 0.5011, "step": 390 }, { "epoch": 0.59, "learning_rate": 1e-05, "loss": 0.5155, "step": 400 }, { "epoch": 0.6, "learning_rate": 1e-05, "loss": 0.5152, "step": 410 }, { "epoch": 0.62, "learning_rate": 1e-05, "loss": 0.5204, "step": 420 }, { "epoch": 0.63, "learning_rate": 1e-05, "loss": 0.5122, "step": 430 }, { "epoch": 0.65, "learning_rate": 1e-05, "loss": 0.5144, "step": 440 }, { "epoch": 0.66, "learning_rate": 1e-05, "loss": 0.5167, "step": 450 }, { "epoch": 0.68, "learning_rate": 1e-05, "loss": 0.5061, "step": 460 }, { "epoch": 0.69, "learning_rate": 1e-05, "loss": 0.5327, "step": 470 }, { "epoch": 0.71, "learning_rate": 1e-05, "loss": 0.5233, "step": 480 }, { "epoch": 0.72, "learning_rate": 1e-05, "loss": 0.5192, "step": 490 }, { "epoch": 0.74, "learning_rate": 1e-05, "loss": 0.5052, "step": 500 }, { "epoch": 0.75, "learning_rate": 1e-05, "loss": 0.5191, "step": 510 }, { "epoch": 0.76, "learning_rate": 1e-05, "loss": 0.5246, "step": 520 }, { "epoch": 0.78, "learning_rate": 1e-05, "loss": 0.5121, "step": 530 }, { "epoch": 0.79, "learning_rate": 1e-05, "loss": 0.5094, "step": 540 }, { "epoch": 0.81, "learning_rate": 1e-05, "loss": 0.5108, "step": 550 }, { "epoch": 0.82, "learning_rate": 1e-05, "loss": 0.5124, "step": 560 }, { "epoch": 0.84, "learning_rate": 1e-05, "loss": 0.5159, "step": 570 }, { "epoch": 0.85, "learning_rate": 1e-05, "loss": 0.5095, "step": 580 }, { "epoch": 0.87, "learning_rate": 1e-05, "loss": 0.5181, "step": 590 }, { "epoch": 0.88, "learning_rate": 1e-05, "loss": 0.517, "step": 600 }, { "epoch": 0.9, "learning_rate": 1e-05, "loss": 0.492, "step": 610 }, { "epoch": 0.91, "learning_rate": 1e-05, "loss": 0.5101, "step": 620 }, { "epoch": 0.93, "learning_rate": 1e-05, "loss": 0.5058, "step": 630 }, { "epoch": 0.94, "learning_rate": 1e-05, "loss": 0.5045, "step": 640 }, { "epoch": 0.96, "learning_rate": 1e-05, "loss": 0.5077, "step": 650 }, { "epoch": 0.97, "learning_rate": 1e-05, "loss": 0.51, "step": 660 }, { "epoch": 0.99, "learning_rate": 1e-05, "loss": 0.5128, "step": 670 }, { "epoch": 1.0, "learning_rate": 1e-05, "loss": 0.5118, "step": 680 }, { "epoch": 1.01, "learning_rate": 1e-05, "loss": 0.3543, "step": 690 }, { "epoch": 1.03, "learning_rate": 1e-05, "loss": 0.3402, "step": 700 }, { "epoch": 1.04, "learning_rate": 1e-05, "loss": 0.3482, "step": 710 }, { "epoch": 1.06, "learning_rate": 1e-05, "loss": 0.3442, "step": 720 }, { "epoch": 1.07, "learning_rate": 1e-05, "loss": 0.3329, "step": 730 }, { "epoch": 1.09, "learning_rate": 1e-05, "loss": 0.3335, "step": 740 }, { "epoch": 1.1, "learning_rate": 1e-05, "loss": 0.3448, "step": 750 }, { "epoch": 1.12, "learning_rate": 1e-05, "loss": 0.3451, "step": 760 }, { "epoch": 1.13, "learning_rate": 1e-05, "loss": 0.3354, "step": 770 }, { "epoch": 1.15, "learning_rate": 1e-05, "loss": 0.3412, "step": 780 }, { "epoch": 1.16, "learning_rate": 1e-05, "loss": 0.343, "step": 790 }, { "epoch": 1.18, "learning_rate": 1e-05, "loss": 0.3463, "step": 800 }, { "epoch": 1.19, "learning_rate": 1e-05, "loss": 0.3661, "step": 810 }, { "epoch": 1.21, "learning_rate": 1e-05, "loss": 0.3378, "step": 820 }, { "epoch": 1.22, "learning_rate": 1e-05, "loss": 0.3375, "step": 830 }, { "epoch": 1.24, "learning_rate": 1e-05, "loss": 0.3402, "step": 840 }, { "epoch": 1.25, "learning_rate": 1e-05, "loss": 0.3529, "step": 850 }, { "epoch": 1.26, "learning_rate": 1e-05, "loss": 0.3499, "step": 860 }, { "epoch": 1.28, "learning_rate": 1e-05, "loss": 0.3454, "step": 870 }, { "epoch": 1.29, "learning_rate": 1e-05, "loss": 0.3444, "step": 880 }, { "epoch": 1.31, "learning_rate": 1e-05, "loss": 0.3424, "step": 890 }, { "epoch": 1.32, "learning_rate": 1e-05, "loss": 0.3508, "step": 900 }, { "epoch": 1.34, "learning_rate": 1e-05, "loss": 0.3458, "step": 910 }, { "epoch": 1.35, "learning_rate": 1e-05, "loss": 0.3442, "step": 920 }, { "epoch": 1.37, "learning_rate": 1e-05, "loss": 0.3456, "step": 930 }, { "epoch": 1.38, "learning_rate": 1e-05, "loss": 0.3437, "step": 940 }, { "epoch": 1.4, "learning_rate": 1e-05, "loss": 0.3477, "step": 950 }, { "epoch": 1.41, "learning_rate": 1e-05, "loss": 0.3514, "step": 960 }, { "epoch": 1.43, "learning_rate": 1e-05, "loss": 0.3394, "step": 970 }, { "epoch": 1.44, "learning_rate": 1e-05, "loss": 0.3499, "step": 980 }, { "epoch": 1.46, "learning_rate": 1e-05, "loss": 0.3474, "step": 990 }, { "epoch": 1.47, "learning_rate": 1e-05, "loss": 0.3549, "step": 1000 }, { "epoch": 1.49, "learning_rate": 1e-05, "loss": 0.3483, "step": 1010 }, { "epoch": 1.5, "learning_rate": 1e-05, "loss": 0.3503, "step": 1020 }, { "epoch": 1.51, "learning_rate": 1e-05, "loss": 0.3493, "step": 1030 }, { "epoch": 1.53, "learning_rate": 1e-05, "loss": 0.349, "step": 1040 }, { "epoch": 1.54, "learning_rate": 1e-05, "loss": 0.3487, "step": 1050 }, { "epoch": 1.56, "learning_rate": 1e-05, "loss": 0.3472, "step": 1060 }, { "epoch": 1.57, "learning_rate": 1e-05, "loss": 0.349, "step": 1070 }, { "epoch": 1.59, "learning_rate": 1e-05, "loss": 0.356, "step": 1080 }, { "epoch": 1.6, "learning_rate": 1e-05, "loss": 0.3484, "step": 1090 }, { "epoch": 1.62, "learning_rate": 1e-05, "loss": 0.3471, "step": 1100 }, { "epoch": 1.63, "learning_rate": 1e-05, "loss": 0.3517, "step": 1110 }, { "epoch": 1.65, "learning_rate": 1e-05, "loss": 0.3445, "step": 1120 } ], "max_steps": 3400, "num_train_epochs": 5, "total_flos": 1456488892334080.0, "trial_name": null, "trial_params": null }