{ "best_metric": 1.8803279399871826, "best_model_checkpoint": "output/boris-grebenshikov/checkpoint-169", "epoch": 1.0, "global_step": 169, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00013690389360668606, "loss": 2.5933, "step": 5 }, { "epoch": 0.06, "learning_rate": 0.00013601813066569938, "loss": 2.3572, "step": 10 }, { "epoch": 0.09, "learning_rate": 0.00013455035782630487, "loss": 2.2816, "step": 15 }, { "epoch": 0.12, "learning_rate": 0.00013251324613578177, "loss": 2.2503, "step": 20 }, { "epoch": 0.15, "learning_rate": 0.00012992438165230672, "loss": 2.0864, "step": 25 }, { "epoch": 0.18, "learning_rate": 0.00012680611362733682, "loss": 2.1159, "step": 30 }, { "epoch": 0.21, "learning_rate": 0.00012318536156811003, "loss": 2.0993, "step": 35 }, { "epoch": 0.24, "learning_rate": 0.00011909338284586077, "loss": 1.8802, "step": 40 }, { "epoch": 0.27, "learning_rate": 0.00011456550285595239, "loss": 2.0531, "step": 45 }, { "epoch": 0.3, "learning_rate": 0.00010964081005941026, "loss": 1.9281, "step": 50 }, { "epoch": 0.33, "learning_rate": 0.0001043618185385132, "loss": 2.1099, "step": 55 }, { "epoch": 0.36, "learning_rate": 9.877410097954656e-05, "loss": 2.0119, "step": 60 }, { "epoch": 0.38, "learning_rate": 9.292589525111794e-05, "loss": 1.9675, "step": 65 }, { "epoch": 0.41, "learning_rate": 8.686768797438203e-05, "loss": 1.9512, "step": 70 }, { "epoch": 0.44, "learning_rate": 8.065177868014528e-05, "loss": 2.047, "step": 75 }, { "epoch": 0.47, "learning_rate": 7.433182831541305e-05, "loss": 1.9436, "step": 80 }, { "epoch": 0.5, "learning_rate": 6.796239599704895e-05, "loss": 1.9677, "step": 85 }, { "epoch": 0.53, "learning_rate": 6.159846801167835e-05, "loss": 1.8582, "step": 90 }, { "epoch": 0.56, "learning_rate": 5.529498312790352e-05, "loss": 1.9952, "step": 95 }, { "epoch": 0.59, "learning_rate": 4.9106358318734425e-05, "loss": 1.9317, "step": 100 }, { "epoch": 0.62, "learning_rate": 4.3086018988597235e-05, "loss": 1.834, "step": 105 }, { "epoch": 0.65, "learning_rate": 3.728593776039493e-05, "loss": 1.8923, "step": 110 }, { "epoch": 0.68, "learning_rate": 3.1756185804197785e-05, "loss": 1.9544, "step": 115 }, { "epoch": 0.71, "learning_rate": 2.6544500580870892e-05, "loss": 1.921, "step": 120 }, { "epoch": 0.74, "learning_rate": 2.169587373223826e-05, "loss": 1.9539, "step": 125 }, { "epoch": 0.77, "learning_rate": 1.725216267546246e-05, "loss": 1.9665, "step": 130 }, { "epoch": 0.8, "learning_rate": 1.3251729254682012e-05, "loss": 2.0147, "step": 135 }, { "epoch": 0.83, "learning_rate": 9.729108569369736e-06, "loss": 1.8575, "step": 140 }, { "epoch": 0.86, "learning_rate": 6.7147108383636075e-06, "loss": 1.8372, "step": 145 }, { "epoch": 0.89, "learning_rate": 4.234558873329575e-06, "loss": 1.7437, "step": 150 }, { "epoch": 0.92, "learning_rate": 2.310063428006295e-06, "loss": 1.8886, "step": 155 }, { "epoch": 0.95, "learning_rate": 9.578383626055595e-07, "loss": 1.9129, "step": 160 }, { "epoch": 0.98, "learning_rate": 1.895572190242788e-07, "loss": 1.8767, "step": 165 }, { "epoch": 1.0, "eval_loss": 1.8803279399871826, "eval_runtime": 11.5804, "eval_samples_per_second": 22.279, "eval_steps_per_second": 2.85, "step": 169 } ], "max_steps": 169, "num_train_epochs": 1, "total_flos": 176110829568000.0, "trial_name": null, "trial_params": null }