{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.006290495061961376, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00012580990123922753, "eval_loss": 1.046350121498108, "eval_runtime": 460.9411, "eval_samples_per_second": 7.261, "eval_steps_per_second": 3.632, "step": 1 }, { "epoch": 0.0006290495061961376, "grad_norm": 3.190112829208374, "learning_rate": 5e-05, "loss": 0.9941, "step": 5 }, { "epoch": 0.0012580990123922753, "grad_norm": 0.6102684140205383, "learning_rate": 0.0001, "loss": 0.8414, "step": 10 }, { "epoch": 0.0012580990123922753, "eval_loss": 1.001396894454956, "eval_runtime": 474.1232, "eval_samples_per_second": 7.059, "eval_steps_per_second": 3.531, "step": 10 }, { "epoch": 0.0018871485185884128, "grad_norm": 0.7861143350601196, "learning_rate": 9.619397662556435e-05, "loss": 0.8466, "step": 15 }, { "epoch": 0.0025161980247845506, "grad_norm": 0.4665175974369049, "learning_rate": 8.535533905932738e-05, "loss": 0.7658, "step": 20 }, { "epoch": 0.0025161980247845506, "eval_loss": 0.8983107805252075, "eval_runtime": 473.2366, "eval_samples_per_second": 7.073, "eval_steps_per_second": 3.537, "step": 20 }, { "epoch": 0.003145247530980688, "grad_norm": 0.6117036938667297, "learning_rate": 6.91341716182545e-05, "loss": 0.8343, "step": 25 }, { "epoch": 0.0037742970371768257, "grad_norm": 0.7812676429748535, "learning_rate": 5e-05, "loss": 0.7194, "step": 30 }, { "epoch": 0.0037742970371768257, "eval_loss": 0.8442201018333435, "eval_runtime": 472.7127, "eval_samples_per_second": 7.08, "eval_steps_per_second": 3.541, "step": 30 }, { "epoch": 0.004403346543372964, "grad_norm": 0.5659365653991699, "learning_rate": 3.086582838174551e-05, "loss": 0.6455, "step": 35 }, { "epoch": 0.005032396049569101, "grad_norm": 0.5611511468887329, "learning_rate": 1.4644660940672627e-05, "loss": 0.7392, "step": 40 }, { "epoch": 0.005032396049569101, "eval_loss": 0.818490207195282, "eval_runtime": 472.2539, "eval_samples_per_second": 7.087, "eval_steps_per_second": 3.545, "step": 40 }, { "epoch": 0.005661445555765239, "grad_norm": 0.5884243249893188, "learning_rate": 3.8060233744356633e-06, "loss": 0.7173, "step": 45 }, { "epoch": 0.006290495061961376, "grad_norm": 0.6214433312416077, "learning_rate": 0.0, "loss": 0.8757, "step": 50 }, { "epoch": 0.006290495061961376, "eval_loss": 0.8147611021995544, "eval_runtime": 472.5027, "eval_samples_per_second": 7.084, "eval_steps_per_second": 3.543, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3917970967298048e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }