{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07727975270479134, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0015455950540958269, "eval_loss": 10.375998497009277, "eval_runtime": 1.5673, "eval_samples_per_second": 174.182, "eval_steps_per_second": 87.41, "step": 1 }, { "epoch": 0.0077279752704791345, "grad_norm": 0.04116315767168999, "learning_rate": 5e-05, "loss": 10.3764, "step": 5 }, { "epoch": 0.015455950540958269, "grad_norm": 0.03205065801739693, "learning_rate": 0.0001, "loss": 10.3734, "step": 10 }, { "epoch": 0.015455950540958269, "eval_loss": 10.375765800476074, "eval_runtime": 1.5404, "eval_samples_per_second": 177.229, "eval_steps_per_second": 88.939, "step": 10 }, { "epoch": 0.023183925811437404, "grad_norm": 0.037050023674964905, "learning_rate": 9.619397662556435e-05, "loss": 10.3713, "step": 15 }, { "epoch": 0.030911901081916538, "grad_norm": 0.037202052772045135, "learning_rate": 8.535533905932738e-05, "loss": 10.3748, "step": 20 }, { "epoch": 0.030911901081916538, "eval_loss": 10.37524700164795, "eval_runtime": 1.5713, "eval_samples_per_second": 173.737, "eval_steps_per_second": 87.187, "step": 20 }, { "epoch": 0.03863987635239567, "grad_norm": 0.03221876919269562, "learning_rate": 6.91341716182545e-05, "loss": 10.3727, "step": 25 }, { "epoch": 0.04636785162287481, "grad_norm": 0.03972822427749634, "learning_rate": 5e-05, "loss": 10.3745, "step": 30 }, { "epoch": 0.04636785162287481, "eval_loss": 10.374836921691895, "eval_runtime": 1.6013, "eval_samples_per_second": 170.488, "eval_steps_per_second": 85.556, "step": 30 }, { "epoch": 0.05409582689335394, "grad_norm": 0.030050573870539665, "learning_rate": 3.086582838174551e-05, "loss": 10.3735, "step": 35 }, { "epoch": 0.061823802163833076, "grad_norm": 0.03952740505337715, "learning_rate": 1.4644660940672627e-05, "loss": 10.3741, "step": 40 }, { "epoch": 0.061823802163833076, "eval_loss": 10.3746337890625, "eval_runtime": 1.5441, "eval_samples_per_second": 176.804, "eval_steps_per_second": 88.726, "step": 40 }, { "epoch": 0.0695517774343122, "grad_norm": 0.024366561323404312, "learning_rate": 3.8060233744356633e-06, "loss": 10.3754, "step": 45 }, { "epoch": 0.07727975270479134, "grad_norm": 0.05005223676562309, "learning_rate": 0.0, "loss": 10.3762, "step": 50 }, { "epoch": 0.07727975270479134, "eval_loss": 10.374598503112793, "eval_runtime": 1.6011, "eval_samples_per_second": 170.508, "eval_steps_per_second": 85.566, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 704734494720.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }