{ "best_metric": null, "best_model_checkpoint": null, "epoch": 7.6923076923076925, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15384615384615385, "grad_norm": 1.2540255213989835, "learning_rate": 4e-05, "loss": 2.461, "step": 1 }, { "epoch": 0.7692307692307693, "grad_norm": 1.2283767946925122, "learning_rate": 0.0002, "loss": 2.3764, "step": 5 }, { "epoch": 0.7692307692307693, "eval_loss": 1.8175537586212158, "eval_runtime": 2.6465, "eval_samples_per_second": 7.557, "eval_steps_per_second": 1.134, "step": 5 }, { "epoch": 1.5384615384615383, "grad_norm": 0.5222933366890941, "learning_rate": 0.00019396926207859084, "loss": 1.6306, "step": 10 }, { "epoch": 1.5384615384615383, "eval_loss": 1.2812074422836304, "eval_runtime": 1.3813, "eval_samples_per_second": 14.48, "eval_steps_per_second": 2.172, "step": 10 }, { "epoch": 2.3076923076923075, "grad_norm": 0.40273382098746935, "learning_rate": 0.0001766044443118978, "loss": 1.2054, "step": 15 }, { "epoch": 2.3076923076923075, "eval_loss": 0.9901388883590698, "eval_runtime": 1.3581, "eval_samples_per_second": 14.726, "eval_steps_per_second": 2.209, "step": 15 }, { "epoch": 3.076923076923077, "grad_norm": 0.5929366580267508, "learning_rate": 0.00015000000000000001, "loss": 0.9679, "step": 20 }, { "epoch": 3.076923076923077, "eval_loss": 0.8994835019111633, "eval_runtime": 1.3574, "eval_samples_per_second": 14.734, "eval_steps_per_second": 2.21, "step": 20 }, { "epoch": 3.8461538461538463, "grad_norm": 0.34496621838110464, "learning_rate": 0.00011736481776669306, "loss": 0.8649, "step": 25 }, { "epoch": 3.8461538461538463, "eval_loss": 0.867924690246582, "eval_runtime": 1.355, "eval_samples_per_second": 14.76, "eval_steps_per_second": 2.214, "step": 25 }, { "epoch": 4.615384615384615, "grad_norm": 0.2439958306613565, "learning_rate": 8.263518223330697e-05, "loss": 0.8062, "step": 30 }, { "epoch": 4.615384615384615, "eval_loss": 0.8346365690231323, "eval_runtime": 1.3475, "eval_samples_per_second": 14.843, "eval_steps_per_second": 2.226, "step": 30 }, { "epoch": 5.384615384615385, "grad_norm": 0.4208553153399377, "learning_rate": 5.000000000000002e-05, "loss": 0.8035, "step": 35 }, { "epoch": 5.384615384615385, "eval_loss": 0.8252410888671875, "eval_runtime": 1.3524, "eval_samples_per_second": 14.789, "eval_steps_per_second": 2.218, "step": 35 }, { "epoch": 6.153846153846154, "grad_norm": 0.21047147413474968, "learning_rate": 2.339555568810221e-05, "loss": 0.7083, "step": 40 }, { "epoch": 6.153846153846154, "eval_loss": 0.8200315237045288, "eval_runtime": 1.3534, "eval_samples_per_second": 14.778, "eval_steps_per_second": 2.217, "step": 40 }, { "epoch": 6.923076923076923, "grad_norm": 0.25331336485020006, "learning_rate": 6.030737921409169e-06, "loss": 0.703, "step": 45 }, { "epoch": 6.923076923076923, "eval_loss": 0.8187538981437683, "eval_runtime": 1.348, "eval_samples_per_second": 14.837, "eval_steps_per_second": 2.226, "step": 45 }, { "epoch": 7.6923076923076925, "grad_norm": 0.2588396412267122, "learning_rate": 0.0, "loss": 0.6968, "step": 50 }, { "epoch": 7.6923076923076925, "eval_loss": 0.8178739547729492, "eval_runtime": 1.3503, "eval_samples_per_second": 14.811, "eval_steps_per_second": 2.222, "step": 50 }, { "epoch": 7.6923076923076925, "step": 50, "total_flos": 828551528448.0, "train_loss": 1.0779932928085327, "train_runtime": 175.8451, "train_samples_per_second": 4.549, "train_steps_per_second": 0.284 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 828551528448.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }