{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.006846970215679562, "eval_steps": 8, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022823234052265205, "eval_loss": 2.2855961322784424, "eval_runtime": 257.8182, "eval_samples_per_second": 7.156, "eval_steps_per_second": 3.58, "step": 1 }, { "epoch": 0.0006846970215679561, "grad_norm": 0.9269846081733704, "learning_rate": 6e-05, "loss": 2.2791, "step": 3 }, { "epoch": 0.0013693940431359123, "grad_norm": 0.9510827660560608, "learning_rate": 0.00012, "loss": 2.3717, "step": 6 }, { "epoch": 0.0018258587241812164, "eval_loss": 2.153048276901245, "eval_runtime": 258.3426, "eval_samples_per_second": 7.142, "eval_steps_per_second": 3.573, "step": 8 }, { "epoch": 0.0020540910647038686, "grad_norm": 0.6708703637123108, "learning_rate": 0.00018, "loss": 2.2041, "step": 9 }, { "epoch": 0.0027387880862718246, "grad_norm": 1.3607641458511353, "learning_rate": 0.00019510565162951537, "loss": 2.2227, "step": 12 }, { "epoch": 0.003423485107839781, "grad_norm": 0.737883448600769, "learning_rate": 0.00017071067811865476, "loss": 2.1457, "step": 15 }, { "epoch": 0.0036517174483624328, "eval_loss": 2.0498392581939697, "eval_runtime": 258.2323, "eval_samples_per_second": 7.145, "eval_steps_per_second": 3.574, "step": 16 }, { "epoch": 0.004108182129407737, "grad_norm": 0.6803624033927917, "learning_rate": 0.00013090169943749476, "loss": 2.0524, "step": 18 }, { "epoch": 0.004792879150975694, "grad_norm": 0.5455082058906555, "learning_rate": 8.435655349597689e-05, "loss": 2.0345, "step": 21 }, { "epoch": 0.005477576172543649, "grad_norm": 1.0996414422988892, "learning_rate": 4.12214747707527e-05, "loss": 2.0625, "step": 24 }, { "epoch": 0.005477576172543649, "eval_loss": 2.0255534648895264, "eval_runtime": 258.4482, "eval_samples_per_second": 7.139, "eval_steps_per_second": 3.571, "step": 24 }, { "epoch": 0.0061622731941116055, "grad_norm": 0.6625553965568542, "learning_rate": 1.0899347581163221e-05, "loss": 2.0498, "step": 27 }, { "epoch": 0.006846970215679562, "grad_norm": 0.5361489057540894, "learning_rate": 0.0, "loss": 2.124, "step": 30 } ], "logging_steps": 3, "max_steps": 30, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2148404268302336e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }