{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0030747943731262973, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00030747943731262973, "grad_norm": 4.839503765106201, "learning_rate": 2e-05, "loss": 7.5076, "step": 1 }, { "epoch": 0.00030747943731262973, "eval_loss": 8.116337776184082, "eval_runtime": 41.8717, "eval_samples_per_second": 32.719, "eval_steps_per_second": 16.359, "step": 1 }, { "epoch": 0.0006149588746252595, "grad_norm": 5.878857612609863, "learning_rate": 4e-05, "loss": 7.9647, "step": 2 }, { "epoch": 0.0009224383119378892, "grad_norm": 6.081296920776367, "learning_rate": 6e-05, "loss": 7.6554, "step": 3 }, { "epoch": 0.0009224383119378892, "eval_loss": 8.096881866455078, "eval_runtime": 40.4417, "eval_samples_per_second": 33.876, "eval_steps_per_second": 16.938, "step": 3 }, { "epoch": 0.001229917749250519, "grad_norm": 4.793824195861816, "learning_rate": 8e-05, "loss": 5.5741, "step": 4 }, { "epoch": 0.0015373971865631486, "grad_norm": 6.256138801574707, "learning_rate": 0.0001, "loss": 9.0043, "step": 5 }, { "epoch": 0.0018448766238757784, "grad_norm": 6.00553560256958, "learning_rate": 0.00012, "loss": 7.7881, "step": 6 }, { "epoch": 0.0018448766238757784, "eval_loss": 7.770421028137207, "eval_runtime": 40.5882, "eval_samples_per_second": 33.754, "eval_steps_per_second": 16.877, "step": 6 }, { "epoch": 0.002152356061188408, "grad_norm": 5.929523944854736, "learning_rate": 0.00014, "loss": 8.65, "step": 7 }, { "epoch": 0.002459835498501038, "grad_norm": 6.152801513671875, "learning_rate": 0.00016, "loss": 7.4528, "step": 8 }, { "epoch": 0.0027673149358136673, "grad_norm": 4.511847019195557, "learning_rate": 0.00018, "loss": 5.0966, "step": 9 }, { "epoch": 0.0027673149358136673, "eval_loss": 6.538444995880127, "eval_runtime": 40.6732, "eval_samples_per_second": 33.683, "eval_steps_per_second": 16.842, "step": 9 }, { "epoch": 0.0030747943731262973, "grad_norm": 7.016354084014893, "learning_rate": 0.0002, "loss": 6.6274, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 737027133800448.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }