havinash-ai's picture
Training in progress, step 10, checkpoint
fb79b62 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.005128205128205128,
"eval_steps": 3,
"global_step": 10,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0005128205128205128,
"grad_norm": 28.866498947143555,
"learning_rate": 2e-05,
"loss": 22.3345,
"step": 1
},
{
"epoch": 0.0005128205128205128,
"eval_loss": 4.872819423675537,
"eval_runtime": 12.6874,
"eval_samples_per_second": 64.788,
"eval_steps_per_second": 32.394,
"step": 1
},
{
"epoch": 0.0010256410256410256,
"grad_norm": 26.673473358154297,
"learning_rate": 4e-05,
"loss": 20.8146,
"step": 2
},
{
"epoch": 0.0015384615384615385,
"grad_norm": 24.699934005737305,
"learning_rate": 6e-05,
"loss": 19.8149,
"step": 3
},
{
"epoch": 0.0015384615384615385,
"eval_loss": 4.819361686706543,
"eval_runtime": 12.0615,
"eval_samples_per_second": 68.151,
"eval_steps_per_second": 34.075,
"step": 3
},
{
"epoch": 0.0020512820512820513,
"grad_norm": 19.96166229248047,
"learning_rate": 8e-05,
"loss": 19.845,
"step": 4
},
{
"epoch": 0.002564102564102564,
"grad_norm": 14.74158763885498,
"learning_rate": 0.0001,
"loss": 15.1953,
"step": 5
},
{
"epoch": 0.003076923076923077,
"grad_norm": 23.97884178161621,
"learning_rate": 0.00012,
"loss": 18.7039,
"step": 6
},
{
"epoch": 0.003076923076923077,
"eval_loss": 4.507215976715088,
"eval_runtime": 12.1994,
"eval_samples_per_second": 67.38,
"eval_steps_per_second": 33.69,
"step": 6
},
{
"epoch": 0.0035897435897435897,
"grad_norm": 28.07773780822754,
"learning_rate": 0.00014,
"loss": 20.8542,
"step": 7
},
{
"epoch": 0.0041025641025641026,
"grad_norm": 24.7640323638916,
"learning_rate": 0.00016,
"loss": 17.5828,
"step": 8
},
{
"epoch": 0.004615384615384616,
"grad_norm": 29.232324600219727,
"learning_rate": 0.00018,
"loss": 18.9759,
"step": 9
},
{
"epoch": 0.004615384615384616,
"eval_loss": 3.9376187324523926,
"eval_runtime": 12.1491,
"eval_samples_per_second": 67.659,
"eval_steps_per_second": 33.83,
"step": 9
},
{
"epoch": 0.005128205128205128,
"grad_norm": 43.22773361206055,
"learning_rate": 0.0002,
"loss": 18.0903,
"step": 10
}
],
"logging_steps": 1,
"max_steps": 10,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 3,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 298765642629120.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}