File size: 1,926 Bytes
229fdd5 ef3602d 229fdd5 ef3602d 229fdd5 ef3602d 98b40d0 229fdd5 98b40d0 ef3602d 229fdd5 ef3602d 98b40d0 229fdd5 98b40d0 ef3602d 229fdd5 ef3602d 98b40d0 229fdd5 98b40d0 ef3602d 229fdd5 98b40d0 229fdd5 98b40d0 ef3602d 229fdd5 ef3602d 98b40d0 229fdd5 98b40d0 ef3602d 229fdd5 ef3602d 98b40d0 229fdd5 98b40d0 ef3602d 229fdd5 ef3602d 98b40d0 229fdd5 ef3602d 229fdd5 98b40d0 229fdd5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 12,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5925925925925926,
"grad_norm": 16.085969924926758,
"learning_rate": 8.333333333333334e-05,
"loss": 8.8634,
"step": 2
},
{
"epoch": 1.0,
"grad_norm": 1.1762865781784058,
"learning_rate": 6.666666666666667e-05,
"loss": 5.6966,
"step": 4
},
{
"epoch": 1.5925925925925926,
"grad_norm": 2.515094518661499,
"learning_rate": 5e-05,
"loss": 7.5589,
"step": 6
},
{
"epoch": 2.0,
"grad_norm": 0.3776528835296631,
"learning_rate": 3.3333333333333335e-05,
"loss": 4.9983,
"step": 8
},
{
"epoch": 2.5925925925925926,
"grad_norm": 0.8414855599403381,
"learning_rate": 1.6666666666666667e-05,
"loss": 7.2323,
"step": 10
},
{
"epoch": 3.0,
"grad_norm": 0.46793699264526367,
"learning_rate": 0.0,
"loss": 4.9528,
"step": 12
},
{
"epoch": 3.0,
"step": 12,
"total_flos": 16597381341504.0,
"train_loss": 6.5503848393758135,
"train_runtime": 47.4701,
"train_samples_per_second": 4.55,
"train_steps_per_second": 0.253
}
],
"logging_steps": 2,
"max_steps": 12,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 16597381341504.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}
|