File size: 1,694 Bytes
e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e 9f0b263 e6b2e7e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.9069373607635498,
"learning_rate": 0.0002,
"loss": 0.8844,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.823881208896637,
"learning_rate": 0.0002,
"loss": 0.4541,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 4.174367904663086,
"learning_rate": 0.0002,
"loss": 0.2964,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.7141203284263611,
"learning_rate": 0.0002,
"loss": 0.2295,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.6890754699707031,
"learning_rate": 0.0002,
"loss": 0.1953,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.9104921221733093,
"learning_rate": 0.0002,
"loss": 0.1755,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.103034997322547e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|