File size: 1,699 Bytes
31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 4b32760 31c51a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.5260074138641357,
"learning_rate": 0.0002,
"loss": 0.8045,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.3671823740005493,
"learning_rate": 0.0002,
"loss": 0.3861,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.49801939725875854,
"learning_rate": 0.0002,
"loss": 0.2388,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.25766927003860474,
"learning_rate": 0.0002,
"loss": 0.1858,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.5788165926933289,
"learning_rate": 0.0002,
"loss": 0.1568,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.3915470540523529,
"learning_rate": 0.0002,
"loss": 0.1405,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.0351908666638336e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|