File size: 1,695 Bytes
9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 42c83c3 9166de3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 500,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9090909090909091,
"grad_norm": 0.8041964769363403,
"learning_rate": 0.0002,
"loss": 0.8636,
"step": 250
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.6063739061355591,
"learning_rate": 0.0002,
"loss": 0.4697,
"step": 500
},
{
"epoch": 2.7272727272727275,
"grad_norm": 0.9930739402770996,
"learning_rate": 0.0002,
"loss": 0.3111,
"step": 750
},
{
"epoch": 3.6363636363636362,
"grad_norm": 0.8456557393074036,
"learning_rate": 0.0002,
"loss": 0.235,
"step": 1000
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.948847770690918,
"learning_rate": 0.0002,
"loss": 0.2006,
"step": 1250
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.4028228223323822,
"learning_rate": 0.0002,
"loss": 0.1806,
"step": 1500
}
],
"logging_steps": 250,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.0944844579045376e+16,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|