File size: 1,349 Bytes
75c4c46 c23fad8 75c4c46 c23fad8 75c4c46 64b8cf0 c23fad8 64b8cf0 75c4c46 c23fad8 64b8cf0 75c4c46 c23fad8 64b8cf0 75c4c46 c23fad8 64b8cf0 75c4c46 64b8cf0 c23fad8 64b8cf0 75c4c46 c23fad8 75c4c46 c23fad8 75c4c46 c23fad8 75c4c46 c23fad8 75c4c46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.96,
"eval_steps": 500,
"global_step": 18,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 0.0001,
"loss": 2.272,
"step": 1
},
{
"epoch": 0.27,
"learning_rate": 0.00018314696123025454,
"loss": 1.7677,
"step": 5
},
{
"epoch": 0.53,
"learning_rate": 0.0001,
"loss": 0.5037,
"step": 10
},
{
"epoch": 0.8,
"learning_rate": 1.6853038769745467e-05,
"loss": 0.2847,
"step": 15
},
{
"epoch": 0.96,
"eval_loss": 0.26359862089157104,
"eval_runtime": 8.8309,
"eval_samples_per_second": 22.648,
"eval_steps_per_second": 2.831,
"step": 18
},
{
"epoch": 0.96,
"step": 18,
"total_flos": 20001168556032.0,
"train_loss": 0.7799174222681258,
"train_runtime": 192.3889,
"train_samples_per_second": 3.119,
"train_steps_per_second": 0.094
}
],
"logging_steps": 5,
"max_steps": 18,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 20001168556032.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}
|