|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.002584236159427492, |
|
"eval_steps": 13, |
|
"global_step": 13, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00019878739687903787, |
|
"grad_norm": 2.414543867111206, |
|
"learning_rate": 5e-06, |
|
"loss": 13.1525, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00019878739687903787, |
|
"eval_loss": 3.2820167541503906, |
|
"eval_runtime": 75.5342, |
|
"eval_samples_per_second": 28.054, |
|
"eval_steps_per_second": 14.033, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00039757479375807575, |
|
"grad_norm": 2.2050728797912598, |
|
"learning_rate": 1e-05, |
|
"loss": 13.7228, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005963621906371136, |
|
"grad_norm": 2.5719025135040283, |
|
"learning_rate": 1.5e-05, |
|
"loss": 14.2385, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0007951495875161515, |
|
"grad_norm": 2.4067375659942627, |
|
"learning_rate": 2e-05, |
|
"loss": 12.0989, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0009939369843951894, |
|
"grad_norm": 2.902513027191162, |
|
"learning_rate": 2.5e-05, |
|
"loss": 13.3901, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0011927243812742273, |
|
"grad_norm": 2.1985535621643066, |
|
"learning_rate": 3e-05, |
|
"loss": 13.2702, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0013915117781532651, |
|
"grad_norm": 2.53352427482605, |
|
"learning_rate": 3.5e-05, |
|
"loss": 14.1685, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001590299175032303, |
|
"grad_norm": 2.161381483078003, |
|
"learning_rate": 4e-05, |
|
"loss": 13.6176, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0017890865719113408, |
|
"grad_norm": 3.1022119522094727, |
|
"learning_rate": 4.5e-05, |
|
"loss": 15.6293, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.001987873968790379, |
|
"grad_norm": 2.5830698013305664, |
|
"learning_rate": 5e-05, |
|
"loss": 14.0974, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0021866613656694165, |
|
"grad_norm": 3.0659115314483643, |
|
"learning_rate": 4.99229333433282e-05, |
|
"loss": 13.9289, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0023854487625484546, |
|
"grad_norm": 3.4623639583587646, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"loss": 13.4014, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002584236159427492, |
|
"grad_norm": 2.969046115875244, |
|
"learning_rate": 4.9309248009941914e-05, |
|
"loss": 11.8233, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002584236159427492, |
|
"eval_loss": 3.2339165210723877, |
|
"eval_runtime": 74.0873, |
|
"eval_samples_per_second": 28.601, |
|
"eval_steps_per_second": 14.307, |
|
"step": 13 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2158071405084672.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|