|
{ |
|
"best_metric": 0.3044010102748871, |
|
"best_model_checkpoint": "./beans_outputs/checkpoint-650", |
|
"epoch": 10.0, |
|
"eval_steps": 500, |
|
"global_step": 1300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 2.5466179847717285, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0208, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8045112781954887, |
|
"eval_loss": 0.8930914998054504, |
|
"eval_runtime": 0.9307, |
|
"eval_samples_per_second": 142.898, |
|
"eval_steps_per_second": 18.265, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 1.4668034315109253, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8683, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8796992481203008, |
|
"eval_loss": 0.6855143308639526, |
|
"eval_runtime": 0.9382, |
|
"eval_samples_per_second": 141.765, |
|
"eval_steps_per_second": 18.12, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 1.5885818004608154, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7297, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_loss": 0.5008501410484314, |
|
"eval_runtime": 0.9428, |
|
"eval_samples_per_second": 141.075, |
|
"eval_steps_per_second": 18.032, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 2.4792442321777344, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6084, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 1.0500737428665161, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4881, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9323308270676691, |
|
"eval_loss": 0.3685053288936615, |
|
"eval_runtime": 0.9444, |
|
"eval_samples_per_second": 140.829, |
|
"eval_steps_per_second": 18.001, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 4.615384615384615, |
|
"grad_norm": 0.9487807154655457, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3733, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_loss": 0.3044010102748871, |
|
"eval_runtime": 0.9975, |
|
"eval_samples_per_second": 133.327, |
|
"eval_steps_per_second": 17.042, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 5.384615384615385, |
|
"grad_norm": 2.811962604522705, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3229, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_loss": 0.3873802721500397, |
|
"eval_runtime": 0.9366, |
|
"eval_samples_per_second": 142.002, |
|
"eval_steps_per_second": 18.151, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 2.758183717727661, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4189, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.923076923076923, |
|
"grad_norm": 5.297445297241211, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5796, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5939849624060151, |
|
"eval_loss": 0.7966009974479675, |
|
"eval_runtime": 0.9577, |
|
"eval_samples_per_second": 138.872, |
|
"eval_steps_per_second": 17.75, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 28.03814697265625, |
|
"learning_rate": 5e-05, |
|
"loss": 0.765, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5263157894736842, |
|
"eval_loss": 0.9421801567077637, |
|
"eval_runtime": 0.9477, |
|
"eval_samples_per_second": 140.335, |
|
"eval_steps_per_second": 17.938, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 8.461538461538462, |
|
"grad_norm": 5.901334285736084, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0732, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.3308270676691729, |
|
"eval_loss": 1.0903689861297607, |
|
"eval_runtime": 0.9358, |
|
"eval_samples_per_second": 142.119, |
|
"eval_steps_per_second": 18.166, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 1.3560993671417236, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0879, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 37.35905456542969, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1643, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.41353383458646614, |
|
"eval_loss": 1.0773515701293945, |
|
"eval_runtime": 0.9946, |
|
"eval_samples_per_second": 133.719, |
|
"eval_steps_per_second": 17.092, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 1300, |
|
"total_flos": 8.772706474360013e+17, |
|
"train_loss": 0.7308059575007512, |
|
"train_runtime": 175.3451, |
|
"train_samples_per_second": 58.969, |
|
"train_steps_per_second": 7.414 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 1300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.772706474360013e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|