|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.00595847686435155, |
|
"eval_steps": 5, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0002979238432175775, |
|
"grad_norm": 0.013859261758625507, |
|
"learning_rate": 1e-05, |
|
"loss": 11.9245, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002979238432175775, |
|
"eval_loss": 11.930392265319824, |
|
"eval_runtime": 15.0362, |
|
"eval_samples_per_second": 376.027, |
|
"eval_steps_per_second": 188.013, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.000595847686435155, |
|
"grad_norm": 0.013614109717309475, |
|
"learning_rate": 2e-05, |
|
"loss": 11.9318, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0008937715296527325, |
|
"grad_norm": 0.013719133101403713, |
|
"learning_rate": 3e-05, |
|
"loss": 11.9329, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00119169537287031, |
|
"grad_norm": 0.014367117546498775, |
|
"learning_rate": 4e-05, |
|
"loss": 11.9325, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0014896192160878876, |
|
"grad_norm": 0.013640515506267548, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9285, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0014896192160878876, |
|
"eval_loss": 11.930379867553711, |
|
"eval_runtime": 7.6643, |
|
"eval_samples_per_second": 737.707, |
|
"eval_steps_per_second": 368.854, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.001787543059305465, |
|
"grad_norm": 0.013276793994009495, |
|
"learning_rate": 6e-05, |
|
"loss": 11.9311, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0020854669025230426, |
|
"grad_norm": 0.014673787169158459, |
|
"learning_rate": 7e-05, |
|
"loss": 11.9264, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00238339074574062, |
|
"grad_norm": 0.014198667369782925, |
|
"learning_rate": 8e-05, |
|
"loss": 11.9318, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0026813145889581975, |
|
"grad_norm": 0.013602347113192081, |
|
"learning_rate": 9e-05, |
|
"loss": 11.9322, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.002979238432175775, |
|
"grad_norm": 0.012240378186106682, |
|
"learning_rate": 0.0001, |
|
"loss": 11.933, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002979238432175775, |
|
"eval_loss": 11.930337905883789, |
|
"eval_runtime": 7.9261, |
|
"eval_samples_per_second": 713.34, |
|
"eval_steps_per_second": 356.67, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.003277162275393353, |
|
"grad_norm": 0.014199422672390938, |
|
"learning_rate": 9.755282581475769e-05, |
|
"loss": 11.9283, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00357508611861093, |
|
"grad_norm": 0.01845669187605381, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 11.9266, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0038730099618285076, |
|
"grad_norm": 0.01380027923732996, |
|
"learning_rate": 7.938926261462366e-05, |
|
"loss": 11.9286, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.004170933805046085, |
|
"grad_norm": 0.014826415106654167, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 11.9277, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.004468857648263663, |
|
"grad_norm": 0.012456392869353294, |
|
"learning_rate": 5e-05, |
|
"loss": 11.934, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004468857648263663, |
|
"eval_loss": 11.930290222167969, |
|
"eval_runtime": 7.7915, |
|
"eval_samples_per_second": 725.664, |
|
"eval_steps_per_second": 362.832, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.00476678149148124, |
|
"grad_norm": 0.018006734549999237, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 11.9274, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.005064705334698817, |
|
"grad_norm": 0.013570823706686497, |
|
"learning_rate": 2.061073738537635e-05, |
|
"loss": 11.9304, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.005362629177916395, |
|
"grad_norm": 0.012760688550770283, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 11.9299, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.005660553021133973, |
|
"grad_norm": 0.014291306026279926, |
|
"learning_rate": 2.4471741852423237e-06, |
|
"loss": 11.9341, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.00595847686435155, |
|
"grad_norm": 0.01459888182580471, |
|
"learning_rate": 0.0, |
|
"loss": 11.9277, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.00595847686435155, |
|
"eval_loss": 11.930275917053223, |
|
"eval_runtime": 7.8619, |
|
"eval_samples_per_second": 719.166, |
|
"eval_steps_per_second": 359.583, |
|
"step": 20 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 5, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 9452912640.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|