|
{ |
|
"best_metric": 0.9734994769096375, |
|
"best_model_checkpoint": "/home/iais_marenpielka/Bouthaina/res_nw_irq_03/checkpoint-1057", |
|
"epoch": 6.0, |
|
"eval_steps": 500, |
|
"global_step": 6342, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 2.316096782684326, |
|
"learning_rate": 4.865067829457365e-05, |
|
"loss": 1.1923, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 0.3758671129852906, |
|
"eval_loss": 0.9734994769096375, |
|
"eval_rouge1": 0.5620694784691431, |
|
"eval_rouge2": 0.3072634138927888, |
|
"eval_rougeL": 0.5602967261743523, |
|
"eval_runtime": 18.3819, |
|
"eval_samples_per_second": 114.95, |
|
"eval_steps_per_second": 14.416, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.9589498043060303, |
|
"learning_rate": 4.609011627906977e-05, |
|
"loss": 0.3956, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 0.41456228931689637, |
|
"eval_loss": 1.2529053688049316, |
|
"eval_rouge1": 0.6342121068042533, |
|
"eval_rouge2": 0.3777098530906886, |
|
"eval_rougeL": 0.6329704343270921, |
|
"eval_runtime": 27.6701, |
|
"eval_samples_per_second": 76.364, |
|
"eval_steps_per_second": 9.577, |
|
"step": 2114 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.9037576913833618, |
|
"learning_rate": 4.3529554263565894e-05, |
|
"loss": 0.2578, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 0.4177567847266788, |
|
"eval_loss": 1.1432485580444336, |
|
"eval_rouge1": 0.6356354703691816, |
|
"eval_rouge2": 0.38873743710479597, |
|
"eval_rougeL": 0.6340522473648312, |
|
"eval_runtime": 16.0819, |
|
"eval_samples_per_second": 131.39, |
|
"eval_steps_per_second": 16.478, |
|
"step": 3171 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.466954469680786, |
|
"learning_rate": 4.096899224806201e-05, |
|
"loss": 0.1771, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 0.420916242293927, |
|
"eval_loss": 1.3314274549484253, |
|
"eval_rouge1": 0.6564326494399506, |
|
"eval_rouge2": 0.4082972964966378, |
|
"eval_rougeL": 0.6545946336979762, |
|
"eval_runtime": 15.9444, |
|
"eval_samples_per_second": 132.523, |
|
"eval_steps_per_second": 16.62, |
|
"step": 4228 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.057302236557007, |
|
"learning_rate": 3.840843023255814e-05, |
|
"loss": 0.1337, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 0.42563539215296015, |
|
"eval_loss": 1.344425916671753, |
|
"eval_rouge1": 0.6508055039664861, |
|
"eval_rouge2": 0.40613669991922435, |
|
"eval_rougeL": 0.6494650833785085, |
|
"eval_runtime": 139.4783, |
|
"eval_samples_per_second": 15.149, |
|
"eval_steps_per_second": 1.9, |
|
"step": 5285 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.6741355657577515, |
|
"learning_rate": 3.5847868217054265e-05, |
|
"loss": 0.1118, |
|
"step": 6342 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 0.42475546499314026, |
|
"eval_loss": 1.369273066520691, |
|
"eval_rouge1": 0.6534368445259939, |
|
"eval_rouge2": 0.40793487128864964, |
|
"eval_rougeL": 0.6523435258107617, |
|
"eval_runtime": 22.7821, |
|
"eval_samples_per_second": 92.748, |
|
"eval_steps_per_second": 11.632, |
|
"step": 6342 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 6342, |
|
"total_flos": 1.1774067405225984e+16, |
|
"train_loss": 0.37803844186042396, |
|
"train_runtime": 4074.3615, |
|
"train_samples_per_second": 41.489, |
|
"train_steps_per_second": 5.189 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 21140, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.1774067405225984e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|