|
{ |
|
"best_metric": 0.2145923674106598, |
|
"best_model_checkpoint": "./resultTraining5/checkpoint-126", |
|
"epoch": 10.5, |
|
"eval_steps": 18, |
|
"global_step": 126, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 728.242919921875, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 1.8847, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_Accuracy": 0.5388888888888889, |
|
"eval_Cross_Entropy_Loss": 1.7277253663296708, |
|
"eval_F1": 0.35018050541516244, |
|
"eval_Precision": 0.26944444444444443, |
|
"eval_Recall": 0.5, |
|
"eval_loss": 1.7277655601501465, |
|
"eval_runtime": 23.884, |
|
"eval_samples_per_second": 7.536, |
|
"eval_steps_per_second": 0.377, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 297.9496765136719, |
|
"learning_rate": 1.9333333333333333e-05, |
|
"loss": 1.4535, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_Accuracy": 0.5666666666666667, |
|
"eval_Cross_Entropy_Loss": 0.7339681547135115, |
|
"eval_F1": 0.5125, |
|
"eval_Precision": 0.567000567000567, |
|
"eval_Recall": 0.5440317972922618, |
|
"eval_loss": 0.734002947807312, |
|
"eval_runtime": 24.1886, |
|
"eval_samples_per_second": 7.442, |
|
"eval_steps_per_second": 0.372, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"grad_norm": 192.5962371826172, |
|
"learning_rate": 2.9998194895940213e-05, |
|
"loss": 0.6789, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_Accuracy": 0.7666666666666667, |
|
"eval_Cross_Entropy_Loss": 0.5086390532967117, |
|
"eval_F1": 0.7664071190211346, |
|
"eval_Precision": 0.7671317446598345, |
|
"eval_Recall": 0.7687243820643399, |
|
"eval_loss": 0.5086456537246704, |
|
"eval_runtime": 22.8477, |
|
"eval_samples_per_second": 7.878, |
|
"eval_steps_per_second": 0.394, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 69.82402801513672, |
|
"learning_rate": 2.981984774215214e-05, |
|
"loss": 0.4986, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_Accuracy": 0.7944444444444444, |
|
"eval_Cross_Entropy_Loss": 0.40525681872645186, |
|
"eval_F1": 0.7941331025316065, |
|
"eval_Precision": 0.7944444444444445, |
|
"eval_Recall": 0.7962364923611973, |
|
"eval_loss": 0.4052683413028717, |
|
"eval_runtime": 23.7394, |
|
"eval_samples_per_second": 7.582, |
|
"eval_steps_per_second": 0.379, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"grad_norm": 14.14302921295166, |
|
"learning_rate": 2.935304910939002e-05, |
|
"loss": 0.3813, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_Accuracy": 0.8888888888888888, |
|
"eval_Cross_Entropy_Loss": 0.29074438542399245, |
|
"eval_F1": 0.8872039102644442, |
|
"eval_Precision": 0.8933333333333333, |
|
"eval_Recall": 0.8847348155508632, |
|
"eval_loss": 0.29075339436531067, |
|
"eval_runtime": 24.5928, |
|
"eval_samples_per_second": 7.319, |
|
"eval_steps_per_second": 0.366, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 160.1807403564453, |
|
"learning_rate": 2.860688470206501e-05, |
|
"loss": 0.2684, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_Accuracy": 0.8944444444444445, |
|
"eval_Cross_Entropy_Loss": 0.24539544933520827, |
|
"eval_F1": 0.8920147769252629, |
|
"eval_Precision": 0.9058441558441559, |
|
"eval_Recall": 0.8881505403055521, |
|
"eval_loss": 0.2453812062740326, |
|
"eval_runtime": 23.6027, |
|
"eval_samples_per_second": 7.626, |
|
"eval_steps_per_second": 0.381, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"grad_norm": 59.56110382080078, |
|
"learning_rate": 2.7595877761616246e-05, |
|
"loss": 0.1486, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 10.5, |
|
"eval_Accuracy": 0.9166666666666666, |
|
"eval_Cross_Entropy_Loss": 0.21460228119083746, |
|
"eval_F1": 0.9163542860683416, |
|
"eval_Precision": 0.9157595249876299, |
|
"eval_Recall": 0.9174636691094273, |
|
"eval_loss": 0.2145923674106598, |
|
"eval_runtime": 23.8953, |
|
"eval_samples_per_second": 7.533, |
|
"eval_steps_per_second": 0.377, |
|
"step": 126 |
|
} |
|
], |
|
"logging_steps": 18, |
|
"max_steps": 450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 38, |
|
"save_steps": 18, |
|
"stateful_callbacks": { |
|
"CustomEarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.827331953465856e+16, |
|
"train_batch_size": 20, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|