FModelT1 / trainer_state.json
AmiraMohammed's picture
Upload 11 files
c779fed verified
raw
history blame
5 kB
{
"best_metric": 0.2145923674106598,
"best_model_checkpoint": "./resultTraining5/checkpoint-126",
"epoch": 10.5,
"eval_steps": 18,
"global_step": 126,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.5,
"grad_norm": 728.242919921875,
"learning_rate": 7.333333333333333e-06,
"loss": 1.8847,
"step": 18
},
{
"epoch": 1.5,
"eval_Accuracy": 0.5388888888888889,
"eval_Cross_Entropy_Loss": 1.7277253663296708,
"eval_F1": 0.35018050541516244,
"eval_Precision": 0.26944444444444443,
"eval_Recall": 0.5,
"eval_loss": 1.7277655601501465,
"eval_runtime": 23.884,
"eval_samples_per_second": 7.536,
"eval_steps_per_second": 0.377,
"step": 18
},
{
"epoch": 3.0,
"grad_norm": 297.9496765136719,
"learning_rate": 1.9333333333333333e-05,
"loss": 1.4535,
"step": 36
},
{
"epoch": 3.0,
"eval_Accuracy": 0.5666666666666667,
"eval_Cross_Entropy_Loss": 0.7339681547135115,
"eval_F1": 0.5125,
"eval_Precision": 0.567000567000567,
"eval_Recall": 0.5440317972922618,
"eval_loss": 0.734002947807312,
"eval_runtime": 24.1886,
"eval_samples_per_second": 7.442,
"eval_steps_per_second": 0.372,
"step": 36
},
{
"epoch": 4.5,
"grad_norm": 192.5962371826172,
"learning_rate": 2.9998194895940213e-05,
"loss": 0.6789,
"step": 54
},
{
"epoch": 4.5,
"eval_Accuracy": 0.7666666666666667,
"eval_Cross_Entropy_Loss": 0.5086390532967117,
"eval_F1": 0.7664071190211346,
"eval_Precision": 0.7671317446598345,
"eval_Recall": 0.7687243820643399,
"eval_loss": 0.5086456537246704,
"eval_runtime": 22.8477,
"eval_samples_per_second": 7.878,
"eval_steps_per_second": 0.394,
"step": 54
},
{
"epoch": 6.0,
"grad_norm": 69.82402801513672,
"learning_rate": 2.981984774215214e-05,
"loss": 0.4986,
"step": 72
},
{
"epoch": 6.0,
"eval_Accuracy": 0.7944444444444444,
"eval_Cross_Entropy_Loss": 0.40525681872645186,
"eval_F1": 0.7941331025316065,
"eval_Precision": 0.7944444444444445,
"eval_Recall": 0.7962364923611973,
"eval_loss": 0.4052683413028717,
"eval_runtime": 23.7394,
"eval_samples_per_second": 7.582,
"eval_steps_per_second": 0.379,
"step": 72
},
{
"epoch": 7.5,
"grad_norm": 14.14302921295166,
"learning_rate": 2.935304910939002e-05,
"loss": 0.3813,
"step": 90
},
{
"epoch": 7.5,
"eval_Accuracy": 0.8888888888888888,
"eval_Cross_Entropy_Loss": 0.29074438542399245,
"eval_F1": 0.8872039102644442,
"eval_Precision": 0.8933333333333333,
"eval_Recall": 0.8847348155508632,
"eval_loss": 0.29075339436531067,
"eval_runtime": 24.5928,
"eval_samples_per_second": 7.319,
"eval_steps_per_second": 0.366,
"step": 90
},
{
"epoch": 9.0,
"grad_norm": 160.1807403564453,
"learning_rate": 2.860688470206501e-05,
"loss": 0.2684,
"step": 108
},
{
"epoch": 9.0,
"eval_Accuracy": 0.8944444444444445,
"eval_Cross_Entropy_Loss": 0.24539544933520827,
"eval_F1": 0.8920147769252629,
"eval_Precision": 0.9058441558441559,
"eval_Recall": 0.8881505403055521,
"eval_loss": 0.2453812062740326,
"eval_runtime": 23.6027,
"eval_samples_per_second": 7.626,
"eval_steps_per_second": 0.381,
"step": 108
},
{
"epoch": 10.5,
"grad_norm": 59.56110382080078,
"learning_rate": 2.7595877761616246e-05,
"loss": 0.1486,
"step": 126
},
{
"epoch": 10.5,
"eval_Accuracy": 0.9166666666666666,
"eval_Cross_Entropy_Loss": 0.21460228119083746,
"eval_F1": 0.9163542860683416,
"eval_Precision": 0.9157595249876299,
"eval_Recall": 0.9174636691094273,
"eval_loss": 0.2145923674106598,
"eval_runtime": 23.8953,
"eval_samples_per_second": 7.533,
"eval_steps_per_second": 0.377,
"step": 126
}
],
"logging_steps": 18,
"max_steps": 450,
"num_input_tokens_seen": 0,
"num_train_epochs": 38,
"save_steps": 18,
"stateful_callbacks": {
"CustomEarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 6.827331953465856e+16,
"train_batch_size": 20,
"trial_name": null,
"trial_params": null
}