{ "best_metric": 0.2145923674106598, "best_model_checkpoint": "./resultTraining5/checkpoint-126", "epoch": 10.5, "eval_steps": 18, "global_step": 126, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.5, "grad_norm": 728.242919921875, "learning_rate": 7.333333333333333e-06, "loss": 1.8847, "step": 18 }, { "epoch": 1.5, "eval_Accuracy": 0.5388888888888889, "eval_Cross_Entropy_Loss": 1.7277253663296708, "eval_F1": 0.35018050541516244, "eval_Precision": 0.26944444444444443, "eval_Recall": 0.5, "eval_loss": 1.7277655601501465, "eval_runtime": 23.884, "eval_samples_per_second": 7.536, "eval_steps_per_second": 0.377, "step": 18 }, { "epoch": 3.0, "grad_norm": 297.9496765136719, "learning_rate": 1.9333333333333333e-05, "loss": 1.4535, "step": 36 }, { "epoch": 3.0, "eval_Accuracy": 0.5666666666666667, "eval_Cross_Entropy_Loss": 0.7339681547135115, "eval_F1": 0.5125, "eval_Precision": 0.567000567000567, "eval_Recall": 0.5440317972922618, "eval_loss": 0.734002947807312, "eval_runtime": 24.1886, "eval_samples_per_second": 7.442, "eval_steps_per_second": 0.372, "step": 36 }, { "epoch": 4.5, "grad_norm": 192.5962371826172, "learning_rate": 2.9998194895940213e-05, "loss": 0.6789, "step": 54 }, { "epoch": 4.5, "eval_Accuracy": 0.7666666666666667, "eval_Cross_Entropy_Loss": 0.5086390532967117, "eval_F1": 0.7664071190211346, "eval_Precision": 0.7671317446598345, "eval_Recall": 0.7687243820643399, "eval_loss": 0.5086456537246704, "eval_runtime": 22.8477, "eval_samples_per_second": 7.878, "eval_steps_per_second": 0.394, "step": 54 }, { "epoch": 6.0, "grad_norm": 69.82402801513672, "learning_rate": 2.981984774215214e-05, "loss": 0.4986, "step": 72 }, { "epoch": 6.0, "eval_Accuracy": 0.7944444444444444, "eval_Cross_Entropy_Loss": 0.40525681872645186, "eval_F1": 0.7941331025316065, "eval_Precision": 0.7944444444444445, "eval_Recall": 0.7962364923611973, "eval_loss": 0.4052683413028717, "eval_runtime": 23.7394, "eval_samples_per_second": 7.582, "eval_steps_per_second": 0.379, "step": 72 }, { "epoch": 7.5, "grad_norm": 14.14302921295166, "learning_rate": 2.935304910939002e-05, "loss": 0.3813, "step": 90 }, { "epoch": 7.5, "eval_Accuracy": 0.8888888888888888, "eval_Cross_Entropy_Loss": 0.29074438542399245, "eval_F1": 0.8872039102644442, "eval_Precision": 0.8933333333333333, "eval_Recall": 0.8847348155508632, "eval_loss": 0.29075339436531067, "eval_runtime": 24.5928, "eval_samples_per_second": 7.319, "eval_steps_per_second": 0.366, "step": 90 }, { "epoch": 9.0, "grad_norm": 160.1807403564453, "learning_rate": 2.860688470206501e-05, "loss": 0.2684, "step": 108 }, { "epoch": 9.0, "eval_Accuracy": 0.8944444444444445, "eval_Cross_Entropy_Loss": 0.24539544933520827, "eval_F1": 0.8920147769252629, "eval_Precision": 0.9058441558441559, "eval_Recall": 0.8881505403055521, "eval_loss": 0.2453812062740326, "eval_runtime": 23.6027, "eval_samples_per_second": 7.626, "eval_steps_per_second": 0.381, "step": 108 }, { "epoch": 10.5, "grad_norm": 59.56110382080078, "learning_rate": 2.7595877761616246e-05, "loss": 0.1486, "step": 126 }, { "epoch": 10.5, "eval_Accuracy": 0.9166666666666666, "eval_Cross_Entropy_Loss": 0.21460228119083746, "eval_F1": 0.9163542860683416, "eval_Precision": 0.9157595249876299, "eval_Recall": 0.9174636691094273, "eval_loss": 0.2145923674106598, "eval_runtime": 23.8953, "eval_samples_per_second": 7.533, "eval_steps_per_second": 0.377, "step": 126 } ], "logging_steps": 18, "max_steps": 450, "num_input_tokens_seen": 0, "num_train_epochs": 38, "save_steps": 18, "stateful_callbacks": { "CustomEarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 6.827331953465856e+16, "train_batch_size": 20, "trial_name": null, "trial_params": null }