|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"eval_steps": 500, |
|
"global_step": 1932, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"step": 276, |
|
"train_eval_accuracy": 0.6987528344671202, |
|
"train_eval_f1": 0.6755183421371805, |
|
"train_eval_loss": 1.2385817766189575, |
|
"train_eval_precision": 0.7371331169153404, |
|
"train_eval_recall": 0.700062751383177, |
|
"train_loss": 1.2385817766189575, |
|
"train_runtime": 259.0959, |
|
"train_samples_per_second": 34.041, |
|
"train_steps_per_second": 1.065 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6833333333333333, |
|
"eval_f1": 0.6547321354719501, |
|
"eval_loss": 1.2707617282867432, |
|
"eval_precision": 0.7152277887972227, |
|
"eval_recall": 0.6807604737331212, |
|
"eval_runtime": 110.9236, |
|
"eval_samples_per_second": 34.078, |
|
"eval_steps_per_second": 1.073, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 552, |
|
"train_eval_accuracy": 0.9099773242630386, |
|
"train_eval_f1": 0.9092824086512392, |
|
"train_eval_loss": 0.39961349964141846, |
|
"train_eval_precision": 0.9126506135478565, |
|
"train_eval_recall": 0.909610330961518, |
|
"train_loss": 0.39961352944374084, |
|
"train_runtime": 259.0099, |
|
"train_samples_per_second": 34.053, |
|
"train_steps_per_second": 1.066 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8843915343915344, |
|
"eval_f1": 0.8845088728896672, |
|
"eval_loss": 0.49980613589286804, |
|
"eval_precision": 0.8892030673846649, |
|
"eval_recall": 0.8861504544427677, |
|
"eval_runtime": 111.0942, |
|
"eval_samples_per_second": 34.025, |
|
"eval_steps_per_second": 1.071, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 828, |
|
"train_eval_accuracy": 0.9325396825396826, |
|
"train_eval_f1": 0.9319133039039142, |
|
"train_eval_loss": 0.27389460802078247, |
|
"train_eval_precision": 0.934170610859209, |
|
"train_eval_recall": 0.9324433400664413, |
|
"train_loss": 0.27389463782310486, |
|
"train_runtime": 258.8663, |
|
"train_samples_per_second": 34.072, |
|
"train_steps_per_second": 1.066 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8981481481481481, |
|
"eval_f1": 0.8983790808910544, |
|
"eval_loss": 0.4277746081352234, |
|
"eval_precision": 0.9018452269248401, |
|
"eval_recall": 0.8993916560895983, |
|
"eval_runtime": 111.0843, |
|
"eval_samples_per_second": 34.028, |
|
"eval_steps_per_second": 1.071, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1104, |
|
"train_eval_accuracy": 0.9470521541950113, |
|
"train_eval_f1": 0.9467599019967656, |
|
"train_eval_loss": 0.20678994059562683, |
|
"train_eval_precision": 0.9485244133482296, |
|
"train_eval_recall": 0.9470018597626247, |
|
"train_loss": 0.20678995549678802, |
|
"train_runtime": 258.8456, |
|
"train_samples_per_second": 34.074, |
|
"train_steps_per_second": 1.066 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8973544973544973, |
|
"eval_f1": 0.8979176810370734, |
|
"eval_loss": 0.39539220929145813, |
|
"eval_precision": 0.9018417578163312, |
|
"eval_recall": 0.8984604158679782, |
|
"eval_runtime": 111.0135, |
|
"eval_samples_per_second": 34.05, |
|
"eval_steps_per_second": 1.072, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1380, |
|
"train_eval_accuracy": 0.9561224489795919, |
|
"train_eval_f1": 0.9558613640362872, |
|
"train_eval_loss": 0.16801320016384125, |
|
"train_eval_precision": 0.956385049537581, |
|
"train_eval_recall": 0.9560588471737465, |
|
"train_loss": 0.16801321506500244, |
|
"train_runtime": 258.896, |
|
"train_samples_per_second": 34.068, |
|
"train_steps_per_second": 1.066 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9034391534391535, |
|
"eval_f1": 0.9040125697548127, |
|
"eval_loss": 0.4033404588699341, |
|
"eval_precision": 0.9052728311936364, |
|
"eval_recall": 0.9046585790514887, |
|
"eval_runtime": 110.9029, |
|
"eval_samples_per_second": 34.084, |
|
"eval_steps_per_second": 1.073, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 1656, |
|
"train_eval_accuracy": 0.9645124716553288, |
|
"train_eval_f1": 0.9643703596173855, |
|
"train_eval_loss": 0.13308991491794586, |
|
"train_eval_precision": 0.964791623196822, |
|
"train_eval_recall": 0.9644331346757402, |
|
"train_loss": 0.13308990001678467, |
|
"train_runtime": 258.612, |
|
"train_samples_per_second": 34.105, |
|
"train_steps_per_second": 1.067 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.903968253968254, |
|
"eval_f1": 0.9045126462342287, |
|
"eval_loss": 0.4025459587574005, |
|
"eval_precision": 0.9070552370106681, |
|
"eval_recall": 0.9050900149375409, |
|
"eval_runtime": 110.9248, |
|
"eval_samples_per_second": 34.077, |
|
"eval_steps_per_second": 1.073, |
|
"step": 1656 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 1932, |
|
"train_eval_accuracy": 0.9687074829931973, |
|
"train_eval_f1": 0.9684965457139353, |
|
"train_eval_loss": 0.11909189075231552, |
|
"train_eval_precision": 0.9696149844869657, |
|
"train_eval_recall": 0.9685670790376533, |
|
"train_loss": 0.11909190565347672, |
|
"train_runtime": 258.6816, |
|
"train_samples_per_second": 34.096, |
|
"train_steps_per_second": 1.067 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8957671957671958, |
|
"eval_f1": 0.8957969434045191, |
|
"eval_loss": 0.4326554834842682, |
|
"eval_precision": 0.8981578919986064, |
|
"eval_recall": 0.8970552202896507, |
|
"eval_runtime": 111.0014, |
|
"eval_samples_per_second": 34.054, |
|
"eval_steps_per_second": 1.072, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 1932, |
|
"total_flos": 1.62503106619392e+16, |
|
"train_loss": 0.6677281476449275, |
|
"train_runtime": 8398.7851, |
|
"train_samples_per_second": 21.003, |
|
"train_steps_per_second": 0.657 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8957671957671958, |
|
"eval_f1": 0.8957969434045191, |
|
"eval_loss": 0.4326554834842682, |
|
"eval_precision": 0.8981578919986064, |
|
"eval_recall": 0.8970552202896507, |
|
"eval_runtime": 111.1885, |
|
"eval_samples_per_second": 33.996, |
|
"eval_steps_per_second": 1.07, |
|
"step": 1932 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 1932, |
|
"train_en_eval_accuracy": 0.9687074829931973, |
|
"train_en_eval_f1": 0.9684965457139353, |
|
"train_en_eval_loss": 0.11909189075231552, |
|
"train_en_eval_precision": 0.9696149844869657, |
|
"train_en_eval_recall": 0.9685670790376533, |
|
"train_en_loss": 0.11909190565347672, |
|
"train_en_runtime": 258.9019, |
|
"train_en_samples_per_second": 34.067, |
|
"train_en_steps_per_second": 1.066 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 1932, |
|
"test_en_eval_accuracy": 0.8957671957671958, |
|
"test_en_eval_f1": 0.8957969434045191, |
|
"test_en_eval_loss": 0.43265554308891296, |
|
"test_en_eval_precision": 0.8981578919986064, |
|
"test_en_eval_recall": 0.8970552202896507, |
|
"test_en_loss": 0.4326554834842682, |
|
"test_en_runtime": 110.7536, |
|
"test_en_samples_per_second": 34.13, |
|
"test_en_steps_per_second": 1.074 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5520, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.62503106619392e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|