|
{ |
|
"best_metric": 1.569107174873352, |
|
"best_model_checkpoint": "chinese_roberta_L-2_H-128-finetuned-MC-hyper/checkpoint-1250", |
|
"epoch": 5.0, |
|
"global_step": 1250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.92e-05, |
|
"loss": 1.6109, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.8400000000000004e-05, |
|
"loss": 1.6093, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.76e-05, |
|
"loss": 1.6059, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.6800000000000006e-05, |
|
"loss": 1.6083, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 1.6063, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.52e-05, |
|
"loss": 1.606, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.44e-05, |
|
"loss": 1.6085, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.36e-05, |
|
"loss": 1.609, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.2800000000000004e-05, |
|
"loss": 1.6032, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.2e-05, |
|
"loss": 1.609, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.12e-05, |
|
"loss": 1.6022, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 1.5993, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.27799999713897705, |
|
"eval_loss": 1.596663236618042, |
|
"eval_runtime": 0.7533, |
|
"eval_samples_per_second": 663.789, |
|
"eval_steps_per_second": 42.483, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.960000000000001e-05, |
|
"loss": 1.6047, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.88e-05, |
|
"loss": 1.6005, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 3.8e-05, |
|
"loss": 1.5939, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 3.72e-05, |
|
"loss": 1.5916, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 3.6400000000000004e-05, |
|
"loss": 1.5908, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.56e-05, |
|
"loss": 1.59, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.48e-05, |
|
"loss": 1.5996, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 1.586, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.32e-05, |
|
"loss": 1.5712, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.24e-05, |
|
"loss": 1.5726, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.16e-05, |
|
"loss": 1.5727, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.08e-05, |
|
"loss": 1.5822, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3e-05, |
|
"loss": 1.5731, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.3019999861717224, |
|
"eval_loss": 1.5877653360366821, |
|
"eval_runtime": 0.647, |
|
"eval_samples_per_second": 772.82, |
|
"eval_steps_per_second": 49.46, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.9199999999999998e-05, |
|
"loss": 1.5914, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.84e-05, |
|
"loss": 1.5627, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.7600000000000003e-05, |
|
"loss": 1.5672, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.6800000000000004e-05, |
|
"loss": 1.564, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 1.577, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.5200000000000003e-05, |
|
"loss": 1.5712, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.44e-05, |
|
"loss": 1.5725, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.36e-05, |
|
"loss": 1.5532, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.2800000000000002e-05, |
|
"loss": 1.5703, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.5803, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.12e-05, |
|
"loss": 1.5728, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.04e-05, |
|
"loss": 1.5434, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.29600000381469727, |
|
"eval_loss": 1.5791035890579224, |
|
"eval_runtime": 0.7356, |
|
"eval_samples_per_second": 679.717, |
|
"eval_steps_per_second": 43.502, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 1.5569, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.88e-05, |
|
"loss": 1.5382, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.5478, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.7199999999999998e-05, |
|
"loss": 1.5665, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.6400000000000002e-05, |
|
"loss": 1.5494, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.56e-05, |
|
"loss": 1.5489, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.48e-05, |
|
"loss": 1.5483, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 1.5388, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.32e-05, |
|
"loss": 1.5563, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.24e-05, |
|
"loss": 1.5616, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 1.16e-05, |
|
"loss": 1.5512, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.08e-05, |
|
"loss": 1.5288, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1e-05, |
|
"loss": 1.5473, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.2939999997615814, |
|
"eval_loss": 1.5701889991760254, |
|
"eval_runtime": 0.7589, |
|
"eval_samples_per_second": 658.833, |
|
"eval_steps_per_second": 42.165, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 9.2e-06, |
|
"loss": 1.5642, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 1.5334, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 7.6e-06, |
|
"loss": 1.526, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 1.5361, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 6e-06, |
|
"loss": 1.573, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 5.2e-06, |
|
"loss": 1.5543, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 4.4e-06, |
|
"loss": 1.5193, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.64, |
|
"learning_rate": 3.6e-06, |
|
"loss": 1.5401, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 1.5373, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 1.5279, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 1.5503, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 1.5422, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.2919999957084656, |
|
"eval_loss": 1.569107174873352, |
|
"eval_runtime": 0.7454, |
|
"eval_samples_per_second": 670.819, |
|
"eval_steps_per_second": 42.932, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 1250, |
|
"total_flos": 10492735676640.0, |
|
"train_loss": 1.5705763679504394, |
|
"train_runtime": 78.9966, |
|
"train_samples_per_second": 253.176, |
|
"train_steps_per_second": 15.823 |
|
} |
|
], |
|
"max_steps": 1250, |
|
"num_train_epochs": 5, |
|
"total_flos": 10492735676640.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|