|
{ |
|
"best_metric": 0.5314118934504415, |
|
"best_model_checkpoint": "./results/checkpoint-15120", |
|
"epoch": 18.0, |
|
"eval_steps": 500, |
|
"global_step": 15120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 7.0613, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.2605280876159668, |
|
"eval_rouge1": 0.5053086747478719, |
|
"eval_rouge2": 0.24615333793097546, |
|
"eval_rougeL": 0.4512970323707136, |
|
"eval_rougeLsum": 0.4656617846478366, |
|
"eval_runtime": 772.9001, |
|
"eval_samples_per_second": 1.938, |
|
"eval_steps_per_second": 0.122, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.9498746867167923e-05, |
|
"loss": 1.4331, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.793233082706767e-05, |
|
"loss": 1.3665, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.2270965576171875, |
|
"eval_rouge1": 0.5124476660885933, |
|
"eval_rouge2": 0.256176861062982, |
|
"eval_rougeL": 0.4600482066009496, |
|
"eval_rougeLsum": 0.4746519151030747, |
|
"eval_runtime": 1039.1433, |
|
"eval_samples_per_second": 1.442, |
|
"eval_steps_per_second": 0.09, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.6365914786967416e-05, |
|
"loss": 1.3381, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 4.4799498746867166e-05, |
|
"loss": 1.3009, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.2114347219467163, |
|
"eval_rouge1": 0.5179075668199837, |
|
"eval_rouge2": 0.2612977945518593, |
|
"eval_rougeL": 0.46524322885156827, |
|
"eval_rougeLsum": 0.4802301912620997, |
|
"eval_runtime": 1018.7441, |
|
"eval_samples_per_second": 1.47, |
|
"eval_steps_per_second": 0.092, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 4.323308270676692e-05, |
|
"loss": 1.2594, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.203908085823059, |
|
"eval_rouge1": 0.5229139335151443, |
|
"eval_rouge2": 0.26558551302892763, |
|
"eval_rougeL": 0.46774305498128443, |
|
"eval_rougeLsum": 0.483920816134904, |
|
"eval_runtime": 926.8973, |
|
"eval_samples_per_second": 1.616, |
|
"eval_steps_per_second": 0.101, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.247, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 4.0100250626566415e-05, |
|
"loss": 1.227, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.1987696886062622, |
|
"eval_rouge1": 0.5247159372903829, |
|
"eval_rouge2": 0.2668774790757894, |
|
"eval_rougeL": 0.4698131046976711, |
|
"eval_rougeLsum": 0.4859852789981728, |
|
"eval_runtime": 1138.215, |
|
"eval_samples_per_second": 1.316, |
|
"eval_steps_per_second": 0.083, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 3.8533834586466165e-05, |
|
"loss": 1.1951, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 3.6967418546365914e-05, |
|
"loss": 1.1918, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.1965585947036743, |
|
"eval_rouge1": 0.5238465835630357, |
|
"eval_rouge2": 0.2697694029030245, |
|
"eval_rougeL": 0.4700972375246304, |
|
"eval_rougeLsum": 0.48604440949884575, |
|
"eval_runtime": 1144.1344, |
|
"eval_samples_per_second": 1.309, |
|
"eval_steps_per_second": 0.082, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 3.540100250626567e-05, |
|
"loss": 1.1696, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.193439245223999, |
|
"eval_rouge1": 0.5261420787321425, |
|
"eval_rouge2": 0.2701869159088741, |
|
"eval_rougeL": 0.472288301825879, |
|
"eval_rougeLsum": 0.48796847398006715, |
|
"eval_runtime": 913.4514, |
|
"eval_samples_per_second": 1.64, |
|
"eval_steps_per_second": 0.103, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 3.3834586466165414e-05, |
|
"loss": 1.1472, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 3.2268170426065164e-05, |
|
"loss": 1.1285, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.1932892799377441, |
|
"eval_rouge1": 0.5236558096283529, |
|
"eval_rouge2": 0.26931250040124155, |
|
"eval_rougeL": 0.4700827158260693, |
|
"eval_rougeLsum": 0.48489767885463625, |
|
"eval_runtime": 1155.7348, |
|
"eval_samples_per_second": 1.296, |
|
"eval_steps_per_second": 0.081, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 1.1234, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 2.9135338345864667e-05, |
|
"loss": 1.1153, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.1960569620132446, |
|
"eval_rouge1": 0.5263030048004651, |
|
"eval_rouge2": 0.27081863504950743, |
|
"eval_rougeL": 0.4724221317085258, |
|
"eval_rougeLsum": 0.48803341513882764, |
|
"eval_runtime": 983.851, |
|
"eval_samples_per_second": 1.523, |
|
"eval_steps_per_second": 0.096, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.756892230576441e-05, |
|
"loss": 1.0927, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.1961216926574707, |
|
"eval_rouge1": 0.5253667627324876, |
|
"eval_rouge2": 0.2690944896569006, |
|
"eval_rougeL": 0.4720451976404024, |
|
"eval_rougeLsum": 0.48738350139979847, |
|
"eval_runtime": 932.6258, |
|
"eval_samples_per_second": 1.606, |
|
"eval_steps_per_second": 0.101, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 10.12, |
|
"learning_rate": 2.6002506265664163e-05, |
|
"loss": 1.0933, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 10.71, |
|
"learning_rate": 2.443609022556391e-05, |
|
"loss": 1.0661, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.2010161876678467, |
|
"eval_rouge1": 0.5234197228179973, |
|
"eval_rouge2": 0.2683998445240433, |
|
"eval_rougeL": 0.4697712822647654, |
|
"eval_rougeLsum": 0.48542105989790263, |
|
"eval_runtime": 1250.3048, |
|
"eval_samples_per_second": 1.198, |
|
"eval_steps_per_second": 0.075, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 11.31, |
|
"learning_rate": 2.2869674185463662e-05, |
|
"loss": 1.0663, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 11.9, |
|
"learning_rate": 2.130325814536341e-05, |
|
"loss": 1.0634, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.2002513408660889, |
|
"eval_rouge1": 0.5259472773406924, |
|
"eval_rouge2": 0.2723400096809272, |
|
"eval_rougeL": 0.47293541812561896, |
|
"eval_rougeLsum": 0.4885362784921822, |
|
"eval_runtime": 1027.3406, |
|
"eval_samples_per_second": 1.458, |
|
"eval_steps_per_second": 0.091, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 1.046, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.2019047737121582, |
|
"eval_rouge1": 0.527682777778989, |
|
"eval_rouge2": 0.27261485269714236, |
|
"eval_rougeL": 0.4747461246549739, |
|
"eval_rougeLsum": 0.49067235332780945, |
|
"eval_runtime": 1023.6212, |
|
"eval_samples_per_second": 1.463, |
|
"eval_steps_per_second": 0.092, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 13.1, |
|
"learning_rate": 1.8170426065162908e-05, |
|
"loss": 1.0454, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 1.6604010025062658e-05, |
|
"loss": 1.0273, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.2045047283172607, |
|
"eval_rouge1": 0.5309321460587708, |
|
"eval_rouge2": 0.27493073812627356, |
|
"eval_rougeL": 0.4776371159725116, |
|
"eval_rougeLsum": 0.49401653255083877, |
|
"eval_runtime": 933.4164, |
|
"eval_samples_per_second": 1.605, |
|
"eval_steps_per_second": 0.101, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 14.29, |
|
"learning_rate": 1.5037593984962406e-05, |
|
"loss": 1.0257, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 14.88, |
|
"learning_rate": 1.3471177944862157e-05, |
|
"loss": 1.0218, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.207719326019287, |
|
"eval_rouge1": 0.5295059932432493, |
|
"eval_rouge2": 0.2727809059549531, |
|
"eval_rougeL": 0.4770545143695025, |
|
"eval_rougeLsum": 0.49245995561354516, |
|
"eval_runtime": 874.8747, |
|
"eval_samples_per_second": 1.712, |
|
"eval_steps_per_second": 0.107, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 15.48, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 1.0208, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.2094707489013672, |
|
"eval_rouge1": 0.5303496124431821, |
|
"eval_rouge2": 0.27278313644220603, |
|
"eval_rougeL": 0.47754216166274177, |
|
"eval_rougeLsum": 0.49284317669377947, |
|
"eval_runtime": 939.0978, |
|
"eval_samples_per_second": 1.595, |
|
"eval_steps_per_second": 0.1, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 16.07, |
|
"learning_rate": 1.0338345864661655e-05, |
|
"loss": 1.0093, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 1.003, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.2109801769256592, |
|
"eval_rouge1": 0.5301463905288382, |
|
"eval_rouge2": 0.2726100969561255, |
|
"eval_rougeL": 0.47718245520328006, |
|
"eval_rougeLsum": 0.49294754454933043, |
|
"eval_runtime": 829.8724, |
|
"eval_samples_per_second": 1.805, |
|
"eval_steps_per_second": 0.113, |
|
"step": 14280 |
|
}, |
|
{ |
|
"epoch": 17.26, |
|
"learning_rate": 7.205513784461153e-06, |
|
"loss": 1.0002, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 5.639097744360902e-06, |
|
"loss": 1.003, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.209855556488037, |
|
"eval_rouge1": 0.5314118934504415, |
|
"eval_rouge2": 0.27293095312101445, |
|
"eval_rougeL": 0.47805026323896327, |
|
"eval_rougeLsum": 0.49408516728917673, |
|
"eval_runtime": 882.3325, |
|
"eval_samples_per_second": 1.698, |
|
"eval_steps_per_second": 0.107, |
|
"step": 15120 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 16800, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.682281994080768e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|