abhiman23897's picture
Training in progress, epoch 1
49a4812
{
"best_metric": 0.5314118934504415,
"best_model_checkpoint": "./results/checkpoint-15120",
"epoch": 18.0,
"eval_steps": 500,
"global_step": 15120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.6,
"learning_rate": 2.9761904761904762e-05,
"loss": 7.0613,
"step": 500
},
{
"epoch": 1.0,
"eval_loss": 1.2605280876159668,
"eval_rouge1": 0.5053086747478719,
"eval_rouge2": 0.24615333793097546,
"eval_rougeL": 0.4512970323707136,
"eval_rougeLsum": 0.4656617846478366,
"eval_runtime": 772.9001,
"eval_samples_per_second": 1.938,
"eval_steps_per_second": 0.122,
"step": 840
},
{
"epoch": 1.19,
"learning_rate": 4.9498746867167923e-05,
"loss": 1.4331,
"step": 1000
},
{
"epoch": 1.79,
"learning_rate": 4.793233082706767e-05,
"loss": 1.3665,
"step": 1500
},
{
"epoch": 2.0,
"eval_loss": 1.2270965576171875,
"eval_rouge1": 0.5124476660885933,
"eval_rouge2": 0.256176861062982,
"eval_rougeL": 0.4600482066009496,
"eval_rougeLsum": 0.4746519151030747,
"eval_runtime": 1039.1433,
"eval_samples_per_second": 1.442,
"eval_steps_per_second": 0.09,
"step": 1680
},
{
"epoch": 2.38,
"learning_rate": 4.6365914786967416e-05,
"loss": 1.3381,
"step": 2000
},
{
"epoch": 2.98,
"learning_rate": 4.4799498746867166e-05,
"loss": 1.3009,
"step": 2500
},
{
"epoch": 3.0,
"eval_loss": 1.2114347219467163,
"eval_rouge1": 0.5179075668199837,
"eval_rouge2": 0.2612977945518593,
"eval_rougeL": 0.46524322885156827,
"eval_rougeLsum": 0.4802301912620997,
"eval_runtime": 1018.7441,
"eval_samples_per_second": 1.47,
"eval_steps_per_second": 0.092,
"step": 2520
},
{
"epoch": 3.57,
"learning_rate": 4.323308270676692e-05,
"loss": 1.2594,
"step": 3000
},
{
"epoch": 4.0,
"eval_loss": 1.203908085823059,
"eval_rouge1": 0.5229139335151443,
"eval_rouge2": 0.26558551302892763,
"eval_rougeL": 0.46774305498128443,
"eval_rougeLsum": 0.483920816134904,
"eval_runtime": 926.8973,
"eval_samples_per_second": 1.616,
"eval_steps_per_second": 0.101,
"step": 3360
},
{
"epoch": 4.17,
"learning_rate": 4.166666666666667e-05,
"loss": 1.247,
"step": 3500
},
{
"epoch": 4.76,
"learning_rate": 4.0100250626566415e-05,
"loss": 1.227,
"step": 4000
},
{
"epoch": 5.0,
"eval_loss": 1.1987696886062622,
"eval_rouge1": 0.5247159372903829,
"eval_rouge2": 0.2668774790757894,
"eval_rougeL": 0.4698131046976711,
"eval_rougeLsum": 0.4859852789981728,
"eval_runtime": 1138.215,
"eval_samples_per_second": 1.316,
"eval_steps_per_second": 0.083,
"step": 4200
},
{
"epoch": 5.36,
"learning_rate": 3.8533834586466165e-05,
"loss": 1.1951,
"step": 4500
},
{
"epoch": 5.95,
"learning_rate": 3.6967418546365914e-05,
"loss": 1.1918,
"step": 5000
},
{
"epoch": 6.0,
"eval_loss": 1.1965585947036743,
"eval_rouge1": 0.5238465835630357,
"eval_rouge2": 0.2697694029030245,
"eval_rougeL": 0.4700972375246304,
"eval_rougeLsum": 0.48604440949884575,
"eval_runtime": 1144.1344,
"eval_samples_per_second": 1.309,
"eval_steps_per_second": 0.082,
"step": 5040
},
{
"epoch": 6.55,
"learning_rate": 3.540100250626567e-05,
"loss": 1.1696,
"step": 5500
},
{
"epoch": 7.0,
"eval_loss": 1.193439245223999,
"eval_rouge1": 0.5261420787321425,
"eval_rouge2": 0.2701869159088741,
"eval_rougeL": 0.472288301825879,
"eval_rougeLsum": 0.48796847398006715,
"eval_runtime": 913.4514,
"eval_samples_per_second": 1.64,
"eval_steps_per_second": 0.103,
"step": 5880
},
{
"epoch": 7.14,
"learning_rate": 3.3834586466165414e-05,
"loss": 1.1472,
"step": 6000
},
{
"epoch": 7.74,
"learning_rate": 3.2268170426065164e-05,
"loss": 1.1285,
"step": 6500
},
{
"epoch": 8.0,
"eval_loss": 1.1932892799377441,
"eval_rouge1": 0.5236558096283529,
"eval_rouge2": 0.26931250040124155,
"eval_rougeL": 0.4700827158260693,
"eval_rougeLsum": 0.48489767885463625,
"eval_runtime": 1155.7348,
"eval_samples_per_second": 1.296,
"eval_steps_per_second": 0.081,
"step": 6720
},
{
"epoch": 8.33,
"learning_rate": 3.0701754385964913e-05,
"loss": 1.1234,
"step": 7000
},
{
"epoch": 8.93,
"learning_rate": 2.9135338345864667e-05,
"loss": 1.1153,
"step": 7500
},
{
"epoch": 9.0,
"eval_loss": 1.1960569620132446,
"eval_rouge1": 0.5263030048004651,
"eval_rouge2": 0.27081863504950743,
"eval_rougeL": 0.4724221317085258,
"eval_rougeLsum": 0.48803341513882764,
"eval_runtime": 983.851,
"eval_samples_per_second": 1.523,
"eval_steps_per_second": 0.096,
"step": 7560
},
{
"epoch": 9.52,
"learning_rate": 2.756892230576441e-05,
"loss": 1.0927,
"step": 8000
},
{
"epoch": 10.0,
"eval_loss": 1.1961216926574707,
"eval_rouge1": 0.5253667627324876,
"eval_rouge2": 0.2690944896569006,
"eval_rougeL": 0.4720451976404024,
"eval_rougeLsum": 0.48738350139979847,
"eval_runtime": 932.6258,
"eval_samples_per_second": 1.606,
"eval_steps_per_second": 0.101,
"step": 8400
},
{
"epoch": 10.12,
"learning_rate": 2.6002506265664163e-05,
"loss": 1.0933,
"step": 8500
},
{
"epoch": 10.71,
"learning_rate": 2.443609022556391e-05,
"loss": 1.0661,
"step": 9000
},
{
"epoch": 11.0,
"eval_loss": 1.2010161876678467,
"eval_rouge1": 0.5234197228179973,
"eval_rouge2": 0.2683998445240433,
"eval_rougeL": 0.4697712822647654,
"eval_rougeLsum": 0.48542105989790263,
"eval_runtime": 1250.3048,
"eval_samples_per_second": 1.198,
"eval_steps_per_second": 0.075,
"step": 9240
},
{
"epoch": 11.31,
"learning_rate": 2.2869674185463662e-05,
"loss": 1.0663,
"step": 9500
},
{
"epoch": 11.9,
"learning_rate": 2.130325814536341e-05,
"loss": 1.0634,
"step": 10000
},
{
"epoch": 12.0,
"eval_loss": 1.2002513408660889,
"eval_rouge1": 0.5259472773406924,
"eval_rouge2": 0.2723400096809272,
"eval_rougeL": 0.47293541812561896,
"eval_rougeLsum": 0.4885362784921822,
"eval_runtime": 1027.3406,
"eval_samples_per_second": 1.458,
"eval_steps_per_second": 0.091,
"step": 10080
},
{
"epoch": 12.5,
"learning_rate": 1.9736842105263158e-05,
"loss": 1.046,
"step": 10500
},
{
"epoch": 13.0,
"eval_loss": 1.2019047737121582,
"eval_rouge1": 0.527682777778989,
"eval_rouge2": 0.27261485269714236,
"eval_rougeL": 0.4747461246549739,
"eval_rougeLsum": 0.49067235332780945,
"eval_runtime": 1023.6212,
"eval_samples_per_second": 1.463,
"eval_steps_per_second": 0.092,
"step": 10920
},
{
"epoch": 13.1,
"learning_rate": 1.8170426065162908e-05,
"loss": 1.0454,
"step": 11000
},
{
"epoch": 13.69,
"learning_rate": 1.6604010025062658e-05,
"loss": 1.0273,
"step": 11500
},
{
"epoch": 14.0,
"eval_loss": 1.2045047283172607,
"eval_rouge1": 0.5309321460587708,
"eval_rouge2": 0.27493073812627356,
"eval_rougeL": 0.4776371159725116,
"eval_rougeLsum": 0.49401653255083877,
"eval_runtime": 933.4164,
"eval_samples_per_second": 1.605,
"eval_steps_per_second": 0.101,
"step": 11760
},
{
"epoch": 14.29,
"learning_rate": 1.5037593984962406e-05,
"loss": 1.0257,
"step": 12000
},
{
"epoch": 14.88,
"learning_rate": 1.3471177944862157e-05,
"loss": 1.0218,
"step": 12500
},
{
"epoch": 15.0,
"eval_loss": 1.207719326019287,
"eval_rouge1": 0.5295059932432493,
"eval_rouge2": 0.2727809059549531,
"eval_rougeL": 0.4770545143695025,
"eval_rougeLsum": 0.49245995561354516,
"eval_runtime": 874.8747,
"eval_samples_per_second": 1.712,
"eval_steps_per_second": 0.107,
"step": 12600
},
{
"epoch": 15.48,
"learning_rate": 1.1904761904761905e-05,
"loss": 1.0208,
"step": 13000
},
{
"epoch": 16.0,
"eval_loss": 1.2094707489013672,
"eval_rouge1": 0.5303496124431821,
"eval_rouge2": 0.27278313644220603,
"eval_rougeL": 0.47754216166274177,
"eval_rougeLsum": 0.49284317669377947,
"eval_runtime": 939.0978,
"eval_samples_per_second": 1.595,
"eval_steps_per_second": 0.1,
"step": 13440
},
{
"epoch": 16.07,
"learning_rate": 1.0338345864661655e-05,
"loss": 1.0093,
"step": 13500
},
{
"epoch": 16.67,
"learning_rate": 8.771929824561403e-06,
"loss": 1.003,
"step": 14000
},
{
"epoch": 17.0,
"eval_loss": 1.2109801769256592,
"eval_rouge1": 0.5301463905288382,
"eval_rouge2": 0.2726100969561255,
"eval_rougeL": 0.47718245520328006,
"eval_rougeLsum": 0.49294754454933043,
"eval_runtime": 829.8724,
"eval_samples_per_second": 1.805,
"eval_steps_per_second": 0.113,
"step": 14280
},
{
"epoch": 17.26,
"learning_rate": 7.205513784461153e-06,
"loss": 1.0002,
"step": 14500
},
{
"epoch": 17.86,
"learning_rate": 5.639097744360902e-06,
"loss": 1.003,
"step": 15000
},
{
"epoch": 18.0,
"eval_loss": 1.209855556488037,
"eval_rouge1": 0.5314118934504415,
"eval_rouge2": 0.27293095312101445,
"eval_rougeL": 0.47805026323896327,
"eval_rougeLsum": 0.49408516728917673,
"eval_runtime": 882.3325,
"eval_samples_per_second": 1.698,
"eval_steps_per_second": 0.107,
"step": 15120
}
],
"logging_steps": 500,
"max_steps": 16800,
"num_train_epochs": 20,
"save_steps": 500,
"total_flos": 3.682281994080768e+16,
"trial_name": null,
"trial_params": null
}