cls-comment-phobert-base-v2-v2.3 / trainer_state.json
tiennguyenbnbk's picture
End of training
53efd81 verified
{
"best_metric": 0.8943646614708781,
"best_model_checkpoint": "cls_comment-phobert-base-v2-v2.3/checkpoint-1600",
"epoch": 21.875,
"eval_steps": 100,
"global_step": 2100,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.04,
"grad_norm": 2.3460028171539307,
"learning_rate": 2.5e-06,
"loss": 1.1884,
"step": 100
},
{
"epoch": 1.04,
"eval_accuracy": 0.7277179236043095,
"eval_f1_score": 0.43708201591573076,
"eval_loss": 1.0905314683914185,
"eval_precision": 0.5016943776884798,
"eval_recall": 0.42288884208445504,
"eval_runtime": 4.9336,
"eval_samples_per_second": 620.848,
"eval_steps_per_second": 9.729,
"step": 100
},
{
"epoch": 2.08,
"grad_norm": 9.137602806091309,
"learning_rate": 5e-06,
"loss": 1.0502,
"step": 200
},
{
"epoch": 2.08,
"eval_accuracy": 0.7956252040483186,
"eval_f1_score": 0.5245336258309143,
"eval_loss": 0.9278329014778137,
"eval_precision": 0.5244123326675032,
"eval_recall": 0.5273852871687749,
"eval_runtime": 4.9622,
"eval_samples_per_second": 617.266,
"eval_steps_per_second": 9.673,
"step": 200
},
{
"epoch": 3.12,
"grad_norm": 8.277727127075195,
"learning_rate": 7.500000000000001e-06,
"loss": 0.9031,
"step": 300
},
{
"epoch": 3.12,
"eval_accuracy": 0.8445968005223637,
"eval_f1_score": 0.5653211085059351,
"eval_loss": 0.7793412208557129,
"eval_precision": 0.7067057672713553,
"eval_recall": 0.589697230912186,
"eval_runtime": 5.0159,
"eval_samples_per_second": 610.664,
"eval_steps_per_second": 9.57,
"step": 300
},
{
"epoch": 4.17,
"grad_norm": 3.594104290008545,
"learning_rate": 1e-05,
"loss": 0.7772,
"step": 400
},
{
"epoch": 4.17,
"eval_accuracy": 0.8697355533790402,
"eval_f1_score": 0.6917446533625714,
"eval_loss": 0.7089904546737671,
"eval_precision": 0.7268852070004593,
"eval_recall": 0.6804440791993415,
"eval_runtime": 4.9399,
"eval_samples_per_second": 620.058,
"eval_steps_per_second": 9.717,
"step": 400
},
{
"epoch": 5.21,
"grad_norm": 4.503917694091797,
"learning_rate": 9.722222222222223e-06,
"loss": 0.6741,
"step": 500
},
{
"epoch": 5.21,
"eval_accuracy": 0.8958537381651975,
"eval_f1_score": 0.8078466525990172,
"eval_loss": 0.648346483707428,
"eval_precision": 0.8797678600731677,
"eval_recall": 0.7817903123125977,
"eval_runtime": 4.9504,
"eval_samples_per_second": 618.741,
"eval_steps_per_second": 9.696,
"step": 500
},
{
"epoch": 6.25,
"grad_norm": 4.407541275024414,
"learning_rate": 9.444444444444445e-06,
"loss": 0.6156,
"step": 600
},
{
"epoch": 6.25,
"eval_accuracy": 0.9079333986287953,
"eval_f1_score": 0.8601153974079928,
"eval_loss": 0.6268225908279419,
"eval_precision": 0.8933059224979765,
"eval_recall": 0.8344643413921417,
"eval_runtime": 4.9864,
"eval_samples_per_second": 614.266,
"eval_steps_per_second": 9.626,
"step": 600
},
{
"epoch": 7.29,
"grad_norm": 5.013394355773926,
"learning_rate": 9.166666666666666e-06,
"loss": 0.5745,
"step": 700
},
{
"epoch": 7.29,
"eval_accuracy": 0.9160953313744695,
"eval_f1_score": 0.8738968846084525,
"eval_loss": 0.6176044940948486,
"eval_precision": 0.8880590844686177,
"eval_recall": 0.8609982161968145,
"eval_runtime": 5.0009,
"eval_samples_per_second": 612.484,
"eval_steps_per_second": 9.598,
"step": 700
},
{
"epoch": 8.33,
"grad_norm": 4.954068183898926,
"learning_rate": 8.888888888888888e-06,
"loss": 0.5466,
"step": 800
},
{
"epoch": 8.33,
"eval_accuracy": 0.9180541952334312,
"eval_f1_score": 0.8730278944900508,
"eval_loss": 0.613418698310852,
"eval_precision": 0.8951285463624247,
"eval_recall": 0.8555146035099561,
"eval_runtime": 4.9947,
"eval_samples_per_second": 613.247,
"eval_steps_per_second": 9.61,
"step": 800
},
{
"epoch": 9.38,
"grad_norm": 6.174578666687012,
"learning_rate": 8.611111111111112e-06,
"loss": 0.5254,
"step": 900
},
{
"epoch": 9.38,
"eval_accuracy": 0.9183806725432583,
"eval_f1_score": 0.8794225212758101,
"eval_loss": 0.6195659637451172,
"eval_precision": 0.898010742334885,
"eval_recall": 0.8634351648847699,
"eval_runtime": 4.9765,
"eval_samples_per_second": 615.487,
"eval_steps_per_second": 9.645,
"step": 900
},
{
"epoch": 10.42,
"grad_norm": 5.367175102233887,
"learning_rate": 8.333333333333334e-06,
"loss": 0.5059,
"step": 1000
},
{
"epoch": 10.42,
"eval_accuracy": 0.9219719229513549,
"eval_f1_score": 0.8799283123746076,
"eval_loss": 0.6174536347389221,
"eval_precision": 0.8925411600987329,
"eval_recall": 0.8702271946202808,
"eval_runtime": 4.9764,
"eval_samples_per_second": 615.501,
"eval_steps_per_second": 9.645,
"step": 1000
},
{
"epoch": 11.46,
"grad_norm": 6.586447238922119,
"learning_rate": 8.055555555555557e-06,
"loss": 0.4971,
"step": 1100
},
{
"epoch": 11.46,
"eval_accuracy": 0.9226248775710089,
"eval_f1_score": 0.8770727386787929,
"eval_loss": 0.6110312342643738,
"eval_precision": 0.89290929483548,
"eval_recall": 0.8638757728594787,
"eval_runtime": 4.9931,
"eval_samples_per_second": 613.443,
"eval_steps_per_second": 9.613,
"step": 1100
},
{
"epoch": 12.5,
"grad_norm": 3.8108513355255127,
"learning_rate": 7.77777777777778e-06,
"loss": 0.4872,
"step": 1200
},
{
"epoch": 12.5,
"eval_accuracy": 0.9229513548808358,
"eval_f1_score": 0.8833538508725888,
"eval_loss": 0.6191003918647766,
"eval_precision": 0.8914104564156814,
"eval_recall": 0.8774149435732262,
"eval_runtime": 4.9594,
"eval_samples_per_second": 617.612,
"eval_steps_per_second": 9.679,
"step": 1200
},
{
"epoch": 13.54,
"grad_norm": 2.2206079959869385,
"learning_rate": 7.500000000000001e-06,
"loss": 0.4756,
"step": 1300
},
{
"epoch": 13.54,
"eval_accuracy": 0.9242572641201436,
"eval_f1_score": 0.892919420274636,
"eval_loss": 0.6239792704582214,
"eval_precision": 0.8938258370033757,
"eval_recall": 0.8931296696096861,
"eval_runtime": 4.9692,
"eval_samples_per_second": 616.403,
"eval_steps_per_second": 9.66,
"step": 1300
},
{
"epoch": 14.58,
"grad_norm": 3.2590479850769043,
"learning_rate": 7.222222222222223e-06,
"loss": 0.4737,
"step": 1400
},
{
"epoch": 14.58,
"eval_accuracy": 0.92033953640222,
"eval_f1_score": 0.8793750306978456,
"eval_loss": 0.6246046423912048,
"eval_precision": 0.9038460171417751,
"eval_recall": 0.8582419368732399,
"eval_runtime": 5.0236,
"eval_samples_per_second": 609.724,
"eval_steps_per_second": 9.555,
"step": 1400
},
{
"epoch": 15.62,
"grad_norm": 4.4676127433776855,
"learning_rate": 6.944444444444445e-06,
"loss": 0.4626,
"step": 1500
},
{
"epoch": 15.62,
"eval_accuracy": 0.9249102187397976,
"eval_f1_score": 0.8889732013227639,
"eval_loss": 0.6267126798629761,
"eval_precision": 0.8955131575427445,
"eval_recall": 0.8842202196754521,
"eval_runtime": 5.0075,
"eval_samples_per_second": 611.678,
"eval_steps_per_second": 9.586,
"step": 1500
},
{
"epoch": 16.67,
"grad_norm": 1.5825713872909546,
"learning_rate": 6.666666666666667e-06,
"loss": 0.4641,
"step": 1600
},
{
"epoch": 16.67,
"eval_accuracy": 0.9271955599085864,
"eval_f1_score": 0.8943646614708781,
"eval_loss": 0.6228047013282776,
"eval_precision": 0.9063376648453086,
"eval_recall": 0.8842502417202245,
"eval_runtime": 4.9845,
"eval_samples_per_second": 614.503,
"eval_steps_per_second": 9.63,
"step": 1600
},
{
"epoch": 17.71,
"grad_norm": 3.3820607662200928,
"learning_rate": 6.3888888888888885e-06,
"loss": 0.4562,
"step": 1700
},
{
"epoch": 17.71,
"eval_accuracy": 0.9278485145282402,
"eval_f1_score": 0.892329553955837,
"eval_loss": 0.6255541443824768,
"eval_precision": 0.9064974712840742,
"eval_recall": 0.8800313461131938,
"eval_runtime": 5.0301,
"eval_samples_per_second": 608.937,
"eval_steps_per_second": 9.543,
"step": 1700
},
{
"epoch": 18.75,
"grad_norm": 0.6632604598999023,
"learning_rate": 6.111111111111112e-06,
"loss": 0.4522,
"step": 1800
},
{
"epoch": 18.75,
"eval_accuracy": 0.9285014691478942,
"eval_f1_score": 0.8912735294131965,
"eval_loss": 0.6203188300132751,
"eval_precision": 0.8992701751319325,
"eval_recall": 0.8844935194666589,
"eval_runtime": 5.0719,
"eval_samples_per_second": 603.913,
"eval_steps_per_second": 9.464,
"step": 1800
},
{
"epoch": 19.79,
"grad_norm": 1.0484055280685425,
"learning_rate": 5.833333333333334e-06,
"loss": 0.4476,
"step": 1900
},
{
"epoch": 19.79,
"eval_accuracy": 0.9262161279791055,
"eval_f1_score": 0.8848887104133892,
"eval_loss": 0.6258328557014465,
"eval_precision": 0.9033804413868713,
"eval_recall": 0.8691664648450299,
"eval_runtime": 5.0588,
"eval_samples_per_second": 605.485,
"eval_steps_per_second": 9.488,
"step": 1900
},
{
"epoch": 20.83,
"grad_norm": 4.217446804046631,
"learning_rate": 5.555555555555557e-06,
"loss": 0.4474,
"step": 2000
},
{
"epoch": 20.83,
"eval_accuracy": 0.9271955599085864,
"eval_f1_score": 0.8931632926835013,
"eval_loss": 0.6333132386207581,
"eval_precision": 0.9133975869931371,
"eval_recall": 0.8751018475655369,
"eval_runtime": 4.9412,
"eval_samples_per_second": 619.889,
"eval_steps_per_second": 9.714,
"step": 2000
},
{
"epoch": 21.88,
"grad_norm": 2.5882654190063477,
"learning_rate": 5.2777777777777785e-06,
"loss": 0.4421,
"step": 2100
},
{
"epoch": 21.88,
"eval_accuracy": 0.9291544237675482,
"eval_f1_score": 0.8929419711474625,
"eval_loss": 0.6339655518531799,
"eval_precision": 0.8975107075254134,
"eval_recall": 0.8896075654961032,
"eval_runtime": 4.9672,
"eval_samples_per_second": 616.645,
"eval_steps_per_second": 9.663,
"step": 2100
},
{
"epoch": 21.88,
"step": 2100,
"total_flos": 4711306254898320.0,
"train_loss": 0.5936482783726283,
"train_runtime": 1811.9756,
"train_samples_per_second": 282.565,
"train_steps_per_second": 2.208
}
],
"logging_steps": 100,
"max_steps": 4000,
"num_input_tokens_seen": 0,
"num_train_epochs": 42,
"save_steps": 100,
"total_flos": 4711306254898320.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}