|
{ |
|
"best_metric": 0.8943646614708781, |
|
"best_model_checkpoint": "cls_comment-phobert-base-v2-v2.3/checkpoint-1600", |
|
"epoch": 21.875, |
|
"eval_steps": 100, |
|
"global_step": 2100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 2.3460028171539307, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.1884, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_accuracy": 0.7277179236043095, |
|
"eval_f1_score": 0.43708201591573076, |
|
"eval_loss": 1.0905314683914185, |
|
"eval_precision": 0.5016943776884798, |
|
"eval_recall": 0.42288884208445504, |
|
"eval_runtime": 4.9336, |
|
"eval_samples_per_second": 620.848, |
|
"eval_steps_per_second": 9.729, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 9.137602806091309, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0502, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_accuracy": 0.7956252040483186, |
|
"eval_f1_score": 0.5245336258309143, |
|
"eval_loss": 0.9278329014778137, |
|
"eval_precision": 0.5244123326675032, |
|
"eval_recall": 0.5273852871687749, |
|
"eval_runtime": 4.9622, |
|
"eval_samples_per_second": 617.266, |
|
"eval_steps_per_second": 9.673, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 8.277727127075195, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.9031, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.8445968005223637, |
|
"eval_f1_score": 0.5653211085059351, |
|
"eval_loss": 0.7793412208557129, |
|
"eval_precision": 0.7067057672713553, |
|
"eval_recall": 0.589697230912186, |
|
"eval_runtime": 5.0159, |
|
"eval_samples_per_second": 610.664, |
|
"eval_steps_per_second": 9.57, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"grad_norm": 3.594104290008545, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7772, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_accuracy": 0.8697355533790402, |
|
"eval_f1_score": 0.6917446533625714, |
|
"eval_loss": 0.7089904546737671, |
|
"eval_precision": 0.7268852070004593, |
|
"eval_recall": 0.6804440791993415, |
|
"eval_runtime": 4.9399, |
|
"eval_samples_per_second": 620.058, |
|
"eval_steps_per_second": 9.717, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"grad_norm": 4.503917694091797, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.6741, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_accuracy": 0.8958537381651975, |
|
"eval_f1_score": 0.8078466525990172, |
|
"eval_loss": 0.648346483707428, |
|
"eval_precision": 0.8797678600731677, |
|
"eval_recall": 0.7817903123125977, |
|
"eval_runtime": 4.9504, |
|
"eval_samples_per_second": 618.741, |
|
"eval_steps_per_second": 9.696, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 4.407541275024414, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 0.6156, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"eval_accuracy": 0.9079333986287953, |
|
"eval_f1_score": 0.8601153974079928, |
|
"eval_loss": 0.6268225908279419, |
|
"eval_precision": 0.8933059224979765, |
|
"eval_recall": 0.8344643413921417, |
|
"eval_runtime": 4.9864, |
|
"eval_samples_per_second": 614.266, |
|
"eval_steps_per_second": 9.626, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"grad_norm": 5.013394355773926, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 0.5745, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_accuracy": 0.9160953313744695, |
|
"eval_f1_score": 0.8738968846084525, |
|
"eval_loss": 0.6176044940948486, |
|
"eval_precision": 0.8880590844686177, |
|
"eval_recall": 0.8609982161968145, |
|
"eval_runtime": 5.0009, |
|
"eval_samples_per_second": 612.484, |
|
"eval_steps_per_second": 9.598, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"grad_norm": 4.954068183898926, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.5466, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_accuracy": 0.9180541952334312, |
|
"eval_f1_score": 0.8730278944900508, |
|
"eval_loss": 0.613418698310852, |
|
"eval_precision": 0.8951285463624247, |
|
"eval_recall": 0.8555146035099561, |
|
"eval_runtime": 4.9947, |
|
"eval_samples_per_second": 613.247, |
|
"eval_steps_per_second": 9.61, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"grad_norm": 6.174578666687012, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 0.5254, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"eval_accuracy": 0.9183806725432583, |
|
"eval_f1_score": 0.8794225212758101, |
|
"eval_loss": 0.6195659637451172, |
|
"eval_precision": 0.898010742334885, |
|
"eval_recall": 0.8634351648847699, |
|
"eval_runtime": 4.9765, |
|
"eval_samples_per_second": 615.487, |
|
"eval_steps_per_second": 9.645, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"grad_norm": 5.367175102233887, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.5059, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"eval_accuracy": 0.9219719229513549, |
|
"eval_f1_score": 0.8799283123746076, |
|
"eval_loss": 0.6174536347389221, |
|
"eval_precision": 0.8925411600987329, |
|
"eval_recall": 0.8702271946202808, |
|
"eval_runtime": 4.9764, |
|
"eval_samples_per_second": 615.501, |
|
"eval_steps_per_second": 9.645, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"grad_norm": 6.586447238922119, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 0.4971, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.46, |
|
"eval_accuracy": 0.9226248775710089, |
|
"eval_f1_score": 0.8770727386787929, |
|
"eval_loss": 0.6110312342643738, |
|
"eval_precision": 0.89290929483548, |
|
"eval_recall": 0.8638757728594787, |
|
"eval_runtime": 4.9931, |
|
"eval_samples_per_second": 613.443, |
|
"eval_steps_per_second": 9.613, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 3.8108513355255127, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.4872, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_accuracy": 0.9229513548808358, |
|
"eval_f1_score": 0.8833538508725888, |
|
"eval_loss": 0.6191003918647766, |
|
"eval_precision": 0.8914104564156814, |
|
"eval_recall": 0.8774149435732262, |
|
"eval_runtime": 4.9594, |
|
"eval_samples_per_second": 617.612, |
|
"eval_steps_per_second": 9.679, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"grad_norm": 2.2206079959869385, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 0.4756, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.54, |
|
"eval_accuracy": 0.9242572641201436, |
|
"eval_f1_score": 0.892919420274636, |
|
"eval_loss": 0.6239792704582214, |
|
"eval_precision": 0.8938258370033757, |
|
"eval_recall": 0.8931296696096861, |
|
"eval_runtime": 4.9692, |
|
"eval_samples_per_second": 616.403, |
|
"eval_steps_per_second": 9.66, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"grad_norm": 3.2590479850769043, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 0.4737, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"eval_accuracy": 0.92033953640222, |
|
"eval_f1_score": 0.8793750306978456, |
|
"eval_loss": 0.6246046423912048, |
|
"eval_precision": 0.9038460171417751, |
|
"eval_recall": 0.8582419368732399, |
|
"eval_runtime": 5.0236, |
|
"eval_samples_per_second": 609.724, |
|
"eval_steps_per_second": 9.555, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"grad_norm": 4.4676127433776855, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.4626, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"eval_accuracy": 0.9249102187397976, |
|
"eval_f1_score": 0.8889732013227639, |
|
"eval_loss": 0.6267126798629761, |
|
"eval_precision": 0.8955131575427445, |
|
"eval_recall": 0.8842202196754521, |
|
"eval_runtime": 5.0075, |
|
"eval_samples_per_second": 611.678, |
|
"eval_steps_per_second": 9.586, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"grad_norm": 1.5825713872909546, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.4641, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"eval_accuracy": 0.9271955599085864, |
|
"eval_f1_score": 0.8943646614708781, |
|
"eval_loss": 0.6228047013282776, |
|
"eval_precision": 0.9063376648453086, |
|
"eval_recall": 0.8842502417202245, |
|
"eval_runtime": 4.9845, |
|
"eval_samples_per_second": 614.503, |
|
"eval_steps_per_second": 9.63, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"grad_norm": 3.3820607662200928, |
|
"learning_rate": 6.3888888888888885e-06, |
|
"loss": 0.4562, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 17.71, |
|
"eval_accuracy": 0.9278485145282402, |
|
"eval_f1_score": 0.892329553955837, |
|
"eval_loss": 0.6255541443824768, |
|
"eval_precision": 0.9064974712840742, |
|
"eval_recall": 0.8800313461131938, |
|
"eval_runtime": 5.0301, |
|
"eval_samples_per_second": 608.937, |
|
"eval_steps_per_second": 9.543, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 0.6632604598999023, |
|
"learning_rate": 6.111111111111112e-06, |
|
"loss": 0.4522, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"eval_accuracy": 0.9285014691478942, |
|
"eval_f1_score": 0.8912735294131965, |
|
"eval_loss": 0.6203188300132751, |
|
"eval_precision": 0.8992701751319325, |
|
"eval_recall": 0.8844935194666589, |
|
"eval_runtime": 5.0719, |
|
"eval_samples_per_second": 603.913, |
|
"eval_steps_per_second": 9.464, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"grad_norm": 1.0484055280685425, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 0.4476, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"eval_accuracy": 0.9262161279791055, |
|
"eval_f1_score": 0.8848887104133892, |
|
"eval_loss": 0.6258328557014465, |
|
"eval_precision": 0.9033804413868713, |
|
"eval_recall": 0.8691664648450299, |
|
"eval_runtime": 5.0588, |
|
"eval_samples_per_second": 605.485, |
|
"eval_steps_per_second": 9.488, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"grad_norm": 4.217446804046631, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.4474, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 20.83, |
|
"eval_accuracy": 0.9271955599085864, |
|
"eval_f1_score": 0.8931632926835013, |
|
"eval_loss": 0.6333132386207581, |
|
"eval_precision": 0.9133975869931371, |
|
"eval_recall": 0.8751018475655369, |
|
"eval_runtime": 4.9412, |
|
"eval_samples_per_second": 619.889, |
|
"eval_steps_per_second": 9.714, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"grad_norm": 2.5882654190063477, |
|
"learning_rate": 5.2777777777777785e-06, |
|
"loss": 0.4421, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"eval_accuracy": 0.9291544237675482, |
|
"eval_f1_score": 0.8929419711474625, |
|
"eval_loss": 0.6339655518531799, |
|
"eval_precision": 0.8975107075254134, |
|
"eval_recall": 0.8896075654961032, |
|
"eval_runtime": 4.9672, |
|
"eval_samples_per_second": 616.645, |
|
"eval_steps_per_second": 9.663, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 21.88, |
|
"step": 2100, |
|
"total_flos": 4711306254898320.0, |
|
"train_loss": 0.5936482783726283, |
|
"train_runtime": 1811.9756, |
|
"train_samples_per_second": 282.565, |
|
"train_steps_per_second": 2.208 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 42, |
|
"save_steps": 100, |
|
"total_flos": 4711306254898320.0, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|