{ "best_metric": 0.8943646614708781, "best_model_checkpoint": "cls_comment-phobert-base-v2-v2.3/checkpoint-1600", "epoch": 21.875, "eval_steps": 100, "global_step": 2100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.04, "grad_norm": 2.3460028171539307, "learning_rate": 2.5e-06, "loss": 1.1884, "step": 100 }, { "epoch": 1.04, "eval_accuracy": 0.7277179236043095, "eval_f1_score": 0.43708201591573076, "eval_loss": 1.0905314683914185, "eval_precision": 0.5016943776884798, "eval_recall": 0.42288884208445504, "eval_runtime": 4.9336, "eval_samples_per_second": 620.848, "eval_steps_per_second": 9.729, "step": 100 }, { "epoch": 2.08, "grad_norm": 9.137602806091309, "learning_rate": 5e-06, "loss": 1.0502, "step": 200 }, { "epoch": 2.08, "eval_accuracy": 0.7956252040483186, "eval_f1_score": 0.5245336258309143, "eval_loss": 0.9278329014778137, "eval_precision": 0.5244123326675032, "eval_recall": 0.5273852871687749, "eval_runtime": 4.9622, "eval_samples_per_second": 617.266, "eval_steps_per_second": 9.673, "step": 200 }, { "epoch": 3.12, "grad_norm": 8.277727127075195, "learning_rate": 7.500000000000001e-06, "loss": 0.9031, "step": 300 }, { "epoch": 3.12, "eval_accuracy": 0.8445968005223637, "eval_f1_score": 0.5653211085059351, "eval_loss": 0.7793412208557129, "eval_precision": 0.7067057672713553, "eval_recall": 0.589697230912186, "eval_runtime": 5.0159, "eval_samples_per_second": 610.664, "eval_steps_per_second": 9.57, "step": 300 }, { "epoch": 4.17, "grad_norm": 3.594104290008545, "learning_rate": 1e-05, "loss": 0.7772, "step": 400 }, { "epoch": 4.17, "eval_accuracy": 0.8697355533790402, "eval_f1_score": 0.6917446533625714, "eval_loss": 0.7089904546737671, "eval_precision": 0.7268852070004593, "eval_recall": 0.6804440791993415, "eval_runtime": 4.9399, "eval_samples_per_second": 620.058, "eval_steps_per_second": 9.717, "step": 400 }, { "epoch": 5.21, "grad_norm": 4.503917694091797, "learning_rate": 9.722222222222223e-06, "loss": 0.6741, "step": 500 }, { "epoch": 5.21, "eval_accuracy": 0.8958537381651975, "eval_f1_score": 0.8078466525990172, "eval_loss": 0.648346483707428, "eval_precision": 0.8797678600731677, "eval_recall": 0.7817903123125977, "eval_runtime": 4.9504, "eval_samples_per_second": 618.741, "eval_steps_per_second": 9.696, "step": 500 }, { "epoch": 6.25, "grad_norm": 4.407541275024414, "learning_rate": 9.444444444444445e-06, "loss": 0.6156, "step": 600 }, { "epoch": 6.25, "eval_accuracy": 0.9079333986287953, "eval_f1_score": 0.8601153974079928, "eval_loss": 0.6268225908279419, "eval_precision": 0.8933059224979765, "eval_recall": 0.8344643413921417, "eval_runtime": 4.9864, "eval_samples_per_second": 614.266, "eval_steps_per_second": 9.626, "step": 600 }, { "epoch": 7.29, "grad_norm": 5.013394355773926, "learning_rate": 9.166666666666666e-06, "loss": 0.5745, "step": 700 }, { "epoch": 7.29, "eval_accuracy": 0.9160953313744695, "eval_f1_score": 0.8738968846084525, "eval_loss": 0.6176044940948486, "eval_precision": 0.8880590844686177, "eval_recall": 0.8609982161968145, "eval_runtime": 5.0009, "eval_samples_per_second": 612.484, "eval_steps_per_second": 9.598, "step": 700 }, { "epoch": 8.33, "grad_norm": 4.954068183898926, "learning_rate": 8.888888888888888e-06, "loss": 0.5466, "step": 800 }, { "epoch": 8.33, "eval_accuracy": 0.9180541952334312, "eval_f1_score": 0.8730278944900508, "eval_loss": 0.613418698310852, "eval_precision": 0.8951285463624247, "eval_recall": 0.8555146035099561, "eval_runtime": 4.9947, "eval_samples_per_second": 613.247, "eval_steps_per_second": 9.61, "step": 800 }, { "epoch": 9.38, "grad_norm": 6.174578666687012, "learning_rate": 8.611111111111112e-06, "loss": 0.5254, "step": 900 }, { "epoch": 9.38, "eval_accuracy": 0.9183806725432583, "eval_f1_score": 0.8794225212758101, "eval_loss": 0.6195659637451172, "eval_precision": 0.898010742334885, "eval_recall": 0.8634351648847699, "eval_runtime": 4.9765, "eval_samples_per_second": 615.487, "eval_steps_per_second": 9.645, "step": 900 }, { "epoch": 10.42, "grad_norm": 5.367175102233887, "learning_rate": 8.333333333333334e-06, "loss": 0.5059, "step": 1000 }, { "epoch": 10.42, "eval_accuracy": 0.9219719229513549, "eval_f1_score": 0.8799283123746076, "eval_loss": 0.6174536347389221, "eval_precision": 0.8925411600987329, "eval_recall": 0.8702271946202808, "eval_runtime": 4.9764, "eval_samples_per_second": 615.501, "eval_steps_per_second": 9.645, "step": 1000 }, { "epoch": 11.46, "grad_norm": 6.586447238922119, "learning_rate": 8.055555555555557e-06, "loss": 0.4971, "step": 1100 }, { "epoch": 11.46, "eval_accuracy": 0.9226248775710089, "eval_f1_score": 0.8770727386787929, "eval_loss": 0.6110312342643738, "eval_precision": 0.89290929483548, "eval_recall": 0.8638757728594787, "eval_runtime": 4.9931, "eval_samples_per_second": 613.443, "eval_steps_per_second": 9.613, "step": 1100 }, { "epoch": 12.5, "grad_norm": 3.8108513355255127, "learning_rate": 7.77777777777778e-06, "loss": 0.4872, "step": 1200 }, { "epoch": 12.5, "eval_accuracy": 0.9229513548808358, "eval_f1_score": 0.8833538508725888, "eval_loss": 0.6191003918647766, "eval_precision": 0.8914104564156814, "eval_recall": 0.8774149435732262, "eval_runtime": 4.9594, "eval_samples_per_second": 617.612, "eval_steps_per_second": 9.679, "step": 1200 }, { "epoch": 13.54, "grad_norm": 2.2206079959869385, "learning_rate": 7.500000000000001e-06, "loss": 0.4756, "step": 1300 }, { "epoch": 13.54, "eval_accuracy": 0.9242572641201436, "eval_f1_score": 0.892919420274636, "eval_loss": 0.6239792704582214, "eval_precision": 0.8938258370033757, "eval_recall": 0.8931296696096861, "eval_runtime": 4.9692, "eval_samples_per_second": 616.403, "eval_steps_per_second": 9.66, "step": 1300 }, { "epoch": 14.58, "grad_norm": 3.2590479850769043, "learning_rate": 7.222222222222223e-06, "loss": 0.4737, "step": 1400 }, { "epoch": 14.58, "eval_accuracy": 0.92033953640222, "eval_f1_score": 0.8793750306978456, "eval_loss": 0.6246046423912048, "eval_precision": 0.9038460171417751, "eval_recall": 0.8582419368732399, "eval_runtime": 5.0236, "eval_samples_per_second": 609.724, "eval_steps_per_second": 9.555, "step": 1400 }, { "epoch": 15.62, "grad_norm": 4.4676127433776855, "learning_rate": 6.944444444444445e-06, "loss": 0.4626, "step": 1500 }, { "epoch": 15.62, "eval_accuracy": 0.9249102187397976, "eval_f1_score": 0.8889732013227639, "eval_loss": 0.6267126798629761, "eval_precision": 0.8955131575427445, "eval_recall": 0.8842202196754521, "eval_runtime": 5.0075, "eval_samples_per_second": 611.678, "eval_steps_per_second": 9.586, "step": 1500 }, { "epoch": 16.67, "grad_norm": 1.5825713872909546, "learning_rate": 6.666666666666667e-06, "loss": 0.4641, "step": 1600 }, { "epoch": 16.67, "eval_accuracy": 0.9271955599085864, "eval_f1_score": 0.8943646614708781, "eval_loss": 0.6228047013282776, "eval_precision": 0.9063376648453086, "eval_recall": 0.8842502417202245, "eval_runtime": 4.9845, "eval_samples_per_second": 614.503, "eval_steps_per_second": 9.63, "step": 1600 }, { "epoch": 17.71, "grad_norm": 3.3820607662200928, "learning_rate": 6.3888888888888885e-06, "loss": 0.4562, "step": 1700 }, { "epoch": 17.71, "eval_accuracy": 0.9278485145282402, "eval_f1_score": 0.892329553955837, "eval_loss": 0.6255541443824768, "eval_precision": 0.9064974712840742, "eval_recall": 0.8800313461131938, "eval_runtime": 5.0301, "eval_samples_per_second": 608.937, "eval_steps_per_second": 9.543, "step": 1700 }, { "epoch": 18.75, "grad_norm": 0.6632604598999023, "learning_rate": 6.111111111111112e-06, "loss": 0.4522, "step": 1800 }, { "epoch": 18.75, "eval_accuracy": 0.9285014691478942, "eval_f1_score": 0.8912735294131965, "eval_loss": 0.6203188300132751, "eval_precision": 0.8992701751319325, "eval_recall": 0.8844935194666589, "eval_runtime": 5.0719, "eval_samples_per_second": 603.913, "eval_steps_per_second": 9.464, "step": 1800 }, { "epoch": 19.79, "grad_norm": 1.0484055280685425, "learning_rate": 5.833333333333334e-06, "loss": 0.4476, "step": 1900 }, { "epoch": 19.79, "eval_accuracy": 0.9262161279791055, "eval_f1_score": 0.8848887104133892, "eval_loss": 0.6258328557014465, "eval_precision": 0.9033804413868713, "eval_recall": 0.8691664648450299, "eval_runtime": 5.0588, "eval_samples_per_second": 605.485, "eval_steps_per_second": 9.488, "step": 1900 }, { "epoch": 20.83, "grad_norm": 4.217446804046631, "learning_rate": 5.555555555555557e-06, "loss": 0.4474, "step": 2000 }, { "epoch": 20.83, "eval_accuracy": 0.9271955599085864, "eval_f1_score": 0.8931632926835013, "eval_loss": 0.6333132386207581, "eval_precision": 0.9133975869931371, "eval_recall": 0.8751018475655369, "eval_runtime": 4.9412, "eval_samples_per_second": 619.889, "eval_steps_per_second": 9.714, "step": 2000 }, { "epoch": 21.88, "grad_norm": 2.5882654190063477, "learning_rate": 5.2777777777777785e-06, "loss": 0.4421, "step": 2100 }, { "epoch": 21.88, "eval_accuracy": 0.9291544237675482, "eval_f1_score": 0.8929419711474625, "eval_loss": 0.6339655518531799, "eval_precision": 0.8975107075254134, "eval_recall": 0.8896075654961032, "eval_runtime": 4.9672, "eval_samples_per_second": 616.645, "eval_steps_per_second": 9.663, "step": 2100 }, { "epoch": 21.88, "step": 2100, "total_flos": 4711306254898320.0, "train_loss": 0.5936482783726283, "train_runtime": 1811.9756, "train_samples_per_second": 282.565, "train_steps_per_second": 2.208 } ], "logging_steps": 100, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 42, "save_steps": 100, "total_flos": 4711306254898320.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }