{ "best_metric": 0.9288338932978429, "best_model_checkpoint": "cls_comment-phobert-base-v2-v3.2/checkpoint-3200", "epoch": 34.78260869565217, "eval_steps": 100, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.8695652173913043, "grad_norm": 1.351387858390808, "learning_rate": 2.5e-06, "loss": 1.8639, "step": 100 }, { "epoch": 0.8695652173913043, "eval_accuracy": 0.40038053818972547, "eval_f1_score": 0.08351588662990973, "eval_loss": 1.7088394165039062, "eval_precision": 0.17952180872348938, "eval_recall": 0.1438423645320197, "eval_runtime": 6.7556, "eval_samples_per_second": 544.589, "eval_steps_per_second": 8.586, "step": 100 }, { "epoch": 1.7391304347826086, "grad_norm": 5.332070827484131, "learning_rate": 5e-06, "loss": 1.5668, "step": 200 }, { "epoch": 1.7391304347826086, "eval_accuracy": 0.580048926338679, "eval_f1_score": 0.21719867493792772, "eval_loss": 1.3287572860717773, "eval_precision": 0.26741462611831024, "eval_recall": 0.2574687919678381, "eval_runtime": 6.8289, "eval_samples_per_second": 538.739, "eval_steps_per_second": 8.493, "step": 200 }, { "epoch": 2.608695652173913, "grad_norm": 4.961886882781982, "learning_rate": 7.500000000000001e-06, "loss": 1.2197, "step": 300 }, { "epoch": 2.608695652173913, "eval_accuracy": 0.7667844522968198, "eval_f1_score": 0.5365670142781532, "eval_loss": 0.974587619304657, "eval_precision": 0.581964201034555, "eval_recall": 0.5148284343114082, "eval_runtime": 6.7013, "eval_samples_per_second": 548.998, "eval_steps_per_second": 8.655, "step": 300 }, { "epoch": 3.4782608695652173, "grad_norm": 4.706761837005615, "learning_rate": 1e-05, "loss": 0.9384, "step": 400 }, { "epoch": 3.4782608695652173, "eval_accuracy": 0.8390867083446589, "eval_f1_score": 0.6137538550736412, "eval_loss": 0.7674435973167419, "eval_precision": 0.6052609240082055, "eval_recall": 0.6267002705049249, "eval_runtime": 6.7131, "eval_samples_per_second": 548.036, "eval_steps_per_second": 8.64, "step": 400 }, { "epoch": 4.3478260869565215, "grad_norm": 4.063348293304443, "learning_rate": 9.722222222222223e-06, "loss": 0.7551, "step": 500 }, { "epoch": 4.3478260869565215, "eval_accuracy": 0.8526773579777114, "eval_f1_score": 0.6283738392523406, "eval_loss": 0.6780158877372742, "eval_precision": 0.6146556832242253, "eval_recall": 0.6453587005137035, "eval_runtime": 6.7179, "eval_samples_per_second": 547.644, "eval_steps_per_second": 8.634, "step": 500 }, { "epoch": 5.217391304347826, "grad_norm": 2.9883711338043213, "learning_rate": 9.444444444444445e-06, "loss": 0.6636, "step": 600 }, { "epoch": 5.217391304347826, "eval_accuracy": 0.8684425115520522, "eval_f1_score": 0.6832828988792853, "eval_loss": 0.6152337193489075, "eval_precision": 0.7626363529752824, "eval_recall": 0.678494572728881, "eval_runtime": 6.6923, "eval_samples_per_second": 549.738, "eval_steps_per_second": 8.667, "step": 600 }, { "epoch": 6.086956521739131, "grad_norm": 4.749546051025391, "learning_rate": 9.166666666666666e-06, "loss": 0.5767, "step": 700 }, { "epoch": 6.086956521739131, "eval_accuracy": 0.8929056808915466, "eval_f1_score": 0.788416346435481, "eval_loss": 0.5486906170845032, "eval_precision": 0.8967939035531012, "eval_recall": 0.7698370330953708, "eval_runtime": 6.716, "eval_samples_per_second": 547.799, "eval_steps_per_second": 8.636, "step": 700 }, { "epoch": 6.956521739130435, "grad_norm": 3.5176820755004883, "learning_rate": 8.888888888888888e-06, "loss": 0.5059, "step": 800 }, { "epoch": 6.956521739130435, "eval_accuracy": 0.8986137537374287, "eval_f1_score": 0.866530151951532, "eval_loss": 0.526166558265686, "eval_precision": 0.8880275046999001, "eval_recall": 0.8533816092377208, "eval_runtime": 6.7032, "eval_samples_per_second": 548.839, "eval_steps_per_second": 8.653, "step": 800 }, { "epoch": 7.826086956521739, "grad_norm": 5.969176292419434, "learning_rate": 8.611111111111112e-06, "loss": 0.4512, "step": 900 }, { "epoch": 7.826086956521739, "eval_accuracy": 0.9195433541723295, "eval_f1_score": 0.9002421953779064, "eval_loss": 0.48821330070495605, "eval_precision": 0.8927762999261393, "eval_recall": 0.9082191267746272, "eval_runtime": 6.6813, "eval_samples_per_second": 550.644, "eval_steps_per_second": 8.681, "step": 900 }, { "epoch": 8.695652173913043, "grad_norm": 4.83783483505249, "learning_rate": 8.333333333333334e-06, "loss": 0.4098, "step": 1000 }, { "epoch": 8.695652173913043, "eval_accuracy": 0.9211742321282957, "eval_f1_score": 0.9111062555604621, "eval_loss": 0.4828358292579651, "eval_precision": 0.9182989102010178, "eval_recall": 0.9060960014608563, "eval_runtime": 6.6852, "eval_samples_per_second": 550.323, "eval_steps_per_second": 8.676, "step": 1000 }, { "epoch": 9.565217391304348, "grad_norm": 4.399734973907471, "learning_rate": 8.055555555555557e-06, "loss": 0.3916, "step": 1100 }, { "epoch": 9.565217391304348, "eval_accuracy": 0.927969556944822, "eval_f1_score": 0.9192607581713574, "eval_loss": 0.46853822469711304, "eval_precision": 0.9254336746301863, "eval_recall": 0.9140372115171418, "eval_runtime": 6.7479, "eval_samples_per_second": 545.206, "eval_steps_per_second": 8.595, "step": 1100 }, { "epoch": 10.434782608695652, "grad_norm": 5.233398914337158, "learning_rate": 7.77777777777778e-06, "loss": 0.373, "step": 1200 }, { "epoch": 10.434782608695652, "eval_accuracy": 0.9238923620549062, "eval_f1_score": 0.9145336176845754, "eval_loss": 0.4755556881427765, "eval_precision": 0.9100056761034006, "eval_recall": 0.9210482679945721, "eval_runtime": 6.7342, "eval_samples_per_second": 546.317, "eval_steps_per_second": 8.613, "step": 1200 }, { "epoch": 11.304347826086957, "grad_norm": 5.472758769989014, "learning_rate": 7.500000000000001e-06, "loss": 0.3592, "step": 1300 }, { "epoch": 11.304347826086957, "eval_accuracy": 0.9317749388420766, "eval_f1_score": 0.9229649364321404, "eval_loss": 0.45966240763664246, "eval_precision": 0.9263012712773211, "eval_recall": 0.9203258950418504, "eval_runtime": 6.7847, "eval_samples_per_second": 542.251, "eval_steps_per_second": 8.549, "step": 1300 }, { "epoch": 12.173913043478262, "grad_norm": 5.35235595703125, "learning_rate": 7.222222222222223e-06, "loss": 0.3377, "step": 1400 }, { "epoch": 12.173913043478262, "eval_accuracy": 0.9304158738787714, "eval_f1_score": 0.9181369027515391, "eval_loss": 0.4691704213619232, "eval_precision": 0.9174925655697546, "eval_recall": 0.9197889600383383, "eval_runtime": 6.7123, "eval_samples_per_second": 548.096, "eval_steps_per_second": 8.641, "step": 1400 }, { "epoch": 13.043478260869565, "grad_norm": 4.6181230545043945, "learning_rate": 6.944444444444445e-06, "loss": 0.3299, "step": 1500 }, { "epoch": 13.043478260869565, "eval_accuracy": 0.9328621908127208, "eval_f1_score": 0.9244416019330937, "eval_loss": 0.46716630458831787, "eval_precision": 0.9291821693223221, "eval_recall": 0.9215636094215414, "eval_runtime": 6.6884, "eval_samples_per_second": 550.057, "eval_steps_per_second": 8.672, "step": 1500 }, { "epoch": 13.91304347826087, "grad_norm": 8.43385124206543, "learning_rate": 6.666666666666667e-06, "loss": 0.3198, "step": 1600 }, { "epoch": 13.91304347826087, "eval_accuracy": 0.9331340038053819, "eval_f1_score": 0.9241282766973115, "eval_loss": 0.4618851840496063, "eval_precision": 0.926426704429738, "eval_recall": 0.9225031003334492, "eval_runtime": 6.6952, "eval_samples_per_second": 549.5, "eval_steps_per_second": 8.663, "step": 1600 }, { "epoch": 14.782608695652174, "grad_norm": 8.825912475585938, "learning_rate": 6.3888888888888885e-06, "loss": 0.3121, "step": 1700 }, { "epoch": 14.782608695652174, "eval_accuracy": 0.9331340038053819, "eval_f1_score": 0.9243317785632609, "eval_loss": 0.46724241971969604, "eval_precision": 0.9249039681497474, "eval_recall": 0.9245288905066229, "eval_runtime": 6.8264, "eval_samples_per_second": 538.934, "eval_steps_per_second": 8.496, "step": 1700 }, { "epoch": 15.652173913043478, "grad_norm": 4.476284027099609, "learning_rate": 6.111111111111112e-06, "loss": 0.3053, "step": 1800 }, { "epoch": 15.652173913043478, "eval_accuracy": 0.9344930687686871, "eval_f1_score": 0.9216180971737599, "eval_loss": 0.46642911434173584, "eval_precision": 0.9166826151640262, "eval_recall": 0.9271868922165902, "eval_runtime": 6.7491, "eval_samples_per_second": 545.113, "eval_steps_per_second": 8.594, "step": 1800 }, { "epoch": 16.52173913043478, "grad_norm": 1.7301744222640991, "learning_rate": 5.833333333333334e-06, "loss": 0.3058, "step": 1900 }, { "epoch": 16.52173913043478, "eval_accuracy": 0.9331340038053819, "eval_f1_score": 0.9228827765631413, "eval_loss": 0.46549805998802185, "eval_precision": 0.9240164131101741, "eval_recall": 0.9221056672944972, "eval_runtime": 6.7346, "eval_samples_per_second": 546.286, "eval_steps_per_second": 8.612, "step": 1900 }, { "epoch": 17.391304347826086, "grad_norm": 7.452052116394043, "learning_rate": 5.555555555555557e-06, "loss": 0.2976, "step": 2000 }, { "epoch": 17.391304347826086, "eval_accuracy": 0.9355803207393314, "eval_f1_score": 0.9258585265558317, "eval_loss": 0.4619200825691223, "eval_precision": 0.9298877666967595, "eval_recall": 0.9220516121183885, "eval_runtime": 6.6864, "eval_samples_per_second": 550.217, "eval_steps_per_second": 8.674, "step": 2000 }, { "epoch": 18.26086956521739, "grad_norm": 2.030193567276001, "learning_rate": 5.2777777777777785e-06, "loss": 0.2975, "step": 2100 }, { "epoch": 18.26086956521739, "eval_accuracy": 0.9342212557760261, "eval_f1_score": 0.9254667743916674, "eval_loss": 0.4662647545337677, "eval_precision": 0.9267335070457062, "eval_recall": 0.9247905559616514, "eval_runtime": 6.6885, "eval_samples_per_second": 550.05, "eval_steps_per_second": 8.672, "step": 2100 }, { "epoch": 19.130434782608695, "grad_norm": 5.05163049697876, "learning_rate": 5e-06, "loss": 0.2872, "step": 2200 }, { "epoch": 19.130434782608695, "eval_accuracy": 0.9344930687686871, "eval_f1_score": 0.923695179464076, "eval_loss": 0.47371503710746765, "eval_precision": 0.928532382805131, "eval_recall": 0.9194008334880648, "eval_runtime": 6.6854, "eval_samples_per_second": 550.301, "eval_steps_per_second": 8.676, "step": 2200 }, { "epoch": 20.0, "grad_norm": 6.272797584533691, "learning_rate": 4.722222222222222e-06, "loss": 0.2879, "step": 2300 }, { "epoch": 20.0, "eval_accuracy": 0.9317749388420766, "eval_f1_score": 0.9201329128675652, "eval_loss": 0.47988325357437134, "eval_precision": 0.9115622601790818, "eval_recall": 0.9295115231425782, "eval_runtime": 6.7353, "eval_samples_per_second": 546.224, "eval_steps_per_second": 8.611, "step": 2300 }, { "epoch": 20.869565217391305, "grad_norm": 5.919195175170898, "learning_rate": 4.444444444444444e-06, "loss": 0.2848, "step": 2400 }, { "epoch": 20.869565217391305, "eval_accuracy": 0.9325903778200598, "eval_f1_score": 0.9194393410487441, "eval_loss": 0.48427507281303406, "eval_precision": 0.9091962837604621, "eval_recall": 0.9309372715371734, "eval_runtime": 6.6852, "eval_samples_per_second": 550.322, "eval_steps_per_second": 8.676, "step": 2400 }, { "epoch": 21.73913043478261, "grad_norm": 3.268333673477173, "learning_rate": 4.166666666666667e-06, "loss": 0.2808, "step": 2500 }, { "epoch": 21.73913043478261, "eval_accuracy": 0.9325903778200598, "eval_f1_score": 0.9242720840575954, "eval_loss": 0.48389649391174316, "eval_precision": 0.925898105451882, "eval_recall": 0.9236990043362497, "eval_runtime": 6.6806, "eval_samples_per_second": 550.7, "eval_steps_per_second": 8.682, "step": 2500 }, { "epoch": 22.608695652173914, "grad_norm": 3.7583141326904297, "learning_rate": 3.88888888888889e-06, "loss": 0.2798, "step": 2600 }, { "epoch": 22.608695652173914, "eval_accuracy": 0.9342212557760261, "eval_f1_score": 0.9240298173904554, "eval_loss": 0.4839774966239929, "eval_precision": 0.9196521435699385, "eval_recall": 0.9288845586975052, "eval_runtime": 6.7723, "eval_samples_per_second": 543.245, "eval_steps_per_second": 8.564, "step": 2600 }, { "epoch": 23.47826086956522, "grad_norm": 5.6299052238464355, "learning_rate": 3.6111111111111115e-06, "loss": 0.2797, "step": 2700 }, { "epoch": 23.47826086956522, "eval_accuracy": 0.9334058167980429, "eval_f1_score": 0.922297295666424, "eval_loss": 0.4770027697086334, "eval_precision": 0.9202906698861152, "eval_recall": 0.9245720434908796, "eval_runtime": 6.7079, "eval_samples_per_second": 548.456, "eval_steps_per_second": 8.646, "step": 2700 }, { "epoch": 24.347826086956523, "grad_norm": 11.368356704711914, "learning_rate": 3.3333333333333333e-06, "loss": 0.2754, "step": 2800 }, { "epoch": 24.347826086956523, "eval_accuracy": 0.9317749388420766, "eval_f1_score": 0.9225259593995536, "eval_loss": 0.4862979054450989, "eval_precision": 0.9212011606593585, "eval_recall": 0.9252089494748691, "eval_runtime": 6.8104, "eval_samples_per_second": 540.2, "eval_steps_per_second": 8.516, "step": 2800 }, { "epoch": 25.217391304347824, "grad_norm": 0.14479239284992218, "learning_rate": 3.055555555555556e-06, "loss": 0.2752, "step": 2900 }, { "epoch": 25.217391304347824, "eval_accuracy": 0.9325903778200598, "eval_f1_score": 0.9242984205440742, "eval_loss": 0.48786690831184387, "eval_precision": 0.9237836539478133, "eval_recall": 0.9258615724988001, "eval_runtime": 6.7704, "eval_samples_per_second": 543.395, "eval_steps_per_second": 8.567, "step": 2900 }, { "epoch": 26.08695652173913, "grad_norm": 7.544506072998047, "learning_rate": 2.7777777777777783e-06, "loss": 0.2718, "step": 3000 }, { "epoch": 26.08695652173913, "eval_accuracy": 0.9361239467246535, "eval_f1_score": 0.9270177056615392, "eval_loss": 0.47883340716362, "eval_precision": 0.9300585908606723, "eval_recall": 0.924359200098848, "eval_runtime": 6.7784, "eval_samples_per_second": 542.751, "eval_steps_per_second": 8.557, "step": 3000 }, { "epoch": 26.956521739130434, "grad_norm": 1.55753493309021, "learning_rate": 2.5e-06, "loss": 0.2712, "step": 3100 }, { "epoch": 26.956521739130434, "eval_accuracy": 0.9355803207393314, "eval_f1_score": 0.9252996225270083, "eval_loss": 0.47663480043411255, "eval_precision": 0.9273088499395005, "eval_recall": 0.9236556967474857, "eval_runtime": 6.7007, "eval_samples_per_second": 549.049, "eval_steps_per_second": 8.656, "step": 3100 }, { "epoch": 27.82608695652174, "grad_norm": 1.633718729019165, "learning_rate": 2.222222222222222e-06, "loss": 0.2714, "step": 3200 }, { "epoch": 27.82608695652174, "eval_accuracy": 0.9382984506659419, "eval_f1_score": 0.9288338932978429, "eval_loss": 0.47798144817352295, "eval_precision": 0.9284697321489548, "eval_recall": 0.929401062619975, "eval_runtime": 6.6895, "eval_samples_per_second": 549.965, "eval_steps_per_second": 8.67, "step": 3200 }, { "epoch": 28.695652173913043, "grad_norm": 0.5183067917823792, "learning_rate": 1.944444444444445e-06, "loss": 0.2697, "step": 3300 }, { "epoch": 28.695652173913043, "eval_accuracy": 0.9366675727099756, "eval_f1_score": 0.9263019689053353, "eval_loss": 0.485741525888443, "eval_precision": 0.9242977411952608, "eval_recall": 0.9286175955534596, "eval_runtime": 6.7616, "eval_samples_per_second": 544.104, "eval_steps_per_second": 8.578, "step": 3300 }, { "epoch": 29.565217391304348, "grad_norm": 4.411154747009277, "learning_rate": 1.6666666666666667e-06, "loss": 0.2674, "step": 3400 }, { "epoch": 29.565217391304348, "eval_accuracy": 0.9347648817613482, "eval_f1_score": 0.9235274952764979, "eval_loss": 0.48756158351898193, "eval_precision": 0.9173551034787693, "eval_recall": 0.9303550035125531, "eval_runtime": 6.831, "eval_samples_per_second": 538.573, "eval_steps_per_second": 8.491, "step": 3400 }, { "epoch": 30.434782608695652, "grad_norm": 9.261029243469238, "learning_rate": 1.3888888888888892e-06, "loss": 0.2681, "step": 3500 }, { "epoch": 30.434782608695652, "eval_accuracy": 0.9361239467246535, "eval_f1_score": 0.9262356484286315, "eval_loss": 0.486868292093277, "eval_precision": 0.9184420290722909, "eval_recall": 0.934775981488375, "eval_runtime": 6.6731, "eval_samples_per_second": 551.315, "eval_steps_per_second": 8.692, "step": 3500 }, { "epoch": 31.304347826086957, "grad_norm": 0.14830726385116577, "learning_rate": 1.111111111111111e-06, "loss": 0.2685, "step": 3600 }, { "epoch": 31.304347826086957, "eval_accuracy": 0.933949442783365, "eval_f1_score": 0.9240517281456259, "eval_loss": 0.4930637776851654, "eval_precision": 0.9212029440977482, "eval_recall": 0.9279123078001273, "eval_runtime": 6.6837, "eval_samples_per_second": 550.442, "eval_steps_per_second": 8.678, "step": 3600 }, { "epoch": 32.17391304347826, "grad_norm": 2.589641571044922, "learning_rate": 8.333333333333333e-07, "loss": 0.2665, "step": 3700 }, { "epoch": 32.17391304347826, "eval_accuracy": 0.933949442783365, "eval_f1_score": 0.9234423513703757, "eval_loss": 0.48508113622665405, "eval_precision": 0.9211049540814888, "eval_recall": 0.9261949377668691, "eval_runtime": 6.7146, "eval_samples_per_second": 547.914, "eval_steps_per_second": 8.638, "step": 3700 }, { "epoch": 33.04347826086956, "grad_norm": 0.28960728645324707, "learning_rate": 5.555555555555555e-07, "loss": 0.2703, "step": 3800 }, { "epoch": 33.04347826086956, "eval_accuracy": 0.9366675727099756, "eval_f1_score": 0.9263225046285886, "eval_loss": 0.4864026606082916, "eval_precision": 0.9226325281435318, "eval_recall": 0.9303985110853679, "eval_runtime": 6.7275, "eval_samples_per_second": 546.86, "eval_steps_per_second": 8.621, "step": 3800 }, { "epoch": 33.91304347826087, "grad_norm": 0.5719828009605408, "learning_rate": 2.7777777777777776e-07, "loss": 0.2661, "step": 3900 }, { "epoch": 33.91304347826087, "eval_accuracy": 0.9363957597173145, "eval_f1_score": 0.9271367551931604, "eval_loss": 0.484861820936203, "eval_precision": 0.9227424858610155, "eval_recall": 0.9318596341609905, "eval_runtime": 6.7272, "eval_samples_per_second": 546.887, "eval_steps_per_second": 8.622, "step": 3900 }, { "epoch": 34.78260869565217, "grad_norm": 4.2173051834106445, "learning_rate": 0.0, "loss": 0.2695, "step": 4000 }, { "epoch": 34.78260869565217, "eval_accuracy": 0.9361239467246535, "eval_f1_score": 0.9269315744830021, "eval_loss": 0.48626089096069336, "eval_precision": 0.9223039042139164, "eval_recall": 0.9319526344370851, "eval_runtime": 6.7602, "eval_samples_per_second": 544.211, "eval_steps_per_second": 8.58, "step": 4000 }, { "epoch": 34.78260869565217, "step": 4000, "total_flos": 8772403331841000.0, "train_loss": 0.4447998676300049, "train_runtime": 3679.271, "train_samples_per_second": 139.158, "train_steps_per_second": 1.087 } ], "logging_steps": 100, "max_steps": 4000, "num_input_tokens_seen": 0, "num_train_epochs": 35, "save_steps": 100, "total_flos": 8772403331841000.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }