{ "best_metric": 0.7776904948939514, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-masakhaner-kin/checkpoint-6000", "epoch": 104.4776119402985, "global_step": 7000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.99, "eval_accuracy_score": 0.9532111458798986, "eval_f1": 0.7152000000000002, "eval_loss": 0.1510487049818039, "eval_precision": 0.7072784810126582, "eval_recall": 0.7233009708737864, "eval_runtime": 4.2168, "eval_samples_per_second": 71.618, "eval_steps_per_second": 9.011, "step": 200 }, { "epoch": 5.97, "eval_accuracy_score": 0.9599165549098495, "eval_f1": 0.7722308892355694, "eval_loss": 0.14730291068553925, "eval_precision": 0.7454819277108434, "eval_recall": 0.8009708737864077, "eval_runtime": 4.2242, "eval_samples_per_second": 71.493, "eval_steps_per_second": 8.996, "step": 400 }, { "epoch": 7.46, "learning_rate": 4.865771812080537e-05, "loss": 0.1459, "step": 500 }, { "epoch": 8.96, "eval_accuracy_score": 0.9578304276560871, "eval_f1": 0.7629513343799057, "eval_loss": 0.20204676687717438, "eval_precision": 0.7408536585365854, "eval_recall": 0.7864077669902912, "eval_runtime": 4.2219, "eval_samples_per_second": 71.532, "eval_steps_per_second": 9.001, "step": 600 }, { "epoch": 11.94, "eval_accuracy_score": 0.950081954999255, "eval_f1": 0.730829420970266, "eval_loss": 0.2434845268726349, "eval_precision": 0.7075757575757575, "eval_recall": 0.7556634304207119, "eval_runtime": 4.2229, "eval_samples_per_second": 71.516, "eval_steps_per_second": 8.999, "step": 800 }, { "epoch": 14.93, "learning_rate": 4.697986577181208e-05, "loss": 0.0052, "step": 1000 }, { "epoch": 14.93, "eval_accuracy_score": 0.955893309491879, "eval_f1": 0.7670136108887109, "eval_loss": 0.25165775418281555, "eval_precision": 0.7591125198098256, "eval_recall": 0.7750809061488673, "eval_runtime": 4.2133, "eval_samples_per_second": 71.679, "eval_steps_per_second": 9.019, "step": 1000 }, { "epoch": 17.91, "eval_accuracy_score": 0.9544032185963344, "eval_f1": 0.7628865979381443, "eval_loss": 0.2373453825712204, "eval_precision": 0.7480559875583204, "eval_recall": 0.7783171521035599, "eval_runtime": 4.2221, "eval_samples_per_second": 71.528, "eval_steps_per_second": 9.0, "step": 1200 }, { "epoch": 20.9, "eval_accuracy_score": 0.9527641186112353, "eval_f1": 0.7570532915360502, "eval_loss": 0.2740270793437958, "eval_precision": 0.7340425531914894, "eval_recall": 0.7815533980582524, "eval_runtime": 4.2179, "eval_samples_per_second": 71.599, "eval_steps_per_second": 9.009, "step": 1400 }, { "epoch": 22.39, "learning_rate": 4.530201342281879e-05, "loss": 0.0028, "step": 1500 }, { "epoch": 23.88, "eval_accuracy_score": 0.9506779913574728, "eval_f1": 0.7511520737327189, "eval_loss": 0.29271605610847473, "eval_precision": 0.7149122807017544, "eval_recall": 0.7912621359223301, "eval_runtime": 4.2167, "eval_samples_per_second": 71.619, "eval_steps_per_second": 9.012, "step": 1600 }, { "epoch": 26.87, "eval_accuracy_score": 0.953956191327671, "eval_f1": 0.7715654952076678, "eval_loss": 0.27202117443084717, "eval_precision": 0.7618296529968455, "eval_recall": 0.7815533980582524, "eval_runtime": 4.2162, "eval_samples_per_second": 71.629, "eval_steps_per_second": 9.013, "step": 1800 }, { "epoch": 29.85, "learning_rate": 4.36241610738255e-05, "loss": 0.0031, "step": 2000 }, { "epoch": 29.85, "eval_accuracy_score": 0.9545522276858889, "eval_f1": 0.769352290679305, "eval_loss": 0.3008579909801483, "eval_precision": 0.7515432098765432, "eval_recall": 0.7880258899676376, "eval_runtime": 4.2137, "eval_samples_per_second": 71.671, "eval_steps_per_second": 9.018, "step": 2000 }, { "epoch": 32.84, "eval_accuracy_score": 0.9544032185963344, "eval_f1": 0.7775100401606425, "eval_loss": 0.3004043698310852, "eval_precision": 0.7719298245614035, "eval_recall": 0.7831715210355987, "eval_runtime": 4.2226, "eval_samples_per_second": 71.52, "eval_steps_per_second": 8.999, "step": 2200 }, { "epoch": 35.82, "eval_accuracy_score": 0.9523170913425719, "eval_f1": 0.7588932806324111, "eval_loss": 0.3016415238380432, "eval_precision": 0.7418856259659969, "eval_recall": 0.7766990291262136, "eval_runtime": 4.2238, "eval_samples_per_second": 71.5, "eval_steps_per_second": 8.997, "step": 2400 }, { "epoch": 37.31, "learning_rate": 4.194630872483222e-05, "loss": 0.0022, "step": 2500 }, { "epoch": 38.81, "eval_accuracy_score": 0.9551482640441067, "eval_f1": 0.7769897557131599, "eval_loss": 0.2800486385822296, "eval_precision": 0.7572964669738863, "eval_recall": 0.7977346278317152, "eval_runtime": 4.2149, "eval_samples_per_second": 71.651, "eval_steps_per_second": 9.016, "step": 2600 }, { "epoch": 41.79, "eval_accuracy_score": 0.9502309640888095, "eval_f1": 0.7396403440187647, "eval_loss": 0.29597747325897217, "eval_precision": 0.7155824508320726, "eval_recall": 0.7653721682847896, "eval_runtime": 4.2274, "eval_samples_per_second": 71.439, "eval_steps_per_second": 8.989, "step": 2800 }, { "epoch": 44.78, "learning_rate": 4.026845637583892e-05, "loss": 0.0021, "step": 3000 }, { "epoch": 44.78, "eval_accuracy_score": 0.953658173148562, "eval_f1": 0.769108280254777, "eval_loss": 0.3028393089771271, "eval_precision": 0.7570532915360502, "eval_recall": 0.7815533980582524, "eval_runtime": 4.2104, "eval_samples_per_second": 71.727, "eval_steps_per_second": 9.025, "step": 3000 }, { "epoch": 47.76, "eval_accuracy_score": 0.9566383549396513, "eval_f1": 0.7822706065318817, "eval_loss": 0.2665591835975647, "eval_precision": 0.7529940119760479, "eval_recall": 0.813915857605178, "eval_runtime": 4.2247, "eval_samples_per_second": 71.485, "eval_steps_per_second": 8.995, "step": 3200 }, { "epoch": 50.75, "eval_accuracy_score": 0.9566383549396513, "eval_f1": 0.7772435897435898, "eval_loss": 0.26991933584213257, "eval_precision": 0.7698412698412699, "eval_recall": 0.7847896440129449, "eval_runtime": 4.2153, "eval_samples_per_second": 71.643, "eval_steps_per_second": 9.015, "step": 3400 }, { "epoch": 52.24, "learning_rate": 3.859060402684564e-05, "loss": 0.0022, "step": 3500 }, { "epoch": 53.73, "eval_accuracy_score": 0.9572343912978691, "eval_f1": 0.7881694644284571, "eval_loss": 0.2740214169025421, "eval_precision": 0.7788309636650869, "eval_recall": 0.7977346278317152, "eval_runtime": 4.2125, "eval_samples_per_second": 71.691, "eval_steps_per_second": 9.021, "step": 3600 }, { "epoch": 56.72, "eval_accuracy_score": 0.9563403367605424, "eval_f1": 0.7778643803585348, "eval_loss": 0.2816332280635834, "eval_precision": 0.750375939849624, "eval_recall": 0.8074433656957929, "eval_runtime": 4.2121, "eval_samples_per_second": 71.699, "eval_steps_per_second": 9.022, "step": 3800 }, { "epoch": 59.7, "learning_rate": 3.6912751677852356e-05, "loss": 0.0014, "step": 4000 }, { "epoch": 59.7, "eval_accuracy_score": 0.9518700640739085, "eval_f1": 0.7486122125297383, "eval_loss": 0.3085213601589203, "eval_precision": 0.7340590979782271, "eval_recall": 0.7637540453074434, "eval_runtime": 4.2023, "eval_samples_per_second": 71.866, "eval_steps_per_second": 9.043, "step": 4000 }, { "epoch": 62.69, "eval_accuracy_score": 0.9554462822232156, "eval_f1": 0.7712, "eval_loss": 0.27103257179260254, "eval_precision": 0.7626582278481012, "eval_recall": 0.7799352750809061, "eval_runtime": 4.2088, "eval_samples_per_second": 71.755, "eval_steps_per_second": 9.029, "step": 4200 }, { "epoch": 65.67, "eval_accuracy_score": 0.9573834003874236, "eval_f1": 0.7856573705179283, "eval_loss": 0.2769891619682312, "eval_precision": 0.7739403453689168, "eval_recall": 0.7977346278317152, "eval_runtime": 4.2117, "eval_samples_per_second": 71.705, "eval_steps_per_second": 9.022, "step": 4400 }, { "epoch": 67.16, "learning_rate": 3.523489932885906e-05, "loss": 0.0017, "step": 4500 }, { "epoch": 68.66, "eval_accuracy_score": 0.952019073163463, "eval_f1": 0.765079365079365, "eval_loss": 0.3148031532764435, "eval_precision": 0.7507788161993769, "eval_recall": 0.7799352750809061, "eval_runtime": 4.2067, "eval_samples_per_second": 71.79, "eval_steps_per_second": 9.033, "step": 4600 }, { "epoch": 71.64, "eval_accuracy_score": 0.955893309491879, "eval_f1": 0.7903351519875291, "eval_loss": 0.3128798007965088, "eval_precision": 0.762406015037594, "eval_recall": 0.8203883495145631, "eval_runtime": 4.203, "eval_samples_per_second": 71.854, "eval_steps_per_second": 9.041, "step": 4800 }, { "epoch": 74.63, "learning_rate": 3.3557046979865775e-05, "loss": 0.0011, "step": 5000 }, { "epoch": 74.63, "eval_accuracy_score": 0.9549992549545522, "eval_f1": 0.7678855325914149, "eval_loss": 0.3046160936355591, "eval_precision": 0.7546875, "eval_recall": 0.7815533980582524, "eval_runtime": 4.2139, "eval_samples_per_second": 71.668, "eval_steps_per_second": 9.018, "step": 5000 }, { "epoch": 77.61, "eval_accuracy_score": 0.9526151095216808, "eval_f1": 0.749407114624506, "eval_loss": 0.3481159210205078, "eval_precision": 0.732612055641422, "eval_recall": 0.7669902912621359, "eval_runtime": 4.2066, "eval_samples_per_second": 71.791, "eval_steps_per_second": 9.033, "step": 5200 }, { "epoch": 80.6, "eval_accuracy_score": 0.9479958277454925, "eval_f1": 0.7124999999999999, "eval_loss": 0.3350728154182434, "eval_precision": 0.6888217522658611, "eval_recall": 0.7378640776699029, "eval_runtime": 4.207, "eval_samples_per_second": 71.785, "eval_steps_per_second": 9.033, "step": 5400 }, { "epoch": 82.09, "learning_rate": 3.1879194630872485e-05, "loss": 0.0012, "step": 5500 }, { "epoch": 83.58, "eval_accuracy_score": 0.9545522276858889, "eval_f1": 0.7642799678197908, "eval_loss": 0.32919129729270935, "eval_precision": 0.76, "eval_recall": 0.7686084142394822, "eval_runtime": 4.2057, "eval_samples_per_second": 71.807, "eval_steps_per_second": 9.035, "step": 5600 }, { "epoch": 86.57, "eval_accuracy_score": 0.9547012367754433, "eval_f1": 0.779552715654952, "eval_loss": 0.3345593214035034, "eval_precision": 0.7697160883280757, "eval_recall": 0.7896440129449838, "eval_runtime": 4.208, "eval_samples_per_second": 71.767, "eval_steps_per_second": 9.03, "step": 5800 }, { "epoch": 89.55, "learning_rate": 3.02013422818792e-05, "loss": 0.0008, "step": 6000 }, { "epoch": 89.55, "eval_accuracy_score": 0.9542542095067799, "eval_f1": 0.7776904948939514, "eval_loss": 0.3138931095600128, "eval_precision": 0.7557251908396947, "eval_recall": 0.8009708737864077, "eval_runtime": 4.2007, "eval_samples_per_second": 71.892, "eval_steps_per_second": 9.046, "step": 6000 }, { "epoch": 92.54, "eval_accuracy_score": 0.9493369095514826, "eval_f1": 0.7448818897637796, "eval_loss": 0.30180272459983826, "eval_precision": 0.7254601226993865, "eval_recall": 0.7653721682847896, "eval_runtime": 4.2104, "eval_samples_per_second": 71.727, "eval_steps_per_second": 9.025, "step": 6200 }, { "epoch": 95.52, "eval_accuracy_score": 0.9499329459097005, "eval_f1": 0.7467482785003826, "eval_loss": 0.298985093832016, "eval_precision": 0.7082728592162555, "eval_recall": 0.7896440129449838, "eval_runtime": 4.2098, "eval_samples_per_second": 71.738, "eval_steps_per_second": 9.027, "step": 6400 }, { "epoch": 97.01, "learning_rate": 2.8523489932885905e-05, "loss": 0.0013, "step": 6500 }, { "epoch": 98.51, "eval_accuracy_score": 0.9485918641037103, "eval_f1": 0.7388932190179269, "eval_loss": 0.35180896520614624, "eval_precision": 0.7127819548872181, "eval_recall": 0.7669902912621359, "eval_runtime": 4.206, "eval_samples_per_second": 71.802, "eval_steps_per_second": 9.035, "step": 6600 }, { "epoch": 101.49, "eval_accuracy_score": 0.9490388913723737, "eval_f1": 0.7475409836065574, "eval_loss": 0.3536173403263092, "eval_precision": 0.7574750830564784, "eval_recall": 0.7378640776699029, "eval_runtime": 4.2125, "eval_samples_per_second": 71.692, "eval_steps_per_second": 9.021, "step": 6800 }, { "epoch": 104.48, "learning_rate": 2.6845637583892618e-05, "loss": 0.0008, "step": 7000 }, { "epoch": 104.48, "eval_accuracy_score": 0.9476978095663835, "eval_f1": 0.7232704402515724, "eval_loss": 0.3082831799983978, "eval_precision": 0.7033639143730887, "eval_recall": 0.7443365695792881, "eval_runtime": 4.205, "eval_samples_per_second": 71.819, "eval_steps_per_second": 9.037, "step": 7000 }, { "epoch": 104.48, "step": 7000, "total_flos": 2.888661669588173e+16, "train_loss": 0.01226256138086319, "train_runtime": 7779.0968, "train_samples_per_second": 61.704, "train_steps_per_second": 1.928 } ], "max_steps": 15000, "num_train_epochs": 224, "total_flos": 2.888661669588173e+16, "trial_name": null, "trial_params": null }