|
{ |
|
"best_metric": 0.7776904948939514, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-masakhaner-kin/checkpoint-6000", |
|
"epoch": 104.4776119402985, |
|
"global_step": 7000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy_score": 0.9532111458798986, |
|
"eval_f1": 0.7152000000000002, |
|
"eval_loss": 0.1510487049818039, |
|
"eval_precision": 0.7072784810126582, |
|
"eval_recall": 0.7233009708737864, |
|
"eval_runtime": 4.2168, |
|
"eval_samples_per_second": 71.618, |
|
"eval_steps_per_second": 9.011, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_accuracy_score": 0.9599165549098495, |
|
"eval_f1": 0.7722308892355694, |
|
"eval_loss": 0.14730291068553925, |
|
"eval_precision": 0.7454819277108434, |
|
"eval_recall": 0.8009708737864077, |
|
"eval_runtime": 4.2242, |
|
"eval_samples_per_second": 71.493, |
|
"eval_steps_per_second": 8.996, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 4.865771812080537e-05, |
|
"loss": 0.1459, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_accuracy_score": 0.9578304276560871, |
|
"eval_f1": 0.7629513343799057, |
|
"eval_loss": 0.20204676687717438, |
|
"eval_precision": 0.7408536585365854, |
|
"eval_recall": 0.7864077669902912, |
|
"eval_runtime": 4.2219, |
|
"eval_samples_per_second": 71.532, |
|
"eval_steps_per_second": 9.001, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"eval_accuracy_score": 0.950081954999255, |
|
"eval_f1": 0.730829420970266, |
|
"eval_loss": 0.2434845268726349, |
|
"eval_precision": 0.7075757575757575, |
|
"eval_recall": 0.7556634304207119, |
|
"eval_runtime": 4.2229, |
|
"eval_samples_per_second": 71.516, |
|
"eval_steps_per_second": 8.999, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 4.697986577181208e-05, |
|
"loss": 0.0052, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"eval_accuracy_score": 0.955893309491879, |
|
"eval_f1": 0.7670136108887109, |
|
"eval_loss": 0.25165775418281555, |
|
"eval_precision": 0.7591125198098256, |
|
"eval_recall": 0.7750809061488673, |
|
"eval_runtime": 4.2133, |
|
"eval_samples_per_second": 71.679, |
|
"eval_steps_per_second": 9.019, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"eval_accuracy_score": 0.9544032185963344, |
|
"eval_f1": 0.7628865979381443, |
|
"eval_loss": 0.2373453825712204, |
|
"eval_precision": 0.7480559875583204, |
|
"eval_recall": 0.7783171521035599, |
|
"eval_runtime": 4.2221, |
|
"eval_samples_per_second": 71.528, |
|
"eval_steps_per_second": 9.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"eval_accuracy_score": 0.9527641186112353, |
|
"eval_f1": 0.7570532915360502, |
|
"eval_loss": 0.2740270793437958, |
|
"eval_precision": 0.7340425531914894, |
|
"eval_recall": 0.7815533980582524, |
|
"eval_runtime": 4.2179, |
|
"eval_samples_per_second": 71.599, |
|
"eval_steps_per_second": 9.009, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 4.530201342281879e-05, |
|
"loss": 0.0028, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"eval_accuracy_score": 0.9506779913574728, |
|
"eval_f1": 0.7511520737327189, |
|
"eval_loss": 0.29271605610847473, |
|
"eval_precision": 0.7149122807017544, |
|
"eval_recall": 0.7912621359223301, |
|
"eval_runtime": 4.2167, |
|
"eval_samples_per_second": 71.619, |
|
"eval_steps_per_second": 9.012, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 26.87, |
|
"eval_accuracy_score": 0.953956191327671, |
|
"eval_f1": 0.7715654952076678, |
|
"eval_loss": 0.27202117443084717, |
|
"eval_precision": 0.7618296529968455, |
|
"eval_recall": 0.7815533980582524, |
|
"eval_runtime": 4.2162, |
|
"eval_samples_per_second": 71.629, |
|
"eval_steps_per_second": 9.013, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 0.0031, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 29.85, |
|
"eval_accuracy_score": 0.9545522276858889, |
|
"eval_f1": 0.769352290679305, |
|
"eval_loss": 0.3008579909801483, |
|
"eval_precision": 0.7515432098765432, |
|
"eval_recall": 0.7880258899676376, |
|
"eval_runtime": 4.2137, |
|
"eval_samples_per_second": 71.671, |
|
"eval_steps_per_second": 9.018, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.84, |
|
"eval_accuracy_score": 0.9544032185963344, |
|
"eval_f1": 0.7775100401606425, |
|
"eval_loss": 0.3004043698310852, |
|
"eval_precision": 0.7719298245614035, |
|
"eval_recall": 0.7831715210355987, |
|
"eval_runtime": 4.2226, |
|
"eval_samples_per_second": 71.52, |
|
"eval_steps_per_second": 8.999, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 35.82, |
|
"eval_accuracy_score": 0.9523170913425719, |
|
"eval_f1": 0.7588932806324111, |
|
"eval_loss": 0.3016415238380432, |
|
"eval_precision": 0.7418856259659969, |
|
"eval_recall": 0.7766990291262136, |
|
"eval_runtime": 4.2238, |
|
"eval_samples_per_second": 71.5, |
|
"eval_steps_per_second": 8.997, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 37.31, |
|
"learning_rate": 4.194630872483222e-05, |
|
"loss": 0.0022, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 38.81, |
|
"eval_accuracy_score": 0.9551482640441067, |
|
"eval_f1": 0.7769897557131599, |
|
"eval_loss": 0.2800486385822296, |
|
"eval_precision": 0.7572964669738863, |
|
"eval_recall": 0.7977346278317152, |
|
"eval_runtime": 4.2149, |
|
"eval_samples_per_second": 71.651, |
|
"eval_steps_per_second": 9.016, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 41.79, |
|
"eval_accuracy_score": 0.9502309640888095, |
|
"eval_f1": 0.7396403440187647, |
|
"eval_loss": 0.29597747325897217, |
|
"eval_precision": 0.7155824508320726, |
|
"eval_recall": 0.7653721682847896, |
|
"eval_runtime": 4.2274, |
|
"eval_samples_per_second": 71.439, |
|
"eval_steps_per_second": 8.989, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"learning_rate": 4.026845637583892e-05, |
|
"loss": 0.0021, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 44.78, |
|
"eval_accuracy_score": 0.953658173148562, |
|
"eval_f1": 0.769108280254777, |
|
"eval_loss": 0.3028393089771271, |
|
"eval_precision": 0.7570532915360502, |
|
"eval_recall": 0.7815533980582524, |
|
"eval_runtime": 4.2104, |
|
"eval_samples_per_second": 71.727, |
|
"eval_steps_per_second": 9.025, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 47.76, |
|
"eval_accuracy_score": 0.9566383549396513, |
|
"eval_f1": 0.7822706065318817, |
|
"eval_loss": 0.2665591835975647, |
|
"eval_precision": 0.7529940119760479, |
|
"eval_recall": 0.813915857605178, |
|
"eval_runtime": 4.2247, |
|
"eval_samples_per_second": 71.485, |
|
"eval_steps_per_second": 8.995, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 50.75, |
|
"eval_accuracy_score": 0.9566383549396513, |
|
"eval_f1": 0.7772435897435898, |
|
"eval_loss": 0.26991933584213257, |
|
"eval_precision": 0.7698412698412699, |
|
"eval_recall": 0.7847896440129449, |
|
"eval_runtime": 4.2153, |
|
"eval_samples_per_second": 71.643, |
|
"eval_steps_per_second": 9.015, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 52.24, |
|
"learning_rate": 3.859060402684564e-05, |
|
"loss": 0.0022, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 53.73, |
|
"eval_accuracy_score": 0.9572343912978691, |
|
"eval_f1": 0.7881694644284571, |
|
"eval_loss": 0.2740214169025421, |
|
"eval_precision": 0.7788309636650869, |
|
"eval_recall": 0.7977346278317152, |
|
"eval_runtime": 4.2125, |
|
"eval_samples_per_second": 71.691, |
|
"eval_steps_per_second": 9.021, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 56.72, |
|
"eval_accuracy_score": 0.9563403367605424, |
|
"eval_f1": 0.7778643803585348, |
|
"eval_loss": 0.2816332280635834, |
|
"eval_precision": 0.750375939849624, |
|
"eval_recall": 0.8074433656957929, |
|
"eval_runtime": 4.2121, |
|
"eval_samples_per_second": 71.699, |
|
"eval_steps_per_second": 9.022, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 59.7, |
|
"learning_rate": 3.6912751677852356e-05, |
|
"loss": 0.0014, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 59.7, |
|
"eval_accuracy_score": 0.9518700640739085, |
|
"eval_f1": 0.7486122125297383, |
|
"eval_loss": 0.3085213601589203, |
|
"eval_precision": 0.7340590979782271, |
|
"eval_recall": 0.7637540453074434, |
|
"eval_runtime": 4.2023, |
|
"eval_samples_per_second": 71.866, |
|
"eval_steps_per_second": 9.043, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 62.69, |
|
"eval_accuracy_score": 0.9554462822232156, |
|
"eval_f1": 0.7712, |
|
"eval_loss": 0.27103257179260254, |
|
"eval_precision": 0.7626582278481012, |
|
"eval_recall": 0.7799352750809061, |
|
"eval_runtime": 4.2088, |
|
"eval_samples_per_second": 71.755, |
|
"eval_steps_per_second": 9.029, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 65.67, |
|
"eval_accuracy_score": 0.9573834003874236, |
|
"eval_f1": 0.7856573705179283, |
|
"eval_loss": 0.2769891619682312, |
|
"eval_precision": 0.7739403453689168, |
|
"eval_recall": 0.7977346278317152, |
|
"eval_runtime": 4.2117, |
|
"eval_samples_per_second": 71.705, |
|
"eval_steps_per_second": 9.022, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 67.16, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 0.0017, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 68.66, |
|
"eval_accuracy_score": 0.952019073163463, |
|
"eval_f1": 0.765079365079365, |
|
"eval_loss": 0.3148031532764435, |
|
"eval_precision": 0.7507788161993769, |
|
"eval_recall": 0.7799352750809061, |
|
"eval_runtime": 4.2067, |
|
"eval_samples_per_second": 71.79, |
|
"eval_steps_per_second": 9.033, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 71.64, |
|
"eval_accuracy_score": 0.955893309491879, |
|
"eval_f1": 0.7903351519875291, |
|
"eval_loss": 0.3128798007965088, |
|
"eval_precision": 0.762406015037594, |
|
"eval_recall": 0.8203883495145631, |
|
"eval_runtime": 4.203, |
|
"eval_samples_per_second": 71.854, |
|
"eval_steps_per_second": 9.041, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 74.63, |
|
"learning_rate": 3.3557046979865775e-05, |
|
"loss": 0.0011, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 74.63, |
|
"eval_accuracy_score": 0.9549992549545522, |
|
"eval_f1": 0.7678855325914149, |
|
"eval_loss": 0.3046160936355591, |
|
"eval_precision": 0.7546875, |
|
"eval_recall": 0.7815533980582524, |
|
"eval_runtime": 4.2139, |
|
"eval_samples_per_second": 71.668, |
|
"eval_steps_per_second": 9.018, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 77.61, |
|
"eval_accuracy_score": 0.9526151095216808, |
|
"eval_f1": 0.749407114624506, |
|
"eval_loss": 0.3481159210205078, |
|
"eval_precision": 0.732612055641422, |
|
"eval_recall": 0.7669902912621359, |
|
"eval_runtime": 4.2066, |
|
"eval_samples_per_second": 71.791, |
|
"eval_steps_per_second": 9.033, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 80.6, |
|
"eval_accuracy_score": 0.9479958277454925, |
|
"eval_f1": 0.7124999999999999, |
|
"eval_loss": 0.3350728154182434, |
|
"eval_precision": 0.6888217522658611, |
|
"eval_recall": 0.7378640776699029, |
|
"eval_runtime": 4.207, |
|
"eval_samples_per_second": 71.785, |
|
"eval_steps_per_second": 9.033, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 82.09, |
|
"learning_rate": 3.1879194630872485e-05, |
|
"loss": 0.0012, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 83.58, |
|
"eval_accuracy_score": 0.9545522276858889, |
|
"eval_f1": 0.7642799678197908, |
|
"eval_loss": 0.32919129729270935, |
|
"eval_precision": 0.76, |
|
"eval_recall": 0.7686084142394822, |
|
"eval_runtime": 4.2057, |
|
"eval_samples_per_second": 71.807, |
|
"eval_steps_per_second": 9.035, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 86.57, |
|
"eval_accuracy_score": 0.9547012367754433, |
|
"eval_f1": 0.779552715654952, |
|
"eval_loss": 0.3345593214035034, |
|
"eval_precision": 0.7697160883280757, |
|
"eval_recall": 0.7896440129449838, |
|
"eval_runtime": 4.208, |
|
"eval_samples_per_second": 71.767, |
|
"eval_steps_per_second": 9.03, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 89.55, |
|
"learning_rate": 3.02013422818792e-05, |
|
"loss": 0.0008, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 89.55, |
|
"eval_accuracy_score": 0.9542542095067799, |
|
"eval_f1": 0.7776904948939514, |
|
"eval_loss": 0.3138931095600128, |
|
"eval_precision": 0.7557251908396947, |
|
"eval_recall": 0.8009708737864077, |
|
"eval_runtime": 4.2007, |
|
"eval_samples_per_second": 71.892, |
|
"eval_steps_per_second": 9.046, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 92.54, |
|
"eval_accuracy_score": 0.9493369095514826, |
|
"eval_f1": 0.7448818897637796, |
|
"eval_loss": 0.30180272459983826, |
|
"eval_precision": 0.7254601226993865, |
|
"eval_recall": 0.7653721682847896, |
|
"eval_runtime": 4.2104, |
|
"eval_samples_per_second": 71.727, |
|
"eval_steps_per_second": 9.025, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 95.52, |
|
"eval_accuracy_score": 0.9499329459097005, |
|
"eval_f1": 0.7467482785003826, |
|
"eval_loss": 0.298985093832016, |
|
"eval_precision": 0.7082728592162555, |
|
"eval_recall": 0.7896440129449838, |
|
"eval_runtime": 4.2098, |
|
"eval_samples_per_second": 71.738, |
|
"eval_steps_per_second": 9.027, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 97.01, |
|
"learning_rate": 2.8523489932885905e-05, |
|
"loss": 0.0013, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 98.51, |
|
"eval_accuracy_score": 0.9485918641037103, |
|
"eval_f1": 0.7388932190179269, |
|
"eval_loss": 0.35180896520614624, |
|
"eval_precision": 0.7127819548872181, |
|
"eval_recall": 0.7669902912621359, |
|
"eval_runtime": 4.206, |
|
"eval_samples_per_second": 71.802, |
|
"eval_steps_per_second": 9.035, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 101.49, |
|
"eval_accuracy_score": 0.9490388913723737, |
|
"eval_f1": 0.7475409836065574, |
|
"eval_loss": 0.3536173403263092, |
|
"eval_precision": 0.7574750830564784, |
|
"eval_recall": 0.7378640776699029, |
|
"eval_runtime": 4.2125, |
|
"eval_samples_per_second": 71.692, |
|
"eval_steps_per_second": 9.021, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 104.48, |
|
"learning_rate": 2.6845637583892618e-05, |
|
"loss": 0.0008, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 104.48, |
|
"eval_accuracy_score": 0.9476978095663835, |
|
"eval_f1": 0.7232704402515724, |
|
"eval_loss": 0.3082831799983978, |
|
"eval_precision": 0.7033639143730887, |
|
"eval_recall": 0.7443365695792881, |
|
"eval_runtime": 4.205, |
|
"eval_samples_per_second": 71.819, |
|
"eval_steps_per_second": 9.037, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 104.48, |
|
"step": 7000, |
|
"total_flos": 2.888661669588173e+16, |
|
"train_loss": 0.01226256138086319, |
|
"train_runtime": 7779.0968, |
|
"train_samples_per_second": 61.704, |
|
"train_steps_per_second": 1.928 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 224, |
|
"total_flos": 2.888661669588173e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|