cares-bert-base / trainer_state.json
chizhik
updated model weights
c0e3ff5
raw
history blame
11.2 kB
{
"best_metric": 0.58822980593355,
"best_model_checkpoint": "./CARES/checkpoints/bert-ba/checkpoint-2961",
"epoch": 21.0,
"global_step": 2961,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 0.22213496267795563,
"eval_macro_f1": 0.08913506250646834,
"eval_macro_precision": 0.1601542788476893,
"eval_macro_recall": 0.07453283930073998,
"eval_micro_f1": 0.3941888619854721,
"eval_micro_precision": 0.7650375939849624,
"eval_micro_recall": 0.2654924983692107,
"eval_runtime": 3.0004,
"eval_samples_per_second": 321.956,
"eval_steps_per_second": 20.331,
"step": 141
},
{
"epoch": 2.0,
"eval_loss": 0.18322525918483734,
"eval_macro_f1": 0.20542764030139582,
"eval_macro_precision": 0.31153358140506404,
"eval_macro_recall": 0.1749941105591186,
"eval_micro_f1": 0.6042759177087536,
"eval_micro_precision": 0.7917547568710359,
"eval_micro_recall": 0.4885844748858447,
"eval_runtime": 3.0066,
"eval_samples_per_second": 321.295,
"eval_steps_per_second": 20.289,
"step": 282
},
{
"epoch": 3.0,
"eval_loss": 0.168878972530365,
"eval_macro_f1": 0.29169004336353355,
"eval_macro_precision": 0.39228947650947077,
"eval_macro_recall": 0.2624222179377449,
"eval_micro_f1": 0.6591789310611929,
"eval_micro_precision": 0.8112488083889419,
"eval_micro_recall": 0.5551206784083497,
"eval_runtime": 3.0155,
"eval_samples_per_second": 320.35,
"eval_steps_per_second": 20.229,
"step": 423
},
{
"epoch": 3.55,
"learning_rate": 2.8936170212765956e-05,
"loss": 0.1959,
"step": 500
},
{
"epoch": 4.0,
"eval_loss": 0.1501942127943039,
"eval_macro_f1": 0.35589775613243146,
"eval_macro_precision": 0.4667524057986162,
"eval_macro_recall": 0.3166674518971492,
"eval_micro_f1": 0.7155012948575658,
"eval_micro_precision": 0.8264957264957264,
"eval_micro_recall": 0.6307893020221788,
"eval_runtime": 3.0213,
"eval_samples_per_second": 319.733,
"eval_steps_per_second": 20.19,
"step": 564
},
{
"epoch": 5.0,
"eval_loss": 0.14672552049160004,
"eval_macro_f1": 0.47974247836953543,
"eval_macro_precision": 0.6466753641860046,
"eval_macro_recall": 0.42420722655395326,
"eval_micro_f1": 0.7180585296216989,
"eval_micro_precision": 0.7927501970055162,
"eval_micro_recall": 0.6562296151337247,
"eval_runtime": 3.0023,
"eval_samples_per_second": 321.752,
"eval_steps_per_second": 20.318,
"step": 705
},
{
"epoch": 6.0,
"eval_loss": 0.14830899238586426,
"eval_macro_f1": 0.4652554310103303,
"eval_macro_precision": 0.6251017145329125,
"eval_macro_recall": 0.4150412963066584,
"eval_micro_f1": 0.7256140350877194,
"eval_micro_precision": 0.7851176917236142,
"eval_micro_recall": 0.6744944553163731,
"eval_runtime": 2.9724,
"eval_samples_per_second": 324.991,
"eval_steps_per_second": 20.522,
"step": 846
},
{
"epoch": 7.0,
"eval_loss": 0.1467733532190323,
"eval_macro_f1": 0.5097890383483837,
"eval_macro_precision": 0.6555313836604214,
"eval_macro_recall": 0.4437744774893355,
"eval_micro_f1": 0.7510431154381085,
"eval_micro_precision": 0.8041697691734921,
"eval_micro_recall": 0.7045009784735812,
"eval_runtime": 2.9801,
"eval_samples_per_second": 324.15,
"eval_steps_per_second": 20.469,
"step": 987
},
{
"epoch": 7.09,
"learning_rate": 2.7872340425531914e-05,
"loss": 0.0904,
"step": 1000
},
{
"epoch": 8.0,
"eval_loss": 0.14951790869235992,
"eval_macro_f1": 0.5411089496886734,
"eval_macro_precision": 0.6577005724429985,
"eval_macro_recall": 0.47852188216570773,
"eval_micro_f1": 0.7489539748953974,
"eval_micro_precision": 0.8044943820224719,
"eval_micro_recall": 0.700587084148728,
"eval_runtime": 2.9774,
"eval_samples_per_second": 324.441,
"eval_steps_per_second": 20.487,
"step": 1128
},
{
"epoch": 9.0,
"eval_loss": 0.1488533467054367,
"eval_macro_f1": 0.5590641407878995,
"eval_macro_precision": 0.6102853068447548,
"eval_macro_recall": 0.5269868274318028,
"eval_micro_f1": 0.755420054200542,
"eval_micro_precision": 0.7857646229739254,
"eval_micro_recall": 0.7273320287018917,
"eval_runtime": 2.9723,
"eval_samples_per_second": 324.996,
"eval_steps_per_second": 20.523,
"step": 1269
},
{
"epoch": 10.0,
"eval_loss": 0.15144900977611542,
"eval_macro_f1": 0.5597430867412742,
"eval_macro_precision": 0.6112400331236583,
"eval_macro_recall": 0.5287805393050224,
"eval_micro_f1": 0.7608550434201737,
"eval_micro_precision": 0.7796030116358659,
"eval_micro_recall": 0.7429876060013046,
"eval_runtime": 2.9735,
"eval_samples_per_second": 324.869,
"eval_steps_per_second": 20.515,
"step": 1410
},
{
"epoch": 10.64,
"learning_rate": 2.6808510638297873e-05,
"loss": 0.0424,
"step": 1500
},
{
"epoch": 11.0,
"eval_loss": 0.15306253731250763,
"eval_macro_f1": 0.5476291897617486,
"eval_macro_precision": 0.6042943362522458,
"eval_macro_recall": 0.5150958586698227,
"eval_micro_f1": 0.7621293800539084,
"eval_micro_precision": 0.7881533101045296,
"eval_micro_recall": 0.7377690802348337,
"eval_runtime": 2.9994,
"eval_samples_per_second": 322.068,
"eval_steps_per_second": 20.338,
"step": 1551
},
{
"epoch": 12.0,
"eval_loss": 0.15226905047893524,
"eval_macro_f1": 0.5645136859707334,
"eval_macro_precision": 0.6250404048441249,
"eval_macro_recall": 0.5290350408209084,
"eval_micro_f1": 0.7750586657727119,
"eval_micro_precision": 0.7972413793103448,
"eval_micro_recall": 0.7540769732550554,
"eval_runtime": 2.9853,
"eval_samples_per_second": 323.581,
"eval_steps_per_second": 20.433,
"step": 1692
},
{
"epoch": 13.0,
"eval_loss": 0.15537378191947937,
"eval_macro_f1": 0.5801357636140765,
"eval_macro_precision": 0.6557226559864182,
"eval_macro_recall": 0.5300773127347133,
"eval_micro_f1": 0.7715736040609137,
"eval_micro_precision": 0.8016877637130801,
"eval_micro_recall": 0.7436399217221135,
"eval_runtime": 4.9377,
"eval_samples_per_second": 195.637,
"eval_steps_per_second": 12.354,
"step": 1833
},
{
"epoch": 14.0,
"eval_loss": 0.15421901643276215,
"eval_macro_f1": 0.5773329417808055,
"eval_macro_precision": 0.6485175319800287,
"eval_macro_recall": 0.5350306451399538,
"eval_micro_f1": 0.77552400270453,
"eval_micro_precision": 0.8049122807017544,
"eval_micro_recall": 0.7482061317677756,
"eval_runtime": 3.0097,
"eval_samples_per_second": 320.96,
"eval_steps_per_second": 20.268,
"step": 1974
},
{
"epoch": 14.18,
"learning_rate": 2.574468085106383e-05,
"loss": 0.0219,
"step": 2000
},
{
"epoch": 15.0,
"eval_loss": 0.15735264122486115,
"eval_macro_f1": 0.5661412601117408,
"eval_macro_precision": 0.5937031922346153,
"eval_macro_recall": 0.5467428221378843,
"eval_micro_f1": 0.7738255033557048,
"eval_micro_precision": 0.796821008984105,
"eval_micro_recall": 0.7521200260926288,
"eval_runtime": 2.9854,
"eval_samples_per_second": 323.576,
"eval_steps_per_second": 20.433,
"step": 2115
},
{
"epoch": 16.0,
"eval_loss": 0.1617126762866974,
"eval_macro_f1": 0.5686269653829785,
"eval_macro_precision": 0.6170850785135864,
"eval_macro_recall": 0.5368268361090598,
"eval_micro_f1": 0.779524924723988,
"eval_micro_precision": 0.8001373626373627,
"eval_micro_recall": 0.7599478147423353,
"eval_runtime": 2.9891,
"eval_samples_per_second": 323.178,
"eval_steps_per_second": 20.408,
"step": 2256
},
{
"epoch": 17.0,
"eval_loss": 0.16439199447631836,
"eval_macro_f1": 0.5741609962540627,
"eval_macro_precision": 0.6203354824423237,
"eval_macro_recall": 0.5508776094197674,
"eval_micro_f1": 0.7765251989389921,
"eval_micro_precision": 0.7896156439649359,
"eval_micro_recall": 0.7638617090671885,
"eval_runtime": 2.979,
"eval_samples_per_second": 324.271,
"eval_steps_per_second": 20.477,
"step": 2397
},
{
"epoch": 17.73,
"learning_rate": 2.4680851063829786e-05,
"loss": 0.013,
"step": 2500
},
{
"epoch": 18.0,
"eval_loss": 0.16467925906181335,
"eval_macro_f1": 0.5828461745953328,
"eval_macro_precision": 0.6374781102044266,
"eval_macro_recall": 0.5491770837420844,
"eval_micro_f1": 0.7797519275896748,
"eval_micro_precision": 0.8020689655172414,
"eval_micro_recall": 0.7586431833007176,
"eval_runtime": 2.9765,
"eval_samples_per_second": 324.537,
"eval_steps_per_second": 20.494,
"step": 2538
},
{
"epoch": 19.0,
"eval_loss": 0.17651152610778809,
"eval_macro_f1": 0.578653206536447,
"eval_macro_precision": 0.5940445993831301,
"eval_macro_recall": 0.5738532439728641,
"eval_micro_f1": 0.7715868361029651,
"eval_micro_precision": 0.7708333333333334,
"eval_micro_recall": 0.7723418134377038,
"eval_runtime": 2.9833,
"eval_samples_per_second": 323.805,
"eval_steps_per_second": 20.447,
"step": 2679
},
{
"epoch": 20.0,
"eval_loss": 0.17114990949630737,
"eval_macro_f1": 0.5779704862006727,
"eval_macro_precision": 0.6285916131953179,
"eval_macro_recall": 0.5455200342016094,
"eval_micro_f1": 0.7783711615487315,
"eval_micro_precision": 0.7969924812030075,
"eval_micro_recall": 0.7606001304631441,
"eval_runtime": 2.9774,
"eval_samples_per_second": 324.449,
"eval_steps_per_second": 20.488,
"step": 2820
},
{
"epoch": 21.0,
"eval_loss": 0.1727105975151062,
"eval_macro_f1": 0.58822980593355,
"eval_macro_precision": 0.6578174885933932,
"eval_macro_recall": 0.5467783967982416,
"eval_micro_f1": 0.782282793867121,
"eval_micro_precision": 0.818830242510699,
"eval_micro_recall": 0.7488584474885844,
"eval_runtime": 2.9806,
"eval_samples_per_second": 324.101,
"eval_steps_per_second": 20.466,
"step": 2961
}
],
"max_steps": 14100,
"num_train_epochs": 100,
"total_flos": 1.2360197339395008e+16,
"trial_name": null,
"trial_params": null
}