adapters-opt-bnb8-QLORA-super_glue-axb
/
trainer_state-opt-bnb8-QLORA-super_glue-axb-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 1, | |
"global_step": 70, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.14285714285714285, | |
"grad_norm": 3.998234510421753, | |
"learning_rate": 2.5e-05, | |
"loss": 0.7117, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.14285714285714285, | |
"eval_loss": 0.6798907518386841, | |
"eval_matthews_correlation": 0.02588070098335507, | |
"eval_runtime": 2.5395, | |
"eval_samples_per_second": 87.024, | |
"eval_steps_per_second": 1.575, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.2857142857142857, | |
"grad_norm": 7.397937297821045, | |
"learning_rate": 5e-05, | |
"loss": 0.7295, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.2857142857142857, | |
"eval_loss": 0.6786512732505798, | |
"eval_matthews_correlation": 0.06415201276791879, | |
"eval_runtime": 2.5854, | |
"eval_samples_per_second": 85.481, | |
"eval_steps_per_second": 1.547, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.42857142857142855, | |
"grad_norm": 2.171586751937866, | |
"learning_rate": 4.9264705882352944e-05, | |
"loss": 0.6526, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.42857142857142855, | |
"eval_loss": 0.6761562824249268, | |
"eval_matthews_correlation": 0.06415201276791879, | |
"eval_runtime": 2.5858, | |
"eval_samples_per_second": 85.465, | |
"eval_steps_per_second": 1.547, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.5714285714285714, | |
"grad_norm": 1.377044677734375, | |
"learning_rate": 4.8529411764705885e-05, | |
"loss": 0.7017, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.5714285714285714, | |
"eval_loss": 0.6824426651000977, | |
"eval_matthews_correlation": 0.03840151481124831, | |
"eval_runtime": 2.5843, | |
"eval_samples_per_second": 85.516, | |
"eval_steps_per_second": 1.548, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.7142857142857143, | |
"grad_norm": 1.5057487487792969, | |
"learning_rate": 4.7794117647058826e-05, | |
"loss": 0.7353, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.7142857142857143, | |
"eval_loss": 0.6836904287338257, | |
"eval_matthews_correlation": 0.03840151481124831, | |
"eval_runtime": 2.5832, | |
"eval_samples_per_second": 85.554, | |
"eval_steps_per_second": 1.548, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.8571428571428571, | |
"grad_norm": 2.314178228378296, | |
"learning_rate": 4.705882352941177e-05, | |
"loss": 0.7249, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.8571428571428571, | |
"eval_loss": 0.6898430585861206, | |
"eval_matthews_correlation": 0.07515566518214702, | |
"eval_runtime": 2.748, | |
"eval_samples_per_second": 80.422, | |
"eval_steps_per_second": 1.456, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 2.5473382472991943, | |
"learning_rate": 4.632352941176471e-05, | |
"loss": 0.7272, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_loss": 0.6884390115737915, | |
"eval_matthews_correlation": 0.06254519425349994, | |
"eval_runtime": 2.5815, | |
"eval_samples_per_second": 85.608, | |
"eval_steps_per_second": 1.549, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.1428571428571428, | |
"grad_norm": 1.2883397340774536, | |
"learning_rate": 4.558823529411765e-05, | |
"loss": 0.6804, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.1428571428571428, | |
"eval_loss": 0.6875110268592834, | |
"eval_matthews_correlation": 0.08901797683850127, | |
"eval_runtime": 2.5789, | |
"eval_samples_per_second": 85.695, | |
"eval_steps_per_second": 1.551, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.2857142857142856, | |
"grad_norm": 9.519600868225098, | |
"learning_rate": 4.485294117647059e-05, | |
"loss": 0.7318, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.2857142857142856, | |
"eval_loss": 0.6818560361862183, | |
"eval_matthews_correlation": 0.1265779624120371, | |
"eval_runtime": 2.5834, | |
"eval_samples_per_second": 85.546, | |
"eval_steps_per_second": 1.548, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.4285714285714286, | |
"grad_norm": 12.696111679077148, | |
"learning_rate": 4.411764705882353e-05, | |
"loss": 0.714, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.4285714285714286, | |
"eval_loss": 0.6748389601707458, | |
"eval_matthews_correlation": -0.10535441464971222, | |
"eval_runtime": 2.5799, | |
"eval_samples_per_second": 85.662, | |
"eval_steps_per_second": 1.55, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.5714285714285714, | |
"grad_norm": 3.595411539077759, | |
"learning_rate": 4.3382352941176474e-05, | |
"loss": 0.7136, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.5714285714285714, | |
"eval_loss": 0.6693562865257263, | |
"eval_matthews_correlation": -0.07344997524675997, | |
"eval_runtime": 2.582, | |
"eval_samples_per_second": 85.593, | |
"eval_steps_per_second": 1.549, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.7142857142857144, | |
"grad_norm": 3.0389199256896973, | |
"learning_rate": 4.2647058823529415e-05, | |
"loss": 0.7148, | |
"step": 12 | |
}, | |
{ | |
"epoch": 1.7142857142857144, | |
"eval_loss": 0.6688193678855896, | |
"eval_matthews_correlation": -0.05644705286285884, | |
"eval_runtime": 2.5824, | |
"eval_samples_per_second": 85.581, | |
"eval_steps_per_second": 1.549, | |
"step": 12 | |
}, | |
{ | |
"epoch": 1.8571428571428572, | |
"grad_norm": 3.855015754699707, | |
"learning_rate": 4.1911764705882356e-05, | |
"loss": 0.7435, | |
"step": 13 | |
}, | |
{ | |
"epoch": 1.8571428571428572, | |
"eval_loss": 0.6662454009056091, | |
"eval_matthews_correlation": -0.05644705286285884, | |
"eval_runtime": 2.5839, | |
"eval_samples_per_second": 85.531, | |
"eval_steps_per_second": 1.548, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 2.5934810638427734, | |
"learning_rate": 4.11764705882353e-05, | |
"loss": 0.7235, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_loss": 0.6664111018180847, | |
"eval_matthews_correlation": -0.08839144444490248, | |
"eval_runtime": 2.5848, | |
"eval_samples_per_second": 85.5, | |
"eval_steps_per_second": 1.548, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.142857142857143, | |
"grad_norm": 4.137197971343994, | |
"learning_rate": 4.044117647058824e-05, | |
"loss": 0.735, | |
"step": 15 | |
}, | |
{ | |
"epoch": 2.142857142857143, | |
"eval_loss": 0.6680322885513306, | |
"eval_matthews_correlation": -0.016145842093015665, | |
"eval_runtime": 2.5832, | |
"eval_samples_per_second": 85.554, | |
"eval_steps_per_second": 1.548, | |
"step": 15 | |
}, | |
{ | |
"epoch": 2.2857142857142856, | |
"grad_norm": 1.7002750635147095, | |
"learning_rate": 3.970588235294117e-05, | |
"loss": 0.7067, | |
"step": 16 | |
}, | |
{ | |
"epoch": 2.2857142857142856, | |
"eval_loss": 0.6730608940124512, | |
"eval_matthews_correlation": -0.01955413854015369, | |
"eval_runtime": 2.5331, | |
"eval_samples_per_second": 87.246, | |
"eval_steps_per_second": 1.579, | |
"step": 16 | |
}, | |
{ | |
"epoch": 2.4285714285714284, | |
"grad_norm": 5.263991355895996, | |
"learning_rate": 3.897058823529412e-05, | |
"loss": 0.677, | |
"step": 17 | |
}, | |
{ | |
"epoch": 2.4285714285714284, | |
"eval_loss": 0.6752498745918274, | |
"eval_matthews_correlation": -0.003660370980642071, | |
"eval_runtime": 2.5815, | |
"eval_samples_per_second": 85.609, | |
"eval_steps_per_second": 1.549, | |
"step": 17 | |
}, | |
{ | |
"epoch": 2.571428571428571, | |
"grad_norm": 3.7233338356018066, | |
"learning_rate": 3.8235294117647055e-05, | |
"loss": 0.7148, | |
"step": 18 | |
}, | |
{ | |
"epoch": 2.571428571428571, | |
"eval_loss": 0.6821073889732361, | |
"eval_matthews_correlation": 0.03233650264505481, | |
"eval_runtime": 2.5867, | |
"eval_samples_per_second": 85.438, | |
"eval_steps_per_second": 1.546, | |
"step": 18 | |
}, | |
{ | |
"epoch": 2.7142857142857144, | |
"grad_norm": 2.5118980407714844, | |
"learning_rate": 3.7500000000000003e-05, | |
"loss": 0.6816, | |
"step": 19 | |
}, | |
{ | |
"epoch": 2.7142857142857144, | |
"eval_loss": 0.6831817030906677, | |
"eval_matthews_correlation": 0.03233650264505481, | |
"eval_runtime": 2.5301, | |
"eval_samples_per_second": 87.348, | |
"eval_steps_per_second": 1.581, | |
"step": 19 | |
}, | |
{ | |
"epoch": 2.857142857142857, | |
"grad_norm": 2.3825275897979736, | |
"learning_rate": 3.6764705882352945e-05, | |
"loss": 0.7313, | |
"step": 20 | |
}, | |
{ | |
"epoch": 2.857142857142857, | |
"eval_loss": 0.6824917793273926, | |
"eval_matthews_correlation": 0.03233650264505481, | |
"eval_runtime": 2.583, | |
"eval_samples_per_second": 85.558, | |
"eval_steps_per_second": 1.549, | |
"step": 20 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 3.6262497901916504, | |
"learning_rate": 3.6029411764705886e-05, | |
"loss": 0.7177, | |
"step": 21 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_loss": 0.6839202046394348, | |
"eval_matthews_correlation": 0.02099447073916433, | |
"eval_runtime": 2.5829, | |
"eval_samples_per_second": 85.562, | |
"eval_steps_per_second": 1.549, | |
"step": 21 | |
}, | |
{ | |
"epoch": 3.142857142857143, | |
"grad_norm": 5.570082187652588, | |
"learning_rate": 3.529411764705883e-05, | |
"loss": 0.7039, | |
"step": 22 | |
}, | |
{ | |
"epoch": 3.142857142857143, | |
"eval_loss": 0.6828458905220032, | |
"eval_matthews_correlation": 0.03233650264505481, | |
"eval_runtime": 2.5864, | |
"eval_samples_per_second": 85.448, | |
"eval_steps_per_second": 1.547, | |
"step": 22 | |
}, | |
{ | |
"epoch": 3.2857142857142856, | |
"grad_norm": 2.2818679809570312, | |
"learning_rate": 3.455882352941177e-05, | |
"loss": 0.6952, | |
"step": 23 | |
}, | |
{ | |
"epoch": 3.2857142857142856, | |
"eval_loss": 0.6800376772880554, | |
"eval_matthews_correlation": 0.03233650264505481, | |
"eval_runtime": 2.5831, | |
"eval_samples_per_second": 85.556, | |
"eval_steps_per_second": 1.549, | |
"step": 23 | |
}, | |
{ | |
"epoch": 3.4285714285714284, | |
"grad_norm": 4.441514015197754, | |
"learning_rate": 3.382352941176471e-05, | |
"loss": 0.6898, | |
"step": 24 | |
}, | |
{ | |
"epoch": 3.4285714285714284, | |
"eval_loss": 0.6810070872306824, | |
"eval_matthews_correlation": 0.03233650264505481, | |
"eval_runtime": 2.5837, | |
"eval_samples_per_second": 85.538, | |
"eval_steps_per_second": 1.548, | |
"step": 24 | |
}, | |
{ | |
"epoch": 3.571428571428571, | |
"grad_norm": 2.3354990482330322, | |
"learning_rate": 3.308823529411765e-05, | |
"loss": 0.6589, | |
"step": 25 | |
}, | |
{ | |
"epoch": 3.571428571428571, | |
"eval_loss": 0.6801570057868958, | |
"eval_matthews_correlation": 0.014648552723664804, | |
"eval_runtime": 2.5834, | |
"eval_samples_per_second": 85.547, | |
"eval_steps_per_second": 1.548, | |
"step": 25 | |
}, | |
{ | |
"epoch": 3.7142857142857144, | |
"grad_norm": 1.5001767873764038, | |
"learning_rate": 3.235294117647059e-05, | |
"loss": 0.6952, | |
"step": 26 | |
}, | |
{ | |
"epoch": 3.7142857142857144, | |
"eval_loss": 0.6867631673812866, | |
"eval_matthews_correlation": 0.03233650264505481, | |
"eval_runtime": 2.5804, | |
"eval_samples_per_second": 85.647, | |
"eval_steps_per_second": 1.55, | |
"step": 26 | |
}, | |
{ | |
"epoch": 3.857142857142857, | |
"grad_norm": 8.003545761108398, | |
"learning_rate": 3.161764705882353e-05, | |
"loss": 0.6509, | |
"step": 27 | |
}, | |
{ | |
"epoch": 3.857142857142857, | |
"eval_loss": 0.6861798763275146, | |
"eval_matthews_correlation": 0.02099447073916433, | |
"eval_runtime": 2.5355, | |
"eval_samples_per_second": 87.161, | |
"eval_steps_per_second": 1.578, | |
"step": 27 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 2.25142502784729, | |
"learning_rate": 3.0882352941176475e-05, | |
"loss": 0.6892, | |
"step": 28 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_loss": 0.6863400340080261, | |
"eval_matthews_correlation": 0.02099447073916433, | |
"eval_runtime": 2.5286, | |
"eval_samples_per_second": 87.399, | |
"eval_steps_per_second": 1.582, | |
"step": 28 | |
}, | |
{ | |
"epoch": 4.142857142857143, | |
"grad_norm": 8.721115112304688, | |
"learning_rate": 3.0147058823529413e-05, | |
"loss": 0.6586, | |
"step": 29 | |
}, | |
{ | |
"epoch": 4.142857142857143, | |
"eval_loss": 0.6862174272537231, | |
"eval_matthews_correlation": 0.02099447073916433, | |
"eval_runtime": 2.5809, | |
"eval_samples_per_second": 85.63, | |
"eval_steps_per_second": 1.55, | |
"step": 29 | |
}, | |
{ | |
"epoch": 4.285714285714286, | |
"grad_norm": 4.0326619148254395, | |
"learning_rate": 2.9411764705882354e-05, | |
"loss": 0.7479, | |
"step": 30 | |
}, | |
{ | |
"epoch": 4.285714285714286, | |
"eval_loss": 0.6849657893180847, | |
"eval_matthews_correlation": 0.02099447073916433, | |
"eval_runtime": 2.5316, | |
"eval_samples_per_second": 87.298, | |
"eval_steps_per_second": 1.58, | |
"step": 30 | |
}, | |
{ | |
"epoch": 4.428571428571429, | |
"grad_norm": 3.4629993438720703, | |
"learning_rate": 2.8676470588235295e-05, | |
"loss": 0.7311, | |
"step": 31 | |
}, | |
{ | |
"epoch": 4.428571428571429, | |
"eval_loss": 0.6831032633781433, | |
"eval_matthews_correlation": 0.03233650264505481, | |
"eval_runtime": 2.5314, | |
"eval_samples_per_second": 87.305, | |
"eval_steps_per_second": 1.58, | |
"step": 31 | |
}, | |
{ | |
"epoch": 4.571428571428571, | |
"grad_norm": 1.6142446994781494, | |
"learning_rate": 2.7941176470588236e-05, | |
"loss": 0.6869, | |
"step": 32 | |
}, | |
{ | |
"epoch": 4.571428571428571, | |
"eval_loss": 0.6772472262382507, | |
"eval_matthews_correlation": -0.003660370980642071, | |
"eval_runtime": 2.5782, | |
"eval_samples_per_second": 85.72, | |
"eval_steps_per_second": 1.551, | |
"step": 32 | |
}, | |
{ | |
"epoch": 4.714285714285714, | |
"grad_norm": 2.363823890686035, | |
"learning_rate": 2.7205882352941174e-05, | |
"loss": 0.6625, | |
"step": 33 | |
}, | |
{ | |
"epoch": 4.714285714285714, | |
"eval_loss": 0.674756646156311, | |
"eval_matthews_correlation": 0.007873691885759546, | |
"eval_runtime": 2.5793, | |
"eval_samples_per_second": 85.683, | |
"eval_steps_per_second": 1.551, | |
"step": 33 | |
}, | |
{ | |
"epoch": 4.857142857142857, | |
"grad_norm": 1.2323307991027832, | |
"learning_rate": 2.647058823529412e-05, | |
"loss": 0.6749, | |
"step": 34 | |
}, | |
{ | |
"epoch": 4.857142857142857, | |
"eval_loss": 0.6726891994476318, | |
"eval_matthews_correlation": 0.019864667834482774, | |
"eval_runtime": 2.5302, | |
"eval_samples_per_second": 87.346, | |
"eval_steps_per_second": 1.581, | |
"step": 34 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 5.784065246582031, | |
"learning_rate": 2.5735294117647057e-05, | |
"loss": 0.6787, | |
"step": 35 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_loss": 0.6702554821968079, | |
"eval_matthews_correlation": 0.007873691885759546, | |
"eval_runtime": 2.5301, | |
"eval_samples_per_second": 87.347, | |
"eval_steps_per_second": 1.581, | |
"step": 35 | |
}, | |
{ | |
"epoch": 5.142857142857143, | |
"grad_norm": 2.1666295528411865, | |
"learning_rate": 2.5e-05, | |
"loss": 0.6622, | |
"step": 36 | |
}, | |
{ | |
"epoch": 5.142857142857143, | |
"eval_loss": 0.6678881049156189, | |
"eval_matthews_correlation": 0.019864667834482774, | |
"eval_runtime": 2.5786, | |
"eval_samples_per_second": 85.706, | |
"eval_steps_per_second": 1.551, | |
"step": 36 | |
}, | |
{ | |
"epoch": 5.285714285714286, | |
"grad_norm": 1.818786382675171, | |
"learning_rate": 2.4264705882352942e-05, | |
"loss": 0.7103, | |
"step": 37 | |
}, | |
{ | |
"epoch": 5.285714285714286, | |
"eval_loss": 0.6679052114486694, | |
"eval_matthews_correlation": 0.019864667834482774, | |
"eval_runtime": 2.5803, | |
"eval_samples_per_second": 85.649, | |
"eval_steps_per_second": 1.55, | |
"step": 37 | |
}, | |
{ | |
"epoch": 5.428571428571429, | |
"grad_norm": 4.267655849456787, | |
"learning_rate": 2.3529411764705884e-05, | |
"loss": 0.7146, | |
"step": 38 | |
}, | |
{ | |
"epoch": 5.428571428571429, | |
"eval_loss": 0.6662653088569641, | |
"eval_matthews_correlation": 0.007873691885759546, | |
"eval_runtime": 2.5787, | |
"eval_samples_per_second": 85.702, | |
"eval_steps_per_second": 1.551, | |
"step": 38 | |
}, | |
{ | |
"epoch": 5.571428571428571, | |
"grad_norm": 1.2934308052062988, | |
"learning_rate": 2.2794117647058825e-05, | |
"loss": 0.7013, | |
"step": 39 | |
}, | |
{ | |
"epoch": 5.571428571428571, | |
"eval_loss": 0.667534589767456, | |
"eval_matthews_correlation": 0.03236368357125948, | |
"eval_runtime": 2.577, | |
"eval_samples_per_second": 85.759, | |
"eval_steps_per_second": 1.552, | |
"step": 39 | |
}, | |
{ | |
"epoch": 5.714285714285714, | |
"grad_norm": 5.849837779998779, | |
"learning_rate": 2.2058823529411766e-05, | |
"loss": 0.6958, | |
"step": 40 | |
}, | |
{ | |
"epoch": 5.714285714285714, | |
"eval_loss": 0.6637371778488159, | |
"eval_matthews_correlation": 0.019864667834482774, | |
"eval_runtime": 2.5784, | |
"eval_samples_per_second": 85.711, | |
"eval_steps_per_second": 1.551, | |
"step": 40 | |
}, | |
{ | |
"epoch": 5.857142857142857, | |
"grad_norm": 1.7092323303222656, | |
"learning_rate": 2.1323529411764707e-05, | |
"loss": 0.6783, | |
"step": 41 | |
}, | |
{ | |
"epoch": 5.857142857142857, | |
"eval_loss": 0.6648374795913696, | |
"eval_matthews_correlation": 0.03236368357125948, | |
"eval_runtime": 2.58, | |
"eval_samples_per_second": 85.659, | |
"eval_steps_per_second": 1.55, | |
"step": 41 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 5.318603038787842, | |
"learning_rate": 2.058823529411765e-05, | |
"loss": 0.6721, | |
"step": 42 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_loss": 0.6657968759536743, | |
"eval_matthews_correlation": 0.05100006919207634, | |
"eval_runtime": 2.5284, | |
"eval_samples_per_second": 87.405, | |
"eval_steps_per_second": 1.582, | |
"step": 42 | |
}, | |
{ | |
"epoch": 6.142857142857143, | |
"grad_norm": 2.208892345428467, | |
"learning_rate": 1.9852941176470586e-05, | |
"loss": 0.7382, | |
"step": 43 | |
}, | |
{ | |
"epoch": 6.142857142857143, | |
"eval_loss": 0.6654295921325684, | |
"eval_matthews_correlation": 0.03840151481124831, | |
"eval_runtime": 2.5782, | |
"eval_samples_per_second": 85.719, | |
"eval_steps_per_second": 1.551, | |
"step": 43 | |
}, | |
{ | |
"epoch": 6.285714285714286, | |
"grad_norm": 1.805138349533081, | |
"learning_rate": 1.9117647058823528e-05, | |
"loss": 0.7029, | |
"step": 44 | |
}, | |
{ | |
"epoch": 6.285714285714286, | |
"eval_loss": 0.6661415696144104, | |
"eval_matthews_correlation": 0.03840151481124831, | |
"eval_runtime": 2.5785, | |
"eval_samples_per_second": 85.708, | |
"eval_steps_per_second": 1.551, | |
"step": 44 | |
}, | |
{ | |
"epoch": 6.428571428571429, | |
"grad_norm": 1.8971737623214722, | |
"learning_rate": 1.8382352941176472e-05, | |
"loss": 0.712, | |
"step": 45 | |
}, | |
{ | |
"epoch": 6.428571428571429, | |
"eval_loss": 0.6690033078193665, | |
"eval_matthews_correlation": 0.03840151481124831, | |
"eval_runtime": 2.5783, | |
"eval_samples_per_second": 85.714, | |
"eval_steps_per_second": 1.551, | |
"step": 45 | |
}, | |
{ | |
"epoch": 6.571428571428571, | |
"grad_norm": 7.983123779296875, | |
"learning_rate": 1.7647058823529414e-05, | |
"loss": 0.607, | |
"step": 46 | |
}, | |
{ | |
"epoch": 6.571428571428571, | |
"eval_loss": 0.6701312065124512, | |
"eval_matthews_correlation": 0.03840151481124831, | |
"eval_runtime": 2.5814, | |
"eval_samples_per_second": 85.613, | |
"eval_steps_per_second": 1.55, | |
"step": 46 | |
}, | |
{ | |
"epoch": 6.714285714285714, | |
"grad_norm": 1.1388919353485107, | |
"learning_rate": 1.6911764705882355e-05, | |
"loss": 0.662, | |
"step": 47 | |
}, | |
{ | |
"epoch": 6.714285714285714, | |
"eval_loss": 0.6674517393112183, | |
"eval_matthews_correlation": 0.019864667834482774, | |
"eval_runtime": 2.5834, | |
"eval_samples_per_second": 85.547, | |
"eval_steps_per_second": 1.548, | |
"step": 47 | |
}, | |
{ | |
"epoch": 6.857142857142857, | |
"grad_norm": 1.0342700481414795, | |
"learning_rate": 1.6176470588235296e-05, | |
"loss": 0.6474, | |
"step": 48 | |
}, | |
{ | |
"epoch": 6.857142857142857, | |
"eval_loss": 0.667188823223114, | |
"eval_matthews_correlation": 0.03840151481124831, | |
"eval_runtime": 2.58, | |
"eval_samples_per_second": 85.659, | |
"eval_steps_per_second": 1.55, | |
"step": 48 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 2.212453603744507, | |
"learning_rate": 1.5441176470588237e-05, | |
"loss": 0.6797, | |
"step": 49 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_loss": 0.665696918964386, | |
"eval_matthews_correlation": 0.019864667834482774, | |
"eval_runtime": 2.5784, | |
"eval_samples_per_second": 85.712, | |
"eval_steps_per_second": 1.551, | |
"step": 49 | |
}, | |
{ | |
"epoch": 7.142857142857143, | |
"grad_norm": 1.651175618171692, | |
"learning_rate": 1.4705882352941177e-05, | |
"loss": 0.682, | |
"step": 50 | |
}, | |
{ | |
"epoch": 7.142857142857143, | |
"eval_loss": 0.6662675142288208, | |
"eval_matthews_correlation": 0.007873691885759546, | |
"eval_runtime": 2.5819, | |
"eval_samples_per_second": 85.597, | |
"eval_steps_per_second": 1.549, | |
"step": 50 | |
}, | |
{ | |
"epoch": 7.285714285714286, | |
"grad_norm": 1.319028377532959, | |
"learning_rate": 1.3970588235294118e-05, | |
"loss": 0.7054, | |
"step": 51 | |
}, | |
{ | |
"epoch": 7.285714285714286, | |
"eval_loss": 0.665385365486145, | |
"eval_matthews_correlation": 0.05626125766542442, | |
"eval_runtime": 2.5824, | |
"eval_samples_per_second": 85.58, | |
"eval_steps_per_second": 1.549, | |
"step": 51 | |
}, | |
{ | |
"epoch": 7.428571428571429, | |
"grad_norm": 1.2058743238449097, | |
"learning_rate": 1.323529411764706e-05, | |
"loss": 0.6679, | |
"step": 52 | |
}, | |
{ | |
"epoch": 7.428571428571429, | |
"eval_loss": 0.6671490669250488, | |
"eval_matthews_correlation": -0.003660370980642071, | |
"eval_runtime": 2.5799, | |
"eval_samples_per_second": 85.663, | |
"eval_steps_per_second": 1.55, | |
"step": 52 | |
}, | |
{ | |
"epoch": 7.571428571428571, | |
"grad_norm": 2.41142201423645, | |
"learning_rate": 1.25e-05, | |
"loss": 0.6798, | |
"step": 53 | |
}, | |
{ | |
"epoch": 7.571428571428571, | |
"eval_loss": 0.6681118011474609, | |
"eval_matthews_correlation": 0.04945936664931965, | |
"eval_runtime": 2.5821, | |
"eval_samples_per_second": 85.589, | |
"eval_steps_per_second": 1.549, | |
"step": 53 | |
}, | |
{ | |
"epoch": 7.714285714285714, | |
"grad_norm": 1.5525087118148804, | |
"learning_rate": 1.1764705882352942e-05, | |
"loss": 0.7081, | |
"step": 54 | |
}, | |
{ | |
"epoch": 7.714285714285714, | |
"eval_loss": 0.6653820872306824, | |
"eval_matthews_correlation": 0.02099447073916433, | |
"eval_runtime": 2.5792, | |
"eval_samples_per_second": 85.687, | |
"eval_steps_per_second": 1.551, | |
"step": 54 | |
}, | |
{ | |
"epoch": 7.857142857142857, | |
"grad_norm": 5.988497257232666, | |
"learning_rate": 1.1029411764705883e-05, | |
"loss": 0.6459, | |
"step": 55 | |
}, | |
{ | |
"epoch": 7.857142857142857, | |
"eval_loss": 0.665358304977417, | |
"eval_matthews_correlation": 0.07350910094057247, | |
"eval_runtime": 2.5843, | |
"eval_samples_per_second": 85.516, | |
"eval_steps_per_second": 1.548, | |
"step": 55 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 7.899259090423584, | |
"learning_rate": 1.0294117647058824e-05, | |
"loss": 0.6267, | |
"step": 56 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_loss": 0.6640177369117737, | |
"eval_matthews_correlation": 0.07792426255947497, | |
"eval_runtime": 2.585, | |
"eval_samples_per_second": 85.493, | |
"eval_steps_per_second": 1.547, | |
"step": 56 | |
}, | |
{ | |
"epoch": 8.142857142857142, | |
"grad_norm": 7.863129615783691, | |
"learning_rate": 9.558823529411764e-06, | |
"loss": 0.6544, | |
"step": 57 | |
}, | |
{ | |
"epoch": 8.142857142857142, | |
"eval_loss": 0.6616404056549072, | |
"eval_matthews_correlation": 0.06892933773673708, | |
"eval_runtime": 2.5328, | |
"eval_samples_per_second": 87.255, | |
"eval_steps_per_second": 1.579, | |
"step": 57 | |
}, | |
{ | |
"epoch": 8.285714285714286, | |
"grad_norm": 4.004273891448975, | |
"learning_rate": 8.823529411764707e-06, | |
"loss": 0.608, | |
"step": 58 | |
}, | |
{ | |
"epoch": 8.285714285714286, | |
"eval_loss": 0.6617823839187622, | |
"eval_matthews_correlation": 0.06415201276791879, | |
"eval_runtime": 2.5837, | |
"eval_samples_per_second": 85.537, | |
"eval_steps_per_second": 1.548, | |
"step": 58 | |
}, | |
{ | |
"epoch": 8.428571428571429, | |
"grad_norm": 3.1885335445404053, | |
"learning_rate": 8.088235294117648e-06, | |
"loss": 0.698, | |
"step": 59 | |
}, | |
{ | |
"epoch": 8.428571428571429, | |
"eval_loss": 0.6621480584144592, | |
"eval_matthews_correlation": 0.08213547054966991, | |
"eval_runtime": 2.5804, | |
"eval_samples_per_second": 85.645, | |
"eval_steps_per_second": 1.55, | |
"step": 59 | |
}, | |
{ | |
"epoch": 8.571428571428571, | |
"grad_norm": 1.903214454650879, | |
"learning_rate": 7.3529411764705884e-06, | |
"loss": 0.6541, | |
"step": 60 | |
}, | |
{ | |
"epoch": 8.571428571428571, | |
"eval_loss": 0.6629019975662231, | |
"eval_matthews_correlation": 0.06892933773673708, | |
"eval_runtime": 2.5325, | |
"eval_samples_per_second": 87.266, | |
"eval_steps_per_second": 1.579, | |
"step": 60 | |
}, | |
{ | |
"epoch": 8.714285714285714, | |
"grad_norm": 2.6434895992279053, | |
"learning_rate": 6.61764705882353e-06, | |
"loss": 0.6375, | |
"step": 61 | |
}, | |
{ | |
"epoch": 8.714285714285714, | |
"eval_loss": 0.6642066836357117, | |
"eval_matthews_correlation": 0.03840151481124831, | |
"eval_runtime": 2.5848, | |
"eval_samples_per_second": 85.5, | |
"eval_steps_per_second": 1.548, | |
"step": 61 | |
}, | |
{ | |
"epoch": 8.857142857142858, | |
"grad_norm": 1.5652414560317993, | |
"learning_rate": 5.882352941176471e-06, | |
"loss": 0.6667, | |
"step": 62 | |
}, | |
{ | |
"epoch": 8.857142857142858, | |
"eval_loss": 0.6663409471511841, | |
"eval_matthews_correlation": 0.04407882683211864, | |
"eval_runtime": 2.5811, | |
"eval_samples_per_second": 85.622, | |
"eval_steps_per_second": 1.55, | |
"step": 62 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 7.361441135406494, | |
"learning_rate": 5.147058823529412e-06, | |
"loss": 0.6357, | |
"step": 63 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_loss": 0.6640652418136597, | |
"eval_matthews_correlation": 0.04945936664931965, | |
"eval_runtime": 2.5855, | |
"eval_samples_per_second": 85.475, | |
"eval_steps_per_second": 1.547, | |
"step": 63 | |
}, | |
{ | |
"epoch": 9.142857142857142, | |
"grad_norm": 2.824963331222534, | |
"learning_rate": 4.411764705882353e-06, | |
"loss": 0.6433, | |
"step": 64 | |
}, | |
{ | |
"epoch": 9.142857142857142, | |
"eval_loss": 0.6646701097488403, | |
"eval_matthews_correlation": 0.04945936664931965, | |
"eval_runtime": 2.5911, | |
"eval_samples_per_second": 85.293, | |
"eval_steps_per_second": 1.544, | |
"step": 64 | |
}, | |
{ | |
"epoch": 9.285714285714286, | |
"grad_norm": 7.149904251098633, | |
"learning_rate": 3.6764705882352942e-06, | |
"loss": 0.6288, | |
"step": 65 | |
}, | |
{ | |
"epoch": 9.285714285714286, | |
"eval_loss": 0.6667016744613647, | |
"eval_matthews_correlation": 0.04945936664931965, | |
"eval_runtime": 2.5847, | |
"eval_samples_per_second": 85.502, | |
"eval_steps_per_second": 1.548, | |
"step": 65 | |
}, | |
{ | |
"epoch": 9.428571428571429, | |
"grad_norm": 1.3270394802093506, | |
"learning_rate": 2.9411764705882355e-06, | |
"loss": 0.678, | |
"step": 66 | |
}, | |
{ | |
"epoch": 9.428571428571429, | |
"eval_loss": 0.6667911410331726, | |
"eval_matthews_correlation": 0.04945936664931965, | |
"eval_runtime": 2.5833, | |
"eval_samples_per_second": 85.551, | |
"eval_steps_per_second": 1.548, | |
"step": 66 | |
}, | |
{ | |
"epoch": 9.571428571428571, | |
"grad_norm": 7.60737419128418, | |
"learning_rate": 2.2058823529411767e-06, | |
"loss": 0.6888, | |
"step": 67 | |
}, | |
{ | |
"epoch": 9.571428571428571, | |
"eval_loss": 0.6648308038711548, | |
"eval_matthews_correlation": 0.06606632151098657, | |
"eval_runtime": 2.5895, | |
"eval_samples_per_second": 85.343, | |
"eval_steps_per_second": 1.545, | |
"step": 67 | |
}, | |
{ | |
"epoch": 9.714285714285714, | |
"grad_norm": 6.667550563812256, | |
"learning_rate": 1.4705882352941177e-06, | |
"loss": 0.6281, | |
"step": 68 | |
}, | |
{ | |
"epoch": 9.714285714285714, | |
"eval_loss": 0.6676240563392639, | |
"eval_matthews_correlation": 0.04945936664931965, | |
"eval_runtime": 2.5817, | |
"eval_samples_per_second": 85.603, | |
"eval_steps_per_second": 1.549, | |
"step": 68 | |
}, | |
{ | |
"epoch": 9.857142857142858, | |
"grad_norm": 2.7164316177368164, | |
"learning_rate": 7.352941176470589e-07, | |
"loss": 0.6405, | |
"step": 69 | |
}, | |
{ | |
"epoch": 9.857142857142858, | |
"eval_loss": 0.6660658717155457, | |
"eval_matthews_correlation": 0.06606632151098657, | |
"eval_runtime": 2.5799, | |
"eval_samples_per_second": 85.663, | |
"eval_steps_per_second": 1.55, | |
"step": 69 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 1.6632885932922363, | |
"learning_rate": 0.0, | |
"loss": 0.6593, | |
"step": 70 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_loss": 0.6662001013755798, | |
"eval_matthews_correlation": 0.06606632151098657, | |
"eval_runtime": 2.5806, | |
"eval_samples_per_second": 85.639, | |
"eval_steps_per_second": 1.55, | |
"step": 70 | |
}, | |
{ | |
"epoch": 10.0, | |
"step": 70, | |
"total_flos": 3.203834021989581e+16, | |
"train_loss": 0.6859068019049508, | |
"train_runtime": 582.463, | |
"train_samples_per_second": 15.16, | |
"train_steps_per_second": 0.12 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 70, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"total_flos": 3.203834021989581e+16, | |
"train_batch_size": 16, | |
"trial_name": null, | |
"trial_params": null | |
} | |