adapters-opt-gptq-QLORA-super_glue-cb
/
trainer_state-opt-gptq-QLORA-super_glue-cb-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 1, | |
"global_step": 40, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.25, | |
"grad_norm": 32.508975982666016, | |
"learning_rate": 2.5e-05, | |
"loss": 1.9242, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.845262050628662, | |
"eval_runtime": 1.664, | |
"eval_samples_per_second": 37.261, | |
"eval_steps_per_second": 1.202, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.5, | |
"grad_norm": 31.633163452148438, | |
"learning_rate": 5e-05, | |
"loss": 1.8427, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.8311964273452759, | |
"eval_runtime": 1.6638, | |
"eval_samples_per_second": 37.264, | |
"eval_steps_per_second": 1.202, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.75, | |
"grad_norm": 32.75844192504883, | |
"learning_rate": 4.868421052631579e-05, | |
"loss": 1.811, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.7797694206237793, | |
"eval_runtime": 1.6596, | |
"eval_samples_per_second": 37.359, | |
"eval_steps_per_second": 1.205, | |
"step": 3 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 33.511817932128906, | |
"learning_rate": 4.736842105263158e-05, | |
"loss": 1.9235, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.7310200929641724, | |
"eval_runtime": 1.6611, | |
"eval_samples_per_second": 37.325, | |
"eval_steps_per_second": 1.204, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.25, | |
"grad_norm": 30.517168045043945, | |
"learning_rate": 4.605263157894737e-05, | |
"loss": 1.6903, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.6759703159332275, | |
"eval_runtime": 1.6597, | |
"eval_samples_per_second": 37.357, | |
"eval_steps_per_second": 1.205, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.5, | |
"grad_norm": 29.271242141723633, | |
"learning_rate": 4.473684210526316e-05, | |
"loss": 1.6032, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.620038390159607, | |
"eval_runtime": 1.6099, | |
"eval_samples_per_second": 38.511, | |
"eval_steps_per_second": 1.242, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.75, | |
"grad_norm": 31.15843963623047, | |
"learning_rate": 4.342105263157895e-05, | |
"loss": 1.7522, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.5599325895309448, | |
"eval_runtime": 1.6096, | |
"eval_samples_per_second": 38.518, | |
"eval_steps_per_second": 1.243, | |
"step": 7 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 32.795101165771484, | |
"learning_rate": 4.210526315789474e-05, | |
"loss": 1.665, | |
"step": 8 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.5007245540618896, | |
"eval_runtime": 1.6593, | |
"eval_samples_per_second": 37.365, | |
"eval_steps_per_second": 1.205, | |
"step": 8 | |
}, | |
{ | |
"epoch": 2.25, | |
"grad_norm": 24.919414520263672, | |
"learning_rate": 4.078947368421053e-05, | |
"loss": 1.4334, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.4376260042190552, | |
"eval_runtime": 1.6592, | |
"eval_samples_per_second": 37.367, | |
"eval_steps_per_second": 1.205, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.5, | |
"grad_norm": 26.314369201660156, | |
"learning_rate": 3.9473684210526316e-05, | |
"loss": 1.4499, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.3746062517166138, | |
"eval_runtime": 1.659, | |
"eval_samples_per_second": 37.372, | |
"eval_steps_per_second": 1.206, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.75, | |
"grad_norm": 29.581968307495117, | |
"learning_rate": 3.815789473684211e-05, | |
"loss": 1.4751, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.3099955320358276, | |
"eval_runtime": 1.6591, | |
"eval_samples_per_second": 37.37, | |
"eval_steps_per_second": 1.205, | |
"step": 11 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 23.594392776489258, | |
"learning_rate": 3.6842105263157895e-05, | |
"loss": 1.2184, | |
"step": 12 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.2490864992141724, | |
"eval_runtime": 1.6592, | |
"eval_samples_per_second": 37.368, | |
"eval_steps_per_second": 1.205, | |
"step": 12 | |
}, | |
{ | |
"epoch": 3.25, | |
"grad_norm": 21.464380264282227, | |
"learning_rate": 3.5526315789473684e-05, | |
"loss": 1.1918, | |
"step": 13 | |
}, | |
{ | |
"epoch": 3.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.1868385076522827, | |
"eval_runtime": 1.6597, | |
"eval_samples_per_second": 37.355, | |
"eval_steps_per_second": 1.205, | |
"step": 13 | |
}, | |
{ | |
"epoch": 3.5, | |
"grad_norm": 23.22657585144043, | |
"learning_rate": 3.421052631578947e-05, | |
"loss": 1.2961, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.1285125017166138, | |
"eval_runtime": 1.6596, | |
"eval_samples_per_second": 37.359, | |
"eval_steps_per_second": 1.205, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.75, | |
"grad_norm": 20.130626678466797, | |
"learning_rate": 3.289473684210527e-05, | |
"loss": 1.058, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.0760616064071655, | |
"eval_runtime": 1.6629, | |
"eval_samples_per_second": 37.284, | |
"eval_steps_per_second": 1.203, | |
"step": 15 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 19.751522064208984, | |
"learning_rate": 3.157894736842105e-05, | |
"loss": 1.0431, | |
"step": 16 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 1.027753233909607, | |
"eval_runtime": 1.6593, | |
"eval_samples_per_second": 37.365, | |
"eval_steps_per_second": 1.205, | |
"step": 16 | |
}, | |
{ | |
"epoch": 4.25, | |
"grad_norm": 21.63219451904297, | |
"learning_rate": 3.0263157894736844e-05, | |
"loss": 1.1025, | |
"step": 17 | |
}, | |
{ | |
"epoch": 4.25, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 0.9908801913261414, | |
"eval_runtime": 1.6593, | |
"eval_samples_per_second": 37.364, | |
"eval_steps_per_second": 1.205, | |
"step": 17 | |
}, | |
{ | |
"epoch": 4.5, | |
"grad_norm": 10.896172523498535, | |
"learning_rate": 2.8947368421052634e-05, | |
"loss": 1.027, | |
"step": 18 | |
}, | |
{ | |
"epoch": 4.5, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.25513196480938416, | |
"eval_loss": 0.9616305232048035, | |
"eval_runtime": 1.6609, | |
"eval_samples_per_second": 37.328, | |
"eval_steps_per_second": 1.204, | |
"step": 18 | |
}, | |
{ | |
"epoch": 4.75, | |
"grad_norm": 10.560508728027344, | |
"learning_rate": 2.7631578947368426e-05, | |
"loss": 0.9494, | |
"step": 19 | |
}, | |
{ | |
"epoch": 4.75, | |
"eval_accuracy": 0.4838709677419355, | |
"eval_f1": 0.26990838618745594, | |
"eval_loss": 0.9417527914047241, | |
"eval_runtime": 1.6591, | |
"eval_samples_per_second": 37.369, | |
"eval_steps_per_second": 1.205, | |
"step": 19 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 7.201696395874023, | |
"learning_rate": 2.6315789473684212e-05, | |
"loss": 0.8845, | |
"step": 20 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_accuracy": 0.532258064516129, | |
"eval_f1": 0.32762030323005936, | |
"eval_loss": 0.9292937517166138, | |
"eval_runtime": 1.6591, | |
"eval_samples_per_second": 37.371, | |
"eval_steps_per_second": 1.206, | |
"step": 20 | |
}, | |
{ | |
"epoch": 5.25, | |
"grad_norm": 3.4469192028045654, | |
"learning_rate": 2.5e-05, | |
"loss": 1.0096, | |
"step": 21 | |
}, | |
{ | |
"epoch": 5.25, | |
"eval_accuracy": 0.5967741935483871, | |
"eval_f1": 0.4066193853427896, | |
"eval_loss": 0.9209141731262207, | |
"eval_runtime": 1.6594, | |
"eval_samples_per_second": 37.363, | |
"eval_steps_per_second": 1.205, | |
"step": 21 | |
}, | |
{ | |
"epoch": 5.5, | |
"grad_norm": 5.117708683013916, | |
"learning_rate": 2.368421052631579e-05, | |
"loss": 0.8488, | |
"step": 22 | |
}, | |
{ | |
"epoch": 5.5, | |
"eval_accuracy": 0.6290322580645161, | |
"eval_f1": 0.434640522875817, | |
"eval_loss": 0.9170079231262207, | |
"eval_runtime": 1.6597, | |
"eval_samples_per_second": 37.356, | |
"eval_steps_per_second": 1.205, | |
"step": 22 | |
}, | |
{ | |
"epoch": 5.75, | |
"grad_norm": 4.267374515533447, | |
"learning_rate": 2.236842105263158e-05, | |
"loss": 0.847, | |
"step": 23 | |
}, | |
{ | |
"epoch": 5.75, | |
"eval_accuracy": 0.6129032258064516, | |
"eval_f1": 0.41979655712050085, | |
"eval_loss": 0.9126449227333069, | |
"eval_runtime": 1.6593, | |
"eval_samples_per_second": 37.364, | |
"eval_steps_per_second": 1.205, | |
"step": 23 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 4.23488712310791, | |
"learning_rate": 2.105263157894737e-05, | |
"loss": 0.8127, | |
"step": 24 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.3648484848484848, | |
"eval_loss": 0.9107705354690552, | |
"eval_runtime": 1.6606, | |
"eval_samples_per_second": 37.337, | |
"eval_steps_per_second": 1.204, | |
"step": 24 | |
}, | |
{ | |
"epoch": 6.25, | |
"grad_norm": 6.90617561340332, | |
"learning_rate": 1.9736842105263158e-05, | |
"loss": 0.8793, | |
"step": 25 | |
}, | |
{ | |
"epoch": 6.25, | |
"eval_accuracy": 0.5161290322580645, | |
"eval_f1": 0.34080808080808084, | |
"eval_loss": 0.9092269539833069, | |
"eval_runtime": 1.6621, | |
"eval_samples_per_second": 37.302, | |
"eval_steps_per_second": 1.203, | |
"step": 25 | |
}, | |
{ | |
"epoch": 6.5, | |
"grad_norm": 3.975435733795166, | |
"learning_rate": 1.8421052631578947e-05, | |
"loss": 0.8219, | |
"step": 26 | |
}, | |
{ | |
"epoch": 6.5, | |
"eval_accuracy": 0.532258064516129, | |
"eval_f1": 0.35006119951040393, | |
"eval_loss": 0.9109280705451965, | |
"eval_runtime": 1.6614, | |
"eval_samples_per_second": 37.318, | |
"eval_steps_per_second": 1.204, | |
"step": 26 | |
}, | |
{ | |
"epoch": 6.75, | |
"grad_norm": 4.9610748291015625, | |
"learning_rate": 1.7105263157894737e-05, | |
"loss": 0.8406, | |
"step": 27 | |
}, | |
{ | |
"epoch": 6.75, | |
"eval_accuracy": 0.5161290322580645, | |
"eval_f1": 0.34080808080808084, | |
"eval_loss": 0.9092269539833069, | |
"eval_runtime": 1.6609, | |
"eval_samples_per_second": 37.33, | |
"eval_steps_per_second": 1.204, | |
"step": 27 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 2.729252815246582, | |
"learning_rate": 1.5789473684210526e-05, | |
"loss": 0.9318, | |
"step": 28 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_accuracy": 0.532258064516129, | |
"eval_f1": 0.3553553553553554, | |
"eval_loss": 0.9079668521881104, | |
"eval_runtime": 1.6613, | |
"eval_samples_per_second": 37.321, | |
"eval_steps_per_second": 1.204, | |
"step": 28 | |
}, | |
{ | |
"epoch": 7.25, | |
"grad_norm": 5.042109489440918, | |
"learning_rate": 1.4473684210526317e-05, | |
"loss": 0.9478, | |
"step": 29 | |
}, | |
{ | |
"epoch": 7.25, | |
"eval_accuracy": 0.532258064516129, | |
"eval_f1": 0.3553553553553554, | |
"eval_loss": 0.9087701439857483, | |
"eval_runtime": 1.661, | |
"eval_samples_per_second": 37.327, | |
"eval_steps_per_second": 1.204, | |
"step": 29 | |
}, | |
{ | |
"epoch": 7.5, | |
"grad_norm": 4.453703880310059, | |
"learning_rate": 1.3157894736842106e-05, | |
"loss": 0.8433, | |
"step": 30 | |
}, | |
{ | |
"epoch": 7.5, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.36946595195552906, | |
"eval_loss": 0.9084787964820862, | |
"eval_runtime": 1.6611, | |
"eval_samples_per_second": 37.325, | |
"eval_steps_per_second": 1.204, | |
"step": 30 | |
}, | |
{ | |
"epoch": 7.75, | |
"grad_norm": 3.1456220149993896, | |
"learning_rate": 1.1842105263157895e-05, | |
"loss": 0.794, | |
"step": 31 | |
}, | |
{ | |
"epoch": 7.75, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.36946595195552906, | |
"eval_loss": 0.9089512825012207, | |
"eval_runtime": 1.6607, | |
"eval_samples_per_second": 37.334, | |
"eval_steps_per_second": 1.204, | |
"step": 31 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 12.549348831176758, | |
"learning_rate": 1.0526315789473684e-05, | |
"loss": 0.9746, | |
"step": 32 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.36946595195552906, | |
"eval_loss": 0.9065335392951965, | |
"eval_runtime": 1.6609, | |
"eval_samples_per_second": 37.329, | |
"eval_steps_per_second": 1.204, | |
"step": 32 | |
}, | |
{ | |
"epoch": 8.25, | |
"grad_norm": 4.388298511505127, | |
"learning_rate": 9.210526315789474e-06, | |
"loss": 0.9246, | |
"step": 33 | |
}, | |
{ | |
"epoch": 8.25, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.36946595195552906, | |
"eval_loss": 0.9052813053131104, | |
"eval_runtime": 1.661, | |
"eval_samples_per_second": 37.326, | |
"eval_steps_per_second": 1.204, | |
"step": 33 | |
}, | |
{ | |
"epoch": 8.5, | |
"grad_norm": 2.7072877883911133, | |
"learning_rate": 7.894736842105263e-06, | |
"loss": 0.7426, | |
"step": 34 | |
}, | |
{ | |
"epoch": 8.5, | |
"eval_accuracy": 0.5645161290322581, | |
"eval_f1": 0.3831757289204098, | |
"eval_loss": 0.9058404564857483, | |
"eval_runtime": 1.661, | |
"eval_samples_per_second": 37.328, | |
"eval_steps_per_second": 1.204, | |
"step": 34 | |
}, | |
{ | |
"epoch": 8.75, | |
"grad_norm": 12.431721687316895, | |
"learning_rate": 6.578947368421053e-06, | |
"loss": 0.8438, | |
"step": 35 | |
}, | |
{ | |
"epoch": 8.75, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.3732394366197183, | |
"eval_loss": 0.9043520092964172, | |
"eval_runtime": 1.6608, | |
"eval_samples_per_second": 37.332, | |
"eval_steps_per_second": 1.204, | |
"step": 35 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 4.142194747924805, | |
"learning_rate": 5.263157894736842e-06, | |
"loss": 0.8591, | |
"step": 36 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.3732394366197183, | |
"eval_loss": 0.9042260050773621, | |
"eval_runtime": 1.6606, | |
"eval_samples_per_second": 37.336, | |
"eval_steps_per_second": 1.204, | |
"step": 36 | |
}, | |
{ | |
"epoch": 9.25, | |
"grad_norm": 2.2469961643218994, | |
"learning_rate": 3.9473684210526315e-06, | |
"loss": 0.8005, | |
"step": 37 | |
}, | |
{ | |
"epoch": 9.25, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.3732394366197183, | |
"eval_loss": 0.9043677449226379, | |
"eval_runtime": 1.6601, | |
"eval_samples_per_second": 37.347, | |
"eval_steps_per_second": 1.205, | |
"step": 37 | |
}, | |
{ | |
"epoch": 9.5, | |
"grad_norm": 2.831308126449585, | |
"learning_rate": 2.631578947368421e-06, | |
"loss": 0.8354, | |
"step": 38 | |
}, | |
{ | |
"epoch": 9.5, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.3732394366197183, | |
"eval_loss": 0.9049898982048035, | |
"eval_runtime": 1.6604, | |
"eval_samples_per_second": 37.341, | |
"eval_steps_per_second": 1.205, | |
"step": 38 | |
}, | |
{ | |
"epoch": 9.75, | |
"grad_norm": 4.90074348449707, | |
"learning_rate": 1.3157894736842106e-06, | |
"loss": 0.8728, | |
"step": 39 | |
}, | |
{ | |
"epoch": 9.75, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.3732394366197183, | |
"eval_loss": 0.9037298560142517, | |
"eval_runtime": 1.6599, | |
"eval_samples_per_second": 37.352, | |
"eval_steps_per_second": 1.205, | |
"step": 39 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 2.8960094451904297, | |
"learning_rate": 0.0, | |
"loss": 0.8464, | |
"step": 40 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_accuracy": 0.5483870967741935, | |
"eval_f1": 0.3732394366197183, | |
"eval_loss": 0.9036431908607483, | |
"eval_runtime": 1.6603, | |
"eval_samples_per_second": 37.343, | |
"eval_steps_per_second": 1.205, | |
"step": 40 | |
}, | |
{ | |
"epoch": 10.0, | |
"step": 40, | |
"total_flos": 50446463926272.0, | |
"train_loss": 1.1404520988464355, | |
"train_runtime": 238.9505, | |
"train_samples_per_second": 10.211, | |
"train_steps_per_second": 0.167 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 40, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"total_flos": 50446463926272.0, | |
"train_batch_size": 8, | |
"trial_name": null, | |
"trial_params": null | |
} | |