adapters-opt-bf16-QLORA-super_glue-wic
/
trainer_state-opt-fp16-QLORA-super_glue-wic-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 9.6, | |
"eval_steps": 1, | |
"global_step": 60, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.16, | |
"grad_norm": 10.0, | |
"learning_rate": 2.5e-05, | |
"loss": 0.7248, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.16, | |
"eval_accuracy": 0.504, | |
"eval_loss": 0.7169140577316284, | |
"eval_runtime": 1.7694, | |
"eval_samples_per_second": 141.288, | |
"eval_steps_per_second": 3.956, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.32, | |
"grad_norm": 11.3125, | |
"learning_rate": 5e-05, | |
"loss": 0.7141, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.32, | |
"eval_accuracy": 0.512, | |
"eval_loss": 0.713210940361023, | |
"eval_runtime": 1.908, | |
"eval_samples_per_second": 131.027, | |
"eval_steps_per_second": 3.669, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.48, | |
"grad_norm": 8.5, | |
"learning_rate": 4.913793103448276e-05, | |
"loss": 0.72, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.48, | |
"eval_accuracy": 0.516, | |
"eval_loss": 0.7183730602264404, | |
"eval_runtime": 1.9102, | |
"eval_samples_per_second": 130.879, | |
"eval_steps_per_second": 3.665, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.64, | |
"grad_norm": 9.1875, | |
"learning_rate": 4.827586206896552e-05, | |
"loss": 0.7007, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.64, | |
"eval_accuracy": 0.516, | |
"eval_loss": 0.7356113195419312, | |
"eval_runtime": 1.908, | |
"eval_samples_per_second": 131.029, | |
"eval_steps_per_second": 3.669, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 9.4375, | |
"learning_rate": 4.741379310344828e-05, | |
"loss": 0.8173, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_accuracy": 0.516, | |
"eval_loss": 0.7247294783592224, | |
"eval_runtime": 1.9079, | |
"eval_samples_per_second": 131.036, | |
"eval_steps_per_second": 3.669, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.96, | |
"grad_norm": 10.6875, | |
"learning_rate": 4.655172413793104e-05, | |
"loss": 0.716, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.96, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7145995497703552, | |
"eval_runtime": 1.906, | |
"eval_samples_per_second": 131.162, | |
"eval_steps_per_second": 3.673, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.12, | |
"grad_norm": 3.15625, | |
"learning_rate": 4.5689655172413794e-05, | |
"loss": 0.7381, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.12, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7026171684265137, | |
"eval_runtime": 1.9082, | |
"eval_samples_per_second": 131.015, | |
"eval_steps_per_second": 3.668, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.28, | |
"grad_norm": 3.0625, | |
"learning_rate": 4.482758620689655e-05, | |
"loss": 0.6973, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.28, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6966249942779541, | |
"eval_runtime": 1.9071, | |
"eval_samples_per_second": 131.086, | |
"eval_steps_per_second": 3.67, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.44, | |
"grad_norm": 1.2734375, | |
"learning_rate": 4.396551724137931e-05, | |
"loss": 0.6871, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.44, | |
"eval_accuracy": 0.516, | |
"eval_loss": 0.6961289048194885, | |
"eval_runtime": 1.9103, | |
"eval_samples_per_second": 130.872, | |
"eval_steps_per_second": 3.664, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 4.21875, | |
"learning_rate": 4.3103448275862066e-05, | |
"loss": 0.6943, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.6, | |
"eval_accuracy": 0.508, | |
"eval_loss": 0.6979374885559082, | |
"eval_runtime": 1.9058, | |
"eval_samples_per_second": 131.178, | |
"eval_steps_per_second": 3.673, | |
"step": 10 | |
}, | |
{ | |
"epoch": 1.76, | |
"grad_norm": 1.8671875, | |
"learning_rate": 4.224137931034483e-05, | |
"loss": 0.7017, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.76, | |
"eval_accuracy": 0.508, | |
"eval_loss": 0.6969297528266907, | |
"eval_runtime": 1.9056, | |
"eval_samples_per_second": 131.193, | |
"eval_steps_per_second": 3.673, | |
"step": 11 | |
}, | |
{ | |
"epoch": 1.92, | |
"grad_norm": 6.28125, | |
"learning_rate": 4.1379310344827587e-05, | |
"loss": 0.6898, | |
"step": 12 | |
}, | |
{ | |
"epoch": 1.92, | |
"eval_accuracy": 0.512, | |
"eval_loss": 0.6961719393730164, | |
"eval_runtime": 1.9059, | |
"eval_samples_per_second": 131.172, | |
"eval_steps_per_second": 3.673, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.08, | |
"grad_norm": 2.75, | |
"learning_rate": 4.0517241379310344e-05, | |
"loss": 0.6945, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.08, | |
"eval_accuracy": 0.508, | |
"eval_loss": 0.6973124742507935, | |
"eval_runtime": 1.9053, | |
"eval_samples_per_second": 131.212, | |
"eval_steps_per_second": 3.674, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.24, | |
"grad_norm": 1.4765625, | |
"learning_rate": 3.965517241379311e-05, | |
"loss": 0.713, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.24, | |
"eval_accuracy": 0.512, | |
"eval_loss": 0.6960234642028809, | |
"eval_runtime": 1.9114, | |
"eval_samples_per_second": 130.792, | |
"eval_steps_per_second": 3.662, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.4, | |
"grad_norm": 1.1171875, | |
"learning_rate": 3.8793103448275865e-05, | |
"loss": 0.682, | |
"step": 15 | |
}, | |
{ | |
"epoch": 2.4, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6956601738929749, | |
"eval_runtime": 1.9077, | |
"eval_samples_per_second": 131.049, | |
"eval_steps_per_second": 3.669, | |
"step": 15 | |
}, | |
{ | |
"epoch": 2.56, | |
"grad_norm": 1.2734375, | |
"learning_rate": 3.793103448275862e-05, | |
"loss": 0.68, | |
"step": 16 | |
}, | |
{ | |
"epoch": 2.56, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6956210732460022, | |
"eval_runtime": 1.9087, | |
"eval_samples_per_second": 130.982, | |
"eval_steps_per_second": 3.668, | |
"step": 16 | |
}, | |
{ | |
"epoch": 2.7199999999999998, | |
"grad_norm": 4.5625, | |
"learning_rate": 3.7068965517241385e-05, | |
"loss": 0.6793, | |
"step": 17 | |
}, | |
{ | |
"epoch": 2.7199999999999998, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6956055164337158, | |
"eval_runtime": 1.9077, | |
"eval_samples_per_second": 131.05, | |
"eval_steps_per_second": 3.669, | |
"step": 17 | |
}, | |
{ | |
"epoch": 2.88, | |
"grad_norm": 2.15625, | |
"learning_rate": 3.620689655172414e-05, | |
"loss": 0.6879, | |
"step": 18 | |
}, | |
{ | |
"epoch": 2.88, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6959649324417114, | |
"eval_runtime": 1.9058, | |
"eval_samples_per_second": 131.178, | |
"eval_steps_per_second": 3.673, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.04, | |
"grad_norm": 2.796875, | |
"learning_rate": 3.53448275862069e-05, | |
"loss": 0.6963, | |
"step": 19 | |
}, | |
{ | |
"epoch": 3.04, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.697027325630188, | |
"eval_runtime": 1.8566, | |
"eval_samples_per_second": 134.654, | |
"eval_steps_per_second": 3.77, | |
"step": 19 | |
}, | |
{ | |
"epoch": 3.2, | |
"grad_norm": 5.3125, | |
"learning_rate": 3.4482758620689657e-05, | |
"loss": 0.6925, | |
"step": 20 | |
}, | |
{ | |
"epoch": 3.2, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6985077857971191, | |
"eval_runtime": 1.9069, | |
"eval_samples_per_second": 131.1, | |
"eval_steps_per_second": 3.671, | |
"step": 20 | |
}, | |
{ | |
"epoch": 3.36, | |
"grad_norm": 2.984375, | |
"learning_rate": 3.3620689655172414e-05, | |
"loss": 0.6911, | |
"step": 21 | |
}, | |
{ | |
"epoch": 3.36, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.699636697769165, | |
"eval_runtime": 1.859, | |
"eval_samples_per_second": 134.479, | |
"eval_steps_per_second": 3.765, | |
"step": 21 | |
}, | |
{ | |
"epoch": 3.52, | |
"grad_norm": 3.59375, | |
"learning_rate": 3.275862068965517e-05, | |
"loss": 0.6882, | |
"step": 22 | |
}, | |
{ | |
"epoch": 3.52, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7012538909912109, | |
"eval_runtime": 1.9094, | |
"eval_samples_per_second": 130.928, | |
"eval_steps_per_second": 3.666, | |
"step": 22 | |
}, | |
{ | |
"epoch": 3.68, | |
"grad_norm": 3.875, | |
"learning_rate": 3.1896551724137935e-05, | |
"loss": 0.7016, | |
"step": 23 | |
}, | |
{ | |
"epoch": 3.68, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7007499933242798, | |
"eval_runtime": 1.9071, | |
"eval_samples_per_second": 131.091, | |
"eval_steps_per_second": 3.671, | |
"step": 23 | |
}, | |
{ | |
"epoch": 3.84, | |
"grad_norm": 5.46875, | |
"learning_rate": 3.103448275862069e-05, | |
"loss": 0.6972, | |
"step": 24 | |
}, | |
{ | |
"epoch": 3.84, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7000390887260437, | |
"eval_runtime": 1.9058, | |
"eval_samples_per_second": 131.18, | |
"eval_steps_per_second": 3.673, | |
"step": 24 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 3.125, | |
"learning_rate": 3.017241379310345e-05, | |
"loss": 0.7288, | |
"step": 25 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.697628915309906, | |
"eval_runtime": 1.9065, | |
"eval_samples_per_second": 131.13, | |
"eval_steps_per_second": 3.672, | |
"step": 25 | |
}, | |
{ | |
"epoch": 4.16, | |
"grad_norm": 1.9765625, | |
"learning_rate": 2.9310344827586206e-05, | |
"loss": 0.7239, | |
"step": 26 | |
}, | |
{ | |
"epoch": 4.16, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6958945393562317, | |
"eval_runtime": 1.9088, | |
"eval_samples_per_second": 130.974, | |
"eval_steps_per_second": 3.667, | |
"step": 26 | |
}, | |
{ | |
"epoch": 4.32, | |
"grad_norm": 7.125, | |
"learning_rate": 2.844827586206897e-05, | |
"loss": 0.6701, | |
"step": 27 | |
}, | |
{ | |
"epoch": 4.32, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6942812204360962, | |
"eval_runtime": 1.9094, | |
"eval_samples_per_second": 130.934, | |
"eval_steps_per_second": 3.666, | |
"step": 27 | |
}, | |
{ | |
"epoch": 4.48, | |
"grad_norm": 1.484375, | |
"learning_rate": 2.7586206896551727e-05, | |
"loss": 0.7093, | |
"step": 28 | |
}, | |
{ | |
"epoch": 4.48, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6918594241142273, | |
"eval_runtime": 1.9103, | |
"eval_samples_per_second": 130.869, | |
"eval_steps_per_second": 3.664, | |
"step": 28 | |
}, | |
{ | |
"epoch": 4.64, | |
"grad_norm": 5.0, | |
"learning_rate": 2.672413793103448e-05, | |
"loss": 0.6803, | |
"step": 29 | |
}, | |
{ | |
"epoch": 4.64, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6925703287124634, | |
"eval_runtime": 1.9097, | |
"eval_samples_per_second": 130.908, | |
"eval_steps_per_second": 3.665, | |
"step": 29 | |
}, | |
{ | |
"epoch": 4.8, | |
"grad_norm": 0.91015625, | |
"learning_rate": 2.5862068965517244e-05, | |
"loss": 0.6953, | |
"step": 30 | |
}, | |
{ | |
"epoch": 4.8, | |
"eval_accuracy": 0.516, | |
"eval_loss": 0.6929374933242798, | |
"eval_runtime": 1.9088, | |
"eval_samples_per_second": 130.971, | |
"eval_steps_per_second": 3.667, | |
"step": 30 | |
}, | |
{ | |
"epoch": 4.96, | |
"grad_norm": 4.1875, | |
"learning_rate": 2.5e-05, | |
"loss": 0.6946, | |
"step": 31 | |
}, | |
{ | |
"epoch": 4.96, | |
"eval_accuracy": 0.508, | |
"eval_loss": 0.6934375166893005, | |
"eval_runtime": 1.908, | |
"eval_samples_per_second": 131.026, | |
"eval_steps_per_second": 3.669, | |
"step": 31 | |
}, | |
{ | |
"epoch": 5.12, | |
"grad_norm": 1.40625, | |
"learning_rate": 2.413793103448276e-05, | |
"loss": 0.7016, | |
"step": 32 | |
}, | |
{ | |
"epoch": 5.12, | |
"eval_accuracy": 0.504, | |
"eval_loss": 0.6947265863418579, | |
"eval_runtime": 1.9084, | |
"eval_samples_per_second": 131.001, | |
"eval_steps_per_second": 3.668, | |
"step": 32 | |
}, | |
{ | |
"epoch": 5.28, | |
"grad_norm": 3.03125, | |
"learning_rate": 2.327586206896552e-05, | |
"loss": 0.6953, | |
"step": 33 | |
}, | |
{ | |
"epoch": 5.28, | |
"eval_accuracy": 0.504, | |
"eval_loss": 0.6949218511581421, | |
"eval_runtime": 1.9078, | |
"eval_samples_per_second": 131.039, | |
"eval_steps_per_second": 3.669, | |
"step": 33 | |
}, | |
{ | |
"epoch": 5.44, | |
"grad_norm": 1.4609375, | |
"learning_rate": 2.2413793103448276e-05, | |
"loss": 0.6936, | |
"step": 34 | |
}, | |
{ | |
"epoch": 5.44, | |
"eval_accuracy": 0.504, | |
"eval_loss": 0.6933984160423279, | |
"eval_runtime": 1.9062, | |
"eval_samples_per_second": 131.154, | |
"eval_steps_per_second": 3.672, | |
"step": 34 | |
}, | |
{ | |
"epoch": 5.6, | |
"grad_norm": 4.21875, | |
"learning_rate": 2.1551724137931033e-05, | |
"loss": 0.6759, | |
"step": 35 | |
}, | |
{ | |
"epoch": 5.6, | |
"eval_accuracy": 0.512, | |
"eval_loss": 0.6927656531333923, | |
"eval_runtime": 1.9079, | |
"eval_samples_per_second": 131.032, | |
"eval_steps_per_second": 3.669, | |
"step": 35 | |
}, | |
{ | |
"epoch": 5.76, | |
"grad_norm": 3.890625, | |
"learning_rate": 2.0689655172413793e-05, | |
"loss": 0.6911, | |
"step": 36 | |
}, | |
{ | |
"epoch": 5.76, | |
"eval_accuracy": 0.516, | |
"eval_loss": 0.691476583480835, | |
"eval_runtime": 1.9084, | |
"eval_samples_per_second": 131.001, | |
"eval_steps_per_second": 3.668, | |
"step": 36 | |
}, | |
{ | |
"epoch": 5.92, | |
"grad_norm": 3.921875, | |
"learning_rate": 1.9827586206896554e-05, | |
"loss": 0.7045, | |
"step": 37 | |
}, | |
{ | |
"epoch": 5.92, | |
"eval_accuracy": 0.516, | |
"eval_loss": 0.6917109489440918, | |
"eval_runtime": 1.9104, | |
"eval_samples_per_second": 130.863, | |
"eval_steps_per_second": 3.664, | |
"step": 37 | |
}, | |
{ | |
"epoch": 6.08, | |
"grad_norm": 6.53125, | |
"learning_rate": 1.896551724137931e-05, | |
"loss": 0.6951, | |
"step": 38 | |
}, | |
{ | |
"epoch": 6.08, | |
"eval_accuracy": 0.516, | |
"eval_loss": 0.6929374933242798, | |
"eval_runtime": 1.9058, | |
"eval_samples_per_second": 131.178, | |
"eval_steps_per_second": 3.673, | |
"step": 38 | |
}, | |
{ | |
"epoch": 6.24, | |
"grad_norm": 3.671875, | |
"learning_rate": 1.810344827586207e-05, | |
"loss": 0.6766, | |
"step": 39 | |
}, | |
{ | |
"epoch": 6.24, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6927499771118164, | |
"eval_runtime": 1.908, | |
"eval_samples_per_second": 131.03, | |
"eval_steps_per_second": 3.669, | |
"step": 39 | |
}, | |
{ | |
"epoch": 6.4, | |
"grad_norm": 2.421875, | |
"learning_rate": 1.7241379310344828e-05, | |
"loss": 0.6964, | |
"step": 40 | |
}, | |
{ | |
"epoch": 6.4, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.693472683429718, | |
"eval_runtime": 1.9083, | |
"eval_samples_per_second": 131.006, | |
"eval_steps_per_second": 3.668, | |
"step": 40 | |
}, | |
{ | |
"epoch": 6.5600000000000005, | |
"grad_norm": 0.9765625, | |
"learning_rate": 1.6379310344827585e-05, | |
"loss": 0.6899, | |
"step": 41 | |
}, | |
{ | |
"epoch": 6.5600000000000005, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.69287109375, | |
"eval_runtime": 1.9071, | |
"eval_samples_per_second": 131.089, | |
"eval_steps_per_second": 3.67, | |
"step": 41 | |
}, | |
{ | |
"epoch": 6.72, | |
"grad_norm": 3.21875, | |
"learning_rate": 1.5517241379310346e-05, | |
"loss": 0.6783, | |
"step": 42 | |
}, | |
{ | |
"epoch": 6.72, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6933398246765137, | |
"eval_runtime": 1.9073, | |
"eval_samples_per_second": 131.076, | |
"eval_steps_per_second": 3.67, | |
"step": 42 | |
}, | |
{ | |
"epoch": 6.88, | |
"grad_norm": 0.87890625, | |
"learning_rate": 1.4655172413793103e-05, | |
"loss": 0.6921, | |
"step": 43 | |
}, | |
{ | |
"epoch": 6.88, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6933085918426514, | |
"eval_runtime": 1.9103, | |
"eval_samples_per_second": 130.87, | |
"eval_steps_per_second": 3.664, | |
"step": 43 | |
}, | |
{ | |
"epoch": 7.04, | |
"grad_norm": 2.609375, | |
"learning_rate": 1.3793103448275863e-05, | |
"loss": 0.684, | |
"step": 44 | |
}, | |
{ | |
"epoch": 7.04, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6930469274520874, | |
"eval_runtime": 1.9088, | |
"eval_samples_per_second": 130.974, | |
"eval_steps_per_second": 3.667, | |
"step": 44 | |
}, | |
{ | |
"epoch": 7.2, | |
"grad_norm": 6.3125, | |
"learning_rate": 1.2931034482758622e-05, | |
"loss": 0.7027, | |
"step": 45 | |
}, | |
{ | |
"epoch": 7.2, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6939062476158142, | |
"eval_runtime": 1.909, | |
"eval_samples_per_second": 130.958, | |
"eval_steps_per_second": 3.667, | |
"step": 45 | |
}, | |
{ | |
"epoch": 7.36, | |
"grad_norm": 3.78125, | |
"learning_rate": 1.206896551724138e-05, | |
"loss": 0.6893, | |
"step": 46 | |
}, | |
{ | |
"epoch": 7.36, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6937148571014404, | |
"eval_runtime": 1.9058, | |
"eval_samples_per_second": 131.178, | |
"eval_steps_per_second": 3.673, | |
"step": 46 | |
}, | |
{ | |
"epoch": 7.52, | |
"grad_norm": 3.4375, | |
"learning_rate": 1.1206896551724138e-05, | |
"loss": 0.6754, | |
"step": 47 | |
}, | |
{ | |
"epoch": 7.52, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6943007707595825, | |
"eval_runtime": 1.8591, | |
"eval_samples_per_second": 134.472, | |
"eval_steps_per_second": 3.765, | |
"step": 47 | |
}, | |
{ | |
"epoch": 7.68, | |
"grad_norm": 1.7109375, | |
"learning_rate": 1.0344827586206897e-05, | |
"loss": 0.7103, | |
"step": 48 | |
}, | |
{ | |
"epoch": 7.68, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6930898427963257, | |
"eval_runtime": 1.9082, | |
"eval_samples_per_second": 131.014, | |
"eval_steps_per_second": 3.668, | |
"step": 48 | |
}, | |
{ | |
"epoch": 7.84, | |
"grad_norm": 6.0625, | |
"learning_rate": 9.482758620689655e-06, | |
"loss": 0.6965, | |
"step": 49 | |
}, | |
{ | |
"epoch": 7.84, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6940312385559082, | |
"eval_runtime": 1.9059, | |
"eval_samples_per_second": 131.173, | |
"eval_steps_per_second": 3.673, | |
"step": 49 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 5.46875, | |
"learning_rate": 8.620689655172414e-06, | |
"loss": 0.682, | |
"step": 50 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6938086152076721, | |
"eval_runtime": 1.9046, | |
"eval_samples_per_second": 131.261, | |
"eval_steps_per_second": 3.675, | |
"step": 50 | |
}, | |
{ | |
"epoch": 8.16, | |
"grad_norm": 2.15625, | |
"learning_rate": 7.758620689655173e-06, | |
"loss": 0.6914, | |
"step": 51 | |
}, | |
{ | |
"epoch": 8.16, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6924687623977661, | |
"eval_runtime": 1.9055, | |
"eval_samples_per_second": 131.197, | |
"eval_steps_per_second": 3.674, | |
"step": 51 | |
}, | |
{ | |
"epoch": 8.32, | |
"grad_norm": 4.09375, | |
"learning_rate": 6.896551724137932e-06, | |
"loss": 0.6963, | |
"step": 52 | |
}, | |
{ | |
"epoch": 8.32, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6934570074081421, | |
"eval_runtime": 1.9059, | |
"eval_samples_per_second": 131.17, | |
"eval_steps_per_second": 3.673, | |
"step": 52 | |
}, | |
{ | |
"epoch": 8.48, | |
"grad_norm": 1.2265625, | |
"learning_rate": 6.03448275862069e-06, | |
"loss": 0.6962, | |
"step": 53 | |
}, | |
{ | |
"epoch": 8.48, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6923359632492065, | |
"eval_runtime": 1.9059, | |
"eval_samples_per_second": 131.173, | |
"eval_steps_per_second": 3.673, | |
"step": 53 | |
}, | |
{ | |
"epoch": 8.64, | |
"grad_norm": 3.578125, | |
"learning_rate": 5.172413793103448e-06, | |
"loss": 0.6692, | |
"step": 54 | |
}, | |
{ | |
"epoch": 8.64, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6927656531333923, | |
"eval_runtime": 1.906, | |
"eval_samples_per_second": 131.168, | |
"eval_steps_per_second": 3.673, | |
"step": 54 | |
}, | |
{ | |
"epoch": 8.8, | |
"grad_norm": 1.7421875, | |
"learning_rate": 4.310344827586207e-06, | |
"loss": 0.6991, | |
"step": 55 | |
}, | |
{ | |
"epoch": 8.8, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6923280954360962, | |
"eval_runtime": 1.9063, | |
"eval_samples_per_second": 131.146, | |
"eval_steps_per_second": 3.672, | |
"step": 55 | |
}, | |
{ | |
"epoch": 8.96, | |
"grad_norm": 1.1796875, | |
"learning_rate": 3.448275862068966e-06, | |
"loss": 0.6808, | |
"step": 56 | |
}, | |
{ | |
"epoch": 8.96, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.692800760269165, | |
"eval_runtime": 1.9066, | |
"eval_samples_per_second": 131.124, | |
"eval_steps_per_second": 3.671, | |
"step": 56 | |
}, | |
{ | |
"epoch": 9.12, | |
"grad_norm": 9.25, | |
"learning_rate": 2.586206896551724e-06, | |
"loss": 0.7043, | |
"step": 57 | |
}, | |
{ | |
"epoch": 9.12, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6924726366996765, | |
"eval_runtime": 1.9066, | |
"eval_samples_per_second": 131.123, | |
"eval_steps_per_second": 3.671, | |
"step": 57 | |
}, | |
{ | |
"epoch": 9.28, | |
"grad_norm": 1.15625, | |
"learning_rate": 1.724137931034483e-06, | |
"loss": 0.6786, | |
"step": 58 | |
}, | |
{ | |
"epoch": 9.28, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6920429468154907, | |
"eval_runtime": 1.9069, | |
"eval_samples_per_second": 131.104, | |
"eval_steps_per_second": 3.671, | |
"step": 58 | |
}, | |
{ | |
"epoch": 9.44, | |
"grad_norm": 6.125, | |
"learning_rate": 8.620689655172415e-07, | |
"loss": 0.6852, | |
"step": 59 | |
}, | |
{ | |
"epoch": 9.44, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6916835904121399, | |
"eval_runtime": 1.9088, | |
"eval_samples_per_second": 130.97, | |
"eval_steps_per_second": 3.667, | |
"step": 59 | |
}, | |
{ | |
"epoch": 9.6, | |
"grad_norm": 1.3828125, | |
"learning_rate": 0.0, | |
"loss": 0.6888, | |
"step": 60 | |
}, | |
{ | |
"epoch": 9.6, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6920117139816284, | |
"eval_runtime": 1.9079, | |
"eval_samples_per_second": 131.035, | |
"eval_steps_per_second": 3.669, | |
"step": 60 | |
}, | |
{ | |
"epoch": 9.6, | |
"step": 60, | |
"total_flos": 2.337091978736435e+16, | |
"train_loss": 0.6971277634302775, | |
"train_runtime": 291.0168, | |
"train_samples_per_second": 34.362, | |
"train_steps_per_second": 0.206 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 60, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": false, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 2.337091978736435e+16, | |
"train_batch_size": 10, | |
"trial_name": null, | |
"trial_params": null | |
} | |