|
{ |
|
"best_metric": 0.34307238459587097, |
|
"best_model_checkpoint": "autotrain-ht4es-gbvmt/checkpoint-3000", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 7.62260103225708, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.7053, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 9.429513931274414, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.7098, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 5.347685813903809, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.597, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.363152503967285, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.6223, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 9.079790115356445, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.6182, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 6.9367547035217285, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.6298, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 4.488036632537842, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.582, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.229537487030029, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.5998, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 14.98177433013916, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.6034, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.947093963623047, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5486, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 5.548841953277588, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.5592, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 6.519041538238525, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4745, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 6.388887405395508, |
|
"learning_rate": 4.9537037037037035e-05, |
|
"loss": 0.6121, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 4.3396315574646, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 0.5122, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 12.209185600280762, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 0.5585, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 6.66325044631958, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.6133, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 7.374906063079834, |
|
"learning_rate": 4.768518518518519e-05, |
|
"loss": 0.5779, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 6.596988677978516, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 0.58, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 8.377278327941895, |
|
"learning_rate": 4.675925925925926e-05, |
|
"loss": 0.4981, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 7.247930526733398, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.6069, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 6.867646217346191, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 0.6142, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 5.440280437469482, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 0.5109, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 8.325813293457031, |
|
"learning_rate": 4.490740740740741e-05, |
|
"loss": 0.5326, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 8.603256225585938, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.5622, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 10.510509490966797, |
|
"learning_rate": 4.3981481481481486e-05, |
|
"loss": 0.675, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 7.54085636138916, |
|
"learning_rate": 4.351851851851852e-05, |
|
"loss": 0.5269, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 25.22871971130371, |
|
"learning_rate": 4.305555555555556e-05, |
|
"loss": 0.5974, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 7.171282768249512, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.5236, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 5.170557022094727, |
|
"learning_rate": 4.212962962962963e-05, |
|
"loss": 0.4763, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 4.286471843719482, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5664, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 7.5836286544799805, |
|
"learning_rate": 4.1203703703703705e-05, |
|
"loss": 0.5413, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 7.180052757263184, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.5323, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 8.674583435058594, |
|
"learning_rate": 4.027777777777778e-05, |
|
"loss": 0.5381, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 5.117196559906006, |
|
"learning_rate": 3.981481481481482e-05, |
|
"loss": 0.5539, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 6.353272914886475, |
|
"learning_rate": 3.935185185185186e-05, |
|
"loss": 0.4588, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 5.968225002288818, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.5228, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 17.49488639831543, |
|
"learning_rate": 3.8425925925925924e-05, |
|
"loss": 0.4842, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 5.153933048248291, |
|
"learning_rate": 3.7962962962962964e-05, |
|
"loss": 0.4993, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 7.89682149887085, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.5157, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.91286039352417, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.5277, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.787, |
|
"eval_auc": 0.8603820000000001, |
|
"eval_f1": 0.7905604719764012, |
|
"eval_loss": 0.4363800287246704, |
|
"eval_precision": 0.7775628626692457, |
|
"eval_recall": 0.804, |
|
"eval_runtime": 663.7473, |
|
"eval_samples_per_second": 3.013, |
|
"eval_steps_per_second": 0.188, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"grad_norm": 24.21603775024414, |
|
"learning_rate": 3.6574074074074076e-05, |
|
"loss": 0.487, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 11.67587947845459, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 0.4983, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.075, |
|
"grad_norm": 9.920230865478516, |
|
"learning_rate": 3.564814814814815e-05, |
|
"loss": 0.5016, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 6.970316410064697, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.4673, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 7.130370616912842, |
|
"learning_rate": 3.472222222222222e-05, |
|
"loss": 0.3898, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 9.946027755737305, |
|
"learning_rate": 3.425925925925926e-05, |
|
"loss": 0.4404, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.175, |
|
"grad_norm": 1.6654151678085327, |
|
"learning_rate": 3.3796296296296295e-05, |
|
"loss": 0.4961, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 5.618824481964111, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.4828, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.225, |
|
"grad_norm": 7.5699334144592285, |
|
"learning_rate": 3.2870370370370375e-05, |
|
"loss": 0.4093, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 7.351467609405518, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.4939, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.275, |
|
"grad_norm": 6.4059648513793945, |
|
"learning_rate": 3.194444444444444e-05, |
|
"loss": 0.3547, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 4.268542289733887, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.3945, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.325, |
|
"grad_norm": 4.965268611907959, |
|
"learning_rate": 3.101851851851852e-05, |
|
"loss": 0.3791, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 9.438615798950195, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 0.3727, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 6.795106410980225, |
|
"learning_rate": 3.0092592592592593e-05, |
|
"loss": 0.4429, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 6.984402656555176, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.4231, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.425, |
|
"grad_norm": 4.931349277496338, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 0.4342, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 5.537110805511475, |
|
"learning_rate": 2.8703703703703706e-05, |
|
"loss": 0.4478, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.475, |
|
"grad_norm": 4.687628269195557, |
|
"learning_rate": 2.824074074074074e-05, |
|
"loss": 0.3871, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 5.930976390838623, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.4005, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.525, |
|
"grad_norm": 9.062422752380371, |
|
"learning_rate": 2.7314814814814816e-05, |
|
"loss": 0.4503, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 5.416477203369141, |
|
"learning_rate": 2.6851851851851855e-05, |
|
"loss": 0.4317, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.575, |
|
"grad_norm": 14.157992362976074, |
|
"learning_rate": 2.6388888888888892e-05, |
|
"loss": 0.382, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 4.8775177001953125, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.4017, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 5.229184150695801, |
|
"learning_rate": 2.5462962962962965e-05, |
|
"loss": 0.5269, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 10.698781967163086, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.4021, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 1.675, |
|
"grad_norm": 4.068216800689697, |
|
"learning_rate": 2.4537037037037038e-05, |
|
"loss": 0.3971, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 9.067376136779785, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.434, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.725, |
|
"grad_norm": 5.414083957672119, |
|
"learning_rate": 2.361111111111111e-05, |
|
"loss": 0.4614, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 9.188199043273926, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.4149, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.775, |
|
"grad_norm": 6.829360008239746, |
|
"learning_rate": 2.2685185185185187e-05, |
|
"loss": 0.4274, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 5.337634563446045, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.3448, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.825, |
|
"grad_norm": 7.2519850730896, |
|
"learning_rate": 2.175925925925926e-05, |
|
"loss": 0.4263, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 5.725991725921631, |
|
"learning_rate": 2.1296296296296296e-05, |
|
"loss": 0.4307, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 5.500362396240234, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.3874, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 7.834714412689209, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.3281, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.925, |
|
"grad_norm": 10.548686981201172, |
|
"learning_rate": 1.990740740740741e-05, |
|
"loss": 0.4047, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 8.157145500183105, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 0.4335, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 1.975, |
|
"grad_norm": 9.798107147216797, |
|
"learning_rate": 1.8981481481481482e-05, |
|
"loss": 0.41, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 10.720842361450195, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.4131, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.826, |
|
"eval_auc": 0.9061560000000001, |
|
"eval_f1": 0.84051329055912, |
|
"eval_loss": 0.3920663595199585, |
|
"eval_precision": 0.7758037225042301, |
|
"eval_recall": 0.917, |
|
"eval_runtime": 518.777, |
|
"eval_samples_per_second": 3.855, |
|
"eval_steps_per_second": 0.241, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.025, |
|
"grad_norm": 2.1612956523895264, |
|
"learning_rate": 1.8055555555555555e-05, |
|
"loss": 0.2708, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 16.25421714782715, |
|
"learning_rate": 1.7592592592592595e-05, |
|
"loss": 0.4438, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.075, |
|
"grad_norm": 22.16561508178711, |
|
"learning_rate": 1.712962962962963e-05, |
|
"loss": 0.2589, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 13.343395233154297, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.3357, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.125, |
|
"grad_norm": 8.876195907592773, |
|
"learning_rate": 1.6203703703703704e-05, |
|
"loss": 0.3356, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 7.499932765960693, |
|
"learning_rate": 1.574074074074074e-05, |
|
"loss": 0.3052, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.175, |
|
"grad_norm": 3.757966995239258, |
|
"learning_rate": 1.527777777777778e-05, |
|
"loss": 0.2836, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 10.760390281677246, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.2933, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.225, |
|
"grad_norm": 8.144658088684082, |
|
"learning_rate": 1.4351851851851853e-05, |
|
"loss": 0.3104, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 7.362667083740234, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.2983, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.275, |
|
"grad_norm": 4.645088195800781, |
|
"learning_rate": 1.3425925925925928e-05, |
|
"loss": 0.3655, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 1.1926023960113525, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.2441, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.325, |
|
"grad_norm": 13.049036979675293, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.215, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 9.891176223754883, |
|
"learning_rate": 1.2037037037037037e-05, |
|
"loss": 0.3903, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.375, |
|
"grad_norm": 5.911032199859619, |
|
"learning_rate": 1.1574074074074075e-05, |
|
"loss": 0.2469, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 10.652032852172852, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.3542, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.425, |
|
"grad_norm": 5.722354888916016, |
|
"learning_rate": 1.0648148148148148e-05, |
|
"loss": 0.2719, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 7.493654727935791, |
|
"learning_rate": 1.0185185185185185e-05, |
|
"loss": 0.3102, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 2.475, |
|
"grad_norm": 7.553443908691406, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 0.2539, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 3.0468859672546387, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.3312, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.525, |
|
"grad_norm": 9.9094820022583, |
|
"learning_rate": 8.796296296296297e-06, |
|
"loss": 0.3174, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 3.5765790939331055, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 0.3598, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 2.575, |
|
"grad_norm": 6.845639228820801, |
|
"learning_rate": 7.87037037037037e-06, |
|
"loss": 0.2898, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 6.846150875091553, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.2469, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.625, |
|
"grad_norm": 10.330842018127441, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 0.4315, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 6.001290321350098, |
|
"learning_rate": 6.481481481481481e-06, |
|
"loss": 0.4615, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 2.675, |
|
"grad_norm": 4.979047775268555, |
|
"learning_rate": 6.0185185185185185e-06, |
|
"loss": 0.358, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 5.183391094207764, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.3412, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.725, |
|
"grad_norm": 7.089044094085693, |
|
"learning_rate": 5.092592592592592e-06, |
|
"loss": 0.297, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 11.442684173583984, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.2933, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.775, |
|
"grad_norm": 4.924402713775635, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 0.3735, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 12.057656288146973, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.2839, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.825, |
|
"grad_norm": 9.038887977600098, |
|
"learning_rate": 3.2407407407407406e-06, |
|
"loss": 0.2204, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 3.07694673538208, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 0.33, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 2.875, |
|
"grad_norm": 1.35588800907135, |
|
"learning_rate": 2.3148148148148148e-06, |
|
"loss": 0.2462, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 10.075614929199219, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.2932, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.925, |
|
"grad_norm": 5.351684093475342, |
|
"learning_rate": 1.388888888888889e-06, |
|
"loss": 0.2768, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 11.825695037841797, |
|
"learning_rate": 9.259259259259259e-07, |
|
"loss": 0.2972, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 2.975, |
|
"grad_norm": 14.367501258850098, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"loss": 0.2454, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 5.5341925621032715, |
|
"learning_rate": 0.0, |
|
"loss": 0.2264, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8655, |
|
"eval_auc": 0.9263070000000001, |
|
"eval_f1": 0.8702363724071394, |
|
"eval_loss": 0.34307238459587097, |
|
"eval_precision": 0.8406337371854613, |
|
"eval_recall": 0.902, |
|
"eval_runtime": 560.0101, |
|
"eval_samples_per_second": 3.571, |
|
"eval_steps_per_second": 0.223, |
|
"step": 3000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.859807750750208e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|