|
{ |
|
"best_metric": 0.9390347003936768, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 1.0049751243781095, |
|
"eval_steps": 100, |
|
"global_step": 101, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.009950248756218905, |
|
"grad_norm": 2.370561361312866, |
|
"learning_rate": 5e-06, |
|
"loss": 1.0474, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009950248756218905, |
|
"eval_loss": 1.0728834867477417, |
|
"eval_runtime": 7.9605, |
|
"eval_samples_per_second": 21.355, |
|
"eval_steps_per_second": 5.402, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01990049751243781, |
|
"grad_norm": 2.4022700786590576, |
|
"learning_rate": 1e-05, |
|
"loss": 1.0829, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.029850746268656716, |
|
"grad_norm": 3.453360080718994, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.0655, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.03980099502487562, |
|
"grad_norm": 2.4925131797790527, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0848, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.04975124378109453, |
|
"grad_norm": 2.5615079402923584, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.0388, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05970149253731343, |
|
"grad_norm": 2.178732395172119, |
|
"learning_rate": 3e-05, |
|
"loss": 1.0828, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.06965174129353234, |
|
"grad_norm": 2.5980308055877686, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.075, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.07960199004975124, |
|
"grad_norm": 2.408590078353882, |
|
"learning_rate": 4e-05, |
|
"loss": 1.076, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.08955223880597014, |
|
"grad_norm": 2.9705393314361572, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.0433, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.09950248756218906, |
|
"grad_norm": 2.3930296897888184, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0522, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.10945273631840796, |
|
"grad_norm": 2.078711748123169, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.0705, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.11940298507462686, |
|
"grad_norm": 2.475327253341675, |
|
"learning_rate": 6e-05, |
|
"loss": 1.0754, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.12935323383084577, |
|
"grad_norm": 2.961998701095581, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 1.0453, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.13930348258706468, |
|
"grad_norm": 4.242774963378906, |
|
"learning_rate": 7e-05, |
|
"loss": 1.0418, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.14925373134328357, |
|
"grad_norm": 2.8345038890838623, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.0769, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.15920398009950248, |
|
"grad_norm": 2.97365403175354, |
|
"learning_rate": 8e-05, |
|
"loss": 1.0672, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.1691542288557214, |
|
"grad_norm": 3.3158862590789795, |
|
"learning_rate": 8.5e-05, |
|
"loss": 1.0577, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1791044776119403, |
|
"grad_norm": 3.4335522651672363, |
|
"learning_rate": 9e-05, |
|
"loss": 1.0795, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.1890547263681592, |
|
"grad_norm": 2.685619592666626, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.0594, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.19900497512437812, |
|
"grad_norm": 2.6879546642303467, |
|
"learning_rate": 0.0001, |
|
"loss": 1.102, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.208955223880597, |
|
"grad_norm": 3.9734950065612793, |
|
"learning_rate": 9.996239762521151e-05, |
|
"loss": 1.0358, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.21890547263681592, |
|
"grad_norm": 3.007880687713623, |
|
"learning_rate": 9.98496470583896e-05, |
|
"loss": 1.0693, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.22885572139303484, |
|
"grad_norm": 4.0449042320251465, |
|
"learning_rate": 9.966191788709716e-05, |
|
"loss": 1.0339, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.23880597014925373, |
|
"grad_norm": 2.7142257690429688, |
|
"learning_rate": 9.939949247384046e-05, |
|
"loss": 1.0377, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.24875621890547264, |
|
"grad_norm": 2.980034589767456, |
|
"learning_rate": 9.906276553136923e-05, |
|
"loss": 1.0465, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.25870646766169153, |
|
"grad_norm": 3.321119546890259, |
|
"learning_rate": 9.865224352899119e-05, |
|
"loss": 0.9891, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.26865671641791045, |
|
"grad_norm": 2.879805564880371, |
|
"learning_rate": 9.816854393079403e-05, |
|
"loss": 1.0101, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.27860696517412936, |
|
"grad_norm": 2.740520715713501, |
|
"learning_rate": 9.761239426692077e-05, |
|
"loss": 1.0325, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.2885572139303483, |
|
"grad_norm": 2.686652898788452, |
|
"learning_rate": 9.698463103929542e-05, |
|
"loss": 0.9961, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.29850746268656714, |
|
"grad_norm": 2.9260804653167725, |
|
"learning_rate": 9.628619846344454e-05, |
|
"loss": 0.9861, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.30845771144278605, |
|
"grad_norm": 3.1635563373565674, |
|
"learning_rate": 9.551814704830734e-05, |
|
"loss": 1.0263, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.31840796019900497, |
|
"grad_norm": 3.2495548725128174, |
|
"learning_rate": 9.468163201617062e-05, |
|
"loss": 0.9937, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.3283582089552239, |
|
"grad_norm": 2.594937801361084, |
|
"learning_rate": 9.377791156510455e-05, |
|
"loss": 1.0044, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3383084577114428, |
|
"grad_norm": 2.953624963760376, |
|
"learning_rate": 9.280834497651334e-05, |
|
"loss": 1.0048, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3482587064676617, |
|
"grad_norm": 2.600581169128418, |
|
"learning_rate": 9.177439057064683e-05, |
|
"loss": 0.984, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.3582089552238806, |
|
"grad_norm": 2.8330259323120117, |
|
"learning_rate": 9.067760351314838e-05, |
|
"loss": 1.0075, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3681592039800995, |
|
"grad_norm": 2.660095691680908, |
|
"learning_rate": 8.951963347593797e-05, |
|
"loss": 1.0064, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3781094527363184, |
|
"grad_norm": 2.9839465618133545, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 0.9916, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.3880597014925373, |
|
"grad_norm": 2.646791934967041, |
|
"learning_rate": 8.702720065545024e-05, |
|
"loss": 0.991, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.39800995024875624, |
|
"grad_norm": 2.615619659423828, |
|
"learning_rate": 8.569648672789497e-05, |
|
"loss": 1.0294, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4079601990049751, |
|
"grad_norm": 2.4245409965515137, |
|
"learning_rate": 8.43120818934367e-05, |
|
"loss": 0.981, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.417910447761194, |
|
"grad_norm": 2.807609796524048, |
|
"learning_rate": 8.28760684284532e-05, |
|
"loss": 0.9982, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.42786069651741293, |
|
"grad_norm": 2.5878968238830566, |
|
"learning_rate": 8.139060623360493e-05, |
|
"loss": 0.994, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.43781094527363185, |
|
"grad_norm": 2.3299400806427, |
|
"learning_rate": 7.985792958513931e-05, |
|
"loss": 1.0052, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.44776119402985076, |
|
"grad_norm": 2.4846181869506836, |
|
"learning_rate": 7.828034377432693e-05, |
|
"loss": 0.9972, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4577114427860697, |
|
"grad_norm": 2.498612642288208, |
|
"learning_rate": 7.666022164008457e-05, |
|
"loss": 1.0202, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.46766169154228854, |
|
"grad_norm": 2.713153123855591, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.9935, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.47761194029850745, |
|
"grad_norm": 2.8727245330810547, |
|
"learning_rate": 7.330217598512695e-05, |
|
"loss": 0.9894, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.48756218905472637, |
|
"grad_norm": 3.1435141563415527, |
|
"learning_rate": 7.156930328406268e-05, |
|
"loss": 0.9975, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.4975124378109453, |
|
"grad_norm": 2.586751699447632, |
|
"learning_rate": 6.980398830195785e-05, |
|
"loss": 1.0167, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5074626865671642, |
|
"grad_norm": 2.858091354370117, |
|
"learning_rate": 6.800888624023553e-05, |
|
"loss": 0.9704, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.5174129353233831, |
|
"grad_norm": 2.097409963607788, |
|
"learning_rate": 6.618669710291606e-05, |
|
"loss": 1.0067, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.527363184079602, |
|
"grad_norm": 2.168245315551758, |
|
"learning_rate": 6.434016163555452e-05, |
|
"loss": 0.9568, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5373134328358209, |
|
"grad_norm": 2.109443187713623, |
|
"learning_rate": 6.247205720289907e-05, |
|
"loss": 0.9757, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5472636815920398, |
|
"grad_norm": 2.3406951427459717, |
|
"learning_rate": 6.058519361147055e-05, |
|
"loss": 0.9724, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5572139303482587, |
|
"grad_norm": 2.2367491722106934, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 0.9787, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.5671641791044776, |
|
"grad_norm": 2.1488230228424072, |
|
"learning_rate": 5.6766564987506566e-05, |
|
"loss": 0.9817, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.5771144278606966, |
|
"grad_norm": 2.1089084148406982, |
|
"learning_rate": 5.484054353515896e-05, |
|
"loss": 0.9883, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.5870646766169154, |
|
"grad_norm": 2.262054681777954, |
|
"learning_rate": 5.290724144552379e-05, |
|
"loss": 0.9448, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.5970149253731343, |
|
"grad_norm": 2.093430519104004, |
|
"learning_rate": 5.096956658859122e-05, |
|
"loss": 0.9696, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6069651741293532, |
|
"grad_norm": 3.0342745780944824, |
|
"learning_rate": 4.903043341140879e-05, |
|
"loss": 0.9654, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.6169154228855721, |
|
"grad_norm": 3.3050591945648193, |
|
"learning_rate": 4.709275855447621e-05, |
|
"loss": 1.0399, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.6268656716417911, |
|
"grad_norm": 2.6303319931030273, |
|
"learning_rate": 4.515945646484105e-05, |
|
"loss": 0.9839, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.6368159203980099, |
|
"grad_norm": 2.3729300498962402, |
|
"learning_rate": 4.323343501249346e-05, |
|
"loss": 0.9815, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.6467661691542289, |
|
"grad_norm": 2.4030983448028564, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 0.9548, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6567164179104478, |
|
"grad_norm": 2.4516849517822266, |
|
"learning_rate": 3.941480638852948e-05, |
|
"loss": 0.9653, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 2.461177110671997, |
|
"learning_rate": 3.752794279710094e-05, |
|
"loss": 1.0094, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.6766169154228856, |
|
"grad_norm": 2.4054040908813477, |
|
"learning_rate": 3.5659838364445505e-05, |
|
"loss": 0.9782, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.6865671641791045, |
|
"grad_norm": 2.4171035289764404, |
|
"learning_rate": 3.381330289708396e-05, |
|
"loss": 0.9738, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.6965174129353234, |
|
"grad_norm": 2.410240650177002, |
|
"learning_rate": 3.199111375976449e-05, |
|
"loss": 0.9673, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7064676616915423, |
|
"grad_norm": 2.5898170471191406, |
|
"learning_rate": 3.019601169804216e-05, |
|
"loss": 1.0031, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.7164179104477612, |
|
"grad_norm": 2.106549024581909, |
|
"learning_rate": 2.8430696715937337e-05, |
|
"loss": 0.9615, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.7263681592039801, |
|
"grad_norm": 3.1593167781829834, |
|
"learning_rate": 2.6697824014873075e-05, |
|
"loss": 0.9408, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.736318407960199, |
|
"grad_norm": 2.201892852783203, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 1.0046, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.746268656716418, |
|
"grad_norm": 2.6017942428588867, |
|
"learning_rate": 2.333977835991545e-05, |
|
"loss": 1.0041, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7562189054726368, |
|
"grad_norm": 2.420989513397217, |
|
"learning_rate": 2.171965622567308e-05, |
|
"loss": 0.956, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.7661691542288557, |
|
"grad_norm": 2.340715169906616, |
|
"learning_rate": 2.0142070414860704e-05, |
|
"loss": 0.9744, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.7761194029850746, |
|
"grad_norm": 2.212714910507202, |
|
"learning_rate": 1.8609393766395085e-05, |
|
"loss": 0.9479, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.7860696517412935, |
|
"grad_norm": 2.3441693782806396, |
|
"learning_rate": 1.7123931571546827e-05, |
|
"loss": 0.9634, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.7960199004975125, |
|
"grad_norm": 2.3936691284179688, |
|
"learning_rate": 1.5687918106563326e-05, |
|
"loss": 0.9946, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059701492537313, |
|
"grad_norm": 2.836846351623535, |
|
"learning_rate": 1.4303513272105057e-05, |
|
"loss": 0.9847, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.8159203980099502, |
|
"grad_norm": 2.2729079723358154, |
|
"learning_rate": 1.297279934454978e-05, |
|
"loss": 0.95, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.8258706467661692, |
|
"grad_norm": 3.8941750526428223, |
|
"learning_rate": 1.1697777844051105e-05, |
|
"loss": 0.9585, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.835820895522388, |
|
"grad_norm": 3.08699631690979, |
|
"learning_rate": 1.0480366524062042e-05, |
|
"loss": 0.9774, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.845771144278607, |
|
"grad_norm": 2.5296778678894043, |
|
"learning_rate": 9.322396486851626e-06, |
|
"loss": 0.9695, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8557213930348259, |
|
"grad_norm": 2.3693594932556152, |
|
"learning_rate": 8.225609429353187e-06, |
|
"loss": 0.9583, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.8656716417910447, |
|
"grad_norm": 2.286224126815796, |
|
"learning_rate": 7.191655023486682e-06, |
|
"loss": 1.0009, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.8756218905472637, |
|
"grad_norm": 2.6686084270477295, |
|
"learning_rate": 6.222088434895462e-06, |
|
"loss": 0.9626, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.8855721393034826, |
|
"grad_norm": 2.4388320446014404, |
|
"learning_rate": 5.318367983829392e-06, |
|
"loss": 0.9736, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.8955223880597015, |
|
"grad_norm": 2.7601969242095947, |
|
"learning_rate": 4.4818529516926726e-06, |
|
"loss": 0.977, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9054726368159204, |
|
"grad_norm": 2.3179821968078613, |
|
"learning_rate": 3.7138015365554833e-06, |
|
"loss": 0.964, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.9154228855721394, |
|
"grad_norm": 2.7723841667175293, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 0.9642, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.9253731343283582, |
|
"grad_norm": 2.443444013595581, |
|
"learning_rate": 2.3876057330792346e-06, |
|
"loss": 0.9535, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.9353233830845771, |
|
"grad_norm": 2.286829948425293, |
|
"learning_rate": 1.8314560692059835e-06, |
|
"loss": 0.9691, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.945273631840796, |
|
"grad_norm": 3.324568271636963, |
|
"learning_rate": 1.3477564710088098e-06, |
|
"loss": 0.9715, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9552238805970149, |
|
"grad_norm": 2.308687210083008, |
|
"learning_rate": 9.372344686307655e-07, |
|
"loss": 0.9482, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.9651741293532339, |
|
"grad_norm": 2.547358274459839, |
|
"learning_rate": 6.005075261595494e-07, |
|
"loss": 0.9975, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.9751243781094527, |
|
"grad_norm": 2.5918407440185547, |
|
"learning_rate": 3.380821129028489e-07, |
|
"loss": 1.0035, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.9850746268656716, |
|
"grad_norm": 2.4646270275115967, |
|
"learning_rate": 1.503529416103988e-07, |
|
"loss": 0.9942, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.9950248756218906, |
|
"grad_norm": 2.581437826156616, |
|
"learning_rate": 3.760237478849793e-08, |
|
"loss": 1.0177, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9950248756218906, |
|
"eval_loss": 0.9390347003936768, |
|
"eval_runtime": 8.0407, |
|
"eval_samples_per_second": 21.143, |
|
"eval_steps_per_second": 5.348, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0049751243781095, |
|
"grad_norm": 4.282508373260498, |
|
"learning_rate": 0.0, |
|
"loss": 1.4958, |
|
"step": 101 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 101, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.44348402876416e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|