|
{ |
|
"best_metric": 11.050453186035156, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 0.19300361881785283, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0038600723763570566, |
|
"grad_norm": 4.272080898284912, |
|
"learning_rate": 5e-05, |
|
"loss": 11.0898, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0038600723763570566, |
|
"eval_loss": 11.0899019241333, |
|
"eval_runtime": 1.8216, |
|
"eval_samples_per_second": 957.975, |
|
"eval_steps_per_second": 120.227, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007720144752714113, |
|
"grad_norm": 4.217146873474121, |
|
"learning_rate": 0.0001, |
|
"loss": 11.0862, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01158021712907117, |
|
"grad_norm": 4.589000225067139, |
|
"learning_rate": 9.989294616193017e-05, |
|
"loss": 11.0849, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.015440289505428226, |
|
"grad_norm": 4.838542461395264, |
|
"learning_rate": 9.957224306869053e-05, |
|
"loss": 11.0859, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.019300361881785282, |
|
"grad_norm": 5.116698265075684, |
|
"learning_rate": 9.903926402016153e-05, |
|
"loss": 11.0849, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02316043425814234, |
|
"grad_norm": 4.157015323638916, |
|
"learning_rate": 9.829629131445342e-05, |
|
"loss": 11.0858, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.027020506634499397, |
|
"grad_norm": 4.388658046722412, |
|
"learning_rate": 9.73465064747553e-05, |
|
"loss": 11.0843, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.030880579010856453, |
|
"grad_norm": 4.618198871612549, |
|
"learning_rate": 9.619397662556435e-05, |
|
"loss": 11.0835, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03474065138721351, |
|
"grad_norm": 4.6166276931762695, |
|
"learning_rate": 9.484363707663442e-05, |
|
"loss": 11.0822, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.038600723763570564, |
|
"grad_norm": 4.856595993041992, |
|
"learning_rate": 9.330127018922194e-05, |
|
"loss": 11.074, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04246079613992763, |
|
"grad_norm": 4.96979284286499, |
|
"learning_rate": 9.157348061512727e-05, |
|
"loss": 11.0747, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.04632086851628468, |
|
"grad_norm": 5.501377582550049, |
|
"learning_rate": 8.966766701456177e-05, |
|
"loss": 11.0702, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05018094089264174, |
|
"grad_norm": 4.807370662689209, |
|
"learning_rate": 8.759199037394887e-05, |
|
"loss": 11.0664, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.054041013268998794, |
|
"grad_norm": 4.258703231811523, |
|
"learning_rate": 8.535533905932738e-05, |
|
"loss": 11.0712, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05790108564535585, |
|
"grad_norm": 4.211122989654541, |
|
"learning_rate": 8.296729075500344e-05, |
|
"loss": 11.0717, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.061761158021712906, |
|
"grad_norm": 4.176085948944092, |
|
"learning_rate": 8.043807145043604e-05, |
|
"loss": 11.0681, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06562123039806997, |
|
"grad_norm": 4.601294994354248, |
|
"learning_rate": 7.777851165098012e-05, |
|
"loss": 11.0634, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06948130277442702, |
|
"grad_norm": 4.531471252441406, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 11.0651, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.07334137515078408, |
|
"grad_norm": 3.695352554321289, |
|
"learning_rate": 7.211443451095007e-05, |
|
"loss": 11.0707, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.07720144752714113, |
|
"grad_norm": 3.846127510070801, |
|
"learning_rate": 6.91341716182545e-05, |
|
"loss": 11.0624, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08106151990349819, |
|
"grad_norm": 3.707059621810913, |
|
"learning_rate": 6.607197326515808e-05, |
|
"loss": 11.0589, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08492159227985525, |
|
"grad_norm": 4.043831825256348, |
|
"learning_rate": 6.294095225512603e-05, |
|
"loss": 11.058, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0887816646562123, |
|
"grad_norm": 4.06102180480957, |
|
"learning_rate": 5.9754516100806423e-05, |
|
"loss": 11.0572, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.09264173703256937, |
|
"grad_norm": 4.315989971160889, |
|
"learning_rate": 5.6526309611002594e-05, |
|
"loss": 11.053, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09650180940892641, |
|
"grad_norm": 4.479373931884766, |
|
"learning_rate": 5.327015646150716e-05, |
|
"loss": 11.0506, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.09650180940892641, |
|
"eval_loss": 11.057889938354492, |
|
"eval_runtime": 1.8106, |
|
"eval_samples_per_second": 963.753, |
|
"eval_steps_per_second": 120.952, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.10036188178528348, |
|
"grad_norm": 3.5759146213531494, |
|
"learning_rate": 5e-05, |
|
"loss": 11.0631, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.10422195416164053, |
|
"grad_norm": 3.4713640213012695, |
|
"learning_rate": 4.6729843538492847e-05, |
|
"loss": 11.0568, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10808202653799759, |
|
"grad_norm": 3.517700672149658, |
|
"learning_rate": 4.347369038899744e-05, |
|
"loss": 11.0592, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.11194209891435464, |
|
"grad_norm": 3.803478479385376, |
|
"learning_rate": 4.0245483899193595e-05, |
|
"loss": 11.0491, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.1158021712907117, |
|
"grad_norm": 3.692575216293335, |
|
"learning_rate": 3.705904774487396e-05, |
|
"loss": 11.0552, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11966224366706876, |
|
"grad_norm": 3.297511577606201, |
|
"learning_rate": 3.392802673484193e-05, |
|
"loss": 11.0584, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.12352231604342581, |
|
"grad_norm": 3.4056272506713867, |
|
"learning_rate": 3.086582838174551e-05, |
|
"loss": 11.055, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12738238841978286, |
|
"grad_norm": 3.3810365200042725, |
|
"learning_rate": 2.7885565489049946e-05, |
|
"loss": 11.0535, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.13124246079613994, |
|
"grad_norm": 3.5185937881469727, |
|
"learning_rate": 2.500000000000001e-05, |
|
"loss": 11.0526, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.13510253317249699, |
|
"grad_norm": 3.627316951751709, |
|
"learning_rate": 2.2221488349019903e-05, |
|
"loss": 11.0508, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13896260554885403, |
|
"grad_norm": 3.6404926776885986, |
|
"learning_rate": 1.9561928549563968e-05, |
|
"loss": 11.0464, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.1428226779252111, |
|
"grad_norm": 3.660294532775879, |
|
"learning_rate": 1.703270924499656e-05, |
|
"loss": 11.04, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.14668275030156816, |
|
"grad_norm": 3.650830030441284, |
|
"learning_rate": 1.4644660940672627e-05, |
|
"loss": 11.0468, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1505428226779252, |
|
"grad_norm": 3.3983261585235596, |
|
"learning_rate": 1.2408009626051137e-05, |
|
"loss": 11.0528, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.15440289505428226, |
|
"grad_norm": 3.4742696285247803, |
|
"learning_rate": 1.0332332985438248e-05, |
|
"loss": 11.0479, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.15826296743063933, |
|
"grad_norm": 3.557180166244507, |
|
"learning_rate": 8.426519384872733e-06, |
|
"loss": 11.0481, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.16212303980699638, |
|
"grad_norm": 3.4408743381500244, |
|
"learning_rate": 6.698729810778065e-06, |
|
"loss": 11.0475, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.16598311218335343, |
|
"grad_norm": 3.4046168327331543, |
|
"learning_rate": 5.156362923365588e-06, |
|
"loss": 11.0487, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1698431845597105, |
|
"grad_norm": 2.9810149669647217, |
|
"learning_rate": 3.8060233744356633e-06, |
|
"loss": 11.0557, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.17370325693606756, |
|
"grad_norm": 3.1998450756073, |
|
"learning_rate": 2.653493525244721e-06, |
|
"loss": 11.0516, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1775633293124246, |
|
"grad_norm": 3.363704204559326, |
|
"learning_rate": 1.70370868554659e-06, |
|
"loss": 11.0471, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.18142340168878165, |
|
"grad_norm": 3.268141746520996, |
|
"learning_rate": 9.607359798384785e-07, |
|
"loss": 11.0491, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.18528347406513873, |
|
"grad_norm": 3.6082420349121094, |
|
"learning_rate": 4.277569313094809e-07, |
|
"loss": 11.0462, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.18914354644149578, |
|
"grad_norm": 3.4948015213012695, |
|
"learning_rate": 1.0705383806982606e-07, |
|
"loss": 11.0508, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.19300361881785283, |
|
"grad_norm": 3.8561291694641113, |
|
"learning_rate": 0.0, |
|
"loss": 11.042, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.19300361881785283, |
|
"eval_loss": 11.050453186035156, |
|
"eval_runtime": 1.8118, |
|
"eval_samples_per_second": 963.139, |
|
"eval_steps_per_second": 120.875, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4202692608000.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|