|
{ |
|
"best_metric": 1.0014017820358276, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-50", |
|
"epoch": 1.1510791366906474, |
|
"eval_steps": 25, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02302158273381295, |
|
"grad_norm": 51.27009582519531, |
|
"learning_rate": 5e-05, |
|
"loss": 59.5547, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02302158273381295, |
|
"eval_loss": 4.219150543212891, |
|
"eval_runtime": 1.4656, |
|
"eval_samples_per_second": 34.115, |
|
"eval_steps_per_second": 8.87, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0460431654676259, |
|
"grad_norm": 52.14596939086914, |
|
"learning_rate": 0.0001, |
|
"loss": 66.9514, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.06906474820143885, |
|
"grad_norm": 73.6473159790039, |
|
"learning_rate": 9.990365154573717e-05, |
|
"loss": 88.7822, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0920863309352518, |
|
"grad_norm": 76.42533111572266, |
|
"learning_rate": 9.961501876182148e-05, |
|
"loss": 105.8788, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11510791366906475, |
|
"grad_norm": 88.7105941772461, |
|
"learning_rate": 9.913533761814537e-05, |
|
"loss": 102.5182, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.1381294964028777, |
|
"grad_norm": 101.5471420288086, |
|
"learning_rate": 9.846666218300807e-05, |
|
"loss": 89.6244, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16115107913669063, |
|
"grad_norm": 119.08177185058594, |
|
"learning_rate": 9.761185582727977e-05, |
|
"loss": 71.6918, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1841726618705036, |
|
"grad_norm": 113.49226379394531, |
|
"learning_rate": 9.657457896300791e-05, |
|
"loss": 52.1182, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.20719424460431654, |
|
"grad_norm": 64.88825225830078, |
|
"learning_rate": 9.535927336897098e-05, |
|
"loss": 37.5966, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2302158273381295, |
|
"grad_norm": 67.97850799560547, |
|
"learning_rate": 9.397114317029975e-05, |
|
"loss": 34.7204, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.25323741007194245, |
|
"grad_norm": 80.26464080810547, |
|
"learning_rate": 9.241613255361455e-05, |
|
"loss": 37.317, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.2762589928057554, |
|
"grad_norm": 30.27471160888672, |
|
"learning_rate": 9.070090031310558e-05, |
|
"loss": 50.8477, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2992805755395683, |
|
"grad_norm": 34.09827423095703, |
|
"learning_rate": 8.883279133655399e-05, |
|
"loss": 42.7292, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.32230215827338127, |
|
"grad_norm": 35.786705017089844, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 44.7788, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.34532374100719426, |
|
"grad_norm": 32.19075012207031, |
|
"learning_rate": 8.467056167950311e-05, |
|
"loss": 36.2925, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.3683453237410072, |
|
"grad_norm": 33.80173873901367, |
|
"learning_rate": 8.239426430539243e-05, |
|
"loss": 34.3814, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.39136690647482014, |
|
"grad_norm": 34.994361877441406, |
|
"learning_rate": 8.000066048588211e-05, |
|
"loss": 34.2221, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.4143884892086331, |
|
"grad_norm": 30.99186897277832, |
|
"learning_rate": 7.75e-05, |
|
"loss": 33.6055, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.437410071942446, |
|
"grad_norm": 50.038936614990234, |
|
"learning_rate": 7.490299105985507e-05, |
|
"loss": 28.7858, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.460431654676259, |
|
"grad_norm": 25.917606353759766, |
|
"learning_rate": 7.222075445642904e-05, |
|
"loss": 28.09, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.48345323741007196, |
|
"grad_norm": 29.549970626831055, |
|
"learning_rate": 6.946477593864228e-05, |
|
"loss": 32.0602, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.5064748201438849, |
|
"grad_norm": 23.611438751220703, |
|
"learning_rate": 6.664685702961344e-05, |
|
"loss": 38.0321, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.5294964028776978, |
|
"grad_norm": 15.553833961486816, |
|
"learning_rate": 6.377906449072578e-05, |
|
"loss": 42.3021, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.5525179856115108, |
|
"grad_norm": 19.091585159301758, |
|
"learning_rate": 6.087367864990233e-05, |
|
"loss": 38.3939, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.5755395683453237, |
|
"grad_norm": 18.050025939941406, |
|
"learning_rate": 5.794314081535644e-05, |
|
"loss": 36.5805, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5755395683453237, |
|
"eval_loss": 1.0759358406066895, |
|
"eval_runtime": 1.4586, |
|
"eval_samples_per_second": 34.279, |
|
"eval_steps_per_second": 8.913, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.5985611510791367, |
|
"grad_norm": 19.713987350463867, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 32.3912, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.6215827338129496, |
|
"grad_norm": 18.734251022338867, |
|
"learning_rate": 5.205685918464356e-05, |
|
"loss": 33.0844, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.6446043165467625, |
|
"grad_norm": 23.264362335205078, |
|
"learning_rate": 4.912632135009769e-05, |
|
"loss": 30.0175, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.6676258992805756, |
|
"grad_norm": 21.15528106689453, |
|
"learning_rate": 4.6220935509274235e-05, |
|
"loss": 28.8613, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.6906474820143885, |
|
"grad_norm": 21.473705291748047, |
|
"learning_rate": 4.3353142970386564e-05, |
|
"loss": 26.4456, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7136690647482015, |
|
"grad_norm": 22.020238876342773, |
|
"learning_rate": 4.053522406135775e-05, |
|
"loss": 26.6504, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.7366906474820144, |
|
"grad_norm": 23.917692184448242, |
|
"learning_rate": 3.777924554357096e-05, |
|
"loss": 26.535, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.7597122302158273, |
|
"grad_norm": 16.099769592285156, |
|
"learning_rate": 3.509700894014496e-05, |
|
"loss": 38.9816, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.7827338129496403, |
|
"grad_norm": 13.968155860900879, |
|
"learning_rate": 3.250000000000001e-05, |
|
"loss": 38.2441, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.8057553956834532, |
|
"grad_norm": 16.24407196044922, |
|
"learning_rate": 2.9999339514117912e-05, |
|
"loss": 34.9773, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.8287769784172662, |
|
"grad_norm": 18.08936882019043, |
|
"learning_rate": 2.760573569460757e-05, |
|
"loss": 31.861, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.8517985611510791, |
|
"grad_norm": 17.8543643951416, |
|
"learning_rate": 2.53294383204969e-05, |
|
"loss": 28.9791, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.874820143884892, |
|
"grad_norm": 20.712501525878906, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 31.5953, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.897841726618705, |
|
"grad_norm": 21.663637161254883, |
|
"learning_rate": 2.1167208663446025e-05, |
|
"loss": 26.2142, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.920863309352518, |
|
"grad_norm": 20.295846939086914, |
|
"learning_rate": 1.9299099686894423e-05, |
|
"loss": 29.1029, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.943884892086331, |
|
"grad_norm": 21.162328720092773, |
|
"learning_rate": 1.758386744638546e-05, |
|
"loss": 23.3831, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.9669064748201439, |
|
"grad_norm": 24.621768951416016, |
|
"learning_rate": 1.602885682970026e-05, |
|
"loss": 26.9917, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.9899280575539569, |
|
"grad_norm": 33.076759338378906, |
|
"learning_rate": 1.464072663102903e-05, |
|
"loss": 26.055, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.0129496402877698, |
|
"grad_norm": 16.25497055053711, |
|
"learning_rate": 1.3425421036992098e-05, |
|
"loss": 39.5755, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.0359712230215827, |
|
"grad_norm": 11.067312240600586, |
|
"learning_rate": 1.2388144172720251e-05, |
|
"loss": 40.9644, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.0589928057553957, |
|
"grad_norm": 13.398911476135254, |
|
"learning_rate": 1.1533337816991932e-05, |
|
"loss": 36.1482, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.0820143884892086, |
|
"grad_norm": 15.000913619995117, |
|
"learning_rate": 1.0864662381854632e-05, |
|
"loss": 36.1772, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.1050359712230216, |
|
"grad_norm": 15.171695709228516, |
|
"learning_rate": 1.0384981238178534e-05, |
|
"loss": 28.492, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.1280575539568345, |
|
"grad_norm": 17.97978973388672, |
|
"learning_rate": 1.0096348454262845e-05, |
|
"loss": 28.6112, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.1510791366906474, |
|
"grad_norm": 19.472795486450195, |
|
"learning_rate": 1e-05, |
|
"loss": 28.4342, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1510791366906474, |
|
"eval_loss": 1.0014017820358276, |
|
"eval_runtime": 1.4573, |
|
"eval_samples_per_second": 34.31, |
|
"eval_steps_per_second": 8.92, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.259156315439104e+17, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|