{ "best_metric": 1.125113606452942, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 0.22701475595913734, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004540295119182747, "grad_norm": 0.5608993172645569, "learning_rate": 2e-05, "loss": 1.2604, "step": 1 }, { "epoch": 0.004540295119182747, "eval_loss": 1.4539767503738403, "eval_runtime": 10.0855, "eval_samples_per_second": 36.785, "eval_steps_per_second": 4.66, "step": 1 }, { "epoch": 0.009080590238365494, "grad_norm": 0.5262722373008728, "learning_rate": 4e-05, "loss": 1.2812, "step": 2 }, { "epoch": 0.01362088535754824, "grad_norm": 0.45176419615745544, "learning_rate": 6e-05, "loss": 1.1693, "step": 3 }, { "epoch": 0.018161180476730987, "grad_norm": 0.4379451870918274, "learning_rate": 8e-05, "loss": 1.1982, "step": 4 }, { "epoch": 0.022701475595913734, "grad_norm": 0.38019585609436035, "learning_rate": 0.0001, "loss": 1.1727, "step": 5 }, { "epoch": 0.02724177071509648, "grad_norm": 0.3477766513824463, "learning_rate": 9.987820251299122e-05, "loss": 1.2451, "step": 6 }, { "epoch": 0.03178206583427923, "grad_norm": 0.35698744654655457, "learning_rate": 9.951340343707852e-05, "loss": 1.2523, "step": 7 }, { "epoch": 0.036322360953461974, "grad_norm": 0.3647920787334442, "learning_rate": 9.890738003669029e-05, "loss": 1.3273, "step": 8 }, { "epoch": 0.04086265607264472, "grad_norm": 0.3550705015659332, "learning_rate": 9.806308479691595e-05, "loss": 1.2223, "step": 9 }, { "epoch": 0.04540295119182747, "grad_norm": 0.43464210629463196, "learning_rate": 9.698463103929542e-05, "loss": 1.3025, "step": 10 }, { "epoch": 0.049943246311010214, "grad_norm": 0.37873485684394836, "learning_rate": 9.567727288213005e-05, "loss": 1.2259, "step": 11 }, { "epoch": 0.05448354143019296, "grad_norm": 0.37317633628845215, "learning_rate": 9.414737964294636e-05, "loss": 1.2298, "step": 12 }, { "epoch": 0.05902383654937571, "grad_norm": 0.40012049674987793, "learning_rate": 9.24024048078213e-05, "loss": 1.215, "step": 13 }, { "epoch": 0.06356413166855845, "grad_norm": 0.3849846422672272, "learning_rate": 9.045084971874738e-05, "loss": 1.2012, "step": 14 }, { "epoch": 0.0681044267877412, "grad_norm": 0.37194129824638367, "learning_rate": 8.83022221559489e-05, "loss": 1.1632, "step": 15 }, { "epoch": 0.07264472190692395, "grad_norm": 0.38879069685935974, "learning_rate": 8.596699001693255e-05, "loss": 1.1127, "step": 16 }, { "epoch": 0.0771850170261067, "grad_norm": 0.3786041736602783, "learning_rate": 8.345653031794292e-05, "loss": 1.1716, "step": 17 }, { "epoch": 0.08172531214528944, "grad_norm": 0.3922478258609772, "learning_rate": 8.07830737662829e-05, "loss": 1.2044, "step": 18 }, { "epoch": 0.08626560726447219, "grad_norm": 0.38386255502700806, "learning_rate": 7.795964517353735e-05, "loss": 1.151, "step": 19 }, { "epoch": 0.09080590238365494, "grad_norm": 0.42602765560150146, "learning_rate": 7.500000000000001e-05, "loss": 1.1694, "step": 20 }, { "epoch": 0.09534619750283768, "grad_norm": 0.39171090722084045, "learning_rate": 7.191855733945387e-05, "loss": 1.175, "step": 21 }, { "epoch": 0.09988649262202043, "grad_norm": 0.3942036032676697, "learning_rate": 6.873032967079561e-05, "loss": 1.1999, "step": 22 }, { "epoch": 0.10442678774120318, "grad_norm": 0.4129440188407898, "learning_rate": 6.545084971874738e-05, "loss": 1.1767, "step": 23 }, { "epoch": 0.10896708286038592, "grad_norm": 0.4210982620716095, "learning_rate": 6.209609477998338e-05, "loss": 1.2759, "step": 24 }, { "epoch": 0.11350737797956867, "grad_norm": 0.4219196140766144, "learning_rate": 5.868240888334653e-05, "loss": 1.1998, "step": 25 }, { "epoch": 0.11350737797956867, "eval_loss": 1.125113606452942, "eval_runtime": 10.0342, "eval_samples_per_second": 36.974, "eval_steps_per_second": 4.684, "step": 25 }, { "epoch": 0.11804767309875142, "grad_norm": 0.45543304085731506, "learning_rate": 5.522642316338268e-05, "loss": 1.2383, "step": 26 }, { "epoch": 0.12258796821793416, "grad_norm": 0.5423667430877686, "learning_rate": 5.174497483512506e-05, "loss": 1.1882, "step": 27 }, { "epoch": 0.1271282633371169, "grad_norm": 0.45114219188690186, "learning_rate": 4.825502516487497e-05, "loss": 1.1249, "step": 28 }, { "epoch": 0.13166855845629966, "grad_norm": 0.4604540467262268, "learning_rate": 4.477357683661734e-05, "loss": 1.1833, "step": 29 }, { "epoch": 0.1362088535754824, "grad_norm": 0.49046003818511963, "learning_rate": 4.131759111665349e-05, "loss": 1.1793, "step": 30 }, { "epoch": 0.14074914869466515, "grad_norm": 0.5115222334861755, "learning_rate": 3.790390522001662e-05, "loss": 1.1548, "step": 31 }, { "epoch": 0.1452894438138479, "grad_norm": 0.48150011897087097, "learning_rate": 3.4549150281252636e-05, "loss": 1.0399, "step": 32 }, { "epoch": 0.14982973893303064, "grad_norm": 0.5298136472702026, "learning_rate": 3.12696703292044e-05, "loss": 1.1891, "step": 33 }, { "epoch": 0.1543700340522134, "grad_norm": 0.4817681908607483, "learning_rate": 2.8081442660546125e-05, "loss": 1.0856, "step": 34 }, { "epoch": 0.15891032917139614, "grad_norm": 0.5052300095558167, "learning_rate": 2.500000000000001e-05, "loss": 1.0734, "step": 35 }, { "epoch": 0.16345062429057888, "grad_norm": 0.5191230177879333, "learning_rate": 2.2040354826462668e-05, "loss": 1.0951, "step": 36 }, { "epoch": 0.16799091940976163, "grad_norm": 0.49014902114868164, "learning_rate": 1.9216926233717085e-05, "loss": 1.0811, "step": 37 }, { "epoch": 0.17253121452894438, "grad_norm": 0.558128297328949, "learning_rate": 1.6543469682057106e-05, "loss": 1.1841, "step": 38 }, { "epoch": 0.17707150964812712, "grad_norm": 0.5571663975715637, "learning_rate": 1.4033009983067452e-05, "loss": 1.0598, "step": 39 }, { "epoch": 0.18161180476730987, "grad_norm": 0.5573685765266418, "learning_rate": 1.1697777844051105e-05, "loss": 1.013, "step": 40 }, { "epoch": 0.18615209988649262, "grad_norm": 0.5625842213630676, "learning_rate": 9.549150281252633e-06, "loss": 1.0995, "step": 41 }, { "epoch": 0.19069239500567536, "grad_norm": 0.6718692183494568, "learning_rate": 7.597595192178702e-06, "loss": 1.0627, "step": 42 }, { "epoch": 0.1952326901248581, "grad_norm": 0.6249908208847046, "learning_rate": 5.852620357053651e-06, "loss": 1.0266, "step": 43 }, { "epoch": 0.19977298524404086, "grad_norm": 0.6505060791969299, "learning_rate": 4.322727117869951e-06, "loss": 1.0113, "step": 44 }, { "epoch": 0.2043132803632236, "grad_norm": 0.7362877130508423, "learning_rate": 3.0153689607045845e-06, "loss": 1.0885, "step": 45 }, { "epoch": 0.20885357548240635, "grad_norm": 0.7237470746040344, "learning_rate": 1.9369152030840556e-06, "loss": 0.9956, "step": 46 }, { "epoch": 0.2133938706015891, "grad_norm": 0.8162877559661865, "learning_rate": 1.0926199633097157e-06, "loss": 1.0847, "step": 47 }, { "epoch": 0.21793416572077184, "grad_norm": 0.8599759340286255, "learning_rate": 4.865965629214819e-07, "loss": 1.0561, "step": 48 }, { "epoch": 0.2224744608399546, "grad_norm": 1.2603548765182495, "learning_rate": 1.2179748700879012e-07, "loss": 1.0098, "step": 49 }, { "epoch": 0.22701475595913734, "grad_norm": 1.499512791633606, "learning_rate": 0.0, "loss": 1.2965, "step": 50 }, { "epoch": 0.22701475595913734, "eval_loss": 1.127669095993042, "eval_runtime": 10.241, "eval_samples_per_second": 36.227, "eval_steps_per_second": 4.589, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 1 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5934336184352768e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }