{ "best_metric": 1.290708303451538, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.028435345134001564, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0005687069026800313, "grad_norm": 0.7424452900886536, "learning_rate": 5e-05, "loss": 1.3163, "step": 1 }, { "epoch": 0.0005687069026800313, "eval_loss": 1.9984076023101807, "eval_runtime": 778.1587, "eval_samples_per_second": 15.223, "eval_steps_per_second": 1.903, "step": 1 }, { "epoch": 0.0011374138053600626, "grad_norm": 1.080130934715271, "learning_rate": 0.0001, "loss": 1.6109, "step": 2 }, { "epoch": 0.0017061207080400938, "grad_norm": 1.0433323383331299, "learning_rate": 9.989294616193017e-05, "loss": 1.6837, "step": 3 }, { "epoch": 0.002274827610720125, "grad_norm": 0.8534478545188904, "learning_rate": 9.957224306869053e-05, "loss": 1.5771, "step": 4 }, { "epoch": 0.0028435345134001566, "grad_norm": 0.8350222706794739, "learning_rate": 9.903926402016153e-05, "loss": 1.5236, "step": 5 }, { "epoch": 0.0034122414160801876, "grad_norm": 1.1145470142364502, "learning_rate": 9.829629131445342e-05, "loss": 1.4151, "step": 6 }, { "epoch": 0.0039809483187602185, "grad_norm": 0.8083653450012207, "learning_rate": 9.73465064747553e-05, "loss": 1.3308, "step": 7 }, { "epoch": 0.00454965522144025, "grad_norm": 0.5271626114845276, "learning_rate": 9.619397662556435e-05, "loss": 1.4133, "step": 8 }, { "epoch": 0.005118362124120281, "grad_norm": 0.43272873759269714, "learning_rate": 9.484363707663442e-05, "loss": 1.3583, "step": 9 }, { "epoch": 0.005687069026800313, "grad_norm": 0.4580666124820709, "learning_rate": 9.330127018922194e-05, "loss": 1.382, "step": 10 }, { "epoch": 0.006255775929480344, "grad_norm": 0.4700775146484375, "learning_rate": 9.157348061512727e-05, "loss": 1.3653, "step": 11 }, { "epoch": 0.006824482832160375, "grad_norm": 0.46370092034339905, "learning_rate": 8.966766701456177e-05, "loss": 1.3269, "step": 12 }, { "epoch": 0.007393189734840407, "grad_norm": 0.32641956210136414, "learning_rate": 8.759199037394887e-05, "loss": 1.2265, "step": 13 }, { "epoch": 0.007961896637520437, "grad_norm": 0.33604350686073303, "learning_rate": 8.535533905932738e-05, "loss": 1.2166, "step": 14 }, { "epoch": 0.008530603540200469, "grad_norm": 0.3082321584224701, "learning_rate": 8.296729075500344e-05, "loss": 1.265, "step": 15 }, { "epoch": 0.0090993104428805, "grad_norm": 0.3127622902393341, "learning_rate": 8.043807145043604e-05, "loss": 1.3054, "step": 16 }, { "epoch": 0.009668017345560533, "grad_norm": 0.31797119975090027, "learning_rate": 7.777851165098012e-05, "loss": 1.3548, "step": 17 }, { "epoch": 0.010236724248240563, "grad_norm": 0.3794986605644226, "learning_rate": 7.500000000000001e-05, "loss": 1.3261, "step": 18 }, { "epoch": 0.010805431150920594, "grad_norm": 0.3182049095630646, "learning_rate": 7.211443451095007e-05, "loss": 1.3253, "step": 19 }, { "epoch": 0.011374138053600626, "grad_norm": 0.33693861961364746, "learning_rate": 6.91341716182545e-05, "loss": 1.2528, "step": 20 }, { "epoch": 0.011942844956280656, "grad_norm": 0.37158411741256714, "learning_rate": 6.607197326515808e-05, "loss": 1.3638, "step": 21 }, { "epoch": 0.012511551858960688, "grad_norm": 0.3820987343788147, "learning_rate": 6.294095225512603e-05, "loss": 1.3177, "step": 22 }, { "epoch": 0.01308025876164072, "grad_norm": 0.35295188426971436, "learning_rate": 5.9754516100806423e-05, "loss": 1.3014, "step": 23 }, { "epoch": 0.01364896566432075, "grad_norm": 0.3680730164051056, "learning_rate": 5.6526309611002594e-05, "loss": 1.3506, "step": 24 }, { "epoch": 0.014217672567000782, "grad_norm": 0.3996667265892029, "learning_rate": 5.327015646150716e-05, "loss": 1.4373, "step": 25 }, { "epoch": 0.014217672567000782, "eval_loss": 1.2989970445632935, "eval_runtime": 786.3656, "eval_samples_per_second": 15.064, "eval_steps_per_second": 1.883, "step": 25 }, { "epoch": 0.014786379469680814, "grad_norm": 0.2205570936203003, "learning_rate": 5e-05, "loss": 1.1953, "step": 26 }, { "epoch": 0.015355086372360844, "grad_norm": 0.27627643942832947, "learning_rate": 4.6729843538492847e-05, "loss": 1.2833, "step": 27 }, { "epoch": 0.015923793275040874, "grad_norm": 0.2669333815574646, "learning_rate": 4.347369038899744e-05, "loss": 1.2111, "step": 28 }, { "epoch": 0.016492500177720908, "grad_norm": 0.2802363336086273, "learning_rate": 4.0245483899193595e-05, "loss": 1.2611, "step": 29 }, { "epoch": 0.017061207080400938, "grad_norm": 0.2656974196434021, "learning_rate": 3.705904774487396e-05, "loss": 1.232, "step": 30 }, { "epoch": 0.01762991398308097, "grad_norm": 0.27097079157829285, "learning_rate": 3.392802673484193e-05, "loss": 1.2585, "step": 31 }, { "epoch": 0.018198620885761, "grad_norm": 0.28322505950927734, "learning_rate": 3.086582838174551e-05, "loss": 1.228, "step": 32 }, { "epoch": 0.01876732778844103, "grad_norm": 0.28203290700912476, "learning_rate": 2.7885565489049946e-05, "loss": 1.2908, "step": 33 }, { "epoch": 0.019336034691121065, "grad_norm": 0.29397279024124146, "learning_rate": 2.500000000000001e-05, "loss": 1.2699, "step": 34 }, { "epoch": 0.019904741593801095, "grad_norm": 0.35456985235214233, "learning_rate": 2.2221488349019903e-05, "loss": 1.3124, "step": 35 }, { "epoch": 0.020473448496481125, "grad_norm": 0.3785984218120575, "learning_rate": 1.9561928549563968e-05, "loss": 1.3914, "step": 36 }, { "epoch": 0.02104215539916116, "grad_norm": 0.3605644702911377, "learning_rate": 1.703270924499656e-05, "loss": 1.3823, "step": 37 }, { "epoch": 0.02161086230184119, "grad_norm": 0.22077637910842896, "learning_rate": 1.4644660940672627e-05, "loss": 1.1896, "step": 38 }, { "epoch": 0.02217956920452122, "grad_norm": 0.23526205122470856, "learning_rate": 1.2408009626051137e-05, "loss": 1.2117, "step": 39 }, { "epoch": 0.022748276107201253, "grad_norm": 0.3367883563041687, "learning_rate": 1.0332332985438248e-05, "loss": 1.357, "step": 40 }, { "epoch": 0.023316983009881283, "grad_norm": 0.27348804473876953, "learning_rate": 8.426519384872733e-06, "loss": 1.2662, "step": 41 }, { "epoch": 0.023885689912561313, "grad_norm": 0.25156477093696594, "learning_rate": 6.698729810778065e-06, "loss": 1.2183, "step": 42 }, { "epoch": 0.024454396815241346, "grad_norm": 0.2880376875400543, "learning_rate": 5.156362923365588e-06, "loss": 1.2991, "step": 43 }, { "epoch": 0.025023103717921377, "grad_norm": 0.2787708342075348, "learning_rate": 3.8060233744356633e-06, "loss": 1.2286, "step": 44 }, { "epoch": 0.025591810620601407, "grad_norm": 0.2973068058490753, "learning_rate": 2.653493525244721e-06, "loss": 1.2677, "step": 45 }, { "epoch": 0.02616051752328144, "grad_norm": 0.3298507332801819, "learning_rate": 1.70370868554659e-06, "loss": 1.3426, "step": 46 }, { "epoch": 0.02672922442596147, "grad_norm": 0.2946862578392029, "learning_rate": 9.607359798384785e-07, "loss": 1.2525, "step": 47 }, { "epoch": 0.0272979313286415, "grad_norm": 0.3230397701263428, "learning_rate": 4.277569313094809e-07, "loss": 1.3613, "step": 48 }, { "epoch": 0.027866638231321534, "grad_norm": 0.34295061230659485, "learning_rate": 1.0705383806982606e-07, "loss": 1.3333, "step": 49 }, { "epoch": 0.028435345134001564, "grad_norm": 0.37758418917655945, "learning_rate": 0.0, "loss": 1.3443, "step": 50 }, { "epoch": 0.028435345134001564, "eval_loss": 1.290708303451538, "eval_runtime": 777.631, "eval_samples_per_second": 15.233, "eval_steps_per_second": 1.905, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.070663327724339e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }