{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4639025804581038, "eval_steps": 13, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.009278051609162076, "grad_norm": 0.16432242095470428, "learning_rate": 5e-05, "loss": 10.3715, "step": 1 }, { "epoch": 0.009278051609162076, "eval_loss": 10.37634563446045, "eval_runtime": 1.9843, "eval_samples_per_second": 366.368, "eval_steps_per_second": 91.718, "step": 1 }, { "epoch": 0.01855610321832415, "grad_norm": 0.17450982332229614, "learning_rate": 0.0001, "loss": 10.3866, "step": 2 }, { "epoch": 0.02783415482748623, "grad_norm": 0.20075102150440216, "learning_rate": 9.989294616193017e-05, "loss": 10.3751, "step": 3 }, { "epoch": 0.0371122064366483, "grad_norm": 0.20002366602420807, "learning_rate": 9.957224306869053e-05, "loss": 10.3753, "step": 4 }, { "epoch": 0.04639025804581038, "grad_norm": 0.16843415796756744, "learning_rate": 9.903926402016153e-05, "loss": 10.3802, "step": 5 }, { "epoch": 0.05566830965497246, "grad_norm": 0.18495182693004608, "learning_rate": 9.829629131445342e-05, "loss": 10.3753, "step": 6 }, { "epoch": 0.06494636126413453, "grad_norm": 0.1839418262243271, "learning_rate": 9.73465064747553e-05, "loss": 10.3711, "step": 7 }, { "epoch": 0.0742244128732966, "grad_norm": 0.19310903549194336, "learning_rate": 9.619397662556435e-05, "loss": 10.3778, "step": 8 }, { "epoch": 0.08350246448245868, "grad_norm": 0.18260075151920319, "learning_rate": 9.484363707663442e-05, "loss": 10.3754, "step": 9 }, { "epoch": 0.09278051609162076, "grad_norm": 0.2032376527786255, "learning_rate": 9.330127018922194e-05, "loss": 10.3641, "step": 10 }, { "epoch": 0.10205856770078284, "grad_norm": 0.19692528247833252, "learning_rate": 9.157348061512727e-05, "loss": 10.3718, "step": 11 }, { "epoch": 0.11133661930994491, "grad_norm": 0.1963738352060318, "learning_rate": 8.966766701456177e-05, "loss": 10.3822, "step": 12 }, { "epoch": 0.12061467091910699, "grad_norm": 0.21577142179012299, "learning_rate": 8.759199037394887e-05, "loss": 10.3725, "step": 13 }, { "epoch": 0.12061467091910699, "eval_loss": 10.370198249816895, "eval_runtime": 1.9865, "eval_samples_per_second": 365.979, "eval_steps_per_second": 91.621, "step": 13 }, { "epoch": 0.12989272252826906, "grad_norm": 0.19280284643173218, "learning_rate": 8.535533905932738e-05, "loss": 10.3691, "step": 14 }, { "epoch": 0.13917077413743115, "grad_norm": 0.17874455451965332, "learning_rate": 8.296729075500344e-05, "loss": 10.3717, "step": 15 }, { "epoch": 0.1484488257465932, "grad_norm": 0.18887090682983398, "learning_rate": 8.043807145043604e-05, "loss": 10.3698, "step": 16 }, { "epoch": 0.1577268773557553, "grad_norm": 0.19341035187244415, "learning_rate": 7.777851165098012e-05, "loss": 10.3722, "step": 17 }, { "epoch": 0.16700492896491737, "grad_norm": 0.1979735940694809, "learning_rate": 7.500000000000001e-05, "loss": 10.371, "step": 18 }, { "epoch": 0.17628298057407946, "grad_norm": 0.21226716041564941, "learning_rate": 7.211443451095007e-05, "loss": 10.369, "step": 19 }, { "epoch": 0.18556103218324152, "grad_norm": 0.16405072808265686, "learning_rate": 6.91341716182545e-05, "loss": 10.3669, "step": 20 }, { "epoch": 0.19483908379240358, "grad_norm": 0.22428028285503387, "learning_rate": 6.607197326515808e-05, "loss": 10.3651, "step": 21 }, { "epoch": 0.20411713540156567, "grad_norm": 0.21658191084861755, "learning_rate": 6.294095225512603e-05, "loss": 10.369, "step": 22 }, { "epoch": 0.21339518701072774, "grad_norm": 0.2103334218263626, "learning_rate": 5.9754516100806423e-05, "loss": 10.3723, "step": 23 }, { "epoch": 0.22267323861988983, "grad_norm": 0.23737552762031555, "learning_rate": 5.6526309611002594e-05, "loss": 10.3685, "step": 24 }, { "epoch": 0.2319512902290519, "grad_norm": 0.20295554399490356, "learning_rate": 5.327015646150716e-05, "loss": 10.3618, "step": 25 }, { "epoch": 0.24122934183821398, "grad_norm": 0.21819956600666046, "learning_rate": 5e-05, "loss": 10.3648, "step": 26 }, { "epoch": 0.24122934183821398, "eval_loss": 10.364445686340332, "eval_runtime": 1.9824, "eval_samples_per_second": 366.719, "eval_steps_per_second": 91.806, "step": 26 }, { "epoch": 0.25050739344737605, "grad_norm": 0.2082686722278595, "learning_rate": 4.6729843538492847e-05, "loss": 10.3691, "step": 27 }, { "epoch": 0.2597854450565381, "grad_norm": 0.22001177072525024, "learning_rate": 4.347369038899744e-05, "loss": 10.3636, "step": 28 }, { "epoch": 0.26906349666570023, "grad_norm": 0.23329807817935944, "learning_rate": 4.0245483899193595e-05, "loss": 10.3666, "step": 29 }, { "epoch": 0.2783415482748623, "grad_norm": 0.2456667423248291, "learning_rate": 3.705904774487396e-05, "loss": 10.3611, "step": 30 }, { "epoch": 0.28761959988402436, "grad_norm": 0.27157169580459595, "learning_rate": 3.392802673484193e-05, "loss": 10.3665, "step": 31 }, { "epoch": 0.2968976514931864, "grad_norm": 0.2414681762456894, "learning_rate": 3.086582838174551e-05, "loss": 10.37, "step": 32 }, { "epoch": 0.3061757031023485, "grad_norm": 0.22449567914009094, "learning_rate": 2.7885565489049946e-05, "loss": 10.3723, "step": 33 }, { "epoch": 0.3154537547115106, "grad_norm": 0.23689420521259308, "learning_rate": 2.500000000000001e-05, "loss": 10.3668, "step": 34 }, { "epoch": 0.32473180632067267, "grad_norm": 0.24381740391254425, "learning_rate": 2.2221488349019903e-05, "loss": 10.3639, "step": 35 }, { "epoch": 0.33400985792983473, "grad_norm": 0.24260398745536804, "learning_rate": 1.9561928549563968e-05, "loss": 10.3677, "step": 36 }, { "epoch": 0.3432879095389968, "grad_norm": 0.23076368868350983, "learning_rate": 1.703270924499656e-05, "loss": 10.36, "step": 37 }, { "epoch": 0.3525659611481589, "grad_norm": 0.2268386036157608, "learning_rate": 1.4644660940672627e-05, "loss": 10.3564, "step": 38 }, { "epoch": 0.361844012757321, "grad_norm": 0.20787151157855988, "learning_rate": 1.2408009626051137e-05, "loss": 10.3652, "step": 39 }, { "epoch": 0.361844012757321, "eval_loss": 10.361462593078613, "eval_runtime": 1.9924, "eval_samples_per_second": 364.89, "eval_steps_per_second": 91.348, "step": 39 }, { "epoch": 0.37112206436648304, "grad_norm": 0.22251972556114197, "learning_rate": 1.0332332985438248e-05, "loss": 10.3642, "step": 40 }, { "epoch": 0.3804001159756451, "grad_norm": 0.22692683339118958, "learning_rate": 8.426519384872733e-06, "loss": 10.3622, "step": 41 }, { "epoch": 0.38967816758480717, "grad_norm": 0.23968474566936493, "learning_rate": 6.698729810778065e-06, "loss": 10.3702, "step": 42 }, { "epoch": 0.3989562191939693, "grad_norm": 0.22414839267730713, "learning_rate": 5.156362923365588e-06, "loss": 10.3629, "step": 43 }, { "epoch": 0.40823427080313135, "grad_norm": 0.27029696106910706, "learning_rate": 3.8060233744356633e-06, "loss": 10.3601, "step": 44 }, { "epoch": 0.4175123224122934, "grad_norm": 0.26507827639579773, "learning_rate": 2.653493525244721e-06, "loss": 10.3646, "step": 45 }, { "epoch": 0.4267903740214555, "grad_norm": 0.23490291833877563, "learning_rate": 1.70370868554659e-06, "loss": 10.358, "step": 46 }, { "epoch": 0.4360684256306176, "grad_norm": 0.2541787326335907, "learning_rate": 9.607359798384785e-07, "loss": 10.36, "step": 47 }, { "epoch": 0.44534647723977966, "grad_norm": 0.21563424170017242, "learning_rate": 4.277569313094809e-07, "loss": 10.365, "step": 48 }, { "epoch": 0.4546245288489417, "grad_norm": 0.24226868152618408, "learning_rate": 1.0705383806982606e-07, "loss": 10.3748, "step": 49 }, { "epoch": 0.4639025804581038, "grad_norm": 0.23314820230007172, "learning_rate": 0.0, "loss": 10.3625, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 83683914547200.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }