|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.009939369843951893, |
|
"eval_steps": 13, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00019878739687903787, |
|
"grad_norm": 2.414543867111206, |
|
"learning_rate": 5e-06, |
|
"loss": 13.1525, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00019878739687903787, |
|
"eval_loss": 3.2820167541503906, |
|
"eval_runtime": 75.5342, |
|
"eval_samples_per_second": 28.054, |
|
"eval_steps_per_second": 14.033, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00039757479375807575, |
|
"grad_norm": 2.2050728797912598, |
|
"learning_rate": 1e-05, |
|
"loss": 13.7228, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0005963621906371136, |
|
"grad_norm": 2.5719025135040283, |
|
"learning_rate": 1.5e-05, |
|
"loss": 14.2385, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0007951495875161515, |
|
"grad_norm": 2.4067375659942627, |
|
"learning_rate": 2e-05, |
|
"loss": 12.0989, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0009939369843951894, |
|
"grad_norm": 2.902513027191162, |
|
"learning_rate": 2.5e-05, |
|
"loss": 13.3901, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0011927243812742273, |
|
"grad_norm": 2.1985535621643066, |
|
"learning_rate": 3e-05, |
|
"loss": 13.2702, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0013915117781532651, |
|
"grad_norm": 2.53352427482605, |
|
"learning_rate": 3.5e-05, |
|
"loss": 14.1685, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001590299175032303, |
|
"grad_norm": 2.161381483078003, |
|
"learning_rate": 4e-05, |
|
"loss": 13.6176, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0017890865719113408, |
|
"grad_norm": 3.1022119522094727, |
|
"learning_rate": 4.5e-05, |
|
"loss": 15.6293, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.001987873968790379, |
|
"grad_norm": 2.5830698013305664, |
|
"learning_rate": 5e-05, |
|
"loss": 14.0974, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0021866613656694165, |
|
"grad_norm": 3.0659115314483643, |
|
"learning_rate": 4.99229333433282e-05, |
|
"loss": 13.9289, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0023854487625484546, |
|
"grad_norm": 3.4623639583587646, |
|
"learning_rate": 4.9692208514878444e-05, |
|
"loss": 13.4014, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.002584236159427492, |
|
"grad_norm": 2.969046115875244, |
|
"learning_rate": 4.9309248009941914e-05, |
|
"loss": 11.8233, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002584236159427492, |
|
"eval_loss": 3.2339165210723877, |
|
"eval_runtime": 74.0873, |
|
"eval_samples_per_second": 28.601, |
|
"eval_steps_per_second": 14.307, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0027830235563065303, |
|
"grad_norm": 3.8479912281036377, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 13.0678, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.002981810953185568, |
|
"grad_norm": 4.167858600616455, |
|
"learning_rate": 4.8096988312782174e-05, |
|
"loss": 13.3701, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003180598350064606, |
|
"grad_norm": 3.2626233100891113, |
|
"learning_rate": 4.72751631047092e-05, |
|
"loss": 12.1547, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0033793857469436436, |
|
"grad_norm": 3.215256690979004, |
|
"learning_rate": 4.6316004108852305e-05, |
|
"loss": 12.0763, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0035781731438226817, |
|
"grad_norm": 5.636133670806885, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 14.4562, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0037769605407017197, |
|
"grad_norm": 3.237064838409424, |
|
"learning_rate": 4.401014914000078e-05, |
|
"loss": 11.2384, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.003975747937580758, |
|
"grad_norm": 2.562439203262329, |
|
"learning_rate": 4.267766952966369e-05, |
|
"loss": 12.7868, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.004174535334459795, |
|
"grad_norm": 3.096048593521118, |
|
"learning_rate": 4.123620120825459e-05, |
|
"loss": 13.4175, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.004373322731338833, |
|
"grad_norm": 3.3756790161132812, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 11.9214, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004572110128217871, |
|
"grad_norm": 3.1741342544555664, |
|
"learning_rate": 3.8062464117898724e-05, |
|
"loss": 11.5485, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.004770897525096909, |
|
"grad_norm": 3.0276670455932617, |
|
"learning_rate": 3.634976249348867e-05, |
|
"loss": 12.0198, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.004969684921975946, |
|
"grad_norm": 3.186634063720703, |
|
"learning_rate": 3.456708580912725e-05, |
|
"loss": 11.7737, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.005168472318854984, |
|
"grad_norm": 2.610215663909912, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 13.0966, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.005168472318854984, |
|
"eval_loss": 3.0876364707946777, |
|
"eval_runtime": 74.3552, |
|
"eval_samples_per_second": 28.498, |
|
"eval_steps_per_second": 14.256, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0053672597157340225, |
|
"grad_norm": 3.7985334396362305, |
|
"learning_rate": 3.083613409639764e-05, |
|
"loss": 13.1346, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0055660471126130606, |
|
"grad_norm": 2.66591477394104, |
|
"learning_rate": 2.8910861626005776e-05, |
|
"loss": 14.0919, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.005764834509492099, |
|
"grad_norm": 3.770318031311035, |
|
"learning_rate": 2.6961477393196126e-05, |
|
"loss": 12.0625, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.005963621906371136, |
|
"grad_norm": 3.1966161727905273, |
|
"learning_rate": 2.5e-05, |
|
"loss": 13.3168, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006162409303250174, |
|
"grad_norm": 3.4274048805236816, |
|
"learning_rate": 2.303852260680388e-05, |
|
"loss": 13.7754, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.006361196700129212, |
|
"grad_norm": 3.12906813621521, |
|
"learning_rate": 2.1089138373994223e-05, |
|
"loss": 11.2148, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.00655998409700825, |
|
"grad_norm": 2.4191653728485107, |
|
"learning_rate": 1.9163865903602374e-05, |
|
"loss": 11.369, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.006758771493887287, |
|
"grad_norm": 3.0178472995758057, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 11.5686, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.006957558890766325, |
|
"grad_norm": 3.4739089012145996, |
|
"learning_rate": 1.5432914190872757e-05, |
|
"loss": 13.3696, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.007156346287645363, |
|
"grad_norm": 4.477285385131836, |
|
"learning_rate": 1.3650237506511331e-05, |
|
"loss": 13.9461, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.007355133684524401, |
|
"grad_norm": 3.5579240322113037, |
|
"learning_rate": 1.1937535882101281e-05, |
|
"loss": 12.4578, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0075539210814034394, |
|
"grad_norm": 3.2776896953582764, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 11.7642, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.007752708478282477, |
|
"grad_norm": 2.8097920417785645, |
|
"learning_rate": 8.763798791745411e-06, |
|
"loss": 11.8259, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.007752708478282477, |
|
"eval_loss": 3.0318076610565186, |
|
"eval_runtime": 73.9722, |
|
"eval_samples_per_second": 28.646, |
|
"eval_steps_per_second": 14.33, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.007951495875161516, |
|
"grad_norm": 3.0600802898406982, |
|
"learning_rate": 7.3223304703363135e-06, |
|
"loss": 12.3082, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008150283272040553, |
|
"grad_norm": 2.5751516819000244, |
|
"learning_rate": 5.989850859999227e-06, |
|
"loss": 11.4527, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.00834907066891959, |
|
"grad_norm": 3.5589427947998047, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 11.6761, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.008547858065798629, |
|
"grad_norm": 4.126989364624023, |
|
"learning_rate": 3.6839958911476957e-06, |
|
"loss": 11.1792, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.008746645462677666, |
|
"grad_norm": 2.9589903354644775, |
|
"learning_rate": 2.7248368952908053e-06, |
|
"loss": 10.7405, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.008945432859556703, |
|
"grad_norm": 3.088578224182129, |
|
"learning_rate": 1.9030116872178316e-06, |
|
"loss": 12.7116, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.009144220256435742, |
|
"grad_norm": 2.728550672531128, |
|
"learning_rate": 1.2235870926211619e-06, |
|
"loss": 11.8275, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.00934300765331478, |
|
"grad_norm": 3.458416223526001, |
|
"learning_rate": 6.907519900580861e-07, |
|
"loss": 11.6581, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.009541795050193818, |
|
"grad_norm": 3.6537857055664062, |
|
"learning_rate": 3.077914851215585e-07, |
|
"loss": 11.4921, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.009740582447072856, |
|
"grad_norm": 2.9352803230285645, |
|
"learning_rate": 7.706665667180091e-08, |
|
"loss": 11.6247, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.009939369843951893, |
|
"grad_norm": 3.011121988296509, |
|
"learning_rate": 0.0, |
|
"loss": 12.5026, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8184384007962624.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|