|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0033519553072625, |
|
"eval_steps": 14, |
|
"global_step": 56, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.017877094972067038, |
|
"grad_norm": 0.008051837794482708, |
|
"learning_rate": 1e-05, |
|
"loss": 11.9287, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.017877094972067038, |
|
"eval_loss": 11.928812026977539, |
|
"eval_runtime": 17.7636, |
|
"eval_samples_per_second": 5.348, |
|
"eval_steps_per_second": 2.702, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.035754189944134075, |
|
"grad_norm": 0.008926309645175934, |
|
"learning_rate": 2e-05, |
|
"loss": 11.9296, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.053631284916201116, |
|
"grad_norm": 0.00831968616694212, |
|
"learning_rate": 3e-05, |
|
"loss": 11.9287, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.07150837988826815, |
|
"grad_norm": 0.007635825779289007, |
|
"learning_rate": 4e-05, |
|
"loss": 11.9297, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0893854748603352, |
|
"grad_norm": 0.006136827636510134, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9308, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.10726256983240223, |
|
"grad_norm": 0.008952487260103226, |
|
"learning_rate": 6e-05, |
|
"loss": 11.9298, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.12513966480446928, |
|
"grad_norm": 0.008229999803006649, |
|
"learning_rate": 7e-05, |
|
"loss": 11.9316, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.1430167597765363, |
|
"grad_norm": 0.008337481878697872, |
|
"learning_rate": 8e-05, |
|
"loss": 11.9323, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.16089385474860335, |
|
"grad_norm": 0.0077857039868831635, |
|
"learning_rate": 9e-05, |
|
"loss": 11.9287, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.1787709497206704, |
|
"grad_norm": 0.007779798936098814, |
|
"learning_rate": 0.0001, |
|
"loss": 11.9293, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.19664804469273742, |
|
"grad_norm": 0.010903590358793736, |
|
"learning_rate": 9.988343845952697e-05, |
|
"loss": 11.9297, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.21452513966480447, |
|
"grad_norm": 0.008899732492864132, |
|
"learning_rate": 9.953429730181653e-05, |
|
"loss": 11.929, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.2324022346368715, |
|
"grad_norm": 0.008686481043696404, |
|
"learning_rate": 9.895420438411616e-05, |
|
"loss": 11.9327, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.25027932960893856, |
|
"grad_norm": 0.008174674585461617, |
|
"learning_rate": 9.814586436738998e-05, |
|
"loss": 11.9286, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.25027932960893856, |
|
"eval_loss": 11.928715705871582, |
|
"eval_runtime": 0.3873, |
|
"eval_samples_per_second": 245.313, |
|
"eval_steps_per_second": 123.947, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.2681564245810056, |
|
"grad_norm": 0.008636604063212872, |
|
"learning_rate": 9.711304610594104e-05, |
|
"loss": 11.9307, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.2860335195530726, |
|
"grad_norm": 0.009689634665846825, |
|
"learning_rate": 9.586056507527266e-05, |
|
"loss": 11.9291, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.3039106145251397, |
|
"grad_norm": 0.007181845605373383, |
|
"learning_rate": 9.439426092011875e-05, |
|
"loss": 11.9316, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.3217877094972067, |
|
"grad_norm": 0.00760689377784729, |
|
"learning_rate": 9.272097022732443e-05, |
|
"loss": 11.9297, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.3396648044692737, |
|
"grad_norm": 0.007029213942587376, |
|
"learning_rate": 9.08484946505221e-05, |
|
"loss": 11.9308, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.3575418994413408, |
|
"grad_norm": 0.009038039483129978, |
|
"learning_rate": 8.8785564535221e-05, |
|
"loss": 11.931, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3754189944134078, |
|
"grad_norm": 0.0075072660110890865, |
|
"learning_rate": 8.654179821390621e-05, |
|
"loss": 11.9313, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.39329608938547483, |
|
"grad_norm": 0.00974891148507595, |
|
"learning_rate": 8.412765716093272e-05, |
|
"loss": 11.9313, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.4111731843575419, |
|
"grad_norm": 0.0073928870260715485, |
|
"learning_rate": 8.155439721630264e-05, |
|
"loss": 11.9301, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.42905027932960893, |
|
"grad_norm": 0.009019100107252598, |
|
"learning_rate": 7.883401610574336e-05, |
|
"loss": 11.9291, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.44692737430167595, |
|
"grad_norm": 0.007918241433799267, |
|
"learning_rate": 7.597919750177168e-05, |
|
"loss": 11.9313, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.464804469273743, |
|
"grad_norm": 0.009383410215377808, |
|
"learning_rate": 7.300325188655761e-05, |
|
"loss": 11.9298, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.48268156424581005, |
|
"grad_norm": 0.008459771983325481, |
|
"learning_rate": 6.992005449231208e-05, |
|
"loss": 11.9309, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.5005586592178771, |
|
"grad_norm": 0.008441867306828499, |
|
"learning_rate": 6.674398060854931e-05, |
|
"loss": 11.9304, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5005586592178771, |
|
"eval_loss": 11.928580284118652, |
|
"eval_runtime": 0.3886, |
|
"eval_samples_per_second": 244.488, |
|
"eval_steps_per_second": 123.531, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.5184357541899441, |
|
"grad_norm": 0.010095364414155483, |
|
"learning_rate": 6.348983855785121e-05, |
|
"loss": 11.9277, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.5363128491620112, |
|
"grad_norm": 0.007604570593684912, |
|
"learning_rate": 6.01728006526317e-05, |
|
"loss": 11.9298, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.5541899441340782, |
|
"grad_norm": 0.0103254783898592, |
|
"learning_rate": 5.680833245481234e-05, |
|
"loss": 11.9283, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.5720670391061452, |
|
"grad_norm": 0.00810755044221878, |
|
"learning_rate": 5.341212066823355e-05, |
|
"loss": 11.931, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.5899441340782123, |
|
"grad_norm": 0.007562727201730013, |
|
"learning_rate": 5e-05, |
|
"loss": 11.9287, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.6078212290502794, |
|
"grad_norm": 0.008853144943714142, |
|
"learning_rate": 4.658787933176646e-05, |
|
"loss": 11.9304, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.6256983240223464, |
|
"grad_norm": 0.009539203718304634, |
|
"learning_rate": 4.319166754518768e-05, |
|
"loss": 11.93, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.6435754189944134, |
|
"grad_norm": 0.009173383004963398, |
|
"learning_rate": 3.982719934736832e-05, |
|
"loss": 11.9296, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.6614525139664804, |
|
"grad_norm": 0.008169720880687237, |
|
"learning_rate": 3.651016144214878e-05, |
|
"loss": 11.9302, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.6793296089385474, |
|
"grad_norm": 0.008827430196106434, |
|
"learning_rate": 3.325601939145069e-05, |
|
"loss": 11.9295, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.6972067039106146, |
|
"grad_norm": 0.010021938011050224, |
|
"learning_rate": 3.007994550768793e-05, |
|
"loss": 11.9299, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.7150837988826816, |
|
"grad_norm": 0.010521038435399532, |
|
"learning_rate": 2.6996748113442394e-05, |
|
"loss": 11.9308, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7329608938547486, |
|
"grad_norm": 0.009070714004337788, |
|
"learning_rate": 2.4020802498228335e-05, |
|
"loss": 11.93, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.7508379888268156, |
|
"grad_norm": 0.008820487186312675, |
|
"learning_rate": 2.1165983894256647e-05, |
|
"loss": 11.9279, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.7508379888268156, |
|
"eval_loss": 11.928503036499023, |
|
"eval_runtime": 0.3804, |
|
"eval_samples_per_second": 249.752, |
|
"eval_steps_per_second": 126.19, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.7687150837988826, |
|
"grad_norm": 0.010876229964196682, |
|
"learning_rate": 1.8445602783697374e-05, |
|
"loss": 11.9305, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.7865921787709497, |
|
"grad_norm": 0.008084769360721111, |
|
"learning_rate": 1.5872342839067306e-05, |
|
"loss": 11.9299, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.8044692737430168, |
|
"grad_norm": 0.009019813500344753, |
|
"learning_rate": 1.3458201786093794e-05, |
|
"loss": 11.9283, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.8223463687150838, |
|
"grad_norm": 0.008098295889794827, |
|
"learning_rate": 1.1214435464779006e-05, |
|
"loss": 11.9292, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.8402234636871508, |
|
"grad_norm": 0.008133570663630962, |
|
"learning_rate": 9.151505349477902e-06, |
|
"loss": 11.9289, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.8581005586592179, |
|
"grad_norm": 0.012650455348193645, |
|
"learning_rate": 7.2790297726755716e-06, |
|
"loss": 11.9302, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.8759776536312849, |
|
"grad_norm": 0.009691119194030762, |
|
"learning_rate": 5.605739079881239e-06, |
|
"loss": 11.9307, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.8938547486033519, |
|
"grad_norm": 0.009178046137094498, |
|
"learning_rate": 4.139434924727359e-06, |
|
"loss": 11.9297, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.911731843575419, |
|
"grad_norm": 0.008271483704447746, |
|
"learning_rate": 2.88695389405898e-06, |
|
"loss": 11.9291, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.929608938547486, |
|
"grad_norm": 0.00795311015099287, |
|
"learning_rate": 1.8541356326100433e-06, |
|
"loss": 11.9286, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.9474860335195531, |
|
"grad_norm": 0.008445663377642632, |
|
"learning_rate": 1.0457956158838544e-06, |
|
"loss": 11.932, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.9653631284916201, |
|
"grad_norm": 0.009014743380248547, |
|
"learning_rate": 4.6570269818346224e-07, |
|
"loss": 11.9283, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.9832402234636871, |
|
"grad_norm": 0.009628918021917343, |
|
"learning_rate": 1.1656154047303691e-07, |
|
"loss": 11.9317, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.0033519553072625, |
|
"grad_norm": 0.00985956471413374, |
|
"learning_rate": 0.0, |
|
"loss": 13.9592, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.0033519553072625, |
|
"eval_loss": 11.928487777709961, |
|
"eval_runtime": 0.3738, |
|
"eval_samples_per_second": 254.136, |
|
"eval_steps_per_second": 128.405, |
|
"step": 56 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 56, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 14, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 26468155392.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|