|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9990205680705191, |
|
"eval_steps": 500, |
|
"global_step": 510, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.039177277179236046, |
|
"grad_norm": 0.5212510228157043, |
|
"learning_rate": 0.00098, |
|
"loss": 1.6695, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07835455435847209, |
|
"grad_norm": 0.46687546372413635, |
|
"learning_rate": 0.00094, |
|
"loss": 1.4897, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11753183153770813, |
|
"grad_norm": 0.3956817388534546, |
|
"learning_rate": 0.0009000000000000001, |
|
"loss": 1.1361, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15670910871694418, |
|
"grad_norm": 0.6187575459480286, |
|
"learning_rate": 0.00086, |
|
"loss": 1.1006, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1958863858961802, |
|
"grad_norm": 0.5952175259590149, |
|
"learning_rate": 0.00082, |
|
"loss": 1.1829, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23506366307541626, |
|
"grad_norm": 0.5533424019813538, |
|
"learning_rate": 0.0007800000000000001, |
|
"loss": 1.1187, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2742409402546523, |
|
"grad_norm": 0.49875837564468384, |
|
"learning_rate": 0.00074, |
|
"loss": 1.1533, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31341821743388837, |
|
"grad_norm": 0.3221249580383301, |
|
"learning_rate": 0.0007, |
|
"loss": 1.0684, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3525954946131244, |
|
"grad_norm": 0.4444270730018616, |
|
"learning_rate": 0.00066, |
|
"loss": 0.9958, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3917727717923604, |
|
"grad_norm": 0.3522554337978363, |
|
"learning_rate": 0.00062, |
|
"loss": 1.0248, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4309500489715965, |
|
"grad_norm": 0.5420783162117004, |
|
"learning_rate": 0.00058, |
|
"loss": 0.8882, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4701273261508325, |
|
"grad_norm": 0.390200674533844, |
|
"learning_rate": 0.00054, |
|
"loss": 1.057, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5093046033300686, |
|
"grad_norm": 0.29798322916030884, |
|
"learning_rate": 0.0005, |
|
"loss": 0.9993, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5484818805093046, |
|
"grad_norm": 0.32027778029441833, |
|
"learning_rate": 0.00046, |
|
"loss": 31543.7594, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5876591576885406, |
|
"grad_norm": 0.3356141746044159, |
|
"learning_rate": 0.00042, |
|
"loss": 1.0387, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6268364348677767, |
|
"grad_norm": 0.4466319680213928, |
|
"learning_rate": 0.00038, |
|
"loss": 1.0035, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6660137120470128, |
|
"grad_norm": 0.34509071707725525, |
|
"learning_rate": 0.00034, |
|
"loss": 1.079, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7051909892262488, |
|
"grad_norm": 0.3388811945915222, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8813, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7443682664054848, |
|
"grad_norm": 0.42653968930244446, |
|
"learning_rate": 0.00026000000000000003, |
|
"loss": 0.9738, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7835455435847208, |
|
"grad_norm": 0.41174277663230896, |
|
"learning_rate": 0.00022, |
|
"loss": 1.0102, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8227228207639569, |
|
"grad_norm": 0.2753573954105377, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.9923, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.861900097943193, |
|
"grad_norm": 0.4338860809803009, |
|
"learning_rate": 0.00014000000000000001, |
|
"loss": 0.9225, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.901077375122429, |
|
"grad_norm": 0.3668128252029419, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0012, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.940254652301665, |
|
"grad_norm": 0.20731355249881744, |
|
"learning_rate": 6e-05, |
|
"loss": 0.9247, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9794319294809011, |
|
"grad_norm": 0.2774331867694855, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0422, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9990205680705191, |
|
"step": 510, |
|
"total_flos": 468651156894720.0, |
|
"train_loss": 1238.0412948421404, |
|
"train_runtime": 10118.6641, |
|
"train_samples_per_second": 0.101, |
|
"train_steps_per_second": 0.05 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 510, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 468651156894720.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|