{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 1.8475750577367207, | |
"global_step": 500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.07, | |
"learning_rate": 0.0002, | |
"loss": 1.0758, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.15, | |
"learning_rate": 0.0002, | |
"loss": 1.0262, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.22, | |
"learning_rate": 0.0002, | |
"loss": 0.972, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.3, | |
"learning_rate": 0.0002, | |
"loss": 0.2251, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.37, | |
"learning_rate": 0.0002, | |
"loss": 0.144, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.44, | |
"learning_rate": 0.0002, | |
"loss": 0.1766, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.52, | |
"learning_rate": 0.0002, | |
"loss": 0.1494, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.59, | |
"learning_rate": 0.0002, | |
"loss": 0.1483, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.67, | |
"learning_rate": 0.0002, | |
"loss": 0.1188, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.74, | |
"learning_rate": 0.0002, | |
"loss": 0.0918, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.81, | |
"learning_rate": 0.0002, | |
"loss": 0.1356, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.89, | |
"learning_rate": 0.0002, | |
"loss": 0.0969, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.96, | |
"learning_rate": 0.0002, | |
"loss": 0.1145, | |
"step": 260 | |
}, | |
{ | |
"epoch": 1.03, | |
"learning_rate": 0.0002, | |
"loss": 0.1058, | |
"step": 280 | |
}, | |
{ | |
"epoch": 1.11, | |
"learning_rate": 0.0002, | |
"loss": 0.0827, | |
"step": 300 | |
}, | |
{ | |
"epoch": 1.18, | |
"learning_rate": 0.0002, | |
"loss": 0.0537, | |
"step": 320 | |
}, | |
{ | |
"epoch": 1.26, | |
"learning_rate": 0.0002, | |
"loss": 0.0969, | |
"step": 340 | |
}, | |
{ | |
"epoch": 1.33, | |
"learning_rate": 0.0002, | |
"loss": 0.06, | |
"step": 360 | |
}, | |
{ | |
"epoch": 1.4, | |
"learning_rate": 0.0002, | |
"loss": 0.0777, | |
"step": 380 | |
}, | |
{ | |
"epoch": 1.48, | |
"learning_rate": 0.0002, | |
"loss": 0.071, | |
"step": 400 | |
}, | |
{ | |
"epoch": 1.55, | |
"learning_rate": 0.0002, | |
"loss": 0.0487, | |
"step": 420 | |
}, | |
{ | |
"epoch": 1.63, | |
"learning_rate": 0.0002, | |
"loss": 0.0871, | |
"step": 440 | |
}, | |
{ | |
"epoch": 1.7, | |
"learning_rate": 0.0002, | |
"loss": 0.0577, | |
"step": 460 | |
}, | |
{ | |
"epoch": 1.77, | |
"learning_rate": 0.0002, | |
"loss": 0.073, | |
"step": 480 | |
}, | |
{ | |
"epoch": 1.85, | |
"learning_rate": 0.0002, | |
"loss": 0.0658, | |
"step": 500 | |
}, | |
{ | |
"epoch": 1.85, | |
"step": 500, | |
"total_flos": 5.366331835972915e+17, | |
"train_loss": 0.2142012220621109, | |
"train_runtime": 37295.4093, | |
"train_samples_per_second": 0.429, | |
"train_steps_per_second": 0.013 | |
} | |
], | |
"max_steps": 500, | |
"num_train_epochs": 2, | |
"total_flos": 5.366331835972915e+17, | |
"trial_name": null, | |
"trial_params": null | |
} | |