|
{ |
|
"best_metric": 1.814998745918274, |
|
"best_model_checkpoint": "/home/ubuntu/workplace/NER_simi/output/LLM_05_13/checkpoint-1000", |
|
"epoch": 0.5675368898978433, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.147441434337073e-05, |
|
"loss": 2.3247, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.705117131325856e-05, |
|
"loss": 1.9413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.031336501627827e-05, |
|
"loss": 1.8845, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.262792828314637e-05, |
|
"loss": 1.8492, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 1.9344485998153687, |
|
"eval_runtime": 336.1736, |
|
"eval_samples_per_second": 95.918, |
|
"eval_steps_per_second": 0.75, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.442324303011218e-05, |
|
"loss": 1.8316, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.589012198616609e-05, |
|
"loss": 1.8137, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.713035047190436e-05, |
|
"loss": 1.7988, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.82046852530342e-05, |
|
"loss": 1.7873, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 1.8653844594955444, |
|
"eval_runtime": 336.5787, |
|
"eval_samples_per_second": 95.802, |
|
"eval_steps_per_second": 0.749, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.915231568918581e-05, |
|
"loss": 1.7599, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7658, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7534, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7399, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 1.8613653182983398, |
|
"eval_runtime": 336.3899, |
|
"eval_samples_per_second": 95.856, |
|
"eval_steps_per_second": 0.749, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7327, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7445, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7255, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7249, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_loss": 1.8731602430343628, |
|
"eval_runtime": 336.4325, |
|
"eval_samples_per_second": 95.844, |
|
"eval_steps_per_second": 0.749, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7257, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7176, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 5e-05, |
|
"loss": 1.708, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 5e-05, |
|
"loss": 1.7116, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 1.814998745918274, |
|
"eval_runtime": 336.1286, |
|
"eval_samples_per_second": 95.931, |
|
"eval_steps_per_second": 0.75, |
|
"step": 1000 |
|
} |
|
], |
|
"max_steps": 1762, |
|
"num_train_epochs": 1, |
|
"total_flos": 196298259038208.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|