|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": null, |
|
"eval_steps": 500, |
|
"global_step": 150, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 5.625e-06, |
|
"loss": 1.0211, |
|
"reward": 7.9638, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 9.989294616193018e-06, |
|
"loss": 0.9096, |
|
"reward": 7.3826, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.800249271929645e-06, |
|
"loss": 0.4739, |
|
"reward": 8.4875, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.38363377853754e-06, |
|
"loss": 0.3126, |
|
"reward": 9.1497, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 8.759199037394888e-06, |
|
"loss": 0.2466, |
|
"reward": 9.1595, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 7.956548241817914e-06, |
|
"loss": 0.2322, |
|
"reward": 9.4808, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 7.0137334492936875e-06, |
|
"loss": 0.1405, |
|
"reward": 9.9301, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"learning_rate": 5.975451610080643e-06, |
|
"loss": 0.2078, |
|
"reward": 9.595, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.890925574827195e-06, |
|
"loss": 0.2104, |
|
"reward": 9.7705, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.8115705383691354e-06, |
|
"loss": 0.1774, |
|
"reward": 9.9975, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 2.7885565489049948e-06, |
|
"loss": 0.1646, |
|
"reward": 9.8489, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 1.8703826390797047e-06, |
|
"loss": 0.1785, |
|
"reward": 10.4678, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 8.6, |
|
"learning_rate": 1.100577584535592e-06, |
|
"loss": 0.0917, |
|
"reward": 10.1096, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 9.27, |
|
"learning_rate": 5.156362923365587e-07, |
|
"loss": 0.1104, |
|
"reward": 10.1309, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 1.4328965093369284e-07, |
|
"loss": 0.0614, |
|
"reward": 10.4662, |
|
"step": 149 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 150, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10.0, |
|
"save_steps": 500, |
|
"stateful_callbacks": {}, |
|
"total_flos": 0, |
|
"train_batch_size": null, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|