{ "best_metric": null, "best_model_checkpoint": null, "epoch": null, "eval_steps": 500, "global_step": 310, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 2.8125e-06, "loss": 2.9065, "reward": 11.2264, "step": 9 }, { "epoch": 0.61, "learning_rate": 5.9375e-06, "loss": 2.6597, "reward": 11.0573, "step": 19 }, { "epoch": 0.94, "learning_rate": 9.0625e-06, "loss": 1.7609, "reward": 11.9648, "step": 29 }, { "epoch": 1.26, "learning_rate": 9.985430661522333e-06, "loss": 1.2947, "reward": 13.177, "step": 39 }, { "epoch": 1.58, "learning_rate": 9.914274958326507e-06, "loss": 0.7901, "reward": 13.7785, "step": 49 }, { "epoch": 1.9, "learning_rate": 9.784701678661045e-06, "loss": 0.4549, "reward": 14.603, "step": 59 }, { "epoch": 2.23, "learning_rate": 9.598251102025463e-06, "loss": 0.5224, "reward": 15.594, "step": 69 }, { "epoch": 2.55, "learning_rate": 9.357139626751308e-06, "loss": 0.3896, "reward": 15.697, "step": 79 }, { "epoch": 2.87, "learning_rate": 9.064233422958078e-06, "loss": 0.305, "reward": 16.4374, "step": 89 }, { "epoch": 3.19, "learning_rate": 8.723014361461633e-06, "loss": 0.3697, "reward": 16.539, "step": 99 }, { "epoch": 3.52, "learning_rate": 8.337538623649237e-06, "loss": 0.3504, "reward": 17.218, "step": 109 }, { "epoch": 3.84, "learning_rate": 7.912388484339012e-06, "loss": 0.3387, "reward": 16.595, "step": 119 }, { "epoch": 4.16, "learning_rate": 7.4526178407965396e-06, "loss": 0.3369, "reward": 17.3314, "step": 129 }, { "epoch": 4.48, "learning_rate": 6.963692135422872e-06, "loss": 0.1907, "reward": 17.0773, "step": 139 }, { "epoch": 4.81, "learning_rate": 6.451423386272312e-06, "loss": 0.2781, "reward": 17.3675, "step": 149 }, { "epoch": 5.13, "learning_rate": 5.921901097713317e-06, "loss": 0.2708, "reward": 17.4558, "step": 159 }, { "epoch": 5.45, "learning_rate": 5.381419872519763e-06, "loss": 0.2548, "reward": 18.2967, "step": 169 }, { "epoch": 5.77, "learning_rate": 4.83640458589112e-06, "loss": 0.1266, "reward": 18.4324, "step": 179 }, { "epoch": 6.1, "learning_rate": 4.293334010882164e-06, "loss": 0.2178, "reward": 17.8528, "step": 189 }, { "epoch": 6.42, "learning_rate": 3.7586638031314182e-06, "loss": 0.184, "reward": 18.424, "step": 199 }, { "epoch": 6.74, "learning_rate": 3.2387497603938327e-06, "loss": 0.2045, "reward": 18.8729, "step": 209 }, { "epoch": 7.06, "learning_rate": 2.739772269116402e-06, "loss": 0.1521, "reward": 19.1409, "step": 219 }, { "epoch": 7.39, "learning_rate": 2.2676628361847834e-06, "loss": 0.1096, "reward": 18.8351, "step": 229 }, { "epoch": 7.71, "learning_rate": 1.8280335791817733e-06, "loss": 0.1408, "reward": 18.8548, "step": 239 }, { "epoch": 8.03, "learning_rate": 1.4261105133297693e-06, "loss": 0.1674, "reward": 18.8099, "step": 249 }, { "epoch": 8.35, "learning_rate": 1.0666714281569152e-06, "loss": 0.1583, "reward": 18.7122, "step": 259 }, { "epoch": 8.68, "learning_rate": 7.539890923671061e-07, "loss": 0.1407, "reward": 18.8339, "step": 269 }, { "epoch": 9.0, "learning_rate": 4.917804620559202e-07, "loss": 0.1214, "reward": 18.7366, "step": 279 }, { "epoch": 9.32, "learning_rate": 2.8316249605087386e-07, "loss": 0.093, "reward": 18.8959, "step": 289 }, { "epoch": 9.65, "learning_rate": 1.3061510361333186e-07, "loss": 0.1226, "reward": 18.4177, "step": 299 }, { "epoch": 9.97, "learning_rate": 3.59516649547248e-08, "loss": 0.1234, "reward": 18.9226, "step": 309 } ], "logging_steps": 500, "max_steps": 310, "num_input_tokens_seen": 0, "num_train_epochs": 10.0, "save_steps": 500, "stateful_callbacks": {}, "total_flos": 0, "train_batch_size": null, "trial_name": null, "trial_params": null }