{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.01899245050092588, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 9.49622525046294e-05, "eval_loss": 3.8637006282806396, "eval_runtime": 215.5805, "eval_samples_per_second": 20.568, "eval_steps_per_second": 10.284, "step": 1 }, { "epoch": 0.00047481126252314705, "grad_norm": 8.74648380279541, "learning_rate": 5e-05, "loss": 2.958, "step": 5 }, { "epoch": 0.0009496225250462941, "grad_norm": 8.9068603515625, "learning_rate": 0.0001, "loss": 2.3625, "step": 10 }, { "epoch": 0.0014244337875694412, "grad_norm": 5.726607322692871, "learning_rate": 9.98292246503335e-05, "loss": 1.5296, "step": 15 }, { "epoch": 0.0018992450500925882, "grad_norm": 3.613369941711426, "learning_rate": 9.931806517013612e-05, "loss": 1.0262, "step": 20 }, { "epoch": 0.002374056312615735, "grad_norm": 4.800078392028809, "learning_rate": 9.847001329696653e-05, "loss": 1.1287, "step": 25 }, { "epoch": 0.0028488675751388824, "grad_norm": 3.4092705249786377, "learning_rate": 9.729086208503174e-05, "loss": 0.9243, "step": 30 }, { "epoch": 0.003323678837662029, "grad_norm": 4.513568878173828, "learning_rate": 9.578866633275288e-05, "loss": 1.0764, "step": 35 }, { "epoch": 0.0037984901001851764, "grad_norm": 5.311899662017822, "learning_rate": 9.397368756032445e-05, "loss": 0.8982, "step": 40 }, { "epoch": 0.004273301362708324, "grad_norm": 2.6640231609344482, "learning_rate": 9.185832391312644e-05, "loss": 1.0167, "step": 45 }, { "epoch": 0.00474811262523147, "grad_norm": 5.090713024139404, "learning_rate": 8.945702546981969e-05, "loss": 0.8414, "step": 50 }, { "epoch": 0.00474811262523147, "eval_loss": 1.0240230560302734, "eval_runtime": 215.8469, "eval_samples_per_second": 20.542, "eval_steps_per_second": 10.271, "step": 50 }, { "epoch": 0.005222923887754617, "grad_norm": 3.3579554557800293, "learning_rate": 8.678619553365659e-05, "loss": 1.1465, "step": 55 }, { "epoch": 0.005697735150277765, "grad_norm": 4.077099323272705, "learning_rate": 8.386407858128706e-05, "loss": 1.1425, "step": 60 }, { "epoch": 0.006172546412800912, "grad_norm": 4.041563510894775, "learning_rate": 8.07106356344834e-05, "loss": 0.964, "step": 65 }, { "epoch": 0.006647357675324058, "grad_norm": 4.580941677093506, "learning_rate": 7.734740790612136e-05, "loss": 0.8941, "step": 70 }, { "epoch": 0.007122168937847206, "grad_norm": 3.8948450088500977, "learning_rate": 7.379736965185368e-05, "loss": 0.8085, "step": 75 }, { "epoch": 0.007596980200370353, "grad_norm": 5.882622718811035, "learning_rate": 7.008477123264848e-05, "loss": 0.8934, "step": 80 }, { "epoch": 0.0080717914628935, "grad_norm": 6.029172897338867, "learning_rate": 6.623497346023418e-05, "loss": 0.9615, "step": 85 }, { "epoch": 0.008546602725416647, "grad_norm": 3.7642462253570557, "learning_rate": 6.227427435703997e-05, "loss": 0.6377, "step": 90 }, { "epoch": 0.009021413987939793, "grad_norm": 6.879363536834717, "learning_rate": 5.8229729514036705e-05, "loss": 0.8894, "step": 95 }, { "epoch": 0.00949622525046294, "grad_norm": 3.6813063621520996, "learning_rate": 5.4128967273616625e-05, "loss": 0.7169, "step": 100 }, { "epoch": 0.00949622525046294, "eval_loss": 0.9752216339111328, "eval_runtime": 215.9701, "eval_samples_per_second": 20.531, "eval_steps_per_second": 10.265, "step": 100 }, { "epoch": 0.009971036512986088, "grad_norm": 3.527226686477661, "learning_rate": 5e-05, "loss": 0.9917, "step": 105 }, { "epoch": 0.010445847775509234, "grad_norm": 6.216394424438477, "learning_rate": 4.5871032726383386e-05, "loss": 1.0945, "step": 110 }, { "epoch": 0.010920659038032382, "grad_norm": 4.555583953857422, "learning_rate": 4.17702704859633e-05, "loss": 0.9526, "step": 115 }, { "epoch": 0.01139547030055553, "grad_norm": 4.029147148132324, "learning_rate": 3.772572564296005e-05, "loss": 0.9214, "step": 120 }, { "epoch": 0.011870281563078676, "grad_norm": 3.3755929470062256, "learning_rate": 3.3765026539765834e-05, "loss": 0.666, "step": 125 }, { "epoch": 0.012345092825601823, "grad_norm": 4.81097936630249, "learning_rate": 2.991522876735154e-05, "loss": 0.9961, "step": 130 }, { "epoch": 0.012819904088124971, "grad_norm": 3.1247382164001465, "learning_rate": 2.6202630348146324e-05, "loss": 0.7048, "step": 135 }, { "epoch": 0.013294715350648117, "grad_norm": 4.710264205932617, "learning_rate": 2.2652592093878666e-05, "loss": 1.0437, "step": 140 }, { "epoch": 0.013769526613171264, "grad_norm": 4.46340799331665, "learning_rate": 1.928936436551661e-05, "loss": 0.6061, "step": 145 }, { "epoch": 0.014244337875694412, "grad_norm": 5.0146379470825195, "learning_rate": 1.6135921418712956e-05, "loss": 0.7518, "step": 150 }, { "epoch": 0.014244337875694412, "eval_loss": 0.9467316269874573, "eval_runtime": 216.0208, "eval_samples_per_second": 20.526, "eval_steps_per_second": 10.263, "step": 150 }, { "epoch": 0.014719149138217558, "grad_norm": 4.144189834594727, "learning_rate": 1.3213804466343421e-05, "loss": 1.2056, "step": 155 }, { "epoch": 0.015193960400740706, "grad_norm": 3.242121696472168, "learning_rate": 1.0542974530180327e-05, "loss": 1.0121, "step": 160 }, { "epoch": 0.01566877166326385, "grad_norm": 3.6226155757904053, "learning_rate": 8.141676086873572e-06, "loss": 0.7815, "step": 165 }, { "epoch": 0.016143582925787, "grad_norm": 5.783359527587891, "learning_rate": 6.026312439675552e-06, "loss": 0.9146, "step": 170 }, { "epoch": 0.016618394188310147, "grad_norm": 5.85654878616333, "learning_rate": 4.2113336672471245e-06, "loss": 0.8128, "step": 175 }, { "epoch": 0.017093205450833295, "grad_norm": 4.632771015167236, "learning_rate": 2.7091379149682685e-06, "loss": 0.9454, "step": 180 }, { "epoch": 0.017568016713356442, "grad_norm": 5.443551063537598, "learning_rate": 1.5299867030334814e-06, "loss": 0.8043, "step": 185 }, { "epoch": 0.018042827975879586, "grad_norm": 4.281557559967041, "learning_rate": 6.819348298638839e-07, "loss": 0.8816, "step": 190 }, { "epoch": 0.018517639238402734, "grad_norm": 4.872448921203613, "learning_rate": 1.7077534966650766e-07, "loss": 0.8756, "step": 195 }, { "epoch": 0.01899245050092588, "grad_norm": 5.247133255004883, "learning_rate": 0.0, "loss": 0.7511, "step": 200 }, { "epoch": 0.01899245050092588, "eval_loss": 0.942703127861023, "eval_runtime": 215.9816, "eval_samples_per_second": 20.53, "eval_steps_per_second": 10.265, "step": 200 } ], "logging_steps": 5, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.00038932283392e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }