{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07217610970768676, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00036088054853843375, "eval_loss": 0.7114889621734619, "eval_runtime": 77.4719, "eval_samples_per_second": 15.064, "eval_steps_per_second": 7.538, "step": 1 }, { "epoch": 0.001804402742692169, "grad_norm": 0.7100138664245605, "learning_rate": 5e-05, "loss": 0.6525, "step": 5 }, { "epoch": 0.003608805485384338, "grad_norm": 0.5259695649147034, "learning_rate": 0.0001, "loss": 0.602, "step": 10 }, { "epoch": 0.0054132082280765065, "grad_norm": 0.8943156003952026, "learning_rate": 9.98292246503335e-05, "loss": 0.5045, "step": 15 }, { "epoch": 0.007217610970768676, "grad_norm": 1.1441904306411743, "learning_rate": 9.931806517013612e-05, "loss": 0.3771, "step": 20 }, { "epoch": 0.009022013713460845, "grad_norm": 0.9108137488365173, "learning_rate": 9.847001329696653e-05, "loss": 0.2869, "step": 25 }, { "epoch": 0.010826416456153013, "grad_norm": 0.5597217082977295, "learning_rate": 9.729086208503174e-05, "loss": 0.2311, "step": 30 }, { "epoch": 0.012630819198845183, "grad_norm": 0.4469228684902191, "learning_rate": 9.578866633275288e-05, "loss": 0.2195, "step": 35 }, { "epoch": 0.014435221941537351, "grad_norm": 0.5604591369628906, "learning_rate": 9.397368756032445e-05, "loss": 0.1978, "step": 40 }, { "epoch": 0.01623962468422952, "grad_norm": 0.858860194683075, "learning_rate": 9.185832391312644e-05, "loss": 0.1818, "step": 45 }, { "epoch": 0.01804402742692169, "grad_norm": 0.38272762298583984, "learning_rate": 8.945702546981969e-05, "loss": 0.158, "step": 50 }, { "epoch": 0.01804402742692169, "eval_loss": 0.15961506962776184, "eval_runtime": 77.5459, "eval_samples_per_second": 15.049, "eval_steps_per_second": 7.531, "step": 50 }, { "epoch": 0.019848430169613858, "grad_norm": 0.5038506388664246, "learning_rate": 8.678619553365659e-05, "loss": 0.165, "step": 55 }, { "epoch": 0.021652832912306026, "grad_norm": 0.3890664875507355, "learning_rate": 8.386407858128706e-05, "loss": 0.1747, "step": 60 }, { "epoch": 0.023457235654998194, "grad_norm": 0.5154931545257568, "learning_rate": 8.07106356344834e-05, "loss": 0.1722, "step": 65 }, { "epoch": 0.025261638397690366, "grad_norm": 0.4071500599384308, "learning_rate": 7.734740790612136e-05, "loss": 0.1353, "step": 70 }, { "epoch": 0.027066041140382534, "grad_norm": 1.1991043090820312, "learning_rate": 7.379736965185368e-05, "loss": 0.1566, "step": 75 }, { "epoch": 0.028870443883074703, "grad_norm": 0.5283339619636536, "learning_rate": 7.008477123264848e-05, "loss": 0.138, "step": 80 }, { "epoch": 0.03067484662576687, "grad_norm": 0.5083453059196472, "learning_rate": 6.623497346023418e-05, "loss": 0.1465, "step": 85 }, { "epoch": 0.03247924936845904, "grad_norm": 0.42737603187561035, "learning_rate": 6.227427435703997e-05, "loss": 0.146, "step": 90 }, { "epoch": 0.03428365211115121, "grad_norm": 0.3830859363079071, "learning_rate": 5.8229729514036705e-05, "loss": 0.1378, "step": 95 }, { "epoch": 0.03608805485384338, "grad_norm": 0.5730387568473816, "learning_rate": 5.4128967273616625e-05, "loss": 0.1328, "step": 100 }, { "epoch": 0.03608805485384338, "eval_loss": 0.12604902684688568, "eval_runtime": 77.5118, "eval_samples_per_second": 15.056, "eval_steps_per_second": 7.534, "step": 100 }, { "epoch": 0.03789245759653555, "grad_norm": 0.6017920970916748, "learning_rate": 5e-05, "loss": 0.1328, "step": 105 }, { "epoch": 0.039696860339227716, "grad_norm": 0.5493671894073486, "learning_rate": 4.5871032726383386e-05, "loss": 0.1146, "step": 110 }, { "epoch": 0.041501263081919884, "grad_norm": 0.3780815005302429, "learning_rate": 4.17702704859633e-05, "loss": 0.1355, "step": 115 }, { "epoch": 0.04330566582461205, "grad_norm": 0.4471195936203003, "learning_rate": 3.772572564296005e-05, "loss": 0.128, "step": 120 }, { "epoch": 0.04511006856730422, "grad_norm": 0.405582070350647, "learning_rate": 3.3765026539765834e-05, "loss": 0.1148, "step": 125 }, { "epoch": 0.04691447130999639, "grad_norm": 0.5052685141563416, "learning_rate": 2.991522876735154e-05, "loss": 0.1239, "step": 130 }, { "epoch": 0.04871887405268856, "grad_norm": 0.5250007510185242, "learning_rate": 2.6202630348146324e-05, "loss": 0.1355, "step": 135 }, { "epoch": 0.05052327679538073, "grad_norm": 0.5293309092521667, "learning_rate": 2.2652592093878666e-05, "loss": 0.136, "step": 140 }, { "epoch": 0.0523276795380729, "grad_norm": 0.6472458243370056, "learning_rate": 1.928936436551661e-05, "loss": 0.1243, "step": 145 }, { "epoch": 0.05413208228076507, "grad_norm": 0.6686806082725525, "learning_rate": 1.6135921418712956e-05, "loss": 0.1317, "step": 150 }, { "epoch": 0.05413208228076507, "eval_loss": 0.11311367899179459, "eval_runtime": 77.9072, "eval_samples_per_second": 14.979, "eval_steps_per_second": 7.496, "step": 150 }, { "epoch": 0.05593648502345724, "grad_norm": 0.39626944065093994, "learning_rate": 1.3213804466343421e-05, "loss": 0.1199, "step": 155 }, { "epoch": 0.057740887766149405, "grad_norm": 0.5094817280769348, "learning_rate": 1.0542974530180327e-05, "loss": 0.1201, "step": 160 }, { "epoch": 0.059545290508841574, "grad_norm": 0.5557631254196167, "learning_rate": 8.141676086873572e-06, "loss": 0.1154, "step": 165 }, { "epoch": 0.06134969325153374, "grad_norm": 0.6784803867340088, "learning_rate": 6.026312439675552e-06, "loss": 0.1254, "step": 170 }, { "epoch": 0.06315409599422592, "grad_norm": 0.3143410086631775, "learning_rate": 4.2113336672471245e-06, "loss": 0.115, "step": 175 }, { "epoch": 0.06495849873691809, "grad_norm": 0.5348233580589294, "learning_rate": 2.7091379149682685e-06, "loss": 0.1222, "step": 180 }, { "epoch": 0.06676290147961025, "grad_norm": 0.29956620931625366, "learning_rate": 1.5299867030334814e-06, "loss": 0.1101, "step": 185 }, { "epoch": 0.06856730422230242, "grad_norm": 0.3928939998149872, "learning_rate": 6.819348298638839e-07, "loss": 0.1205, "step": 190 }, { "epoch": 0.07037170696499459, "grad_norm": 0.42990320920944214, "learning_rate": 1.7077534966650766e-07, "loss": 0.1136, "step": 195 }, { "epoch": 0.07217610970768676, "grad_norm": 0.6331201791763306, "learning_rate": 0.0, "loss": 0.124, "step": 200 }, { "epoch": 0.07217610970768676, "eval_loss": 0.11012110859155655, "eval_runtime": 77.7727, "eval_samples_per_second": 15.005, "eval_steps_per_second": 7.509, "step": 200 } ], "logging_steps": 5, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.698847801724109e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }