{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.7540983606557377, "eval_steps": 500, "global_step": 21, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13114754098360656, "grad_norm": 39.05072126696723, "learning_rate": 1.6666666666666667e-05, "loss": 2.4376, "step": 1 }, { "epoch": 0.26229508196721313, "grad_norm": 39.016375245765836, "learning_rate": 3.3333333333333335e-05, "loss": 2.4339, "step": 2 }, { "epoch": 0.39344262295081966, "grad_norm": 32.661267867985714, "learning_rate": 5e-05, "loss": 1.9547, "step": 3 }, { "epoch": 0.5245901639344263, "grad_norm": 13.966533792864706, "learning_rate": 4.962019382530521e-05, "loss": 2.0498, "step": 4 }, { "epoch": 0.6557377049180327, "grad_norm": 16.547294305437767, "learning_rate": 4.849231551964771e-05, "loss": 1.9131, "step": 5 }, { "epoch": 0.7868852459016393, "grad_norm": 7.978971381469852, "learning_rate": 4.665063509461097e-05, "loss": 1.5865, "step": 6 }, { "epoch": 0.9180327868852459, "grad_norm": 4.596921803156147, "learning_rate": 4.415111107797445e-05, "loss": 1.39, "step": 7 }, { "epoch": 0.9180327868852459, "eval_loss": 1.3376487493515015, "eval_runtime": 17.0154, "eval_samples_per_second": 11.284, "eval_steps_per_second": 1.41, "step": 7 }, { "epoch": 1.0491803278688525, "grad_norm": 4.90093363719236, "learning_rate": 4.1069690242163484e-05, "loss": 1.2949, "step": 8 }, { "epoch": 1.180327868852459, "grad_norm": 4.892431290254693, "learning_rate": 3.7500000000000003e-05, "loss": 1.2567, "step": 9 }, { "epoch": 1.3114754098360657, "grad_norm": 2.793814810608094, "learning_rate": 3.355050358314172e-05, "loss": 1.1411, "step": 10 }, { "epoch": 1.4426229508196722, "grad_norm": 2.132448746823284, "learning_rate": 2.9341204441673266e-05, "loss": 1.0593, "step": 11 }, { "epoch": 1.5737704918032787, "grad_norm": 1.6858973805764268, "learning_rate": 2.5e-05, "loss": 1.0091, "step": 12 }, { "epoch": 1.7049180327868854, "grad_norm": 2.079280289342001, "learning_rate": 2.0658795558326743e-05, "loss": 0.977, "step": 13 }, { "epoch": 1.8360655737704918, "grad_norm": 1.5764028928084828, "learning_rate": 1.6449496416858284e-05, "loss": 0.9782, "step": 14 }, { "epoch": 1.9672131147540983, "grad_norm": 1.3883568841727698, "learning_rate": 1.2500000000000006e-05, "loss": 0.9541, "step": 15 }, { "epoch": 1.9672131147540983, "eval_loss": 1.0214608907699585, "eval_runtime": 16.9378, "eval_samples_per_second": 11.336, "eval_steps_per_second": 1.417, "step": 15 }, { "epoch": 2.098360655737705, "grad_norm": 1.4882737473631815, "learning_rate": 8.930309757836517e-06, "loss": 0.8517, "step": 16 }, { "epoch": 2.2295081967213113, "grad_norm": 1.3413297585539747, "learning_rate": 5.848888922025553e-06, "loss": 0.7971, "step": 17 }, { "epoch": 2.360655737704918, "grad_norm": 1.1809008492312185, "learning_rate": 3.3493649053890326e-06, "loss": 0.7859, "step": 18 }, { "epoch": 2.4918032786885247, "grad_norm": 1.0943970674897627, "learning_rate": 1.5076844803522922e-06, "loss": 0.7711, "step": 19 }, { "epoch": 2.6229508196721314, "grad_norm": 1.1052118316853639, "learning_rate": 3.7980617469479953e-07, "loss": 0.7603, "step": 20 }, { "epoch": 2.7540983606557377, "grad_norm": 1.0269995716510345, "learning_rate": 0.0, "loss": 0.745, "step": 21 }, { "epoch": 2.7540983606557377, "eval_loss": 1.0121965408325195, "eval_runtime": 9.0436, "eval_samples_per_second": 21.231, "eval_steps_per_second": 2.654, "step": 21 }, { "epoch": 2.7540983606557377, "step": 21, "total_flos": 3971544514560.0, "train_loss": 1.29271438008263, "train_runtime": 605.3238, "train_samples_per_second": 2.409, "train_steps_per_second": 0.035 } ], "logging_steps": 1, "max_steps": 21, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3971544514560.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }