{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8022053756030324, "eval_steps": 291, "global_step": 1164, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06891798759476224, "grad_norm": 4.408621311187744, "learning_rate": 9.92330596776479e-06, "loss": 0.6326, "step": 100 }, { "epoch": 0.13783597518952448, "grad_norm": 5.029026985168457, "learning_rate": 9.62094968320582e-06, "loss": 0.5365, "step": 200 }, { "epoch": 0.2005513439007581, "eval_accuracy": 0.7271186845182692, "eval_loss": 0.5419921875, "eval_runtime": 641.9062, "eval_samples_per_second": 130.552, "eval_steps_per_second": 8.16, "step": 291 }, { "epoch": 0.2067539627842867, "grad_norm": 5.613715648651123, "learning_rate": 9.102819483545054e-06, "loss": 0.4928, "step": 300 }, { "epoch": 0.27567195037904896, "grad_norm": 5.0579833984375, "learning_rate": 8.39310930928775e-06, "loss": 0.4794, "step": 400 }, { "epoch": 0.34458993797381116, "grad_norm": 6.232191562652588, "learning_rate": 7.524958872697738e-06, "loss": 0.4521, "step": 500 }, { "epoch": 0.4011026878015162, "eval_accuracy": 0.7484546908188349, "eval_loss": 0.50341796875, "eval_runtime": 627.5856, "eval_samples_per_second": 133.531, "eval_steps_per_second": 8.346, "step": 582 }, { "epoch": 0.4135079255685734, "grad_norm": 6.169258117675781, "learning_rate": 6.5389062084740715e-06, "loss": 0.4361, "step": 600 }, { "epoch": 0.4824259131633356, "grad_norm": 6.576643466949463, "learning_rate": 5.4809947618915035e-06, "loss": 0.427, "step": 700 }, { "epoch": 0.5513439007580979, "grad_norm": 9.296501159667969, "learning_rate": 4.400623403379149e-06, "loss": 0.3994, "step": 800 }, { "epoch": 0.6016540317022743, "eval_accuracy": 0.7576788143481062, "eval_loss": 0.4892578125, "eval_runtime": 627.8256, "eval_samples_per_second": 133.48, "eval_steps_per_second": 8.343, "step": 873 }, { "epoch": 0.6202618883528601, "grad_norm": 7.580721378326416, "learning_rate": 3.3482397624875462e-06, "loss": 0.3907, "step": 900 }, { "epoch": 0.6891798759476223, "grad_norm": 7.101686477661133, "learning_rate": 2.3729845903505033e-06, "loss": 0.37, "step": 1000 }, { "epoch": 0.7580978635423845, "grad_norm": 7.940699577331543, "learning_rate": 1.5281793713805377e-06, "loss": 0.3596, "step": 1100 }, { "epoch": 0.8022053756030324, "eval_accuracy": 0.7692656499844872, "eval_loss": 0.468505859375, "eval_runtime": 628.1575, "eval_samples_per_second": 133.409, "eval_steps_per_second": 8.339, "step": 1164 } ], "logging_steps": 100, "max_steps": 1451, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 291, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }