{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "global_step": 12272, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16, "learning_rate": 4.808099739243807e-05, "loss": 1.1464, "step": 500 }, { "epoch": 0.33, "learning_rate": 4.604383963494133e-05, "loss": 1.121, "step": 1000 }, { "epoch": 0.49, "learning_rate": 4.400668187744459e-05, "loss": 1.1114, "step": 1500 }, { "epoch": 0.65, "learning_rate": 4.196952411994785e-05, "loss": 1.1053, "step": 2000 }, { "epoch": 0.81, "learning_rate": 3.9932366362451115e-05, "loss": 1.1036, "step": 2500 }, { "epoch": 0.98, "learning_rate": 3.7899282920469364e-05, "loss": 1.1021, "step": 3000 }, { "epoch": 1.0, "eval_accuracy": 0.3860417727967397, "eval_loss": 1.0996018648147583, "eval_runtime": 27.3661, "eval_samples_per_second": 358.655, "eval_steps_per_second": 11.218, "step": 3068 }, { "epoch": 1.14, "learning_rate": 3.586212516297262e-05, "loss": 1.0988, "step": 3500 }, { "epoch": 1.3, "learning_rate": 3.3824967405475883e-05, "loss": 1.0957, "step": 4000 }, { "epoch": 1.47, "learning_rate": 3.178780964797914e-05, "loss": 1.0942, "step": 4500 }, { "epoch": 1.63, "learning_rate": 2.9754726205997396e-05, "loss": 1.092, "step": 5000 }, { "epoch": 1.79, "learning_rate": 2.7717568448500652e-05, "loss": 1.0908, "step": 5500 }, { "epoch": 1.96, "learning_rate": 2.5680410691003915e-05, "loss": 1.0902, "step": 6000 }, { "epoch": 2.0, "eval_accuracy": 0.39470198675496687, "eval_loss": 1.0913997888565063, "eval_runtime": 27.3848, "eval_samples_per_second": 358.41, "eval_steps_per_second": 11.211, "step": 6136 }, { "epoch": 2.12, "learning_rate": 2.364325293350717e-05, "loss": 1.088, "step": 6500 }, { "epoch": 2.28, "learning_rate": 2.1610169491525427e-05, "loss": 1.0875, "step": 7000 }, { "epoch": 2.44, "learning_rate": 1.9573011734028683e-05, "loss": 1.0873, "step": 7500 }, { "epoch": 2.61, "learning_rate": 1.7535853976531943e-05, "loss": 1.0853, "step": 8000 }, { "epoch": 2.77, "learning_rate": 1.5498696219035202e-05, "loss": 1.0865, "step": 8500 }, { "epoch": 2.93, "learning_rate": 1.3465612777053455e-05, "loss": 1.0871, "step": 9000 }, { "epoch": 3.0, "eval_accuracy": 0.3979623025980642, "eval_loss": 1.0878193378448486, "eval_runtime": 27.352, "eval_samples_per_second": 358.84, "eval_steps_per_second": 11.224, "step": 9204 }, { "epoch": 3.1, "learning_rate": 1.1428455019556714e-05, "loss": 1.0866, "step": 9500 }, { "epoch": 3.26, "learning_rate": 9.391297262059974e-06, "loss": 1.0847, "step": 10000 }, { "epoch": 3.42, "learning_rate": 7.354139504563234e-06, "loss": 1.0844, "step": 10500 }, { "epoch": 3.59, "learning_rate": 5.316981747066493e-06, "loss": 1.0849, "step": 11000 }, { "epoch": 3.75, "learning_rate": 3.283898305084746e-06, "loss": 1.0832, "step": 11500 }, { "epoch": 3.91, "learning_rate": 1.2467405475880053e-06, "loss": 1.0871, "step": 12000 }, { "epoch": 4.0, "eval_accuracy": 0.4031584309730005, "eval_loss": 1.0859721899032593, "eval_runtime": 27.3545, "eval_samples_per_second": 358.808, "eval_steps_per_second": 11.223, "step": 12272 }, { "epoch": 4.0, "step": 12272, "total_flos": 5.465758560807813e+17, "train_loss": 1.0949462632001457, "train_runtime": 12056.6812, "train_samples_per_second": 130.285, "train_steps_per_second": 1.018 } ], "max_steps": 12272, "num_train_epochs": 4, "total_flos": 5.465758560807813e+17, "trial_name": null, "trial_params": null }