{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9993935718617344,
  "eval_steps": 500,
  "global_step": 412,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02425712553062462,
      "grad_norm": 86.94617462158203,
      "learning_rate": 1.1904761904761906e-06,
      "loss": 2.9554,
      "step": 10
    },
    {
      "epoch": 0.04851425106124924,
      "grad_norm": 29.724809646606445,
      "learning_rate": 2.380952380952381e-06,
      "loss": 1.5779,
      "step": 20
    },
    {
      "epoch": 0.07277137659187387,
      "grad_norm": 16.428190231323242,
      "learning_rate": 3.5714285714285718e-06,
      "loss": 1.2023,
      "step": 30
    },
    {
      "epoch": 0.09702850212249849,
      "grad_norm": 17.30933380126953,
      "learning_rate": 4.761904761904762e-06,
      "loss": 1.1538,
      "step": 40
    },
    {
      "epoch": 0.1212856276531231,
      "grad_norm": 15.137809753417969,
      "learning_rate": 4.994234734765043e-06,
      "loss": 1.0789,
      "step": 50
    },
    {
      "epoch": 0.14554275318374774,
      "grad_norm": 11.950181007385254,
      "learning_rate": 4.9708589101037306e-06,
      "loss": 1.0103,
      "step": 60
    },
    {
      "epoch": 0.16979987871437235,
      "grad_norm": 14.55068302154541,
      "learning_rate": 4.92968049037552e-06,
      "loss": 1.0078,
      "step": 70
    },
    {
      "epoch": 0.19405700424499697,
      "grad_norm": 27.079729080200195,
      "learning_rate": 4.870996167038154e-06,
      "loss": 1.0301,
      "step": 80
    },
    {
      "epoch": 0.2183141297756216,
      "grad_norm": 16.41851043701172,
      "learning_rate": 4.7952287619860276e-06,
      "loss": 1.0042,
      "step": 90
    },
    {
      "epoch": 0.2425712553062462,
      "grad_norm": 14.759162902832031,
      "learning_rate": 4.702924181108745e-06,
      "loss": 0.9602,
      "step": 100
    },
    {
      "epoch": 0.2668283808368708,
      "grad_norm": 14.426393508911133,
      "learning_rate": 4.594747481026685e-06,
      "loss": 0.978,
      "step": 110
    },
    {
      "epoch": 0.2910855063674955,
      "grad_norm": 11.475569725036621,
      "learning_rate": 4.471478077342798e-06,
      "loss": 0.9809,
      "step": 120
    },
    {
      "epoch": 0.31534263189812006,
      "grad_norm": 14.514723777770996,
      "learning_rate": 4.334004128935342e-06,
      "loss": 0.981,
      "step": 130
    },
    {
      "epoch": 0.3395997574287447,
      "grad_norm": 11.994640350341797,
      "learning_rate": 4.183316138752799e-06,
      "loss": 0.9829,
      "step": 140
    },
    {
      "epoch": 0.3638568829593693,
      "grad_norm": 81.57245635986328,
      "learning_rate": 4.020499817217441e-06,
      "loss": 0.9799,
      "step": 150
    },
    {
      "epoch": 0.38811400848999394,
      "grad_norm": 13.99566650390625,
      "learning_rate": 3.84672825965686e-06,
      "loss": 0.9717,
      "step": 160
    },
    {
      "epoch": 0.41237113402061853,
      "grad_norm": 12.517782211303711,
      "learning_rate": 3.663253494125244e-06,
      "loss": 0.9327,
      "step": 170
    },
    {
      "epoch": 0.4366282595512432,
      "grad_norm": 13.505069732666016,
      "learning_rate": 3.4713974605125634e-06,
      "loss": 0.9839,
      "step": 180
    },
    {
      "epoch": 0.46088538508186777,
      "grad_norm": 12.123452186584473,
      "learning_rate": 3.272542485937369e-06,
      "loss": 0.9508,
      "step": 190
    },
    {
      "epoch": 0.4851425106124924,
      "grad_norm": 29.056503295898438,
      "learning_rate": 3.0681213250482255e-06,
      "loss": 0.9075,
      "step": 200
    },
    {
      "epoch": 0.509399636143117,
      "grad_norm": 11.140923500061035,
      "learning_rate": 2.8596068369936386e-06,
      "loss": 0.9405,
      "step": 210
    },
    {
      "epoch": 0.5336567616737417,
      "grad_norm": 14.713190078735352,
      "learning_rate": 2.648501373438142e-06,
      "loss": 0.8641,
      "step": 220
    },
    {
      "epoch": 0.5579138872043663,
      "grad_norm": 19.95207405090332,
      "learning_rate": 2.436325954084122e-06,
      "loss": 0.9529,
      "step": 230
    },
    {
      "epoch": 0.582171012734991,
      "grad_norm": 11.159050941467285,
      "learning_rate": 2.2246093076900145e-06,
      "loss": 0.9201,
      "step": 240
    },
    {
      "epoch": 0.6064281382656155,
      "grad_norm": 12.661371231079102,
      "learning_rate": 2.014876857544562e-06,
      "loss": 0.9249,
      "step": 250
    },
    {
      "epoch": 0.6306852637962401,
      "grad_norm": 12.855792045593262,
      "learning_rate": 1.8086397307570724e-06,
      "loss": 0.9223,
      "step": 260
    },
    {
      "epoch": 0.6549423893268648,
      "grad_norm": 12.543617248535156,
      "learning_rate": 1.6073838705519618e-06,
      "loss": 0.9151,
      "step": 270
    },
    {
      "epoch": 0.6791995148574894,
      "grad_norm": 11.495418548583984,
      "learning_rate": 1.4125593300137767e-06,
      "loss": 0.8661,
      "step": 280
    },
    {
      "epoch": 0.7034566403881141,
      "grad_norm": 15.451556205749512,
      "learning_rate": 1.2255698244214863e-06,
      "loss": 0.946,
      "step": 290
    },
    {
      "epoch": 0.7277137659187386,
      "grad_norm": 14.111011505126953,
      "learning_rate": 1.0477626174477403e-06,
      "loss": 0.9041,
      "step": 300
    },
    {
      "epoch": 0.7519708914493632,
      "grad_norm": 9.91163444519043,
      "learning_rate": 8.804188140932251e-07,
      "loss": 0.8724,
      "step": 310
    },
    {
      "epoch": 0.7762280169799879,
      "grad_norm": 17.55638313293457,
      "learning_rate": 7.247441302957858e-07,
      "loss": 0.888,
      "step": 320
    },
    {
      "epoch": 0.8004851425106125,
      "grad_norm": 16.491750717163086,
      "learning_rate": 5.818602057194589e-07,
      "loss": 0.8579,
      "step": 330
    },
    {
      "epoch": 0.8247422680412371,
      "grad_norm": 10.457083702087402,
      "learning_rate": 4.527965223149958e-07,
      "loss": 0.8965,
      "step": 340
    },
    {
      "epoch": 0.8489993935718617,
      "grad_norm": 10.995795249938965,
      "learning_rate": 3.3848298687881143e-07,
      "loss": 0.8624,
      "step": 350
    },
    {
      "epoch": 0.8732565191024864,
      "grad_norm": 15.046560287475586,
      "learning_rate": 2.397432310532133e-07,
      "loss": 0.8749,
      "step": 360
    },
    {
      "epoch": 0.897513644633111,
      "grad_norm": 12.04796314239502,
      "learning_rate": 1.5728867704154076e-07,
      "loss": 0.857,
      "step": 370
    },
    {
      "epoch": 0.9217707701637355,
      "grad_norm": 14.930426597595215,
      "learning_rate": 9.171341179489034e-08,
      "loss": 0.8997,
      "step": 380
    },
    {
      "epoch": 0.9460278956943602,
      "grad_norm": 16.0552978515625,
      "learning_rate": 4.348990660201669e-08,
      "loss": 0.8803,
      "step": 390
    },
    {
      "epoch": 0.9702850212249848,
      "grad_norm": 11.000052452087402,
      "learning_rate": 1.296561292287446e-08,
      "loss": 0.95,
      "step": 400
    },
    {
      "epoch": 0.9945421467556095,
      "grad_norm": 14.236842155456543,
      "learning_rate": 3.604589928837832e-10,
      "loss": 0.8863,
      "step": 410
    },
    {
      "epoch": 0.9993935718617344,
      "step": 412,
      "total_flos": 4.874908633965527e+17,
      "train_loss": 1.011520906270129,
      "train_runtime": 2109.7946,
      "train_samples_per_second": 25.008,
      "train_steps_per_second": 0.195
    }
  ],
  "logging_steps": 10,
  "max_steps": 412,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 4.874908633965527e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}