alexzyqi's picture
Upload trainer_state.json
67271ad verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9993935718617344,
"eval_steps": 500,
"global_step": 412,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02425712553062462,
"grad_norm": 86.94617462158203,
"learning_rate": 1.1904761904761906e-06,
"loss": 2.9554,
"step": 10
},
{
"epoch": 0.04851425106124924,
"grad_norm": 29.724809646606445,
"learning_rate": 2.380952380952381e-06,
"loss": 1.5779,
"step": 20
},
{
"epoch": 0.07277137659187387,
"grad_norm": 16.428190231323242,
"learning_rate": 3.5714285714285718e-06,
"loss": 1.2023,
"step": 30
},
{
"epoch": 0.09702850212249849,
"grad_norm": 17.30933380126953,
"learning_rate": 4.761904761904762e-06,
"loss": 1.1538,
"step": 40
},
{
"epoch": 0.1212856276531231,
"grad_norm": 15.137809753417969,
"learning_rate": 4.994234734765043e-06,
"loss": 1.0789,
"step": 50
},
{
"epoch": 0.14554275318374774,
"grad_norm": 11.950181007385254,
"learning_rate": 4.9708589101037306e-06,
"loss": 1.0103,
"step": 60
},
{
"epoch": 0.16979987871437235,
"grad_norm": 14.55068302154541,
"learning_rate": 4.92968049037552e-06,
"loss": 1.0078,
"step": 70
},
{
"epoch": 0.19405700424499697,
"grad_norm": 27.079729080200195,
"learning_rate": 4.870996167038154e-06,
"loss": 1.0301,
"step": 80
},
{
"epoch": 0.2183141297756216,
"grad_norm": 16.41851043701172,
"learning_rate": 4.7952287619860276e-06,
"loss": 1.0042,
"step": 90
},
{
"epoch": 0.2425712553062462,
"grad_norm": 14.759162902832031,
"learning_rate": 4.702924181108745e-06,
"loss": 0.9602,
"step": 100
},
{
"epoch": 0.2668283808368708,
"grad_norm": 14.426393508911133,
"learning_rate": 4.594747481026685e-06,
"loss": 0.978,
"step": 110
},
{
"epoch": 0.2910855063674955,
"grad_norm": 11.475569725036621,
"learning_rate": 4.471478077342798e-06,
"loss": 0.9809,
"step": 120
},
{
"epoch": 0.31534263189812006,
"grad_norm": 14.514723777770996,
"learning_rate": 4.334004128935342e-06,
"loss": 0.981,
"step": 130
},
{
"epoch": 0.3395997574287447,
"grad_norm": 11.994640350341797,
"learning_rate": 4.183316138752799e-06,
"loss": 0.9829,
"step": 140
},
{
"epoch": 0.3638568829593693,
"grad_norm": 81.57245635986328,
"learning_rate": 4.020499817217441e-06,
"loss": 0.9799,
"step": 150
},
{
"epoch": 0.38811400848999394,
"grad_norm": 13.99566650390625,
"learning_rate": 3.84672825965686e-06,
"loss": 0.9717,
"step": 160
},
{
"epoch": 0.41237113402061853,
"grad_norm": 12.517782211303711,
"learning_rate": 3.663253494125244e-06,
"loss": 0.9327,
"step": 170
},
{
"epoch": 0.4366282595512432,
"grad_norm": 13.505069732666016,
"learning_rate": 3.4713974605125634e-06,
"loss": 0.9839,
"step": 180
},
{
"epoch": 0.46088538508186777,
"grad_norm": 12.123452186584473,
"learning_rate": 3.272542485937369e-06,
"loss": 0.9508,
"step": 190
},
{
"epoch": 0.4851425106124924,
"grad_norm": 29.056503295898438,
"learning_rate": 3.0681213250482255e-06,
"loss": 0.9075,
"step": 200
},
{
"epoch": 0.509399636143117,
"grad_norm": 11.140923500061035,
"learning_rate": 2.8596068369936386e-06,
"loss": 0.9405,
"step": 210
},
{
"epoch": 0.5336567616737417,
"grad_norm": 14.713190078735352,
"learning_rate": 2.648501373438142e-06,
"loss": 0.8641,
"step": 220
},
{
"epoch": 0.5579138872043663,
"grad_norm": 19.95207405090332,
"learning_rate": 2.436325954084122e-06,
"loss": 0.9529,
"step": 230
},
{
"epoch": 0.582171012734991,
"grad_norm": 11.159050941467285,
"learning_rate": 2.2246093076900145e-06,
"loss": 0.9201,
"step": 240
},
{
"epoch": 0.6064281382656155,
"grad_norm": 12.661371231079102,
"learning_rate": 2.014876857544562e-06,
"loss": 0.9249,
"step": 250
},
{
"epoch": 0.6306852637962401,
"grad_norm": 12.855792045593262,
"learning_rate": 1.8086397307570724e-06,
"loss": 0.9223,
"step": 260
},
{
"epoch": 0.6549423893268648,
"grad_norm": 12.543617248535156,
"learning_rate": 1.6073838705519618e-06,
"loss": 0.9151,
"step": 270
},
{
"epoch": 0.6791995148574894,
"grad_norm": 11.495418548583984,
"learning_rate": 1.4125593300137767e-06,
"loss": 0.8661,
"step": 280
},
{
"epoch": 0.7034566403881141,
"grad_norm": 15.451556205749512,
"learning_rate": 1.2255698244214863e-06,
"loss": 0.946,
"step": 290
},
{
"epoch": 0.7277137659187386,
"grad_norm": 14.111011505126953,
"learning_rate": 1.0477626174477403e-06,
"loss": 0.9041,
"step": 300
},
{
"epoch": 0.7519708914493632,
"grad_norm": 9.91163444519043,
"learning_rate": 8.804188140932251e-07,
"loss": 0.8724,
"step": 310
},
{
"epoch": 0.7762280169799879,
"grad_norm": 17.55638313293457,
"learning_rate": 7.247441302957858e-07,
"loss": 0.888,
"step": 320
},
{
"epoch": 0.8004851425106125,
"grad_norm": 16.491750717163086,
"learning_rate": 5.818602057194589e-07,
"loss": 0.8579,
"step": 330
},
{
"epoch": 0.8247422680412371,
"grad_norm": 10.457083702087402,
"learning_rate": 4.527965223149958e-07,
"loss": 0.8965,
"step": 340
},
{
"epoch": 0.8489993935718617,
"grad_norm": 10.995795249938965,
"learning_rate": 3.3848298687881143e-07,
"loss": 0.8624,
"step": 350
},
{
"epoch": 0.8732565191024864,
"grad_norm": 15.046560287475586,
"learning_rate": 2.397432310532133e-07,
"loss": 0.8749,
"step": 360
},
{
"epoch": 0.897513644633111,
"grad_norm": 12.04796314239502,
"learning_rate": 1.5728867704154076e-07,
"loss": 0.857,
"step": 370
},
{
"epoch": 0.9217707701637355,
"grad_norm": 14.930426597595215,
"learning_rate": 9.171341179489034e-08,
"loss": 0.8997,
"step": 380
},
{
"epoch": 0.9460278956943602,
"grad_norm": 16.0552978515625,
"learning_rate": 4.348990660201669e-08,
"loss": 0.8803,
"step": 390
},
{
"epoch": 0.9702850212249848,
"grad_norm": 11.000052452087402,
"learning_rate": 1.296561292287446e-08,
"loss": 0.95,
"step": 400
},
{
"epoch": 0.9945421467556095,
"grad_norm": 14.236842155456543,
"learning_rate": 3.604589928837832e-10,
"loss": 0.8863,
"step": 410
},
{
"epoch": 0.9993935718617344,
"step": 412,
"total_flos": 4.874908633965527e+17,
"train_loss": 1.011520906270129,
"train_runtime": 2109.7946,
"train_samples_per_second": 25.008,
"train_steps_per_second": 0.195
}
],
"logging_steps": 10,
"max_steps": 412,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.874908633965527e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}