allsky-stars-detected-v2 / trainer_state.json
ppicazo's picture
End of training
588fcd3 verified
{
"best_metric": 0.0171243567019701,
"best_model_checkpoint": "D:\\models/outputsstar/checkpoint-1350",
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.037037037037037035,
"grad_norm": 1.6629393100738525,
"learning_rate": 1.9851851851851855e-05,
"loss": 0.5627,
"step": 10
},
{
"epoch": 0.07407407407407407,
"grad_norm": 1.3468966484069824,
"learning_rate": 1.9703703703703704e-05,
"loss": 0.3602,
"step": 20
},
{
"epoch": 0.1111111111111111,
"grad_norm": 1.9818885326385498,
"learning_rate": 1.9555555555555557e-05,
"loss": 0.2409,
"step": 30
},
{
"epoch": 0.14814814814814814,
"grad_norm": 0.6068008542060852,
"learning_rate": 1.9407407407407407e-05,
"loss": 0.1136,
"step": 40
},
{
"epoch": 0.18518518518518517,
"grad_norm": 0.5792471170425415,
"learning_rate": 1.925925925925926e-05,
"loss": 0.0912,
"step": 50
},
{
"epoch": 0.2222222222222222,
"grad_norm": 0.36238569021224976,
"learning_rate": 1.9111111111111113e-05,
"loss": 0.0793,
"step": 60
},
{
"epoch": 0.25925925925925924,
"grad_norm": 0.40632012486457825,
"learning_rate": 1.8962962962962966e-05,
"loss": 0.0783,
"step": 70
},
{
"epoch": 0.2962962962962963,
"grad_norm": 0.3358970582485199,
"learning_rate": 1.8814814814814816e-05,
"loss": 0.0976,
"step": 80
},
{
"epoch": 0.3333333333333333,
"grad_norm": 0.22383123636245728,
"learning_rate": 1.866666666666667e-05,
"loss": 0.0453,
"step": 90
},
{
"epoch": 0.37037037037037035,
"grad_norm": 0.17972876131534576,
"learning_rate": 1.851851851851852e-05,
"loss": 0.0435,
"step": 100
},
{
"epoch": 0.4074074074074074,
"grad_norm": 0.17635494470596313,
"learning_rate": 1.837037037037037e-05,
"loss": 0.0255,
"step": 110
},
{
"epoch": 0.4444444444444444,
"grad_norm": 2.2137696743011475,
"learning_rate": 1.8222222222222224e-05,
"loss": 0.0251,
"step": 120
},
{
"epoch": 0.48148148148148145,
"grad_norm": 0.1810089647769928,
"learning_rate": 1.8074074074074074e-05,
"loss": 0.0588,
"step": 130
},
{
"epoch": 0.5185185185185185,
"grad_norm": 0.1574196070432663,
"learning_rate": 1.7925925925925927e-05,
"loss": 0.043,
"step": 140
},
{
"epoch": 0.5555555555555556,
"grad_norm": 0.13450497388839722,
"learning_rate": 1.7777777777777777e-05,
"loss": 0.0647,
"step": 150
},
{
"epoch": 0.5925925925925926,
"grad_norm": 0.14055782556533813,
"learning_rate": 1.7629629629629633e-05,
"loss": 0.025,
"step": 160
},
{
"epoch": 0.6296296296296297,
"grad_norm": 2.997166395187378,
"learning_rate": 1.7481481481481483e-05,
"loss": 0.0926,
"step": 170
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.12140078842639923,
"learning_rate": 1.7333333333333336e-05,
"loss": 0.015,
"step": 180
},
{
"epoch": 0.7037037037037037,
"grad_norm": 0.11859618127346039,
"learning_rate": 1.7185185185185185e-05,
"loss": 0.058,
"step": 190
},
{
"epoch": 0.7407407407407407,
"grad_norm": 0.09671270847320557,
"learning_rate": 1.7037037037037038e-05,
"loss": 0.0127,
"step": 200
},
{
"epoch": 0.7777777777777778,
"grad_norm": 0.100140281021595,
"learning_rate": 1.688888888888889e-05,
"loss": 0.0384,
"step": 210
},
{
"epoch": 0.8148148148148148,
"grad_norm": 0.10083166509866714,
"learning_rate": 1.674074074074074e-05,
"loss": 0.0121,
"step": 220
},
{
"epoch": 0.8518518518518519,
"grad_norm": 1.3739676475524902,
"learning_rate": 1.6592592592592594e-05,
"loss": 0.089,
"step": 230
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.10838634520769119,
"learning_rate": 1.6444444444444444e-05,
"loss": 0.1098,
"step": 240
},
{
"epoch": 0.9259259259259259,
"grad_norm": 0.1810838133096695,
"learning_rate": 1.6296296296296297e-05,
"loss": 0.0287,
"step": 250
},
{
"epoch": 0.9629629629629629,
"grad_norm": 0.11047308892011642,
"learning_rate": 1.614814814814815e-05,
"loss": 0.0266,
"step": 260
},
{
"epoch": 1.0,
"grad_norm": 2.2907960414886475,
"learning_rate": 1.6000000000000003e-05,
"loss": 0.0394,
"step": 270
},
{
"epoch": 1.0,
"eval_accuracy": 0.989501312335958,
"eval_loss": 0.04065331816673279,
"eval_runtime": 52.3322,
"eval_samples_per_second": 7.28,
"eval_steps_per_second": 0.917,
"step": 270
},
{
"epoch": 1.037037037037037,
"grad_norm": 0.08815015852451324,
"learning_rate": 1.5851851851851852e-05,
"loss": 0.0255,
"step": 280
},
{
"epoch": 1.074074074074074,
"grad_norm": 0.09346853941679001,
"learning_rate": 1.5703703703703705e-05,
"loss": 0.0107,
"step": 290
},
{
"epoch": 1.1111111111111112,
"grad_norm": 0.07555428147315979,
"learning_rate": 1.555555555555556e-05,
"loss": 0.0188,
"step": 300
},
{
"epoch": 1.1481481481481481,
"grad_norm": 0.08554250746965408,
"learning_rate": 1.5407407407407408e-05,
"loss": 0.056,
"step": 310
},
{
"epoch": 1.1851851851851851,
"grad_norm": 0.07133428752422333,
"learning_rate": 1.525925925925926e-05,
"loss": 0.0376,
"step": 320
},
{
"epoch": 1.2222222222222223,
"grad_norm": 0.07578225433826447,
"learning_rate": 1.5111111111111112e-05,
"loss": 0.0115,
"step": 330
},
{
"epoch": 1.2592592592592593,
"grad_norm": 0.07077538967132568,
"learning_rate": 1.4962962962962964e-05,
"loss": 0.0091,
"step": 340
},
{
"epoch": 1.2962962962962963,
"grad_norm": 0.06836975365877151,
"learning_rate": 1.4814814814814815e-05,
"loss": 0.0089,
"step": 350
},
{
"epoch": 1.3333333333333333,
"grad_norm": 23.20952606201172,
"learning_rate": 1.4666666666666666e-05,
"loss": 0.0995,
"step": 360
},
{
"epoch": 1.3703703703703702,
"grad_norm": 0.07452990114688873,
"learning_rate": 1.4518518518518521e-05,
"loss": 0.0089,
"step": 370
},
{
"epoch": 1.4074074074074074,
"grad_norm": 0.07127617299556732,
"learning_rate": 1.4370370370370372e-05,
"loss": 0.0104,
"step": 380
},
{
"epoch": 1.4444444444444444,
"grad_norm": 0.06184624508023262,
"learning_rate": 1.4222222222222224e-05,
"loss": 0.0081,
"step": 390
},
{
"epoch": 1.4814814814814814,
"grad_norm": 0.08141285926103592,
"learning_rate": 1.4074074074074075e-05,
"loss": 0.0104,
"step": 400
},
{
"epoch": 1.5185185185185186,
"grad_norm": 0.06248854473233223,
"learning_rate": 1.3925925925925928e-05,
"loss": 0.095,
"step": 410
},
{
"epoch": 1.5555555555555556,
"grad_norm": 0.07375632971525192,
"learning_rate": 1.377777777777778e-05,
"loss": 0.0918,
"step": 420
},
{
"epoch": 1.5925925925925926,
"grad_norm": 0.06345756351947784,
"learning_rate": 1.362962962962963e-05,
"loss": 0.0077,
"step": 430
},
{
"epoch": 1.6296296296296298,
"grad_norm": 0.06055911257863045,
"learning_rate": 1.3481481481481482e-05,
"loss": 0.0237,
"step": 440
},
{
"epoch": 1.6666666666666665,
"grad_norm": 0.05871434509754181,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.0072,
"step": 450
},
{
"epoch": 1.7037037037037037,
"grad_norm": 0.0945742130279541,
"learning_rate": 1.3185185185185185e-05,
"loss": 0.0674,
"step": 460
},
{
"epoch": 1.7407407407407407,
"grad_norm": 0.0636238381266594,
"learning_rate": 1.303703703703704e-05,
"loss": 0.0623,
"step": 470
},
{
"epoch": 1.7777777777777777,
"grad_norm": 0.08100485056638718,
"learning_rate": 1.288888888888889e-05,
"loss": 0.0103,
"step": 480
},
{
"epoch": 1.8148148148148149,
"grad_norm": 0.06018243730068207,
"learning_rate": 1.2740740740740742e-05,
"loss": 0.0072,
"step": 490
},
{
"epoch": 1.8518518518518519,
"grad_norm": 0.06805320084095001,
"learning_rate": 1.2592592592592593e-05,
"loss": 0.0602,
"step": 500
},
{
"epoch": 1.8888888888888888,
"grad_norm": 9.21893310546875,
"learning_rate": 1.2444444444444446e-05,
"loss": 0.0842,
"step": 510
},
{
"epoch": 1.925925925925926,
"grad_norm": 0.07045555114746094,
"learning_rate": 1.2296296296296298e-05,
"loss": 0.1148,
"step": 520
},
{
"epoch": 1.9629629629629628,
"grad_norm": 0.07674137502908707,
"learning_rate": 1.2148148148148149e-05,
"loss": 0.0094,
"step": 530
},
{
"epoch": 2.0,
"grad_norm": 62.091854095458984,
"learning_rate": 1.2e-05,
"loss": 0.0973,
"step": 540
},
{
"epoch": 2.0,
"eval_accuracy": 0.984251968503937,
"eval_loss": 0.07090622931718826,
"eval_runtime": 52.2703,
"eval_samples_per_second": 7.289,
"eval_steps_per_second": 0.918,
"step": 540
},
{
"epoch": 2.037037037037037,
"grad_norm": 0.05459068343043327,
"learning_rate": 1.1851851851851852e-05,
"loss": 0.0663,
"step": 550
},
{
"epoch": 2.074074074074074,
"grad_norm": 0.06096257269382477,
"learning_rate": 1.1703703703703703e-05,
"loss": 0.0113,
"step": 560
},
{
"epoch": 2.111111111111111,
"grad_norm": 0.05534420534968376,
"learning_rate": 1.1555555555555556e-05,
"loss": 0.0072,
"step": 570
},
{
"epoch": 2.148148148148148,
"grad_norm": 3.7594122886657715,
"learning_rate": 1.1407407407407409e-05,
"loss": 0.0592,
"step": 580
},
{
"epoch": 2.185185185185185,
"grad_norm": 0.05278675630688667,
"learning_rate": 1.125925925925926e-05,
"loss": 0.007,
"step": 590
},
{
"epoch": 2.2222222222222223,
"grad_norm": 0.050314392894506454,
"learning_rate": 1.1111111111111113e-05,
"loss": 0.0081,
"step": 600
},
{
"epoch": 2.259259259259259,
"grad_norm": 0.0523914210498333,
"learning_rate": 1.0962962962962965e-05,
"loss": 0.0104,
"step": 610
},
{
"epoch": 2.2962962962962963,
"grad_norm": 0.06894738972187042,
"learning_rate": 1.0814814814814816e-05,
"loss": 0.0071,
"step": 620
},
{
"epoch": 2.3333333333333335,
"grad_norm": 0.05035197734832764,
"learning_rate": 1.0666666666666667e-05,
"loss": 0.0062,
"step": 630
},
{
"epoch": 2.3703703703703702,
"grad_norm": 0.04814285784959793,
"learning_rate": 1.0518518518518519e-05,
"loss": 0.1157,
"step": 640
},
{
"epoch": 2.4074074074074074,
"grad_norm": 2.7678489685058594,
"learning_rate": 1.037037037037037e-05,
"loss": 0.0565,
"step": 650
},
{
"epoch": 2.4444444444444446,
"grad_norm": 3.185380220413208,
"learning_rate": 1.0222222222222223e-05,
"loss": 0.0508,
"step": 660
},
{
"epoch": 2.4814814814814814,
"grad_norm": 0.05677470192313194,
"learning_rate": 1.0074074074074074e-05,
"loss": 0.0074,
"step": 670
},
{
"epoch": 2.5185185185185186,
"grad_norm": 0.3886967599391937,
"learning_rate": 9.925925925925927e-06,
"loss": 0.0129,
"step": 680
},
{
"epoch": 2.5555555555555554,
"grad_norm": 0.0454292856156826,
"learning_rate": 9.777777777777779e-06,
"loss": 0.0958,
"step": 690
},
{
"epoch": 2.5925925925925926,
"grad_norm": 0.04909510165452957,
"learning_rate": 9.62962962962963e-06,
"loss": 0.0064,
"step": 700
},
{
"epoch": 2.6296296296296298,
"grad_norm": 0.04817191883921623,
"learning_rate": 9.481481481481483e-06,
"loss": 0.0192,
"step": 710
},
{
"epoch": 2.6666666666666665,
"grad_norm": 3.5723814964294434,
"learning_rate": 9.333333333333334e-06,
"loss": 0.2254,
"step": 720
},
{
"epoch": 2.7037037037037037,
"grad_norm": 0.8698053359985352,
"learning_rate": 9.185185185185186e-06,
"loss": 0.0099,
"step": 730
},
{
"epoch": 2.7407407407407405,
"grad_norm": 3.2689051628112793,
"learning_rate": 9.037037037037037e-06,
"loss": 0.0534,
"step": 740
},
{
"epoch": 2.7777777777777777,
"grad_norm": 0.1893489956855774,
"learning_rate": 8.888888888888888e-06,
"loss": 0.0465,
"step": 750
},
{
"epoch": 2.814814814814815,
"grad_norm": 0.05454478785395622,
"learning_rate": 8.740740740740741e-06,
"loss": 0.0106,
"step": 760
},
{
"epoch": 2.851851851851852,
"grad_norm": 0.0487472228705883,
"learning_rate": 8.592592592592593e-06,
"loss": 0.0914,
"step": 770
},
{
"epoch": 2.888888888888889,
"grad_norm": 0.05404801294207573,
"learning_rate": 8.444444444444446e-06,
"loss": 0.0168,
"step": 780
},
{
"epoch": 2.925925925925926,
"grad_norm": 0.043936073780059814,
"learning_rate": 8.296296296296297e-06,
"loss": 0.0079,
"step": 790
},
{
"epoch": 2.962962962962963,
"grad_norm": 0.05625942721962929,
"learning_rate": 8.148148148148148e-06,
"loss": 0.0148,
"step": 800
},
{
"epoch": 3.0,
"grad_norm": 0.04617544263601303,
"learning_rate": 8.000000000000001e-06,
"loss": 0.0057,
"step": 810
},
{
"epoch": 3.0,
"eval_accuracy": 0.9868766404199475,
"eval_loss": 0.04252076894044876,
"eval_runtime": 52.5906,
"eval_samples_per_second": 7.245,
"eval_steps_per_second": 0.913,
"step": 810
},
{
"epoch": 3.037037037037037,
"grad_norm": 0.5136130452156067,
"learning_rate": 7.851851851851853e-06,
"loss": 0.0257,
"step": 820
},
{
"epoch": 3.074074074074074,
"grad_norm": 2.9422848224639893,
"learning_rate": 7.703703703703704e-06,
"loss": 0.0544,
"step": 830
},
{
"epoch": 3.111111111111111,
"grad_norm": 0.0419733040034771,
"learning_rate": 7.555555555555556e-06,
"loss": 0.0073,
"step": 840
},
{
"epoch": 3.148148148148148,
"grad_norm": 0.052699312567710876,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.0068,
"step": 850
},
{
"epoch": 3.185185185185185,
"grad_norm": 0.04446011409163475,
"learning_rate": 7.2592592592592605e-06,
"loss": 0.0086,
"step": 860
},
{
"epoch": 3.2222222222222223,
"grad_norm": 0.04285755380988121,
"learning_rate": 7.111111111111112e-06,
"loss": 0.0063,
"step": 870
},
{
"epoch": 3.259259259259259,
"grad_norm": 0.04666002467274666,
"learning_rate": 6.962962962962964e-06,
"loss": 0.0586,
"step": 880
},
{
"epoch": 3.2962962962962963,
"grad_norm": 0.04253947734832764,
"learning_rate": 6.814814814814815e-06,
"loss": 0.0561,
"step": 890
},
{
"epoch": 3.3333333333333335,
"grad_norm": 0.04394235461950302,
"learning_rate": 6.666666666666667e-06,
"loss": 0.0059,
"step": 900
},
{
"epoch": 3.3703703703703702,
"grad_norm": 0.04241425171494484,
"learning_rate": 6.51851851851852e-06,
"loss": 0.0056,
"step": 910
},
{
"epoch": 3.4074074074074074,
"grad_norm": 0.08582015335559845,
"learning_rate": 6.370370370370371e-06,
"loss": 0.0067,
"step": 920
},
{
"epoch": 3.4444444444444446,
"grad_norm": 0.03971382975578308,
"learning_rate": 6.222222222222223e-06,
"loss": 0.0393,
"step": 930
},
{
"epoch": 3.4814814814814814,
"grad_norm": 0.04343000799417496,
"learning_rate": 6.0740740740740745e-06,
"loss": 0.0056,
"step": 940
},
{
"epoch": 3.5185185185185186,
"grad_norm": 0.04006602242588997,
"learning_rate": 5.925925925925926e-06,
"loss": 0.0533,
"step": 950
},
{
"epoch": 3.5555555555555554,
"grad_norm": 0.040533244609832764,
"learning_rate": 5.777777777777778e-06,
"loss": 0.0277,
"step": 960
},
{
"epoch": 3.5925925925925926,
"grad_norm": 0.0381636768579483,
"learning_rate": 5.62962962962963e-06,
"loss": 0.022,
"step": 970
},
{
"epoch": 3.6296296296296298,
"grad_norm": 0.0465327687561512,
"learning_rate": 5.481481481481482e-06,
"loss": 0.0086,
"step": 980
},
{
"epoch": 3.6666666666666665,
"grad_norm": 0.039545219391584396,
"learning_rate": 5.333333333333334e-06,
"loss": 0.0053,
"step": 990
},
{
"epoch": 3.7037037037037037,
"grad_norm": 0.038673460483551025,
"learning_rate": 5.185185185185185e-06,
"loss": 0.0787,
"step": 1000
},
{
"epoch": 3.7407407407407405,
"grad_norm": 0.04055389016866684,
"learning_rate": 5.037037037037037e-06,
"loss": 0.0062,
"step": 1010
},
{
"epoch": 3.7777777777777777,
"grad_norm": 9.543475151062012,
"learning_rate": 4.888888888888889e-06,
"loss": 0.044,
"step": 1020
},
{
"epoch": 3.814814814814815,
"grad_norm": 0.050516992807388306,
"learning_rate": 4.7407407407407415e-06,
"loss": 0.0531,
"step": 1030
},
{
"epoch": 3.851851851851852,
"grad_norm": 0.040959686040878296,
"learning_rate": 4.592592592592593e-06,
"loss": 0.0054,
"step": 1040
},
{
"epoch": 3.888888888888889,
"grad_norm": 0.2083148956298828,
"learning_rate": 4.444444444444444e-06,
"loss": 0.0055,
"step": 1050
},
{
"epoch": 3.925925925925926,
"grad_norm": 0.04079532250761986,
"learning_rate": 4.296296296296296e-06,
"loss": 0.0758,
"step": 1060
},
{
"epoch": 3.962962962962963,
"grad_norm": 3.1428794860839844,
"learning_rate": 4.1481481481481485e-06,
"loss": 0.1065,
"step": 1070
},
{
"epoch": 4.0,
"grad_norm": 0.04060327261686325,
"learning_rate": 4.000000000000001e-06,
"loss": 0.0403,
"step": 1080
},
{
"epoch": 4.0,
"eval_accuracy": 0.9868766404199475,
"eval_loss": 0.04992913454771042,
"eval_runtime": 52.4443,
"eval_samples_per_second": 7.265,
"eval_steps_per_second": 0.915,
"step": 1080
},
{
"epoch": 4.037037037037037,
"grad_norm": 0.037297219038009644,
"learning_rate": 3.851851851851852e-06,
"loss": 0.005,
"step": 1090
},
{
"epoch": 4.074074074074074,
"grad_norm": 0.040778644382953644,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.005,
"step": 1100
},
{
"epoch": 4.111111111111111,
"grad_norm": 0.03788766264915466,
"learning_rate": 3.555555555555556e-06,
"loss": 0.005,
"step": 1110
},
{
"epoch": 4.148148148148148,
"grad_norm": 0.04125545546412468,
"learning_rate": 3.4074074074074077e-06,
"loss": 0.0047,
"step": 1120
},
{
"epoch": 4.185185185185185,
"grad_norm": 0.03932119160890579,
"learning_rate": 3.25925925925926e-06,
"loss": 0.0209,
"step": 1130
},
{
"epoch": 4.222222222222222,
"grad_norm": 0.040045417845249176,
"learning_rate": 3.1111111111111116e-06,
"loss": 0.005,
"step": 1140
},
{
"epoch": 4.2592592592592595,
"grad_norm": 0.04132600873708725,
"learning_rate": 2.962962962962963e-06,
"loss": 0.0567,
"step": 1150
},
{
"epoch": 4.296296296296296,
"grad_norm": 0.16079649329185486,
"learning_rate": 2.814814814814815e-06,
"loss": 0.0058,
"step": 1160
},
{
"epoch": 4.333333333333333,
"grad_norm": 0.03831535950303078,
"learning_rate": 2.666666666666667e-06,
"loss": 0.0473,
"step": 1170
},
{
"epoch": 4.37037037037037,
"grad_norm": 0.03685208782553673,
"learning_rate": 2.5185185185185186e-06,
"loss": 0.0051,
"step": 1180
},
{
"epoch": 4.407407407407407,
"grad_norm": 0.08284825831651688,
"learning_rate": 2.3703703703703707e-06,
"loss": 0.0053,
"step": 1190
},
{
"epoch": 4.444444444444445,
"grad_norm": 0.037671931087970734,
"learning_rate": 2.222222222222222e-06,
"loss": 0.0048,
"step": 1200
},
{
"epoch": 4.481481481481482,
"grad_norm": 0.048201784491539,
"learning_rate": 2.0740740740740742e-06,
"loss": 0.0046,
"step": 1210
},
{
"epoch": 4.518518518518518,
"grad_norm": 0.036273613572120667,
"learning_rate": 1.925925925925926e-06,
"loss": 0.008,
"step": 1220
},
{
"epoch": 4.555555555555555,
"grad_norm": 0.03639749437570572,
"learning_rate": 1.777777777777778e-06,
"loss": 0.0268,
"step": 1230
},
{
"epoch": 4.592592592592593,
"grad_norm": 0.03778412565588951,
"learning_rate": 1.62962962962963e-06,
"loss": 0.0047,
"step": 1240
},
{
"epoch": 4.62962962962963,
"grad_norm": 0.037292417138814926,
"learning_rate": 1.4814814814814815e-06,
"loss": 0.0522,
"step": 1250
},
{
"epoch": 4.666666666666667,
"grad_norm": 0.03608781844377518,
"learning_rate": 1.3333333333333334e-06,
"loss": 0.0156,
"step": 1260
},
{
"epoch": 4.703703703703704,
"grad_norm": 0.04082287847995758,
"learning_rate": 1.1851851851851854e-06,
"loss": 0.0072,
"step": 1270
},
{
"epoch": 4.7407407407407405,
"grad_norm": 0.03990180045366287,
"learning_rate": 1.0370370370370371e-06,
"loss": 0.027,
"step": 1280
},
{
"epoch": 4.777777777777778,
"grad_norm": 0.2720341086387634,
"learning_rate": 8.88888888888889e-07,
"loss": 0.0384,
"step": 1290
},
{
"epoch": 4.814814814814815,
"grad_norm": 0.21008038520812988,
"learning_rate": 7.407407407407407e-07,
"loss": 0.0495,
"step": 1300
},
{
"epoch": 4.851851851851852,
"grad_norm": 0.038784921169281006,
"learning_rate": 5.925925925925927e-07,
"loss": 0.0605,
"step": 1310
},
{
"epoch": 4.888888888888889,
"grad_norm": 0.03550861403346062,
"learning_rate": 4.444444444444445e-07,
"loss": 0.007,
"step": 1320
},
{
"epoch": 4.925925925925926,
"grad_norm": 0.03672279790043831,
"learning_rate": 2.9629629629629634e-07,
"loss": 0.005,
"step": 1330
},
{
"epoch": 4.962962962962963,
"grad_norm": 0.04202316328883171,
"learning_rate": 1.4814814814814817e-07,
"loss": 0.0475,
"step": 1340
},
{
"epoch": 5.0,
"grad_norm": 0.0387168787419796,
"learning_rate": 0.0,
"loss": 0.0608,
"step": 1350
},
{
"epoch": 5.0,
"eval_accuracy": 0.994750656167979,
"eval_loss": 0.0171243567019701,
"eval_runtime": 52.6363,
"eval_samples_per_second": 7.238,
"eval_steps_per_second": 0.912,
"step": 1350
},
{
"epoch": 5.0,
"step": 1350,
"total_flos": 8.345887281491558e+17,
"train_loss": 0.04418781167379132,
"train_runtime": 3285.5702,
"train_samples_per_second": 3.278,
"train_steps_per_second": 0.411
}
],
"logging_steps": 10,
"max_steps": 1350,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.345887281491558e+17,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}