Training in progress, step 1000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/config.json +1 -1
last-checkpoint/generation_config.json +1 -1
last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +90 -957
last-checkpoint/training_args.bin +1 -1

last-checkpoint/config.json CHANGED Viewed

@@ -45,7 +45,7 @@
   "scale_embedding": false,
   "suppress_tokens": [],
   "torch_dtype": "float32",
-  "transformers_version": "4.40.0",
   "use_cache": false,
   "use_weighted_layer_sum": false,
   "vocab_size": 51865

   "scale_embedding": false,
   "suppress_tokens": [],
   "torch_dtype": "float32",
+  "transformers_version": "4.40.1",
   "use_cache": false,
   "use_weighted_layer_sum": false,
   "vocab_size": 51865

last-checkpoint/generation_config.json CHANGED Viewed

@@ -262,5 +262,5 @@
     "transcribe": 50359,
     "translate": 50358
   },
-  "transformers_version": "4.40.0"
 }

     "transcribe": 50359,
     "translate": 50358
   },
+  "transformers_version": "4.40.1"
 }

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b810b7f0fe83995300521a646137d370d36d549e50472fcf6c1da2a177919ec3
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf5c2c11f76e4faaf0e73a2e3d071ca3a84e4e462cec1f6ed89d2fd5ee8fcff9
 size 966995080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a70bf563a60fc19871fd719056a40461333af72260151f6bade95b428cbc93c
 size 1925064044

 version https://git-lfs.github.com/spec/v1
+oid sha256:5440cc8dfb8091af2b204e8cabf6857c82af16c9a6dd368570a677a0b7da3055
 size 1925064044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7c93a397e9322e49f4ed50d18f810eaf2c39ecdb2985c95d248cd7a2fa2aa47
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:51bea2a28f129bf069e5a02ae44edfec13f51109373355626e9228154b0d41f5
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd54311344b834087a4b1c20d06544579c7f43d33908960b6b3b61734dbde46d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2d378b4bd36bf44babbc26f567786bedc31fd4875330753b97c0f677a367397
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,1168 +1,301 @@
 {
-  "best_metric": 29.080310880829014,
-  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-4000",
-  "epoch": 2.5806451612903225,
   "eval_steps": 1000,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.016129032258064516,
-      "grad_norm": 229.72373962402344,
       "learning_rate": 5.376344086021506e-07,
-      "loss": 7.9674,
       "step": 25
     },
     {
       "epoch": 0.03225806451612903,
-      "grad_norm": 50.686302185058594,
       "learning_rate": 1.0752688172043011e-06,
-      "loss": 5.7026,
       "step": 50
     },
     {
       "epoch": 0.04838709677419355,
-      "grad_norm": 32.474510192871094,
       "learning_rate": 1.6129032258064516e-06,
-      "loss": 3.7065,
       "step": 75
     },
     {
       "epoch": 0.06451612903225806,
-      "grad_norm": 30.973085403442383,
       "learning_rate": 2.1505376344086023e-06,
-      "loss": 2.6906,
       "step": 100
     },
     {
       "epoch": 0.08064516129032258,
-      "grad_norm": 28.370464324951172,
       "learning_rate": 2.688172043010753e-06,
-      "loss": 2.3087,
       "step": 125
     },
     {
       "epoch": 0.0967741935483871,
-      "grad_norm": 29.259729385375977,
       "learning_rate": 3.225806451612903e-06,
-      "loss": 2.0589,
       "step": 150
     },
     {
       "epoch": 0.11290322580645161,
-      "grad_norm": 29.08380699157715,
       "learning_rate": 3.763440860215054e-06,
-      "loss": 1.8731,
       "step": 175
     },
     {
       "epoch": 0.12903225806451613,
-      "grad_norm": 22.745624542236328,
       "learning_rate": 4.3010752688172045e-06,
-      "loss": 1.5257,
       "step": 200
     },
     {
       "epoch": 0.14516129032258066,
-      "grad_norm": 16.694580078125,
       "learning_rate": 4.838709677419355e-06,
-      "loss": 1.4005,
       "step": 225
     },
     {
       "epoch": 0.16129032258064516,
-      "grad_norm": 18.02663803100586,
       "learning_rate": 5.376344086021506e-06,
-      "loss": 1.3308,
       "step": 250
     },
     {
       "epoch": 0.1774193548387097,
-      "grad_norm": 14.609949111938477,
       "learning_rate": 5.9139784946236566e-06,
-      "loss": 1.2143,
       "step": 275
     },
     {
       "epoch": 0.1935483870967742,
-      "grad_norm": 16.727527618408203,
       "learning_rate": 6.451612903225806e-06,
-      "loss": 1.1925,
       "step": 300
     },
     {
       "epoch": 0.20967741935483872,
-      "grad_norm": 15.254867553710938,
       "learning_rate": 6.989247311827958e-06,
-      "loss": 1.1482,
       "step": 325
     },
     {
       "epoch": 0.22580645161290322,
-      "grad_norm": 16.119234085083008,
       "learning_rate": 7.526881720430108e-06,
-      "loss": 1.0825,
       "step": 350
     },
     {
       "epoch": 0.24193548387096775,
-      "grad_norm": 13.577301025390625,
       "learning_rate": 8.064516129032258e-06,
-      "loss": 1.099,
       "step": 375
     },
     {
       "epoch": 0.25806451612903225,
-      "grad_norm": 15.483856201171875,
       "learning_rate": 8.602150537634409e-06,
-      "loss": 1.0654,
       "step": 400
     },
     {
       "epoch": 0.27419354838709675,
-      "grad_norm": 15.842108726501465,
       "learning_rate": 9.13978494623656e-06,
-      "loss": 0.9747,
       "step": 425
     },
     {
       "epoch": 0.2903225806451613,
-      "grad_norm": 13.010821342468262,
       "learning_rate": 9.67741935483871e-06,
-      "loss": 0.9679,
       "step": 450
     },
     {
       "epoch": 0.3064516129032258,
-      "grad_norm": 15.315924644470215,
       "learning_rate": 9.97610513739546e-06,
-      "loss": 0.9001,
       "step": 475
     },
     {
       "epoch": 0.3225806451612903,
-      "grad_norm": 15.252881050109863,
       "learning_rate": 9.916367980884111e-06,
-      "loss": 0.9019,
       "step": 500
     },
     {
       "epoch": 0.3387096774193548,
-      "grad_norm": 15.013239860534668,
       "learning_rate": 9.856630824372761e-06,
-      "loss": 0.9167,
       "step": 525
     },
     {
       "epoch": 0.3548387096774194,
-      "grad_norm": 12.44570255279541,
       "learning_rate": 9.79689366786141e-06,
-      "loss": 0.8644,
       "step": 550
     },
     {
       "epoch": 0.3709677419354839,
-      "grad_norm": 13.266128540039062,
       "learning_rate": 9.737156511350062e-06,
-      "loss": 0.8954,
       "step": 575
     },
     {
       "epoch": 0.3870967741935484,
-      "grad_norm": 13.153059005737305,
       "learning_rate": 9.67741935483871e-06,
-      "loss": 0.8364,
       "step": 600
     },
     {
       "epoch": 0.4032258064516129,
-      "grad_norm": 15.848042488098145,
       "learning_rate": 9.61768219832736e-06,
-      "loss": 0.8667,
       "step": 625
     },
     {
       "epoch": 0.41935483870967744,
-      "grad_norm": 13.445392608642578,
       "learning_rate": 9.557945041816011e-06,
-      "loss": 0.8155,
       "step": 650
     },
     {
       "epoch": 0.43548387096774194,
-      "grad_norm": 13.883005142211914,
       "learning_rate": 9.49820788530466e-06,
-      "loss": 0.8446,
       "step": 675
     },
     {
       "epoch": 0.45161290322580644,
-      "grad_norm": 13.22021198272705,
       "learning_rate": 9.43847072879331e-06,
-      "loss": 0.8255,
       "step": 700
     },
     {
       "epoch": 0.46774193548387094,
-      "grad_norm": 14.165966987609863,
       "learning_rate": 9.37873357228196e-06,
-      "loss": 0.8034,
       "step": 725
     },
     {
       "epoch": 0.4838709677419355,
-      "grad_norm": 12.320103645324707,
       "learning_rate": 9.31899641577061e-06,
-      "loss": 0.7439,
       "step": 750
     },
     {
       "epoch": 0.5,
-      "grad_norm": 13.079719543457031,
       "learning_rate": 9.25925925925926e-06,
-      "loss": 0.7574,
       "step": 775
     },
     {
       "epoch": 0.5161290322580645,
-      "grad_norm": 12.108668327331543,
       "learning_rate": 9.19952210274791e-06,
-      "loss": 0.7844,
       "step": 800
     },
     {
       "epoch": 0.532258064516129,
-      "grad_norm": 12.974024772644043,
       "learning_rate": 9.13978494623656e-06,
-      "loss": 0.783,
       "step": 825
     },
     {
       "epoch": 0.5483870967741935,
-      "grad_norm": 14.670340538024902,
       "learning_rate": 9.08004778972521e-06,
-      "loss": 0.7084,
       "step": 850
     },
     {
       "epoch": 0.5645161290322581,
-      "grad_norm": 15.380485534667969,
       "learning_rate": 9.02031063321386e-06,
-      "loss": 0.7624,
       "step": 875
     },
     {
       "epoch": 0.5806451612903226,
-      "grad_norm": 14.00020694732666,
       "learning_rate": 8.96057347670251e-06,
-      "loss": 0.7031,
       "step": 900
     },
     {
       "epoch": 0.5967741935483871,
-      "grad_norm": 11.307880401611328,
       "learning_rate": 8.90083632019116e-06,
-      "loss": 0.6797,
       "step": 925
     },
     {
       "epoch": 0.6129032258064516,
-      "grad_norm": 14.682994842529297,
       "learning_rate": 8.84109916367981e-06,
-      "loss": 0.6679,
       "step": 950
     },
     {
       "epoch": 0.6290322580645161,
-      "grad_norm": 14.844277381896973,
       "learning_rate": 8.78136200716846e-06,
-      "loss": 0.7079,
       "step": 975
     },
     {
       "epoch": 0.6451612903225806,
-      "grad_norm": 14.752099990844727,
       "learning_rate": 8.72162485065711e-06,
-      "loss": 0.6757,
       "step": 1000
     },
     {
       "epoch": 0.6451612903225806,
-      "eval_cer": 43.3160621761658,
-      "eval_loss": 0.6164932250976562,
-      "eval_runtime": 945.7136,
-      "eval_samples_per_second": 2.412,
-      "eval_steps_per_second": 0.302,
       "step": 1000
-    },
-    {
-      "epoch": 0.6612903225806451,
-      "grad_norm": 12.722296714782715,
-      "learning_rate": 8.66188769414576e-06,
-      "loss": 0.6687,
-      "step": 1025
-    },
-    {
-      "epoch": 0.6774193548387096,
-      "grad_norm": 19.90687370300293,
-      "learning_rate": 8.602150537634409e-06,
-      "loss": 0.6705,
-      "step": 1050
-    },
-    {
-      "epoch": 0.6935483870967742,
-      "grad_norm": 12.18825626373291,
-      "learning_rate": 8.54241338112306e-06,
-      "loss": 0.6708,
-      "step": 1075
-    },
-    {
-      "epoch": 0.7096774193548387,
-      "grad_norm": 12.795165061950684,
-      "learning_rate": 8.48267622461171e-06,
-      "loss": 0.6697,
-      "step": 1100
-    },
-    {
-      "epoch": 0.7258064516129032,
-      "grad_norm": 12.366995811462402,
-      "learning_rate": 8.422939068100358e-06,
-      "loss": 0.6053,
-      "step": 1125
-    },
-    {
-      "epoch": 0.7419354838709677,
-      "grad_norm": 11.671553611755371,
-      "learning_rate": 8.36320191158901e-06,
-      "loss": 0.6733,
-      "step": 1150
-    },
-    {
-      "epoch": 0.7580645161290323,
-      "grad_norm": 14.575772285461426,
-      "learning_rate": 8.303464755077659e-06,
-      "loss": 0.6173,
-      "step": 1175
-    },
-    {
-      "epoch": 0.7741935483870968,
-      "grad_norm": 10.553525924682617,
-      "learning_rate": 8.24372759856631e-06,
-      "loss": 0.6529,
-      "step": 1200
-    },
-    {
-      "epoch": 0.7903225806451613,
-      "grad_norm": 10.87187671661377,
-      "learning_rate": 8.18399044205496e-06,
-      "loss": 0.6273,
-      "step": 1225
-    },
-    {
-      "epoch": 0.8064516129032258,
-      "grad_norm": 13.998808860778809,
-      "learning_rate": 8.124253285543608e-06,
-      "loss": 0.6209,
-      "step": 1250
-    },
-    {
-      "epoch": 0.8225806451612904,
-      "grad_norm": 13.7244234085083,
-      "learning_rate": 8.064516129032258e-06,
-      "loss": 0.6448,
-      "step": 1275
-    },
-    {
-      "epoch": 0.8387096774193549,
-      "grad_norm": 12.110133171081543,
-      "learning_rate": 8.004778972520909e-06,
-      "loss": 0.6347,
-      "step": 1300
-    },
-    {
-      "epoch": 0.8548387096774194,
-      "grad_norm": 15.724874496459961,
-      "learning_rate": 7.945041816009559e-06,
-      "loss": 0.5888,
-      "step": 1325
-    },
-    {
-      "epoch": 0.8709677419354839,
-      "grad_norm": 12.077081680297852,
-      "learning_rate": 7.88530465949821e-06,
-      "loss": 0.6066,
-      "step": 1350
-    },
-    {
-      "epoch": 0.8870967741935484,
-      "grad_norm": 12.401660919189453,
-      "learning_rate": 7.825567502986858e-06,
-      "loss": 0.5831,
-      "step": 1375
-    },
-    {
-      "epoch": 0.9032258064516129,
-      "grad_norm": 12.884041786193848,
-      "learning_rate": 7.765830346475508e-06,
-      "loss": 0.6,
-      "step": 1400
-    },
-    {
-      "epoch": 0.9193548387096774,
-      "grad_norm": 12.485610008239746,
-      "learning_rate": 7.706093189964159e-06,
-      "loss": 0.542,
-      "step": 1425
-    },
-    {
-      "epoch": 0.9354838709677419,
-      "grad_norm": 13.256093978881836,
-      "learning_rate": 7.646356033452809e-06,
-      "loss": 0.5802,
-      "step": 1450
-    },
-    {
-      "epoch": 0.9516129032258065,
-      "grad_norm": 10.507469177246094,
-      "learning_rate": 7.586618876941458e-06,
-      "loss": 0.5468,
-      "step": 1475
-    },
-    {
-      "epoch": 0.967741935483871,
-      "grad_norm": 9.836853981018066,
-      "learning_rate": 7.526881720430108e-06,
-      "loss": 0.5252,
-      "step": 1500
-    },
-    {
-      "epoch": 0.9838709677419355,
-      "grad_norm": 12.627049446105957,
-      "learning_rate": 7.467144563918758e-06,
-      "loss": 0.5501,
-      "step": 1525
-    },
-    {
-      "epoch": 1.0,
-      "grad_norm": 13.873695373535156,
-      "learning_rate": 7.4074074074074075e-06,
-      "loss": 0.5414,
-      "step": 1550
-    },
-    {
-      "epoch": 1.0161290322580645,
-      "grad_norm": 8.701884269714355,
-      "learning_rate": 7.347670250896059e-06,
-      "loss": 0.3627,
-      "step": 1575
-    },
-    {
-      "epoch": 1.032258064516129,
-      "grad_norm": 10.089194297790527,
-      "learning_rate": 7.287933094384708e-06,
-      "loss": 0.3602,
-      "step": 1600
-    },
-    {
-      "epoch": 1.0483870967741935,
-      "grad_norm": 8.33105182647705,
-      "learning_rate": 7.2281959378733575e-06,
-      "loss": 0.3706,
-      "step": 1625
-    },
-    {
-      "epoch": 1.064516129032258,
-      "grad_norm": 12.283960342407227,
-      "learning_rate": 7.168458781362008e-06,
-      "loss": 0.3886,
-      "step": 1650
-    },
-    {
-      "epoch": 1.0806451612903225,
-      "grad_norm": 10.99679183959961,
-      "learning_rate": 7.108721624850657e-06,
-      "loss": 0.4035,
-      "step": 1675
-    },
-    {
-      "epoch": 1.096774193548387,
-      "grad_norm": 8.689805030822754,
-      "learning_rate": 7.048984468339307e-06,
-      "loss": 0.3571,
-      "step": 1700
-    },
-    {
-      "epoch": 1.1129032258064515,
-      "grad_norm": 7.141482353210449,
-      "learning_rate": 6.989247311827958e-06,
-      "loss": 0.3557,
-      "step": 1725
-    },
-    {
-      "epoch": 1.129032258064516,
-      "grad_norm": 10.56028938293457,
-      "learning_rate": 6.929510155316607e-06,
-      "loss": 0.3614,
-      "step": 1750
-    },
-    {
-      "epoch": 1.1451612903225807,
-      "grad_norm": 11.50129508972168,
-      "learning_rate": 6.869772998805258e-06,
-      "loss": 0.3763,
-      "step": 1775
-    },
-    {
-      "epoch": 1.1612903225806452,
-      "grad_norm": 10.562152862548828,
-      "learning_rate": 6.810035842293907e-06,
-      "loss": 0.3497,
-      "step": 1800
-    },
-    {
-      "epoch": 1.1774193548387097,
-      "grad_norm": 12.868457794189453,
-      "learning_rate": 6.7502986857825566e-06,
-      "loss": 0.3562,
-      "step": 1825
-    },
-    {
-      "epoch": 1.1935483870967742,
-      "grad_norm": 11.193254470825195,
-      "learning_rate": 6.690561529271207e-06,
-      "loss": 0.3662,
-      "step": 1850
-    },
-    {
-      "epoch": 1.2096774193548387,
-      "grad_norm": 9.065006256103516,
-      "learning_rate": 6.630824372759857e-06,
-      "loss": 0.3547,
-      "step": 1875
-    },
-    {
-      "epoch": 1.2258064516129032,
-      "grad_norm": 10.713186264038086,
-      "learning_rate": 6.5710872162485075e-06,
-      "loss": 0.3703,
-      "step": 1900
-    },
-    {
-      "epoch": 1.2419354838709677,
-      "grad_norm": 11.31541919708252,
-      "learning_rate": 6.511350059737157e-06,
-      "loss": 0.3814,
-      "step": 1925
-    },
-    {
-      "epoch": 1.2580645161290323,
-      "grad_norm": 12.24356746673584,
-      "learning_rate": 6.451612903225806e-06,
-      "loss": 0.346,
-      "step": 1950
-    },
-    {
-      "epoch": 1.2741935483870968,
-      "grad_norm": 10.248428344726562,
-      "learning_rate": 6.391875746714457e-06,
-      "loss": 0.3581,
-      "step": 1975
-    },
-    {
-      "epoch": 1.2903225806451613,
-      "grad_norm": 9.441059112548828,
-      "learning_rate": 6.332138590203107e-06,
-      "loss": 0.3539,
-      "step": 2000
-    },
-    {
-      "epoch": 1.2903225806451613,
-      "eval_cer": 34.83160621761658,
-      "eval_loss": 0.4711998403072357,
-      "eval_runtime": 956.2234,
-      "eval_samples_per_second": 2.385,
-      "eval_steps_per_second": 0.299,
-      "step": 2000
-    },
-    {
-      "epoch": 1.3064516129032258,
-      "grad_norm": 11.437358856201172,
-      "learning_rate": 6.272401433691757e-06,
-      "loss": 0.377,
-      "step": 2025
-    },
-    {
-      "epoch": 1.3225806451612903,
-      "grad_norm": 8.866403579711914,
-      "learning_rate": 6.212664277180407e-06,
-      "loss": 0.3137,
-      "step": 2050
-    },
-    {
-      "epoch": 1.3387096774193548,
-      "grad_norm": 10.96147632598877,
-      "learning_rate": 6.152927120669057e-06,
-      "loss": 0.3657,
-      "step": 2075
-    },
-    {
-      "epoch": 1.3548387096774195,
-      "grad_norm": 16.82151985168457,
-      "learning_rate": 6.0931899641577065e-06,
-      "loss": 0.3575,
-      "step": 2100
-    },
-    {
-      "epoch": 1.370967741935484,
-      "grad_norm": 10.459049224853516,
-      "learning_rate": 6.033452807646356e-06,
-      "loss": 0.3414,
-      "step": 2125
-    },
-    {
-      "epoch": 1.3870967741935485,
-      "grad_norm": 9.4818696975708,
-      "learning_rate": 5.973715651135007e-06,
-      "loss": 0.3497,
-      "step": 2150
-    },
-    {
-      "epoch": 1.403225806451613,
-      "grad_norm": 8.424386978149414,
-      "learning_rate": 5.9139784946236566e-06,
-      "loss": 0.3414,
-      "step": 2175
-    },
-    {
-      "epoch": 1.4193548387096775,
-      "grad_norm": 10.135176658630371,
-      "learning_rate": 5.854241338112307e-06,
-      "loss": 0.3569,
-      "step": 2200
-    },
-    {
-      "epoch": 1.435483870967742,
-      "grad_norm": 9.196470260620117,
-      "learning_rate": 5.794504181600956e-06,
-      "loss": 0.3767,
-      "step": 2225
-    },
-    {
-      "epoch": 1.4516129032258065,
-      "grad_norm": 9.483991622924805,
-      "learning_rate": 5.734767025089606e-06,
-      "loss": 0.3302,
-      "step": 2250
-    },
-    {
-      "epoch": 1.467741935483871,
-      "grad_norm": 11.096484184265137,
-      "learning_rate": 5.675029868578256e-06,
-      "loss": 0.3648,
-      "step": 2275
-    },
-    {
-      "epoch": 1.4838709677419355,
-      "grad_norm": 11.095719337463379,
-      "learning_rate": 5.615292712066906e-06,
-      "loss": 0.348,
-      "step": 2300
-    },
-    {
-      "epoch": 1.5,
-      "grad_norm": 8.295551300048828,
-      "learning_rate": 5.555555555555557e-06,
-      "loss": 0.3429,
-      "step": 2325
-    },
-    {
-      "epoch": 1.5161290322580645,
-      "grad_norm": 9.586627960205078,
-      "learning_rate": 5.495818399044206e-06,
-      "loss": 0.3101,
-      "step": 2350
-    },
-    {
-      "epoch": 1.532258064516129,
-      "grad_norm": 9.516448974609375,
-      "learning_rate": 5.436081242532856e-06,
-      "loss": 0.3268,
-      "step": 2375
-    },
-    {
-      "epoch": 1.5483870967741935,
-      "grad_norm": 11.90730094909668,
-      "learning_rate": 5.376344086021506e-06,
-      "loss": 0.3218,
-      "step": 2400
-    },
-    {
-      "epoch": 1.564516129032258,
-      "grad_norm": 10.183462142944336,
-      "learning_rate": 5.316606929510155e-06,
-      "loss": 0.3228,
-      "step": 2425
-    },
-    {
-      "epoch": 1.5806451612903225,
-      "grad_norm": 9.693060874938965,
-      "learning_rate": 5.2568697729988065e-06,
-      "loss": 0.3043,
-      "step": 2450
-    },
-    {
-      "epoch": 1.596774193548387,
-      "grad_norm": 10.416152000427246,
-      "learning_rate": 5.197132616487456e-06,
-      "loss": 0.3859,
-      "step": 2475
-    },
-    {
-      "epoch": 1.6129032258064515,
-      "grad_norm": 11.155983924865723,
-      "learning_rate": 5.137395459976105e-06,
-      "loss": 0.321,
-      "step": 2500
-    },
-    {
-      "epoch": 1.629032258064516,
-      "grad_norm": 8.956007957458496,
-      "learning_rate": 5.077658303464756e-06,
-      "loss": 0.3101,
-      "step": 2525
-    },
-    {
-      "epoch": 1.6451612903225805,
-      "grad_norm": 11.339639663696289,
-      "learning_rate": 5.017921146953405e-06,
-      "loss": 0.3276,
-      "step": 2550
-    },
-    {
-      "epoch": 1.661290322580645,
-      "grad_norm": 6.775766372680664,
-      "learning_rate": 4.9581839904420555e-06,
-      "loss": 0.3226,
-      "step": 2575
-    },
-    {
-      "epoch": 1.6774193548387095,
-      "grad_norm": 9.266929626464844,
-      "learning_rate": 4.898446833930705e-06,
-      "loss": 0.317,
-      "step": 2600
-    },
-    {
-      "epoch": 1.6935483870967742,
-      "grad_norm": 7.660613059997559,
-      "learning_rate": 4.838709677419355e-06,
-      "loss": 0.3209,
-      "step": 2625
-    },
-    {
-      "epoch": 1.7096774193548387,
-      "grad_norm": 8.688915252685547,
-      "learning_rate": 4.7789725209080055e-06,
-      "loss": 0.3352,
-      "step": 2650
-    },
-    {
-      "epoch": 1.7258064516129032,
-      "grad_norm": 7.915940761566162,
-      "learning_rate": 4.719235364396655e-06,
-      "loss": 0.3693,
-      "step": 2675
-    },
-    {
-      "epoch": 1.7419354838709677,
-      "grad_norm": 9.707711219787598,
-      "learning_rate": 4.659498207885305e-06,
-      "loss": 0.3232,
-      "step": 2700
-    },
-    {
-      "epoch": 1.7580645161290323,
-      "grad_norm": 9.361932754516602,
-      "learning_rate": 4.599761051373955e-06,
-      "loss": 0.3674,
-      "step": 2725
-    },
-    {
-      "epoch": 1.7741935483870968,
-      "grad_norm": 11.118302345275879,
-      "learning_rate": 4.540023894862605e-06,
-      "loss": 0.3367,
-      "step": 2750
-    },
-    {
-      "epoch": 1.7903225806451613,
-      "grad_norm": 9.054045677185059,
-      "learning_rate": 4.480286738351255e-06,
-      "loss": 0.326,
-      "step": 2775
-    },
-    {
-      "epoch": 1.8064516129032258,
-      "grad_norm": 10.975425720214844,
-      "learning_rate": 4.420549581839905e-06,
-      "loss": 0.3553,
-      "step": 2800
-    },
-    {
-      "epoch": 1.8225806451612905,
-      "grad_norm": 10.127399444580078,
-      "learning_rate": 4.360812425328555e-06,
-      "loss": 0.3321,
-      "step": 2825
-    },
-    {
-      "epoch": 1.838709677419355,
-      "grad_norm": 11.241859436035156,
-      "learning_rate": 4.3010752688172045e-06,
-      "loss": 0.3287,
-      "step": 2850
-    },
-    {
-      "epoch": 1.8548387096774195,
-      "grad_norm": 8.5289888381958,
-      "learning_rate": 4.241338112305855e-06,
-      "loss": 0.2897,
-      "step": 2875
-    },
-    {
-      "epoch": 1.870967741935484,
-      "grad_norm": 15.53427505493164,
-      "learning_rate": 4.181600955794505e-06,
-      "loss": 0.3113,
-      "step": 2900
-    },
-    {
-      "epoch": 1.8870967741935485,
-      "grad_norm": 9.797106742858887,
-      "learning_rate": 4.121863799283155e-06,
-      "loss": 0.3404,
-      "step": 2925
-    },
-    {
-      "epoch": 1.903225806451613,
-      "grad_norm": 11.643150329589844,
-      "learning_rate": 4.062126642771804e-06,
-      "loss": 0.329,
-      "step": 2950
-    },
-    {
-      "epoch": 1.9193548387096775,
-      "grad_norm": 7.331206798553467,
-      "learning_rate": 4.002389486260454e-06,
-      "loss": 0.2925,
-      "step": 2975
-    },
-    {
-      "epoch": 1.935483870967742,
-      "grad_norm": 10.384967803955078,
-      "learning_rate": 3.942652329749105e-06,
-      "loss": 0.2883,
-      "step": 3000
-    },
-    {
-      "epoch": 1.935483870967742,
-      "eval_cer": 31.230569948186528,
-      "eval_loss": 0.4035734534263611,
-      "eval_runtime": 956.3419,
-      "eval_samples_per_second": 2.385,
-      "eval_steps_per_second": 0.299,
-      "step": 3000
-    },
-    {
-      "epoch": 1.9516129032258065,
-      "grad_norm": 9.115569114685059,
-      "learning_rate": 3.882915173237754e-06,
-      "loss": 0.2929,
-      "step": 3025
-    },
-    {
-      "epoch": 1.967741935483871,
-      "grad_norm": 10.029779434204102,
-      "learning_rate": 3.823178016726404e-06,
-      "loss": 0.2939,
-      "step": 3050
-    },
-    {
-      "epoch": 1.9838709677419355,
-      "grad_norm": 10.219369888305664,
-      "learning_rate": 3.763440860215054e-06,
-      "loss": 0.3228,
-      "step": 3075
-    },
-    {
-      "epoch": 2.0,
-      "grad_norm": 10.639910697937012,
-      "learning_rate": 3.7037037037037037e-06,
-      "loss": 0.3236,
-      "step": 3100
-    },
-    {
-      "epoch": 2.0161290322580645,
-      "grad_norm": 4.525390148162842,
-      "learning_rate": 3.643966547192354e-06,
-      "loss": 0.1713,
-      "step": 3125
-    },
-    {
-      "epoch": 2.032258064516129,
-      "grad_norm": 6.762115478515625,
-      "learning_rate": 3.584229390681004e-06,
-      "loss": 0.1892,
-      "step": 3150
-    },
-    {
-      "epoch": 2.0483870967741935,
-      "grad_norm": 8.670353889465332,
-      "learning_rate": 3.5244922341696534e-06,
-      "loss": 0.1902,
-      "step": 3175
-    },
-    {
-      "epoch": 2.064516129032258,
-      "grad_norm": 6.066471576690674,
-      "learning_rate": 3.4647550776583037e-06,
-      "loss": 0.157,
-      "step": 3200
-    },
-    {
-      "epoch": 2.0806451612903225,
-      "grad_norm": 5.877708911895752,
-      "learning_rate": 3.4050179211469536e-06,
-      "loss": 0.18,
-      "step": 3225
-    },
-    {
-      "epoch": 2.096774193548387,
-      "grad_norm": 5.906160354614258,
-      "learning_rate": 3.3452807646356034e-06,
-      "loss": 0.198,
-      "step": 3250
-    },
-    {
-      "epoch": 2.1129032258064515,
-      "grad_norm": 7.664149761199951,
-      "learning_rate": 3.2855436081242537e-06,
-      "loss": 0.1889,
-      "step": 3275
-    },
-    {
-      "epoch": 2.129032258064516,
-      "grad_norm": 6.261497497558594,
-      "learning_rate": 3.225806451612903e-06,
-      "loss": 0.157,
-      "step": 3300
-    },
-    {
-      "epoch": 2.1451612903225805,
-      "grad_norm": 6.205556869506836,
-      "learning_rate": 3.1660692951015535e-06,
-      "loss": 0.1635,
-      "step": 3325
-    },
-    {
-      "epoch": 2.161290322580645,
-      "grad_norm": 5.549154758453369,
-      "learning_rate": 3.1063321385902034e-06,
-      "loss": 0.1799,
-      "step": 3350
-    },
-    {
-      "epoch": 2.1774193548387095,
-      "grad_norm": 7.939329624176025,
-      "learning_rate": 3.0465949820788532e-06,
-      "loss": 0.159,
-      "step": 3375
-    },
-    {
-      "epoch": 2.193548387096774,
-      "grad_norm": 7.168279647827148,
-      "learning_rate": 2.9868578255675035e-06,
-      "loss": 0.1772,
-      "step": 3400
-    },
-    {
-      "epoch": 2.2096774193548385,
-      "grad_norm": 7.339049816131592,
-      "learning_rate": 2.9271206690561534e-06,
-      "loss": 0.161,
-      "step": 3425
-    },
-    {
-      "epoch": 2.225806451612903,
-      "grad_norm": 7.939656734466553,
-      "learning_rate": 2.867383512544803e-06,
-      "loss": 0.163,
-      "step": 3450
-    },
-    {
-      "epoch": 2.241935483870968,
-      "grad_norm": 6.338183403015137,
-      "learning_rate": 2.807646356033453e-06,
-      "loss": 0.1671,
-      "step": 3475
-    },
-    {
-      "epoch": 2.258064516129032,
-      "grad_norm": 4.27256441116333,
-      "learning_rate": 2.747909199522103e-06,
-      "loss": 0.1696,
-      "step": 3500
-    },
-    {
-      "epoch": 2.274193548387097,
-      "grad_norm": 7.593271255493164,
-      "learning_rate": 2.688172043010753e-06,
-      "loss": 0.1558,
-      "step": 3525
-    },
-    {
-      "epoch": 2.2903225806451615,
-      "grad_norm": 3.984323501586914,
-      "learning_rate": 2.6284348864994032e-06,
-      "loss": 0.1727,
-      "step": 3550
-    },
-    {
-      "epoch": 2.306451612903226,
-      "grad_norm": 6.01830530166626,
-      "learning_rate": 2.5686977299880527e-06,
-      "loss": 0.1681,
-      "step": 3575
-    },
-    {
-      "epoch": 2.3225806451612905,
-      "grad_norm": 8.704211235046387,
-      "learning_rate": 2.5089605734767026e-06,
-      "loss": 0.1703,
-      "step": 3600
-    },
-    {
-      "epoch": 2.338709677419355,
-      "grad_norm": 7.5924506187438965,
-      "learning_rate": 2.4492234169653525e-06,
-      "loss": 0.1723,
-      "step": 3625
-    },
-    {
-      "epoch": 2.3548387096774195,
-      "grad_norm": 5.732957363128662,
-      "learning_rate": 2.3894862604540028e-06,
-      "loss": 0.1586,
-      "step": 3650
-    },
-    {
-      "epoch": 2.370967741935484,
-      "grad_norm": 7.640561580657959,
-      "learning_rate": 2.3297491039426526e-06,
-      "loss": 0.1781,
-      "step": 3675
-    },
-    {
-      "epoch": 2.3870967741935485,
-      "grad_norm": 7.3015313148498535,
-      "learning_rate": 2.2700119474313025e-06,
-      "loss": 0.1571,
-      "step": 3700
-    },
-    {
-      "epoch": 2.403225806451613,
-      "grad_norm": 7.6669440269470215,
-      "learning_rate": 2.2102747909199524e-06,
-      "loss": 0.1532,
-      "step": 3725
-    },
-    {
-      "epoch": 2.4193548387096775,
-      "grad_norm": 4.207671165466309,
-      "learning_rate": 2.1505376344086023e-06,
-      "loss": 0.1658,
-      "step": 3750
-    },
-    {
-      "epoch": 2.435483870967742,
-      "grad_norm": 6.316219806671143,
-      "learning_rate": 2.0908004778972526e-06,
-      "loss": 0.1681,
-      "step": 3775
-    },
-    {
-      "epoch": 2.4516129032258065,
-      "grad_norm": 6.380753040313721,
-      "learning_rate": 2.031063321385902e-06,
-      "loss": 0.1595,
-      "step": 3800
-    },
-    {
-      "epoch": 2.467741935483871,
-      "grad_norm": 7.425994873046875,
-      "learning_rate": 1.9713261648745523e-06,
-      "loss": 0.1457,
-      "step": 3825
-    },
-    {
-      "epoch": 2.4838709677419355,
-      "grad_norm": 7.0207600593566895,
-      "learning_rate": 1.911589008363202e-06,
-      "loss": 0.1557,
-      "step": 3850
-    },
-    {
-      "epoch": 2.5,
-      "grad_norm": 7.421679496765137,
-      "learning_rate": 1.8518518518518519e-06,
-      "loss": 0.17,
-      "step": 3875
-    },
-    {
-      "epoch": 2.5161290322580645,
-      "grad_norm": 7.43884801864624,
-      "learning_rate": 1.792114695340502e-06,
-      "loss": 0.1605,
-      "step": 3900
-    },
-    {
-      "epoch": 2.532258064516129,
-      "grad_norm": 6.753660202026367,
-      "learning_rate": 1.7323775388291518e-06,
-      "loss": 0.1467,
-      "step": 3925
-    },
-    {
-      "epoch": 2.5483870967741935,
-      "grad_norm": 5.872158050537109,
-      "learning_rate": 1.6726403823178017e-06,
-      "loss": 0.1511,
-      "step": 3950
-    },
-    {
-      "epoch": 2.564516129032258,
-      "grad_norm": 5.571343421936035,
-      "learning_rate": 1.6129032258064516e-06,
-      "loss": 0.1476,
-      "step": 3975
-    },
-    {
-      "epoch": 2.5806451612903225,
-      "grad_norm": 9.308771133422852,
-      "learning_rate": 1.5531660692951017e-06,
-      "loss": 0.1566,
-      "step": 4000
-    },
-    {
-      "epoch": 2.5806451612903225,
-      "eval_cer": 29.080310880829014,
-      "eval_loss": 0.37441545724868774,
-      "eval_runtime": 959.8469,
-      "eval_samples_per_second": 2.376,
-      "eval_steps_per_second": 0.298,
-      "step": 4000
     }
   ],
   "logging_steps": 25,
@@ -1170,7 +303,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 1000,
-  "total_flos": 1.84665797664768e+19,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 60.21685813863431,
+  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-1000",
+  "epoch": 0.6451612903225806,
   "eval_steps": 1000,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.016129032258064516,
+      "grad_norm": 241.39755249023438,
       "learning_rate": 5.376344086021506e-07,
+      "loss": 8.0646,
       "step": 25
     },
     {
       "epoch": 0.03225806451612903,
+      "grad_norm": 52.91600799560547,
       "learning_rate": 1.0752688172043011e-06,
+      "loss": 5.6903,
       "step": 50
     },
     {
       "epoch": 0.04838709677419355,
+      "grad_norm": 32.09747314453125,
       "learning_rate": 1.6129032258064516e-06,
+      "loss": 3.6353,
       "step": 75
     },
     {
       "epoch": 0.06451612903225806,
+      "grad_norm": 31.451000213623047,
       "learning_rate": 2.1505376344086023e-06,
+      "loss": 2.6364,
       "step": 100
     },
     {
       "epoch": 0.08064516129032258,
+      "grad_norm": 29.471986770629883,
       "learning_rate": 2.688172043010753e-06,
+      "loss": 2.3125,
       "step": 125
     },
     {
       "epoch": 0.0967741935483871,
+      "grad_norm": 28.64345932006836,
       "learning_rate": 3.225806451612903e-06,
+      "loss": 2.1281,
       "step": 150
     },
     {
       "epoch": 0.11290322580645161,
+      "grad_norm": 28.750173568725586,
       "learning_rate": 3.763440860215054e-06,
+      "loss": 1.9073,
       "step": 175
     },
     {
       "epoch": 0.12903225806451613,
+      "grad_norm": 23.051420211791992,
       "learning_rate": 4.3010752688172045e-06,
+      "loss": 1.5977,
       "step": 200
     },
     {
       "epoch": 0.14516129032258066,
+      "grad_norm": 18.67135238647461,
       "learning_rate": 4.838709677419355e-06,
+      "loss": 1.5081,
       "step": 225
     },
     {
       "epoch": 0.16129032258064516,
+      "grad_norm": 15.335652351379395,
       "learning_rate": 5.376344086021506e-06,
+      "loss": 1.4169,
       "step": 250
     },
     {
       "epoch": 0.1774193548387097,
+      "grad_norm": 16.2917537689209,
       "learning_rate": 5.9139784946236566e-06,
+      "loss": 1.3469,
       "step": 275
     },
     {
       "epoch": 0.1935483870967742,
+      "grad_norm": 15.212031364440918,
       "learning_rate": 6.451612903225806e-06,
+      "loss": 1.4059,
       "step": 300
     },
     {
       "epoch": 0.20967741935483872,
+      "grad_norm": 15.661399841308594,
       "learning_rate": 6.989247311827958e-06,
+      "loss": 1.333,
       "step": 325
     },
     {
       "epoch": 0.22580645161290322,
+      "grad_norm": 16.841798782348633,
       "learning_rate": 7.526881720430108e-06,
+      "loss": 1.2252,
       "step": 350
     },
     {
       "epoch": 0.24193548387096775,
+      "grad_norm": 17.468032836914062,
       "learning_rate": 8.064516129032258e-06,
+      "loss": 1.2996,
       "step": 375
     },
     {
       "epoch": 0.25806451612903225,
+      "grad_norm": 16.684844970703125,
       "learning_rate": 8.602150537634409e-06,
+      "loss": 1.2653,
       "step": 400
     },
     {
       "epoch": 0.27419354838709675,
+      "grad_norm": 14.749136924743652,
       "learning_rate": 9.13978494623656e-06,
+      "loss": 1.1967,
       "step": 425
     },
     {
       "epoch": 0.2903225806451613,
+      "grad_norm": 13.751141548156738,
       "learning_rate": 9.67741935483871e-06,
+      "loss": 1.1865,
       "step": 450
     },
     {
       "epoch": 0.3064516129032258,
+      "grad_norm": 16.48873519897461,
       "learning_rate": 9.97610513739546e-06,
+      "loss": 1.1636,
       "step": 475
     },
     {
       "epoch": 0.3225806451612903,
+      "grad_norm": 14.694608688354492,
       "learning_rate": 9.916367980884111e-06,
+      "loss": 1.1796,
       "step": 500
     },
     {
       "epoch": 0.3387096774193548,
+      "grad_norm": 15.619414329528809,
       "learning_rate": 9.856630824372761e-06,
+      "loss": 1.1655,
       "step": 525
     },
     {
       "epoch": 0.3548387096774194,
+      "grad_norm": 13.177242279052734,
       "learning_rate": 9.79689366786141e-06,
+      "loss": 1.143,
       "step": 550
     },
     {
       "epoch": 0.3709677419354839,
+      "grad_norm": 15.957605361938477,
       "learning_rate": 9.737156511350062e-06,
+      "loss": 1.1414,
       "step": 575
     },
     {
       "epoch": 0.3870967741935484,
+      "grad_norm": 12.467620849609375,
       "learning_rate": 9.67741935483871e-06,
+      "loss": 1.0964,
       "step": 600
     },
     {
       "epoch": 0.4032258064516129,
+      "grad_norm": 15.435978889465332,
       "learning_rate": 9.61768219832736e-06,
+      "loss": 1.1512,
       "step": 625
     },
     {
       "epoch": 0.41935483870967744,
+      "grad_norm": 13.087624549865723,
       "learning_rate": 9.557945041816011e-06,
+      "loss": 1.1338,
       "step": 650
     },
     {
       "epoch": 0.43548387096774194,
+      "grad_norm": 15.716456413269043,
       "learning_rate": 9.49820788530466e-06,
+      "loss": 1.0783,
       "step": 675
     },
     {
       "epoch": 0.45161290322580644,
+      "grad_norm": 14.517507553100586,
       "learning_rate": 9.43847072879331e-06,
+      "loss": 1.0728,
       "step": 700
     },
     {
       "epoch": 0.46774193548387094,
+      "grad_norm": 17.37009620666504,
       "learning_rate": 9.37873357228196e-06,
+      "loss": 1.0317,
       "step": 725
     },
     {
       "epoch": 0.4838709677419355,
+      "grad_norm": 14.03701400756836,
       "learning_rate": 9.31899641577061e-06,
+      "loss": 1.0347,
       "step": 750
     },
     {
       "epoch": 0.5,
+      "grad_norm": 12.431659698486328,
       "learning_rate": 9.25925925925926e-06,
+      "loss": 1.0524,
       "step": 775
     },
     {
       "epoch": 0.5161290322580645,
+      "grad_norm": 12.746413230895996,
       "learning_rate": 9.19952210274791e-06,
+      "loss": 1.0826,
       "step": 800
     },
     {
       "epoch": 0.532258064516129,
+      "grad_norm": 15.521408081054688,
       "learning_rate": 9.13978494623656e-06,
+      "loss": 1.0377,
       "step": 825
     },
     {
       "epoch": 0.5483870967741935,
+      "grad_norm": 15.342901229858398,
       "learning_rate": 9.08004778972521e-06,
+      "loss": 0.9762,
       "step": 850
     },
     {
       "epoch": 0.5645161290322581,
+      "grad_norm": 16.137371063232422,
       "learning_rate": 9.02031063321386e-06,
+      "loss": 1.0725,
       "step": 875
     },
     {
       "epoch": 0.5806451612903226,
+      "grad_norm": 14.61146068572998,
       "learning_rate": 8.96057347670251e-06,
+      "loss": 0.9554,
       "step": 900
     },
     {
       "epoch": 0.5967741935483871,
+      "grad_norm": 13.561723709106445,
       "learning_rate": 8.90083632019116e-06,
+      "loss": 1.0127,
       "step": 925
     },
     {
       "epoch": 0.6129032258064516,
+      "grad_norm": 16.037729263305664,
       "learning_rate": 8.84109916367981e-06,
+      "loss": 0.9621,
       "step": 950
     },
     {
       "epoch": 0.6290322580645161,
+      "grad_norm": 13.945268630981445,
       "learning_rate": 8.78136200716846e-06,
+      "loss": 0.9479,
       "step": 975
     },
     {
       "epoch": 0.6451612903225806,
+      "grad_norm": 15.826567649841309,
       "learning_rate": 8.72162485065711e-06,
+      "loss": 0.9789,
       "step": 1000
     },
     {
       "epoch": 0.6451612903225806,
+      "eval_cer": 60.21685813863431,
+      "eval_loss": 0.9020848870277405,
+      "eval_runtime": 953.7359,
+      "eval_samples_per_second": 2.392,
+      "eval_steps_per_second": 0.3,
       "step": 1000
     }
   ],
   "logging_steps": 25,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 1000,
+  "total_flos": 4.61736640512e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fdbd7ffd023398f8cec6e5726c887d0bce38c6797a0f638b634302be6e3c8ab1
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:5780b3fe6cf6a2b7abc711d493a9d31fc1181c9fff73c0fc0a79ae423a23e2fb
 size 5176