End of training

Browse files

Files changed (7) hide show

README.md +13 -13
config.json +14 -21
model.safetensors +2 -2
runs/Apr29_07-25-51_eac612c9136b/events.out.tfevents.1714375573.eac612c9136b.2924.1 +3 -0
runs/Apr29_07-28-43_eac612c9136b/events.out.tfevents.1714375750.eac612c9136b.2924.2 +3 -0
runs/Apr29_07-30-02_eac612c9136b/events.out.tfevents.1714375826.eac612c9136b.2924.3 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: apache-2.0
-base_model: facebook/wav2vec2-large-xlsr-53
 tags:
 - generated_from_trainer
 datasets:
@@ -22,7 +22,7 @@ model-index:
     metrics:
     - name: Wer
       type: wer
-      value: 0.8004270028466857
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -30,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
 # wav2vec2-common-voice-16_1_vi
-This model is a fine-tuned version of [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on the common_voice_16_1 dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.4999
-- Wer: 0.8004
 ## Model description
@@ -66,13 +66,13 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch   | Step | Validation Loss | Wer    |
 |:-------------:|:-------:|:----:|:---------------:|:------:|
-| 16.2607       | 4.2373  | 500  | 3.5522          | 0.9999 |
-| 3.4445        | 8.4746  | 1000 | 3.3740          | 0.9999 |
-| 2.2065        | 12.7119 | 1500 | 1.5781          | 0.9709 |
-| 0.7589        | 16.9492 | 2000 | 1.4173          | 0.8542 |
-| 0.4407        | 21.1864 | 2500 | 1.4711          | 0.8281 |
-| 0.3235        | 25.4237 | 3000 | 1.4836          | 0.8097 |
-| 0.2751        | 29.6610 | 3500 | 1.4999          | 0.8004 |
 ### Framework versions

 ---
+license: cc-by-nc-4.0
+base_model: nguyenvulebinh/wav2vec2-base-vi
 tags:
 - generated_from_trainer
 datasets:
     metrics:
     - name: Wer
       type: wer
+      value: 0.9998983326555511
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # wav2vec2-common-voice-16_1_vi
+This model is a fine-tuned version of [nguyenvulebinh/wav2vec2-base-vi](https://huggingface.co/nguyenvulebinh/wav2vec2-base-vi) on the common_voice_16_1 dataset.
 It achieves the following results on the evaluation set:
+- Loss: 3.5320
+- Wer: 0.9999
 ## Model description
 | Training Loss | Epoch   | Step | Validation Loss | Wer    |
 |:-------------:|:-------:|:----:|:---------------:|:------:|
+| 23.1642       | 4.2373  | 500  | 9.3416          | 0.9999 |
+| 4.3182        | 8.4746  | 1000 | 3.5487          | 0.9999 |
+| 3.4768        | 12.7119 | 1500 | 3.5353          | 0.9999 |
+| 3.4681        | 16.9492 | 2000 | 3.5314          | 0.9999 |
+| 3.4635        | 21.1864 | 2500 | 3.5318          | 0.9999 |
+| 3.4642        | 25.4237 | 3000 | 3.5313          | 0.9999 |
+| 3.4753        | 29.6610 | 3500 | 3.5320          | 0.9999 |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "_name_or_path": "facebook/wav2vec2-large-xlsr-53",
-  "activation_dropout": 0.0,
   "adapter_attn_dim": null,
   "adapter_kernel_size": 3,
   "adapter_stride": 2,
@@ -12,9 +12,9 @@
   "attention_dropout": 0.1,
   "bos_token_id": 1,
   "classifier_proj_size": 256,
-  "codevector_dim": 768,
   "contrastive_logits_temperature": 0.1,
-  "conv_bias": true,
   "conv_dim": [
     512,
     512,
@@ -45,48 +45,41 @@
   "ctc_loss_reduction": "mean",
   "ctc_zero_infinity": false,
   "diversity_loss_weight": 0.1,
-  "do_stable_layer_norm": true,
   "eos_token_id": 2,
   "feat_extract_activation": "gelu",
   "feat_extract_dropout": 0.0,
-  "feat_extract_norm": "layer",
   "feat_proj_dropout": 0.0,
   "feat_quantizer_dropout": 0.0,
-  "final_dropout": 0.0,
   "hidden_act": "gelu",
   "hidden_dropout": 0.1,
-  "hidden_size": 1024,
   "initializer_range": 0.02,
-  "intermediate_size": 4096,
   "layer_norm_eps": 1e-05,
   "layerdrop": 0.1,
-  "mask_channel_length": 10,
-  "mask_channel_min_space": 1,
-  "mask_channel_other": 0.0,
-  "mask_channel_prob": 0.0,
-  "mask_channel_selection": "static",
   "mask_feature_length": 10,
   "mask_feature_min_masks": 0,
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
-  "mask_time_min_space": 1,
-  "mask_time_other": 0.0,
   "mask_time_prob": 0.05,
-  "mask_time_selection": "static",
   "model_type": "wav2vec2",
   "num_adapter_layers": 3,
-  "num_attention_heads": 16,
   "num_codevector_groups": 2,
   "num_codevectors_per_group": 320,
   "num_conv_pos_embedding_groups": 16,
   "num_conv_pos_embeddings": 128,
   "num_feat_extract_layers": 7,
-  "num_hidden_layers": 24,
   "num_negatives": 100,
-  "output_hidden_size": 1024,
   "pad_token_id": 95,
-  "proj_codevector_dim": 768,
   "tdnn_dilation": [
     1,
     2,

 {
+  "_name_or_path": "nguyenvulebinh/wav2vec2-base-vi",
+  "activation_dropout": 0.1,
   "adapter_attn_dim": null,
   "adapter_kernel_size": 3,
   "adapter_stride": 2,
   "attention_dropout": 0.1,
   "bos_token_id": 1,
   "classifier_proj_size": 256,
+  "codevector_dim": 256,
   "contrastive_logits_temperature": 0.1,
+  "conv_bias": false,
   "conv_dim": [
     512,
     512,
   "ctc_loss_reduction": "mean",
   "ctc_zero_infinity": false,
   "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": false,
   "eos_token_id": 2,
   "feat_extract_activation": "gelu",
   "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "group",
   "feat_proj_dropout": 0.0,
   "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.1,
   "hidden_act": "gelu",
   "hidden_dropout": 0.1,
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
   "layer_norm_eps": 1e-05,
   "layerdrop": 0.1,
   "mask_feature_length": 10,
   "mask_feature_min_masks": 0,
   "mask_feature_prob": 0.0,
   "mask_time_length": 10,
   "mask_time_min_masks": 2,
   "mask_time_prob": 0.05,
   "model_type": "wav2vec2",
   "num_adapter_layers": 3,
+  "num_attention_heads": 12,
   "num_codevector_groups": 2,
   "num_codevectors_per_group": 320,
   "num_conv_pos_embedding_groups": 16,
   "num_conv_pos_embeddings": 128,
   "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
   "num_negatives": 100,
+  "output_hidden_size": 768,
   "pad_token_id": 95,
+  "proj_codevector_dim": 256,
   "tdnn_dilation": [
     1,
     2,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49fb8ef39dc1607843cf0729138aa25835218471b0ef8be9a9c0421b49d5f1c4
-size 1262209280

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc78b1dc88a4788f6cf467df5c7da42542a41f217c24692eaf9528d507a0267b
+size 377814144

runs/Apr29_07-25-51_eac612c9136b/events.out.tfevents.1714375573.eac612c9136b.2924.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ced30ff9f1d1f30943a88c43254014114a0bb30229f7a23924686f32995fdfa
+size 6160

runs/Apr29_07-28-43_eac612c9136b/events.out.tfevents.1714375750.eac612c9136b.2924.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ad6d24d230e8157848480d9bde27064ed3af84c0d24a794146c888c566b41bf
+size 6160

runs/Apr29_07-30-02_eac612c9136b/events.out.tfevents.1714375826.eac612c9136b.2924.3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36bf264da5669452a0657923a56a36231a6b668c282e3ace2325148036095ca3
+size 10217

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4b35a856eac15d8e3780e1cd2bdfc60f1b306b18f0e6193d3ad7808e58a8c8b
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fbfe89347d7ce2c188a7039b74b5f952a27772e7681f5bac72af43f4f5e98d6
 size 4984