marma commited on
Commit
913c468
·
1 Parent(s): 60b4398
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "marma/wav2vec2-large-swedish-bart-base",
3
  "architectures": [
4
  "SpeechEncoderDecoderModel"
5
  ],
@@ -88,22 +87,22 @@
88
  },
89
  "decoder_start_token_id": 0,
90
  "encoder": {
91
- "_name_or_path": "KBLab/wav2vec2-large-voxrex-swedish",
92
- "activation_dropout": 0.05,
93
  "adapter_kernel_size": 3,
94
  "adapter_stride": 2,
95
- "add_adapter": false,
96
  "add_cross_attention": false,
97
  "apply_spec_augment": true,
98
  "architectures": [
99
- "Wav2Vec2ForCTC"
100
  ],
101
  "attention_dropout": 0.1,
102
  "bad_words_ids": null,
103
  "bos_token_id": 1,
104
  "chunk_size_feed_forward": 0,
105
  "classifier_proj_size": 256,
106
- "codevector_dim": 256,
107
  "contrastive_logits_temperature": 0.1,
108
  "conv_bias": true,
109
  "conv_dim": [
@@ -134,8 +133,8 @@
134
  2
135
  ],
136
  "cross_attention_hidden_size": null,
137
- "ctc_loss_reduction": "mean",
138
- "ctc_zero_infinity": true,
139
  "decoder_start_token_id": null,
140
  "diversity_loss_weight": 0.1,
141
  "diversity_penalty": 0.0,
@@ -147,14 +146,15 @@
147
  "feat_extract_activation": "gelu",
148
  "feat_extract_dropout": 0.0,
149
  "feat_extract_norm": "layer",
150
- "feat_proj_dropout": 0.05,
151
  "feat_quantizer_dropout": 0.0,
152
  "final_dropout": 0.0,
153
  "finetuning_task": null,
154
  "forced_bos_token_id": null,
155
  "forced_eos_token_id": null,
 
156
  "hidden_act": "gelu",
157
- "hidden_dropout": 0.05,
158
  "hidden_size": 1024,
159
  "id2label": {
160
  "0": "LABEL_0",
@@ -169,7 +169,7 @@
169
  "LABEL_1": 1
170
  },
171
  "layer_norm_eps": 1e-05,
172
- "layerdrop": 0.05,
173
  "length_penalty": 1.0,
174
  "mask_channel_length": 10,
175
  "mask_channel_min_space": 1,
@@ -183,7 +183,7 @@
183
  "mask_time_min_masks": 2,
184
  "mask_time_min_space": 1,
185
  "mask_time_other": 0.0,
186
- "mask_time_prob": 0.05,
187
  "mask_time_selection": "static",
188
  "max_length": 20,
189
  "min_length": 0,
@@ -208,7 +208,7 @@
208
  "pad_token_id": 0,
209
  "prefix": null,
210
  "problem_type": null,
211
- "proj_codevector_dim": 256,
212
  "pruned_heads": {},
213
  "remove_invalid_values": false,
214
  "repetition_penalty": 1.0,
@@ -248,19 +248,17 @@
248
  "transformers_version": "4.16.0.dev0",
249
  "use_bfloat16": false,
250
  "use_weighted_layer_sum": false,
251
- "vocab_size": 46,
252
  "xvector_output_dim": 512
253
  },
254
- "encoder_add_adapter": true,
255
- "encoder_feat_proj_dropout": 0.0,
256
- "encoder_layerdrop": 0.0,
257
  "eos_token_id": 2,
258
  "is_encoder_decoder": true,
259
- "max_length": 200,
260
  "model_type": "speech-encoder-decoder",
261
- "num_beams": 5,
262
  "pad_token_id": 1,
 
263
  "tie_word_embeddings": false,
264
  "torch_dtype": "float32",
265
- "transformers_version": null
 
266
  }
 
1
  {
 
2
  "architectures": [
3
  "SpeechEncoderDecoderModel"
4
  ],
 
87
  },
88
  "decoder_start_token_id": 0,
89
  "encoder": {
90
+ "_name_or_path": "KBLab/wav2vec2-large-voxrex",
91
+ "activation_dropout": 0.0,
92
  "adapter_kernel_size": 3,
93
  "adapter_stride": 2,
94
+ "add_adapter": true,
95
  "add_cross_attention": false,
96
  "apply_spec_augment": true,
97
  "architectures": [
98
+ "Wav2Vec2ForPreTraining"
99
  ],
100
  "attention_dropout": 0.1,
101
  "bad_words_ids": null,
102
  "bos_token_id": 1,
103
  "chunk_size_feed_forward": 0,
104
  "classifier_proj_size": 256,
105
+ "codevector_dim": 768,
106
  "contrastive_logits_temperature": 0.1,
107
  "conv_bias": true,
108
  "conv_dim": [
 
133
  2
134
  ],
135
  "cross_attention_hidden_size": null,
136
+ "ctc_loss_reduction": "sum",
137
+ "ctc_zero_infinity": false,
138
  "decoder_start_token_id": null,
139
  "diversity_loss_weight": 0.1,
140
  "diversity_penalty": 0.0,
 
146
  "feat_extract_activation": "gelu",
147
  "feat_extract_dropout": 0.0,
148
  "feat_extract_norm": "layer",
149
+ "feat_proj_dropout": 0.0,
150
  "feat_quantizer_dropout": 0.0,
151
  "final_dropout": 0.0,
152
  "finetuning_task": null,
153
  "forced_bos_token_id": null,
154
  "forced_eos_token_id": null,
155
+ "gradient_checkpointing": false,
156
  "hidden_act": "gelu",
157
+ "hidden_dropout": 0.1,
158
  "hidden_size": 1024,
159
  "id2label": {
160
  "0": "LABEL_0",
 
169
  "LABEL_1": 1
170
  },
171
  "layer_norm_eps": 1e-05,
172
+ "layerdrop": 0.0,
173
  "length_penalty": 1.0,
174
  "mask_channel_length": 10,
175
  "mask_channel_min_space": 1,
 
183
  "mask_time_min_masks": 2,
184
  "mask_time_min_space": 1,
185
  "mask_time_other": 0.0,
186
+ "mask_time_prob": 0.0,
187
  "mask_time_selection": "static",
188
  "max_length": 20,
189
  "min_length": 0,
 
208
  "pad_token_id": 0,
209
  "prefix": null,
210
  "problem_type": null,
211
+ "proj_codevector_dim": 768,
212
  "pruned_heads": {},
213
  "remove_invalid_values": false,
214
  "repetition_penalty": 1.0,
 
248
  "transformers_version": "4.16.0.dev0",
249
  "use_bfloat16": false,
250
  "use_weighted_layer_sum": false,
251
+ "vocab_size": 32,
252
  "xvector_output_dim": 512
253
  },
 
 
 
254
  "eos_token_id": 2,
255
  "is_encoder_decoder": true,
256
+ "max_length": 40,
257
  "model_type": "speech-encoder-decoder",
 
258
  "pad_token_id": 1,
259
+ "processor_class": "Wav2Vec2Processor",
260
  "tie_word_embeddings": false,
261
  "torch_dtype": "float32",
262
+ "transformers_version": null,
263
+ "use_cache": false
264
  }
preprocessor_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "do_normalize": true,
3
  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
  "feature_size": 1,
 
1
  {
2
+ "_processor_class": null,
3
  "do_normalize": true,
4
  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
5
  "feature_size": 1,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1591efa33cd05727ab96974eb5b4c1718ef40b94f0e0f8455fa51d82294582e
3
- size 1649302897
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a6fb5a66ede6750cf6e94c6360fc4840d4363847a89a69a7a84369c6a549e2b
3
+ size 1649294705
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "cls_token": "</s>", "sep_token": "</s>", "special_tokens_map_file": "/home/marmal/.cache/huggingface/transformers/eeba8b7f723def8f95951bb301a16f684eb8a2b6bfbc13e715d200a2df8a7e61.4405f81af4d0c9b936ce3368cc07277d6d61ec031eac1137ba8e4d9acedd61ee", "name_or_path": "KBLab/bart-base-swedish-cased", "tokenizer_class": "PreTrainedTokenizerFast"}
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "mask_token": "<mask>", "cls_token": "</s>", "sep_token": "</s>", "special_tokens_map_file": null, "name_or_path": "KBLab/bart-base-swedish-cased", "tokenizer_class": "PreTrainedTokenizerFast"}