SR Steinkamp
commited on
Commit
·
4bd6647
1
Parent(s):
6f1a318
updated WER score
Browse files
README.md
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
language: nl
|
2 |
datasets:
|
3 |
- common_voicemetrics:
|
@@ -11,7 +12,7 @@ license: apache-2.0
|
|
11 |
model-index:
|
12 |
- name: `simonsr XLSR Wav2Vec2 Large 53`
|
13 |
results:
|
14 |
-
- task:
|
15 |
name: Speech Recognition
|
16 |
type: automatic-speech-recognition
|
17 |
dataset:
|
@@ -21,7 +22,7 @@ model-index:
|
|
21 |
metrics:
|
22 |
- name: Test WER
|
23 |
type: wer
|
24 |
-
value:
|
25 |
---
|
26 |
|
27 |
# Wav2Vec2-Large-XLSR-53-Dutch
|
@@ -79,7 +80,7 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
79 |
import unidecode
|
80 |
import re
|
81 |
|
82 |
-
test_dataset = load_dataset("common_voice", "nl", split="test")
|
83 |
wer = load_metric("wer")
|
84 |
|
85 |
processor = Wav2Vec2Processor.from_pretrained("{model_id}") #TODO: replace {model_id} with your model id. The model id consists of {your_username}/{your_modelname}, *e.g.* `elgeish/wav2vec2-large-xlsr-53-arabic`
|
@@ -91,7 +92,7 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
|
|
91 |
|
92 |
# Preprocessing the datasets.
|
93 |
# We need to read the aduio files as arrays
|
94 |
-
def speech_file_to_array_fn(batch):
|
95 |
batch["sentence"] = unidecode.unidecode(batch["sentence"])
|
96 |
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
|
97 |
speech_array, sampling_rate = torchaudio.load(batch["path"])
|
@@ -117,8 +118,7 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
|
|
117 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
118 |
```
|
119 |
|
120 |
-
**Test Result**:
|
121 |
-
|
122 |
|
123 |
## Training
|
124 |
|
|
|
1 |
+
---
|
2 |
language: nl
|
3 |
datasets:
|
4 |
- common_voicemetrics:
|
|
|
12 |
model-index:
|
13 |
- name: `simonsr XLSR Wav2Vec2 Large 53`
|
14 |
results:
|
15 |
+
- task:
|
16 |
name: Speech Recognition
|
17 |
type: automatic-speech-recognition
|
18 |
dataset:
|
|
|
22 |
metrics:
|
23 |
- name: Test WER
|
24 |
type: wer
|
25 |
+
value: 38.74
|
26 |
---
|
27 |
|
28 |
# Wav2Vec2-Large-XLSR-53-Dutch
|
|
|
80 |
import unidecode
|
81 |
import re
|
82 |
|
83 |
+
test_dataset = load_dataset("common_voice", "nl", split="test")
|
84 |
wer = load_metric("wer")
|
85 |
|
86 |
processor = Wav2Vec2Processor.from_pretrained("{model_id}") #TODO: replace {model_id} with your model id. The model id consists of {your_username}/{your_modelname}, *e.g.* `elgeish/wav2vec2-large-xlsr-53-arabic`
|
|
|
92 |
|
93 |
# Preprocessing the datasets.
|
94 |
# We need to read the aduio files as arrays
|
95 |
+
def speech_file_to_array_fn(batch):
|
96 |
batch["sentence"] = unidecode.unidecode(batch["sentence"])
|
97 |
batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
|
98 |
speech_array, sampling_rate = torchaudio.load(batch["path"])
|
|
|
118 |
print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
|
119 |
```
|
120 |
|
121 |
+
**Test Result**: 38.74 %
|
|
|
122 |
|
123 |
## Training
|
124 |
|