Temur commited on
Commit
afcaf68
·
1 Parent(s): d1b82b4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -18
README.md CHANGED
@@ -24,7 +24,7 @@ model-index:
24
  metrics:
25
  - name: Test WER
26
  type: wer
27
- value: {wer_result_on_test} #TODO (IMPORTANT): replace {wer_result_on_test} with the WER error rate you achieved on the common_voice test set. It should be in the format XX.XX (don't add the % sign here). **Please** remember to fill out this value after you evaluated your model, so that your model appears on the leaderboard. If you fill out this model card before evaluating your model, please remember to edit the model card afterward to fill in your value
28
  ---
29
 
30
  # Wav2Vec2-Large-XLSR-53-Georgian
@@ -52,15 +52,15 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
52
  # Preprocessing the datasets.
53
  # We need to read the aduio files as arrays
54
  def speech_file_to_array_fn(batch):
55
- \tspeech_array, sampling_rate = torchaudio.load(batch["path"])
56
- \tbatch["speech"] = resampler(speech_array).squeeze().numpy()
57
- \treturn batch
58
 
59
  test_dataset = test_dataset.map(speech_file_to_array_fn)
60
  inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
61
 
62
  with torch.no_grad():
63
- \tlogits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
64
 
65
  predicted_ids = torch.argmax(logits, dim=-1)
66
 
@@ -88,38 +88,37 @@ processor = Wav2Vec2Processor.from_pretrained("Temur/wav2vec2-Georgian-Daytona")
88
  model = Wav2Vec2ForCTC.from_pretrained("Temur/wav2vec2-Georgian-Daytona")
89
  model.to("cuda")
90
 
91
- chars_to_ignore_regex = '[\\,\\?\\.\\!\\-\\;\\:\\"\\“]' # TODO: adapt this list to include all special characters you removed from the data
92
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
93
 
94
  # Preprocessing the datasets.
95
  # We need to read the aduio files as arrays
96
  def speech_file_to_array_fn(batch):
97
- \tbatch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
98
- \tspeech_array, sampling_rate = torchaudio.load(batch["path"])
99
- \tbatch["speech"] = resampler(speech_array).squeeze().numpy()
100
- \treturn batch
101
 
102
  test_dataset = test_dataset.map(speech_file_to_array_fn)
103
 
104
  # Preprocessing the datasets.
105
  # We need to read the aduio files as arrays
106
  def evaluate(batch):
107
- \tinputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
108
 
109
- \twith torch.no_grad():
110
- \t\tlogits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
111
 
112
- \tpred_ids = torch.argmax(logits, dim=-1)
113
- \tbatch["pred_strings"] = processor.batch_decode(pred_ids)
114
- \treturn batch
115
 
116
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
117
 
118
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
119
  ```
120
 
121
- **Test Result**: XX.XX % # TODO: write output of print here. IMPORTANT: Please remember to also replace {wer_result_on_test} at the top of with this value here. tags.
122
-
123
 
124
  ## Training
125
 
 
24
  metrics:
25
  - name: Test WER
26
  type: wer
27
+ value: 48.34
28
  ---
29
 
30
  # Wav2Vec2-Large-XLSR-53-Georgian
 
52
  # Preprocessing the datasets.
53
  # We need to read the aduio files as arrays
54
  def speech_file_to_array_fn(batch):
55
+ \\tspeech_array, sampling_rate = torchaudio.load(batch["path"])
56
+ \\tbatch["speech"] = resampler(speech_array).squeeze().numpy()
57
+ \\treturn batch
58
 
59
  test_dataset = test_dataset.map(speech_file_to_array_fn)
60
  inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
61
 
62
  with torch.no_grad():
63
+ \\tlogits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
64
 
65
  predicted_ids = torch.argmax(logits, dim=-1)
66
 
 
88
  model = Wav2Vec2ForCTC.from_pretrained("Temur/wav2vec2-Georgian-Daytona")
89
  model.to("cuda")
90
 
91
+ chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“]' # TODO: adapt this list to include all special characters you removed from the data
92
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
93
 
94
  # Preprocessing the datasets.
95
  # We need to read the aduio files as arrays
96
  def speech_file_to_array_fn(batch):
97
+ \\tbatch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
98
+ \\tspeech_array, sampling_rate = torchaudio.load(batch["path"])
99
+ \\tbatch["speech"] = resampler(speech_array).squeeze().numpy()
100
+ \\treturn batch
101
 
102
  test_dataset = test_dataset.map(speech_file_to_array_fn)
103
 
104
  # Preprocessing the datasets.
105
  # We need to read the aduio files as arrays
106
  def evaluate(batch):
107
+ \\tinputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
108
 
109
+ \\twith torch.no_grad():
110
+ \\t\\tlogits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
111
 
112
+ \\tpred_ids = torch.argmax(logits, dim=-1)
113
+ \\tbatch["pred_strings"] = processor.batch_decode(pred_ids)
114
+ \\treturn batch
115
 
116
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
117
 
118
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
119
  ```
120
 
121
+ **Test Result**: 48.34 %
 
122
 
123
  ## Training
124