patrickvonplaten commited on
Commit
521ad3f
Β·
1 Parent(s): e249a0c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -2
README.md CHANGED
@@ -40,7 +40,7 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
40
 
41
  # config
42
  wakati = MeCab.Tagger("-Owakati")
43
- chars_to_ignore_regex = '[\,\、\。\.\γ€Œ\」\…\?\・]'
44
 
45
  # load data, processor and model
46
  test_dataset = load_dataset("common_voice", "ja", split="test[:2%]")
@@ -66,6 +66,10 @@ print("Reference:", test_dataset["sentence"][:2])
66
  ## Evaluation
67
  The model can be evaluated as follows on the Japanese test data of Common Voice.
68
  ```python
 
 
 
 
69
  import torch
70
  import librosa
71
  import torchaudio
@@ -75,7 +79,7 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
75
 
76
  #config
77
  wakati = MeCab.Tagger("-Owakati")
78
- chars_to_ignore_regex = '[\,\、\。\.\γ€Œ\」\…\?\・]'
79
 
80
  # load data, processor and model
81
  test_dataset = load_dataset("common_voice", "ja", split="test")
 
40
 
41
  # config
42
  wakati = MeCab.Tagger("-Owakati")
43
+ chars_to_ignore_regex = '[\\,\\、\\。\\.\\γ€Œ\\」\\…\\?\\・]'
44
 
45
  # load data, processor and model
46
  test_dataset = load_dataset("common_voice", "ja", split="test[:2%]")
 
66
  ## Evaluation
67
  The model can be evaluated as follows on the Japanese test data of Common Voice.
68
  ```python
69
+ !pip install mecab-python3
70
+ !pip install unidic-lite
71
+ !python -m unidic download
72
+
73
  import torch
74
  import librosa
75
  import torchaudio
 
79
 
80
  #config
81
  wakati = MeCab.Tagger("-Owakati")
82
+ chars_to_ignore_regex = '[\\,\\、\\。\\.\\γ€Œ\\」\\…\\?\\・]'
83
 
84
  # load data, processor and model
85
  test_dataset = load_dataset("common_voice", "ja", split="test")