wetdog commited on
Commit
daa90f5
·
1 Parent(s): 92df4f5

add models, configs and utils

Browse files
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Tts Vocos Onnx
3
- emoji: 🌍
4
  colorFrom: purple
5
- colorTo: gray
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
 
1
  ---
2
+ title: tts vocos Onnx Comparison
3
+ emoji: 🐨
4
  colorFrom: purple
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  license: apache-2.0
config_22khz.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ feature_extractor:
2
+ class_path: vocos.feature_extractors.MelSpectrogramFeatures
3
+ init_args:
4
+ sample_rate: 22050
5
+ n_fft: 1024
6
+ hop_length: 256
7
+ n_mels: 80
8
+ padding: center
9
+
10
+ backbone:
11
+ class_path: vocos.models.VocosBackbone
12
+ init_args:
13
+ input_channels: 80
14
+ dim: 512
15
+ intermediate_dim: 1536
16
+ num_layers: 8
17
+
18
+ head:
19
+ class_path: vocos.heads.ISTFTHead
20
+ init_args:
21
+ dim: 512
22
+ n_fft: 1024
23
+ hop_length: 256
24
+ padding: center
matcha_hifigan_multispeaker_cat.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5927b5a9a5f7890d4a8c353266ff00a1d9c4376eb1294020ffe43afa622b72f
3
+ size 142073725
matcha_multispeaker_cat_opset_15.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b53370f69b8f4ca3d510634b644f6d815f34ee7a2944d0fb3a5588f6286b88
3
+ size 102285286
mel_spec_22khz.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15485817350df1e1cf50f75058497ec4b5273acb8903591bb41c6b5fb62daf2b
3
+ size 53870258
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ onnxruntime
2
+ phonemizer
3
+ torch
4
+ unidecode
5
+ gradio
6
+ soundfile
text/LICENSE ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2017 Keith Ito
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
text/__init__.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ from https://github.com/keithito/tacotron """
2
+ from text import cleaners
3
+ from text.symbols import symbols
4
+
5
+
6
+ # Mappings from symbol to numeric ID and vice versa:
7
+ _symbol_to_id = {s: i for i, s in enumerate(symbols)}
8
+ _id_to_symbol = {i: s for i, s in enumerate(symbols)}
9
+
10
+
11
+ def text_to_sequence(text, cleaner_names):
12
+ """Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
13
+ Args:
14
+ text: string to convert to a sequence
15
+ cleaner_names: names of the cleaner functions to run the text through
16
+ Returns:
17
+ List of integers corresponding to the symbols in the text
18
+ """
19
+ sequence = []
20
+
21
+ clean_text = _clean_text(text, cleaner_names)
22
+ for symbol in clean_text:
23
+ if symbol in _symbol_to_id.keys():
24
+ symbol_id = _symbol_to_id[symbol]
25
+ sequence += [symbol_id]
26
+ else:
27
+ continue
28
+ return sequence
29
+
30
+
31
+ def cleaned_text_to_sequence(cleaned_text):
32
+ """Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
33
+ Args:
34
+ text: string to convert to a sequence
35
+ Returns:
36
+ List of integers corresponding to the symbols in the text
37
+ """
38
+ sequence = []
39
+
40
+ for symbol in cleaned_text:
41
+ if symbol in _symbol_to_id.keys():
42
+ symbol_id = _symbol_to_id[symbol]
43
+ sequence += [symbol_id]
44
+ else:
45
+ continue
46
+ return sequence
47
+
48
+
49
+ def sequence_to_text(sequence):
50
+ """Converts a sequence of IDs back to a string"""
51
+ result = ""
52
+ for symbol_id in sequence:
53
+ s = _id_to_symbol[symbol_id]
54
+ result += s
55
+ return result
56
+
57
+
58
+ def _clean_text(text, cleaner_names):
59
+ for name in cleaner_names:
60
+ cleaner = getattr(cleaners, name)
61
+ if not cleaner:
62
+ raise Exception("Unknown cleaner: %s" % name)
63
+ text = cleaner(text)
64
+ return text
text/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (2.04 kB). View file
 
text/__pycache__/cleaners.cpython-310.pyc ADDED
Binary file (3.19 kB). View file
 
text/__pycache__/symbols.cpython-310.pyc ADDED
Binary file (693 Bytes). View file
 
text/cleaners.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ from https://github.com/keithito/tacotron """
2
+
3
+ """
4
+ Cleaners are transformations that run over the input text at both training and eval time.
5
+
6
+ Cleaners can be selected by passing a comma-delimited list of cleaner names as the "cleaners"
7
+ hyperparameter. Some cleaners are English-specific. You'll typically want to use:
8
+ 1. "english_cleaners" for English text
9
+ 2. "transliteration_cleaners" for non-English text that can be transliterated to ASCII using
10
+ the Unidecode library (https://pypi.python.org/pypi/Unidecode)
11
+ 3. "basic_cleaners" if you do not want to transliterate (in this case, you should also update
12
+ the symbols in symbols.py to match your data).
13
+ """
14
+
15
+ import re
16
+ from unidecode import unidecode
17
+ from phonemizer import phonemize
18
+ from phonemizer.backend import EspeakBackend
19
+
20
+ backend = EspeakBackend("ca", preserve_punctuation=True, with_stress=True)
21
+ backend_en = EspeakBackend("en-us", preserve_punctuation=True, with_stress=True)
22
+
23
+ # Regular expression matching whitespace:
24
+ _whitespace_re = re.compile(r"\s+")
25
+
26
+ # List of (regular expression, replacement) pairs for abbreviations:
27
+ _abbreviations = [
28
+ (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1])
29
+ for x in [
30
+ ("mrs", "misess"),
31
+ ("mr", "mister"),
32
+ ("dr", "doctor"),
33
+ ("st", "saint"),
34
+ ("co", "company"),
35
+ ("jr", "junior"),
36
+ ("maj", "major"),
37
+ ("gen", "general"),
38
+ ("drs", "doctors"),
39
+ ("rev", "reverend"),
40
+ ("lt", "lieutenant"),
41
+ ("hon", "honorable"),
42
+ ("sgt", "sergeant"),
43
+ ("capt", "captain"),
44
+ ("esq", "esquire"),
45
+ ("ltd", "limited"),
46
+ ("col", "colonel"),
47
+ ("ft", "fort"),
48
+ ]
49
+ ]
50
+
51
+
52
+ def expand_abbreviations(text):
53
+ for regex, replacement in _abbreviations:
54
+ text = re.sub(regex, replacement, text)
55
+ return text
56
+
57
+
58
+ def expand_numbers(text):
59
+ return normalize_numbers(text)
60
+
61
+
62
+ def lowercase(text):
63
+ return text.lower()
64
+
65
+
66
+ def collapse_whitespace(text):
67
+ return re.sub(_whitespace_re, " ", text)
68
+
69
+
70
+ def convert_to_ascii(text):
71
+ return unidecode(text)
72
+
73
+
74
+ def basic_cleaners(text):
75
+ """Basic pipeline that lowercases and collapses whitespace without transliteration."""
76
+ text = lowercase(text)
77
+ text = collapse_whitespace(text)
78
+ return text
79
+
80
+
81
+ def transliteration_cleaners(text):
82
+ """Pipeline for non-English text that transliterates to ASCII."""
83
+ text = convert_to_ascii(text)
84
+ text = lowercase(text)
85
+ text = collapse_whitespace(text)
86
+ return text
87
+
88
+
89
+ def english_cleaners(text):
90
+ """Pipeline for English text, including abbreviation expansion."""
91
+ text = convert_to_ascii(text)
92
+ text = lowercase(text)
93
+ text = expand_abbreviations(text)
94
+ phonemes = phonemize(text, language="en-us", backend="espeak", strip=True)
95
+ phonemes = collapse_whitespace(phonemes)
96
+ return phonemes
97
+
98
+
99
+ def english_cleaners2(text):
100
+ """Pipeline for English text, including abbreviation expansion. + punctuation + stress"""
101
+ text = convert_to_ascii(text)
102
+ text = lowercase(text)
103
+ text = expand_abbreviations(text)
104
+ phonemes = phonemize(
105
+ text,
106
+ language="en-us",
107
+ backend="espeak",
108
+ strip=True,
109
+ preserve_punctuation=True,
110
+ with_stress=True,
111
+ )
112
+ phonemes = collapse_whitespace(phonemes)
113
+ return phonemes
114
+
115
+
116
+ def english_cleaners3(text):
117
+ """Pipeline for English text, including abbreviation expansion. + punctuation + stress"""
118
+ text = convert_to_ascii(text)
119
+ text = lowercase(text)
120
+ text = expand_abbreviations(text)
121
+ phonemes = backend_en.phonemize([text], strip=True)[0]
122
+ phonemes = collapse_whitespace(phonemes)
123
+ return phonemes
124
+
125
+
126
+ def catalan_cleaners(text):
127
+ """Pipeline for catalan text, including punctuation + stress"""
128
+ #text = convert_to_ascii(text)
129
+ text = lowercase(text)
130
+ #text = expand_abbreviations(text)
131
+ phonemes = backend.phonemize([text], strip=True)[0]
132
+ phonemes = collapse_whitespace(phonemes)
133
+ return phonemes
text/symbols.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ from https://github.com/keithito/tacotron """
2
+
3
+ """
4
+ Defines the set of symbols used in text input to the model.
5
+ """
6
+ _pad = "_"
7
+ _punctuation = ';:,.!?¡¿—…"«»“” '
8
+ _letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
9
+ _letters_ipa = "ɑɐɒæɓʙβɔɕçɗɖðʤəɘɚɛɜɝɞɟʄɡɠɢʛɦɧħɥʜɨɪʝɭɬɫɮʟɱɯɰŋɳɲɴøɵɸθœɶʘɹɺɾɻʀʁɽʂʃʈʧʉʊʋⱱʌɣɤʍχʎʏʑʐʒʔʡʕʢǀǁǂǃˈˌːˑʼʴʰʱʲʷˠˤ˞↓↑→↗↘'̩'ᵻ"
10
+
11
+
12
+ # Export all symbols:
13
+ symbols = [_pad] + list(_punctuation) + list(_letters) + list(_letters_ipa)
14
+
15
+ # Special symbol ids
16
+ SPACE_ID = symbols.index(" ")
utils.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ class HParams:
4
+ def __init__(self, **kwargs):
5
+ for k, v in kwargs.items():
6
+ if type(v) == dict:
7
+ v = HParams(**v)
8
+ self[k] = v
9
+
10
+ def keys(self):
11
+ return self.__dict__.keys()
12
+
13
+ def items(self):
14
+ return self.__dict__.items()
15
+
16
+ def values(self):
17
+ return self.__dict__.values()
18
+
19
+ def __len__(self):
20
+ return len(self.__dict__)
21
+
22
+ def __getitem__(self, key):
23
+ return getattr(self, key)
24
+
25
+ def __setitem__(self, key, value):
26
+ return setattr(self, key, value)
27
+
28
+ def __contains__(self, key):
29
+ return key in self.__dict__
30
+
31
+ def __repr__(self):
32
+ return self.__dict__.__repr__()
33
+
34
+ def get_hparams_from_file(config_path):
35
+ with open(config_path, "r") as f:
36
+ data = f.read()
37
+ config = json.loads(data)
38
+
39
+ hparams = HParams(**config)
40
+
41
+ return hparams