demo fx
Browse files- README.md +4 -2
- demo.py +5 -13
- msinference.py +6 -6
README.md
CHANGED
@@ -26,9 +26,11 @@ Expansion of [SHIFT TTS tool](https://github.com/audeering/shift) with [AudioGen
|
|
26 |
|
27 |
## Available Voices
|
28 |
|
29 |
-
<a href="https://audeering.github.io/shift/">Native English
|
30 |
|
31 |
-
|
|
|
|
|
32 |
|
33 |
## API
|
34 |
|
|
|
26 |
|
27 |
## Available Voices
|
28 |
|
29 |
+
<a href="https://audeering.github.io/shift/">Native English!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6">Non-native English accents!</a> / <a href="https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv">Foreign languages</a>
|
30 |
|
31 |
+
##
|
32 |
+
|
33 |
+
[TTS Demo](https://huggingface.co/dkounadis/artificial-styletts2/blob/main/demo.py)
|
34 |
|
35 |
## API
|
36 |
|
demo.py
CHANGED
@@ -4,7 +4,7 @@ import msinference
|
|
4 |
|
5 |
|
6 |
def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are made of this, I traveled the world and the seven seas.',
|
7 |
-
voice='
|
8 |
speed=1.4, # only for non-english
|
9 |
affect = True # False = high clarity for partially sight
|
10 |
):
|
@@ -14,11 +14,11 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
|
|
14 |
|
15 |
or
|
16 |
|
17 |
-
voice :
|
18 |
|
19 |
or
|
20 |
|
21 |
-
voice : 'deu' #
|
22 |
'''
|
23 |
|
24 |
# StyleTTS2 - En
|
@@ -33,11 +33,7 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
|
|
33 |
'_low', '') + '.wav')
|
34 |
|
35 |
x = msinference.inference(text,
|
36 |
-
style_vector
|
37 |
-
alpha=0.3,
|
38 |
-
beta=0.7,
|
39 |
-
diffusion_steps=7,
|
40 |
-
embedding_scale=1)
|
41 |
|
42 |
# mimic-3 format of voice (English text - Foreign accent)
|
43 |
|
@@ -48,11 +44,7 @@ def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are
|
|
48 |
'_low', '') + '.wav')
|
49 |
|
50 |
x = msinference.inference(text,
|
51 |
-
style_vector
|
52 |
-
alpha=0.3,
|
53 |
-
beta=0.7,
|
54 |
-
diffusion_steps=7,
|
55 |
-
embedding_scale=1)
|
56 |
|
57 |
|
58 |
# Fallback - MMS TTS - Non-English
|
|
|
4 |
|
5 |
|
6 |
def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are made of this, I traveled the world and the seven seas.',
|
7 |
+
voice='af_ZA_google-nwu_1919', # 'serbian', # 'en_US/vctk_low#p276', 'isl', 'abi',
|
8 |
speed=1.4, # only for non-english
|
9 |
affect = True # False = high clarity for partially sight
|
10 |
):
|
|
|
14 |
|
15 |
or
|
16 |
|
17 |
+
voice : 'af_ZA_google-nwu_1919' # from english non-native accents -> https://huggingface.co/dkounadis/artificial-styletts2/discussions/1#6783e3b00e7d90facec060c6
|
18 |
|
19 |
or
|
20 |
|
21 |
+
voice : 'deu' # foreign langs -> https://huggingface.co/dkounadis/artificial-styletts2/blob/main/Utils/all_langs.csv
|
22 |
'''
|
23 |
|
24 |
# StyleTTS2 - En
|
|
|
33 |
'_low', '') + '.wav')
|
34 |
|
35 |
x = msinference.inference(text,
|
36 |
+
style_vector)
|
|
|
|
|
|
|
|
|
37 |
|
38 |
# mimic-3 format of voice (English text - Foreign accent)
|
39 |
|
|
|
44 |
'_low', '') + '.wav')
|
45 |
|
46 |
x = msinference.inference(text,
|
47 |
+
style_vector)
|
|
|
|
|
|
|
|
|
48 |
|
49 |
|
50 |
# Fallback - MMS TTS - Non-English
|
msinference.py
CHANGED
@@ -169,12 +169,12 @@ sampler = DiffusionSampler(
|
|
169 |
clamp=False
|
170 |
)
|
171 |
|
172 |
-
def inference(text,
|
173 |
-
ref_s,
|
174 |
-
alpha = 0.3,
|
175 |
-
beta = 0.7,
|
176 |
-
diffusion_steps=
|
177 |
-
embedding_scale=1,
|
178 |
use_gruut=False):
|
179 |
text = text.strip()
|
180 |
ps = global_phonemizer.phonemize([text])
|
|
|
169 |
clamp=False
|
170 |
)
|
171 |
|
172 |
+
def inference(text,
|
173 |
+
ref_s,
|
174 |
+
alpha = 0.3,
|
175 |
+
beta = 0.7,
|
176 |
+
diffusion_steps=7,
|
177 |
+
embedding_scale=1,
|
178 |
use_gruut=False):
|
179 |
text = text.strip()
|
180 |
ps = global_phonemizer.phonemize([text])
|