Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
f9e5028
1
Parent(s):
a63a536
Add automatic stress support
Browse files- .gitmodules +3 -0
- app.py +19 -8
- stress.py +60 -0
- ukrainian-accentor +1 -0
.gitmodules
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
[submodule "ukrainian-accentor"]
|
2 |
+
path = ukrainian-accentor
|
3 |
+
url = https://github.com/egorsmkv/ukrainian-accentor.git
|
app.py
CHANGED
@@ -8,6 +8,12 @@ import requests
|
|
8 |
from os.path import exists
|
9 |
from formatter import preprocess_text
|
10 |
from datetime import datetime
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
MODEL_NAMES = [
|
13 |
"uk/mykyta/vits-tts"
|
@@ -30,7 +36,7 @@ def download(url, file_name):
|
|
30 |
for MODEL_NAME in MODEL_NAMES:
|
31 |
print(f"downloading {MODEL_NAME}")
|
32 |
release_number = "v2.0.0-beta"
|
33 |
-
model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model.pth"
|
34 |
config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"
|
35 |
|
36 |
model_path = "model.pth"
|
@@ -43,13 +49,14 @@ for MODEL_NAME in MODEL_NAMES:
|
|
43 |
#MODELS[MODEL_NAME] = synthesizer
|
44 |
|
45 |
|
46 |
-
def tts(text: str):
|
47 |
synthesizer = Synthesizer(
|
48 |
model_path, config_path, None, None, None,
|
49 |
)
|
50 |
text = preprocess_text(text)
|
51 |
-
text_limit =
|
52 |
text = text if len(text) < text_limit else text[0:text_limit] # mitigate crashes on hf space
|
|
|
53 |
print(text, datetime.utcnow())
|
54 |
if synthesizer is None:
|
55 |
raise NameError("model not found")
|
@@ -68,10 +75,10 @@ iface = gr.Interface(
|
|
68 |
label="Input",
|
69 |
default="Введ+іть, б+удь л+аска, сво+є р+ечення.",
|
70 |
),
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
],
|
76 |
outputs=gr.outputs.Audio(label="Output"),
|
77 |
title="🐸💬🇺🇦 - Coqui TTS",
|
@@ -79,5 +86,9 @@ iface = gr.Interface(
|
|
79 |
description="Україномовний🇺🇦 TTS за допомогою Coqui TTS (для наголосу використовуйте + перед голосною)",
|
80 |
article="Якщо вам подобається, підтримайте за посиланням: [SUPPORT LINK](https://send.monobank.ua/jar/48iHq4xAXm), " +
|
81 |
"Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts)",
|
|
|
|
|
|
|
|
|
82 |
)
|
83 |
-
iface.launch(enable_queue=True)
|
|
|
8 |
from os.path import exists
|
9 |
from formatter import preprocess_text
|
10 |
from datetime import datetime
|
11 |
+
from stress import sentence_to_stress
|
12 |
+
from enum import Enum
|
13 |
+
|
14 |
+
class StressOption(Enum):
|
15 |
+
ManualStress = "Наголоси вручну"
|
16 |
+
AutomaticStress = "Автоматичні наголоси (Beta)"
|
17 |
|
18 |
MODEL_NAMES = [
|
19 |
"uk/mykyta/vits-tts"
|
|
|
36 |
for MODEL_NAME in MODEL_NAMES:
|
37 |
print(f"downloading {MODEL_NAME}")
|
38 |
release_number = "v2.0.0-beta"
|
39 |
+
model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model-inference.pth"
|
40 |
config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"
|
41 |
|
42 |
model_path = "model.pth"
|
|
|
49 |
#MODELS[MODEL_NAME] = synthesizer
|
50 |
|
51 |
|
52 |
+
def tts(text: str, stress: str):
|
53 |
synthesizer = Synthesizer(
|
54 |
model_path, config_path, None, None, None,
|
55 |
)
|
56 |
text = preprocess_text(text)
|
57 |
+
text_limit = 150
|
58 |
text = text if len(text) < text_limit else text[0:text_limit] # mitigate crashes on hf space
|
59 |
+
text = sentence_to_stress(text) if stress == StressOption.AutomaticStress.value else text
|
60 |
print(text, datetime.utcnow())
|
61 |
if synthesizer is None:
|
62 |
raise NameError("model not found")
|
|
|
75 |
label="Input",
|
76 |
default="Введ+іть, б+удь л+аска, сво+є р+ечення.",
|
77 |
),
|
78 |
+
gr.inputs.Radio(
|
79 |
+
label="Опції",
|
80 |
+
choices=[option.value for option in StressOption],
|
81 |
+
),
|
82 |
],
|
83 |
outputs=gr.outputs.Audio(label="Output"),
|
84 |
title="🐸💬🇺🇦 - Coqui TTS",
|
|
|
86 |
description="Україномовний🇺🇦 TTS за допомогою Coqui TTS (для наголосу використовуйте + перед голосною)",
|
87 |
article="Якщо вам подобається, підтримайте за посиланням: [SUPPORT LINK](https://send.monobank.ua/jar/48iHq4xAXm), " +
|
88 |
"Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts)",
|
89 |
+
examples=[
|
90 |
+
["Введ+іть, б+удь л+аска, сво+є р+ечення.", StressOption.ManualStress.value],
|
91 |
+
["Привіт, як тебе звати?", StressOption.AutomaticStress.value]
|
92 |
+
]
|
93 |
)
|
94 |
+
iface.launch(enable_queue=True, prevent_thread_lock=True)
|
stress.py
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from unittest import skip
|
2 |
+
from gruut import sentences
|
3 |
+
import torch
|
4 |
+
|
5 |
+
importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt")
|
6 |
+
accentor = importer.load_pickle("uk-accentor", "model")
|
7 |
+
replace_accents = importer.load_pickle("uk-accentor", "replace_accents")
|
8 |
+
|
9 |
+
# Using GPU
|
10 |
+
# accentor.cuda()
|
11 |
+
# Back to CPU
|
12 |
+
# accentor.cpu()
|
13 |
+
|
14 |
+
alphabet = "абгґдеєжзиіїйклмнопрстуфхцчшщьюя"
|
15 |
+
|
16 |
+
def accent_word(word):
|
17 |
+
stressed_words = accentor.predict([word], mode='stress')
|
18 |
+
plused_words = [replace_accents(x) for x in stressed_words]
|
19 |
+
return plused_words[0]
|
20 |
+
|
21 |
+
def sentence_to_stress(sentence):
|
22 |
+
words = sentence.split()
|
23 |
+
words = sum([[word, " "] for word in words], start=[])
|
24 |
+
new_list = []
|
25 |
+
for word in words:
|
26 |
+
first_word_sep = list(map(lambda letter: letter in alphabet, word.lower()))
|
27 |
+
if all(first_word_sep):
|
28 |
+
new_list.append(word)
|
29 |
+
else:
|
30 |
+
current_index = 0
|
31 |
+
past_index = 0
|
32 |
+
for letter in first_word_sep:
|
33 |
+
if letter == False:
|
34 |
+
new_list.append(word[past_index:current_index])
|
35 |
+
new_list.append(word[current_index])
|
36 |
+
past_index = current_index + 1
|
37 |
+
current_index += 1
|
38 |
+
new_list.append(word[past_index:current_index])
|
39 |
+
#print(list(filter(lambda x: len(x) > 0, new_list)))
|
40 |
+
for word_index in range(0, len(new_list)):
|
41 |
+
element = new_list[word_index]
|
42 |
+
first_word_sep = list(map(lambda letter: letter in alphabet, element.lower()))
|
43 |
+
if not all(first_word_sep) or len(element) == 0:
|
44 |
+
continue
|
45 |
+
else:
|
46 |
+
new_list[word_index] = accent_word(new_list[word_index])
|
47 |
+
|
48 |
+
return "".join(new_list)
|
49 |
+
|
50 |
+
|
51 |
+
if __name__ == "__main__":
|
52 |
+
sentence = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району."
|
53 |
+
print(sentence_to_stress(sentence))
|
54 |
+
#test_words1 = ["словотворення", "архаїчний", "програма", "а-ля-фуршет"]
|
55 |
+
|
56 |
+
stressed_words = accentor.predict(["привіт"], mode='stress')
|
57 |
+
plused_words = [replace_accents(x) for x in stressed_words]
|
58 |
+
|
59 |
+
print('With stress:', stressed_words)
|
60 |
+
print('With pluses:', plused_words)
|
ukrainian-accentor
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit a3dd2cf9341db200853cfd19df142224a47749b2
|