Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
9caae98
1
Parent(s):
df56c7b
Add ukrainian-accentor support
Browse files- app.py +6 -4
- formatter.py +4 -4
- requirements.txt +2 -1
- stress.py +14 -9
- stress_with_model.py +33 -0
- ukrainian-accentor +1 -0
app.py
CHANGED
@@ -12,7 +12,8 @@ import torch
|
|
12 |
|
13 |
|
14 |
class StressOption(Enum):
|
15 |
-
AutomaticStress = "Автоматичні наголоси"
|
|
|
16 |
|
17 |
|
18 |
class VoiceOption(Enum):
|
@@ -66,9 +67,9 @@ def tts(text: str, voice: str, stress: str):
|
|
66 |
print("Voice", voice)
|
67 |
print("Stress:", stress)
|
68 |
print("Time:", datetime.utcnow())
|
69 |
-
|
70 |
speaker_name = "male1" if voice == VoiceOption.MaleVoice.value else "female3"
|
71 |
-
text = preprocess_text(text,
|
72 |
text_limit = 1200
|
73 |
text = (
|
74 |
text if len(text) < text_limit else text[0:text_limit]
|
@@ -110,7 +111,8 @@ iface = gr.Interface(
|
|
110 |
+ "Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts) \n"
|
111 |
+ "Model training - [Yurii Paniv @robinhad](https://github.com/robinhad) \n"
|
112 |
+ "Mykyta and Olena dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv) \n"
|
113 |
-
+ "Autostress using [ukrainian-word-stress](https://github.com/lang-uk/ukrainian-word-stress) - [Oleksiy Syvokon @asivokon](https://github.com/asivokon) \n"
|
|
|
114 |
+ f'<center><img src="{badge}" alt="visitors badge"/></center>',
|
115 |
examples=[
|
116 |
[
|
|
|
12 |
|
13 |
|
14 |
class StressOption(Enum):
|
15 |
+
AutomaticStress = "Автоматичні наголоси (за словником)"
|
16 |
+
AutomaticStressWithModel = "Автоматичні наголоси (за допомогою моделі)"
|
17 |
|
18 |
|
19 |
class VoiceOption(Enum):
|
|
|
67 |
print("Voice", voice)
|
68 |
print("Stress:", stress)
|
69 |
print("Time:", datetime.utcnow())
|
70 |
+
autostress_with_model = True if stress == StressOption.AutomaticStressWithModel.value else False
|
71 |
speaker_name = "male1" if voice == VoiceOption.MaleVoice.value else "female3"
|
72 |
+
text = preprocess_text(text, autostress_with_model)
|
73 |
text_limit = 1200
|
74 |
text = (
|
75 |
text if len(text) < text_limit else text[0:text_limit]
|
|
|
111 |
+ "Github: [https://github.com/robinhad/ukrainian-tts](https://github.com/robinhad/ukrainian-tts) \n"
|
112 |
+ "Model training - [Yurii Paniv @robinhad](https://github.com/robinhad) \n"
|
113 |
+ "Mykyta and Olena dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv) \n"
|
114 |
+
+ "Autostress (with dictionary) using [ukrainian-word-stress](https://github.com/lang-uk/ukrainian-word-stress) - [Oleksiy Syvokon @asivokon](https://github.com/asivokon) \n"
|
115 |
+
+ "Autostress (with model) using [ukrainian-accentor](https://github.com/egorsmkv/ukrainian-accentor) - [Bohdan Mykhailenko @NeonBohdan](https://github.com/NeonBohdan) + [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv) \n"
|
116 |
+ f'<center><img src="{badge}" alt="visitors badge"/></center>',
|
117 |
examples=[
|
118 |
[
|
formatter.py
CHANGED
@@ -1,9 +1,10 @@
|
|
1 |
import num2words
|
2 |
import re
|
3 |
-
from stress import sentence_to_stress
|
|
|
4 |
|
5 |
|
6 |
-
def preprocess_text(text,
|
7 |
# currencies
|
8 |
text = text.replace("$", "долар")
|
9 |
text = text.replace("₴", "гривня")
|
@@ -77,8 +78,7 @@ def preprocess_text(text, autostress=False):
|
|
77 |
text = text.replace(english_char.upper(), english[english_char].upper())
|
78 |
text = text.replace(english_char, english[english_char])
|
79 |
|
80 |
-
if
|
81 |
-
text = sentence_to_stress(text)
|
82 |
|
83 |
return text
|
84 |
|
|
|
1 |
import num2words
|
2 |
import re
|
3 |
+
from stress import sentence_to_stress, stress_dict
|
4 |
+
from stress_with_model import stress_with_model
|
5 |
|
6 |
|
7 |
+
def preprocess_text(text, use_autostress_model=False):
|
8 |
# currencies
|
9 |
text = text.replace("$", "долар")
|
10 |
text = text.replace("₴", "гривня")
|
|
|
78 |
text = text.replace(english_char.upper(), english[english_char].upper())
|
79 |
text = text.replace(english_char, english[english_char])
|
80 |
|
81 |
+
text = sentence_to_stress(text, stress_with_model if use_autostress_model else stress_dict)
|
|
|
82 |
|
83 |
return text
|
84 |
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
TTS==0.7.1
|
2 |
-
ukrainian-word-stress==1.0.0
|
|
|
|
1 |
TTS==0.7.1
|
2 |
+
ukrainian-word-stress==1.0.0
|
3 |
+
-r ukrainian-accentor/requirements.txt
|
stress.py
CHANGED
@@ -4,15 +4,7 @@ from ukrainian_word_stress import Stressifier, StressSymbol
|
|
4 |
stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
|
5 |
|
6 |
|
7 |
-
def
|
8 |
-
# save custom stress positions
|
9 |
-
all_stresses = []
|
10 |
-
orig_words = sentence.split(" ")
|
11 |
-
for i in range(0, len(orig_words)):
|
12 |
-
if "+" in orig_words[i]:
|
13 |
-
all_stresses.append(i)
|
14 |
-
|
15 |
-
# add stress before vowel
|
16 |
stressed = stressify(sentence.replace("+", "")).replace(StressSymbol.CombiningAcuteAccent, "+")
|
17 |
new_stressed = ""
|
18 |
start = 0
|
@@ -29,6 +21,19 @@ def sentence_to_stress(sentence: str) -> str:
|
|
29 |
else:
|
30 |
new_stressed += stressed[last:]
|
31 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
# replace already stressed words
|
34 |
if len(all_stresses) > 0:
|
|
|
4 |
stressify = Stressifier(stress_symbol=StressSymbol.CombiningAcuteAccent)
|
5 |
|
6 |
|
7 |
+
def stress_dict(sentence: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
stressed = stressify(sentence.replace("+", "")).replace(StressSymbol.CombiningAcuteAccent, "+")
|
9 |
new_stressed = ""
|
10 |
start = 0
|
|
|
21 |
else:
|
22 |
new_stressed += stressed[last:]
|
23 |
break
|
24 |
+
return new_stressed
|
25 |
+
|
26 |
+
|
27 |
+
def sentence_to_stress(sentence: str, stress_function=stress_dict) -> str:
|
28 |
+
# save custom stress positions
|
29 |
+
all_stresses = []
|
30 |
+
orig_words = sentence.split(" ")
|
31 |
+
for i in range(0, len(orig_words)):
|
32 |
+
if "+" in orig_words[i]:
|
33 |
+
all_stresses.append(i)
|
34 |
+
|
35 |
+
# add stress before vowel
|
36 |
+
new_stressed = stress_function(sentence)
|
37 |
|
38 |
# replace already stressed words
|
39 |
if len(all_stresses) > 0:
|
stress_with_model.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
# import
|
4 |
+
importer = torch.package.PackageImporter("ukrainian-accentor/accentor-lite.pt")
|
5 |
+
accentor = importer.load_pickle("uk-accentor", "model")
|
6 |
+
|
7 |
+
# run
|
8 |
+
def stress_with_model(text: str):
|
9 |
+
text = text.lower()
|
10 |
+
try:
|
11 |
+
result = accentor.process(text, mode='plus')
|
12 |
+
except ValueError: # TODO: apply fix for cases when there are no vowels
|
13 |
+
return text
|
14 |
+
return result
|
15 |
+
|
16 |
+
|
17 |
+
if __name__ == "__main__":
|
18 |
+
sentence = "Кам'янець-Подільський - місто в Хмельницькій області України, центр Кам'янець-Подільської міської об'єднаної територіальної громади і Кам'янець-Подільського району."
|
19 |
+
print(stress_with_model(sentence))
|
20 |
+
sentence = "Привіт, як тебе звати?"
|
21 |
+
print(stress_with_model(sentence))
|
22 |
+
sentence = "АННА - український панк-рок гурт"
|
23 |
+
print(stress_with_model(sentence))
|
24 |
+
sentence = "Не тільки в Україні таке може бути."
|
25 |
+
print(stress_with_model(sentence))
|
26 |
+
sentence = "Не тільки в +Укра+їні т+аке може бути."
|
27 |
+
print(stress_with_model(sentence))
|
28 |
+
sentence = "два + два"
|
29 |
+
print(stress_with_model(sentence))
|
30 |
+
sentence = "Н тльк в крн тк мж бт."
|
31 |
+
print(stress_with_model(sentence))
|
32 |
+
sentence = "Н тльк в крн тк мж бт."
|
33 |
+
print(stress_with_model(sentence))
|
ukrainian-accentor
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Subproject commit 44f178282efd7eb3770fd082cab2b795351efe76
|