Spaces:
Running
Running
Yurii Paniv
commited on
Commit
·
349b2ad
1
Parent(s):
1ce668d
Improve number handling
Browse files- app.py +8 -4
- formatter.py +42 -13
app.py
CHANGED
@@ -7,7 +7,6 @@ import requests
|
|
7 |
from os.path import exists
|
8 |
from formatter import preprocess_text
|
9 |
from datetime import datetime
|
10 |
-
from stress import sentence_to_stress
|
11 |
from enum import Enum
|
12 |
import torch
|
13 |
|
@@ -46,11 +45,15 @@ if synthesizer is None:
|
|
46 |
raise NameError("model not found")
|
47 |
|
48 |
def tts(text: str, stress: str):
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
50 |
text_limit = 1200
|
51 |
text = text if len(text) < text_limit else text[0:text_limit] # mitigate crashes on hf space
|
52 |
-
|
53 |
-
print(text, stress, datetime.utcnow())
|
54 |
|
55 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
56 |
with torch.no_grad():
|
@@ -81,6 +84,7 @@ iface = gr.Interface(
|
|
81 |
["Введ+іть, б+удь л+аска, сво+є р+ечення.", StressOption.ManualStress.value],
|
82 |
["Введіть, будь ласка, своє речення.", StressOption.ManualStress.value],
|
83 |
["Привіт, як тебе звати?", StressOption.AutomaticStress.value],
|
|
|
84 |
]
|
85 |
)
|
86 |
iface.launch(enable_queue=True, prevent_thread_lock=True)
|
|
|
7 |
from os.path import exists
|
8 |
from formatter import preprocess_text
|
9 |
from datetime import datetime
|
|
|
10 |
from enum import Enum
|
11 |
import torch
|
12 |
|
|
|
45 |
raise NameError("model not found")
|
46 |
|
47 |
def tts(text: str, stress: str):
|
48 |
+
print("============================")
|
49 |
+
print("Original text:", text)
|
50 |
+
print("Stress:", stress)
|
51 |
+
print("Time:", datetime.utcnow())
|
52 |
+
autostress = True if stress == StressOption.AutomaticStress.value else False
|
53 |
+
text = preprocess_text(text, autostress)
|
54 |
text_limit = 1200
|
55 |
text = text if len(text) < text_limit else text[0:text_limit] # mitigate crashes on hf space
|
56 |
+
print("Converted:", text)
|
|
|
57 |
|
58 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
|
59 |
with torch.no_grad():
|
|
|
84 |
["Введ+іть, б+удь л+аска, сво+є р+ечення.", StressOption.ManualStress.value],
|
85 |
["Введіть, будь ласка, своє речення.", StressOption.ManualStress.value],
|
86 |
["Привіт, як тебе звати?", StressOption.AutomaticStress.value],
|
87 |
+
["Договір підписано 4 квітня 1949 року.", StressOption.AutomaticStress.value],
|
88 |
]
|
89 |
)
|
90 |
iface.launch(enable_queue=True, prevent_thread_lock=True)
|
formatter.py
CHANGED
@@ -1,18 +1,42 @@
|
|
1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
# replace apostrophe
|
3 |
text = text.replace("`", "'")
|
4 |
text = text.replace("ʼ", "'")
|
5 |
# numbers
|
6 |
-
text =
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
# speak english alphabet using brute force transliteration
|
17 |
english = {
|
18 |
"a": "а",
|
@@ -47,9 +71,14 @@ def preprocess_text(text):
|
|
47 |
text = text.replace(english_char.upper(), english[english_char].upper())
|
48 |
text = text.replace(english_char, english[english_char])
|
49 |
|
50 |
-
|
|
|
|
|
51 |
return text
|
52 |
|
53 |
|
54 |
if __name__ == "__main__":
|
55 |
-
print(preprocess_text("Quality of life update"))
|
|
|
|
|
|
|
|
1 |
+
import num2words
|
2 |
+
import re
|
3 |
+
from stress import sentence_to_stress
|
4 |
+
|
5 |
+
def preprocess_text(text, autostress=False):
|
6 |
+
# currencies
|
7 |
+
text = text.replace("$", "долар")
|
8 |
+
text = text.replace("₴", "гривня")
|
9 |
+
text = text.replace("€", "євро")
|
10 |
# replace apostrophe
|
11 |
text = text.replace("`", "'")
|
12 |
text = text.replace("ʼ", "'")
|
13 |
# numbers
|
14 |
+
text = re.sub(r'(\d)\s+(\d)', r'\1\2', text)
|
15 |
+
|
16 |
+
def detect_num_and_convert(word):
|
17 |
+
numbers = "0123456789,."
|
18 |
+
is_number = all(map(lambda x: x in numbers, word))
|
19 |
+
if is_number:
|
20 |
+
try:
|
21 |
+
return num2words.num2words(word, lang="uk")
|
22 |
+
except:
|
23 |
+
return word
|
24 |
+
else:
|
25 |
+
return word
|
26 |
+
|
27 |
+
text = " ".join([detect_num_and_convert(word) for word in text.split(" ")])
|
28 |
+
|
29 |
+
# fallback numbers
|
30 |
+
text = text.replace("1", "один ")
|
31 |
+
text = text.replace("2", "два ")
|
32 |
+
text = text.replace("3", "три ")
|
33 |
+
text = text.replace("4", "чотири ")
|
34 |
+
text = text.replace("5", "п'ять ")
|
35 |
+
text = text.replace("6", "шість ")
|
36 |
+
text = text.replace("7", "сім ")
|
37 |
+
text = text.replace("8", "вісім ")
|
38 |
+
text = text.replace("9", "дев'ять ")
|
39 |
+
text = text.replace("0", "нуль ")
|
40 |
# speak english alphabet using brute force transliteration
|
41 |
english = {
|
42 |
"a": "а",
|
|
|
71 |
text = text.replace(english_char.upper(), english[english_char].upper())
|
72 |
text = text.replace(english_char, english[english_char])
|
73 |
|
74 |
+
if autostress:
|
75 |
+
text = sentence_to_stress(text)
|
76 |
+
|
77 |
return text
|
78 |
|
79 |
|
80 |
if __name__ == "__main__":
|
81 |
+
print(preprocess_text("Quality of life update"))
|
82 |
+
print(preprocess_text("Він украв 20000000 $"))
|
83 |
+
print(preprocess_text("111 000 000 000 доларів державного боргу."))
|
84 |
+
print(preprocess_text("11100000001 доларів державного боргу."))
|