Spaces:
Sleeping
Sleeping
test-rtechs
commited on
Update soni_translate/text_to_speech.py
Browse files- soni_translate/text_to_speech.py +12 -11
soni_translate/text_to_speech.py
CHANGED
@@ -15,6 +15,7 @@ from .utils import (
|
|
15 |
remove_directory_contents,
|
16 |
remove_files,
|
17 |
run_command,
|
|
|
18 |
)
|
19 |
import numpy as np
|
20 |
from typing import Any, Dict
|
@@ -59,7 +60,7 @@ def error_handling_in_tts(error, segment, TRANSLATE_AUDIO_TO, filename):
|
|
59 |
# Read audio data from the TemporaryFile using soundfile
|
60 |
audio_data, samplerate = sf.read(f)
|
61 |
f.close() # Close the TemporaryFile
|
62 |
-
|
63 |
filename, audio_data, samplerate, format="ogg", subtype="vorbis"
|
64 |
)
|
65 |
|
@@ -73,7 +74,7 @@ def error_handling_in_tts(error, segment, TRANSLATE_AUDIO_TO, filename):
|
|
73 |
sample_rate_aux = 22050
|
74 |
duration = float(segment["end"]) - float(segment["start"])
|
75 |
data = np.zeros(int(sample_rate_aux * duration)).astype(np.float32)
|
76 |
-
|
77 |
filename, data, sample_rate_aux, format="ogg", subtype="vorbis"
|
78 |
)
|
79 |
logger.error("Audio will be replaced -> [silent audio].")
|
@@ -181,7 +182,7 @@ def segments_egde_tts(filtered_edge_segments, TRANSLATE_AUDIO_TO, is_gui):
|
|
181 |
# os.remove(temp_file)
|
182 |
|
183 |
# Save file
|
184 |
-
|
185 |
file=filename,
|
186 |
samplerate=sample_rate,
|
187 |
data=data,
|
@@ -256,7 +257,7 @@ def segments_bark_tts(
|
|
256 |
speech_output.cpu().numpy().squeeze().astype(np.float32),
|
257 |
sampling_rate,
|
258 |
)
|
259 |
-
|
260 |
file=filename,
|
261 |
samplerate=sampling_rate,
|
262 |
data=data_tts,
|
@@ -299,7 +300,7 @@ def uromanize(input_string):
|
|
299 |
stderr=subprocess.PIPE,
|
300 |
)
|
301 |
stdout, stderr = process.communicate()
|
302 |
-
script_path = os.path.join("./uroman", "
|
303 |
|
304 |
command = ["perl", script_path]
|
305 |
|
@@ -362,7 +363,7 @@ def segments_vits_tts(filtered_vits_segments, TRANSLATE_AUDIO_TO):
|
|
362 |
sampling_rate,
|
363 |
)
|
364 |
# Save file
|
365 |
-
|
366 |
file=filename,
|
367 |
samplerate=sampling_rate,
|
368 |
data=data_tts,
|
@@ -667,7 +668,7 @@ def segments_coqui_tts(
|
|
667 |
sampling_rate,
|
668 |
)
|
669 |
# Save file
|
670 |
-
|
671 |
file=filename,
|
672 |
samplerate=sampling_rate,
|
673 |
data=data_tts,
|
@@ -855,7 +856,7 @@ def segments_vits_onnx_tts(filtered_onnx_vits_segments, TRANSLATE_AUDIO_TO):
|
|
855 |
sampling_rate,
|
856 |
)
|
857 |
# Save file
|
858 |
-
|
859 |
file=filename,
|
860 |
samplerate=sampling_rate,
|
861 |
data=data_tts,
|
@@ -925,7 +926,7 @@ def segments_openai_tts(
|
|
925 |
sampling_rate,
|
926 |
)
|
927 |
|
928 |
-
|
929 |
file=filename,
|
930 |
samplerate=sampling_rate,
|
931 |
data=data_tts,
|
@@ -1509,7 +1510,7 @@ def toneconverter_freevc(
|
|
1509 |
target_wav=original_wav_audio_segment,
|
1510 |
)
|
1511 |
|
1512 |
-
|
1513 |
file=save_path,
|
1514 |
samplerate=tts.voice_converter.vc_config.audio.output_sample_rate,
|
1515 |
data=wav,
|
@@ -1571,4 +1572,4 @@ if __name__ == "__main__":
|
|
1571 |
tts_voice03="en-GB-SoniaNeural-Female",
|
1572 |
tts_voice04="en-NZ-MitchellNeural-Male",
|
1573 |
tts_voice05="en-GB-MaisieNeural-Female",
|
1574 |
-
)
|
|
|
15 |
remove_directory_contents,
|
16 |
remove_files,
|
17 |
run_command,
|
18 |
+
write_chunked,
|
19 |
)
|
20 |
import numpy as np
|
21 |
from typing import Any, Dict
|
|
|
60 |
# Read audio data from the TemporaryFile using soundfile
|
61 |
audio_data, samplerate = sf.read(f)
|
62 |
f.close() # Close the TemporaryFile
|
63 |
+
write_chunked(
|
64 |
filename, audio_data, samplerate, format="ogg", subtype="vorbis"
|
65 |
)
|
66 |
|
|
|
74 |
sample_rate_aux = 22050
|
75 |
duration = float(segment["end"]) - float(segment["start"])
|
76 |
data = np.zeros(int(sample_rate_aux * duration)).astype(np.float32)
|
77 |
+
write_chunked(
|
78 |
filename, data, sample_rate_aux, format="ogg", subtype="vorbis"
|
79 |
)
|
80 |
logger.error("Audio will be replaced -> [silent audio].")
|
|
|
182 |
# os.remove(temp_file)
|
183 |
|
184 |
# Save file
|
185 |
+
write_chunked(
|
186 |
file=filename,
|
187 |
samplerate=sample_rate,
|
188 |
data=data,
|
|
|
257 |
speech_output.cpu().numpy().squeeze().astype(np.float32),
|
258 |
sampling_rate,
|
259 |
)
|
260 |
+
write_chunked(
|
261 |
file=filename,
|
262 |
samplerate=sampling_rate,
|
263 |
data=data_tts,
|
|
|
300 |
stderr=subprocess.PIPE,
|
301 |
)
|
302 |
stdout, stderr = process.communicate()
|
303 |
+
script_path = os.path.join("./uroman", "uroman", "uroman.pl")
|
304 |
|
305 |
command = ["perl", script_path]
|
306 |
|
|
|
363 |
sampling_rate,
|
364 |
)
|
365 |
# Save file
|
366 |
+
write_chunked(
|
367 |
file=filename,
|
368 |
samplerate=sampling_rate,
|
369 |
data=data_tts,
|
|
|
668 |
sampling_rate,
|
669 |
)
|
670 |
# Save file
|
671 |
+
write_chunked(
|
672 |
file=filename,
|
673 |
samplerate=sampling_rate,
|
674 |
data=data_tts,
|
|
|
856 |
sampling_rate,
|
857 |
)
|
858 |
# Save file
|
859 |
+
write_chunked(
|
860 |
file=filename,
|
861 |
samplerate=sampling_rate,
|
862 |
data=data_tts,
|
|
|
926 |
sampling_rate,
|
927 |
)
|
928 |
|
929 |
+
write_chunked(
|
930 |
file=filename,
|
931 |
samplerate=sampling_rate,
|
932 |
data=data_tts,
|
|
|
1510 |
target_wav=original_wav_audio_segment,
|
1511 |
)
|
1512 |
|
1513 |
+
write_chunked(
|
1514 |
file=save_path,
|
1515 |
samplerate=tts.voice_converter.vc_config.audio.output_sample_rate,
|
1516 |
data=wav,
|
|
|
1572 |
tts_voice03="en-GB-SoniaNeural-Female",
|
1573 |
tts_voice04="en-NZ-MitchellNeural-Male",
|
1574 |
tts_voice05="en-GB-MaisieNeural-Female",
|
1575 |
+
)
|