Spaces:
Running
on
Zero
Running
on
Zero
update
Browse files
app.py
CHANGED
@@ -32,13 +32,19 @@ tts = FireRedTTS(
|
|
32 |
)
|
33 |
|
34 |
def tts_inference(text, prompt_wav='examples/prompt_1.wav', lang='zh'):
|
|
|
35 |
syn_audio = tts.synthesize(
|
36 |
prompt_wav=prompt_wav,
|
37 |
text=text,
|
38 |
lang=lang,
|
39 |
)[0].detach().cpu().numpy()
|
40 |
-
|
|
|
|
|
|
|
|
|
41 |
syn_audio = (syn_audio * 32768).astype(np.int16)
|
|
|
42 |
return sampling_rate, syn_audio
|
43 |
|
44 |
|
@@ -50,7 +56,7 @@ iface = gr.Interface(
|
|
50 |
gr.Dropdown(["en", "zh"], label="Select language"),
|
51 |
],
|
52 |
outputs=gr.Audio(label="Generated audio"),
|
53 |
-
title="
|
54 |
# description="Enter some text and listen to the generated speech."
|
55 |
)
|
56 |
|
|
|
32 |
)
|
33 |
|
34 |
def tts_inference(text, prompt_wav='examples/prompt_1.wav', lang='zh'):
|
35 |
+
# Model inference
|
36 |
syn_audio = tts.synthesize(
|
37 |
prompt_wav=prompt_wav,
|
38 |
text=text,
|
39 |
lang=lang,
|
40 |
)[0].detach().cpu().numpy()
|
41 |
+
|
42 |
+
# Normalize volume
|
43 |
+
syn_audio = syn_audio / np.max(np.abs(syn_audio)) * 0.9
|
44 |
+
|
45 |
+
# Convert audio data type
|
46 |
syn_audio = (syn_audio * 32768).astype(np.int16)
|
47 |
+
|
48 |
return sampling_rate, syn_audio
|
49 |
|
50 |
|
|
|
56 |
gr.Dropdown(["en", "zh"], label="Select language"),
|
57 |
],
|
58 |
outputs=gr.Audio(label="Generated audio"),
|
59 |
+
title="FireRedTTS: A Foundation Text-To-Speech Framework for Industry-Level Generative Speech Applications",
|
60 |
# description="Enter some text and listen to the generated speech."
|
61 |
)
|
62 |
|