Spaces:

fireredteam
/

FireRedTTS

Running on Zero

hhguo commited on Sep 24, 2024

Commit

9c31cdf

1 Parent(s): 1ed7dcd

update

Files changed (1) hide show

app.py CHANGED Viewed

@@ -32,13 +32,19 @@ tts = FireRedTTS(
 )
 def tts_inference(text, prompt_wav='examples/prompt_1.wav', lang='zh'):
     syn_audio = tts.synthesize(
         prompt_wav=prompt_wav,
         text=text,
         lang=lang,
     )[0].detach().cpu().numpy()
-    print(f'Generate waveform with the shape of {syn_audio.shape}')
     syn_audio = (syn_audio * 32768).astype(np.int16)
     return sampling_rate, syn_audio
@@ -50,7 +56,7 @@ iface = gr.Interface(
         gr.Dropdown(["en", "zh"], label="Select language"),
     ],
     outputs=gr.Audio(label="Generated audio"),
-    title="TTS Demo",
     # description="Enter some text and listen to the generated speech."
 )

 )
 def tts_inference(text, prompt_wav='examples/prompt_1.wav', lang='zh'):
+    # Model inference
     syn_audio = tts.synthesize(
         prompt_wav=prompt_wav,
         text=text,
         lang=lang,
     )[0].detach().cpu().numpy()
+    # Normalize volume
+    syn_audio = syn_audio / np.max(np.abs(syn_audio)) * 0.9
+    # Convert audio data type
     syn_audio = (syn_audio * 32768).astype(np.int16)
     return sampling_rate, syn_audio
         gr.Dropdown(["en", "zh"], label="Select language"),
     ],
     outputs=gr.Audio(label="Generated audio"),
+    title="FireRedTTS: A Foundation Text-To-Speech Framework for Industry-Level Generative Speech Applications",
     # description="Enter some text and listen to the generated speech."
 )