Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import numpy as np | |
import os | |
import requests | |
from fireredtts.fireredtts import FireRedTTS | |
def download_file(url, filename): | |
response = requests.get(url) | |
if response.status_code == 200: | |
with open(filename, 'wb') as file: | |
file.write(response.content) | |
print(f"File downloaded successfully: {filename}") | |
else: | |
print(f"Failed to download file: HTTP {response.status_code}") | |
if not os.path.exists('pretrained_models/fireredtts_gpt.pt'): | |
print("Start to download checkpoints...") | |
download_file('https://huggingface.co/fireredteam/FireRedTTS/resolve/main/fireredtts_gpt.pt', | |
'pretrained_models/fireredtts_gpt.pt') | |
download_file('https://huggingface.co/fireredteam/FireRedTTS/resolve/main/fireredtts_speaker.bin', | |
'pretrained_models/fireredtts_speaker.bin') | |
download_file('https://huggingface.co/fireredteam/FireRedTTS/resolve/main/fireredtts_token2wav.pt', | |
'pretrained_models/fireredtts_token2wav.pt') | |
tts = FireRedTTS( | |
config_path="configs/config_24k.json", | |
pretrained_path='pretrained_models', | |
) | |
def tts_inference(text, prompt_wav='examples/prompt_1.wav', lang='zh'): | |
syn_audio = tts.synthesize( | |
prompt_wav=prompt_wav, | |
text=text, | |
lang=lang, | |
).detach().cpu().numpy() | |
print(f'Generate waveform with the shape of {syn_audio.shape}') | |
syn_audio = (syn_audio * 32768).astype(np.int16) | |
return 48000, syn_audio | |
iface = gr.Interface( | |
fn=tts_inference, | |
inputs=[ | |
gr.Textbox(label="Input text here"), | |
gr.Audio(source="upload", type="filepath", label="Upload reference audio"), | |
gr.Dropdown(["en", "zh"], label="Select language"), | |
], | |
outputs=gr.Audio(label="Generated audio"), | |
title="TTS Demo", | |
description="Enter some text and listen to the generated speech." | |
) | |
if __name__ == "__main__": | |
iface.launch() |