import gradio as gr import uuid import os from datetime import timedelta from TTS.api import TTS import locale import torch locale.getpreferredencoding = lambda: "UTF-8" device = "cuda" if torch.cuda.is_available() else "cpu" os.environ["COQUI_TOS_AGREED"] = "1" tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) tts = TTS("xtts_v2.0.2") SECRET_USER = "admin_kr" SECRET_PASSWORD = "9NLa2)597,J^" async def generate_audio(text_input: str, creator: str, top_k_input: int) -> str: refer_voices = '' unique_id = str(uuid.uuid4()) output_file = f'output.wav' match creator: case 'Roomie': refer_voices = ["./assets/roomie/roomie_emocionado_base_1.wav", "./assets/roomie/ref_12.wav", "./assets/roomie/ref_11.wav", "./assets/roomie/ref_10.wav", "./assets/roomie/ref_1.wav", "./assets/roomie/ref_6.wav", "./assets/roomie/ref_7.wav", "./assets/roomie/ref_8.wav", "./assets/roomie/roomie_emocionado_base_2.wav",] case 'Xavy': refer_voices = ["./assets/xavy/neutro_3.wav", "./assets/xavy/neutro_1.wav", "./assets/xavy/neutro_2.wav"] case 'Bella': refer_voices = ["./assets/bella/neutro_2.wav", "./assets/bella/neutro_1.wav", "./assets/bella/neutro_3.wav"] case 'Julia': refer_voices = ["assets/julia/neutro_4_Final_fast.wav", "assets/julia/enfadado_1_Final.wav", "assets/julia/enfadado_2_Final.wav", "assets/julia/enfadado_3_Final.wav", "assets/julia/emocionada_1.wav", "assets/julia/emocionada_2_Final.wav"] tts.tts_to_file(text=text_input, file_path=output_file, speaker_wav=refer_voices, language="en", split_sentences=True, top_k=top_k_input ) source_audio_file_name = output_file return gr.Audio(value=source_audio_file_name) app = gr.Interface( fn=generate_audio, inputs=[gr.Textbox(label='Text to Speach'), gr.Dropdown( ['Roomie', 'Xavy', 'Bella', 'Julia'], label="Coice your creator"), gr.Slider(0, 100, value=50, label='Emotion', info='Valores mas altos para tonos mas emocionado, valores bajos para tonos mas aburrido ')], outputs=['audio'] ) app.launch(auth=(SECRET_USER, SECRET_PASSWORD))