Spaces:

archivartaunik
/

Bextts

Running on Zero

App Files Files Community

archivartaunik commited on Dec 27, 2024

Commit

e2bbfc7

verified ·

1 Parent(s): f07b8c9

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -26

app.py CHANGED Viewed

@@ -1,5 +1,14 @@
-import os
 import sys
 # Клонуем рэпазіторый, калі ён яшчэ не загружаны
 if not os.path.exists("XTTSv2-Finetuning-for-New-Languages"):
@@ -12,25 +21,12 @@ if os.path.exists("XTTSv2-Finetuning-for-New-Languages/TTS"):
 # Дадаем тэчку TTS у PYTHONPATH
 sys.path.append("./TTS")
-# Усталёўваем залежнасці з рэпазіторыя
-#os.system("pip install -r XTTSv2-Finetuning-for-New-Languages/requirements.txt")
-# Імпартуем патрэбныя модулі
-import gradio as gr
-import torch
-from tqdm import tqdm
-from underthesea import sent_tokenize
-from TTS.tts.configs.xtts_config import XttsConfig
-from TTS.tts.models.xtts import Xtts
-from huggingface_hub import hf_hub_download
 # Вызначэнне прылады (выкарыстоўваецца GPU, калі даступна)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 # Шлях да мадэлі ў Hugging Face
-repo_id = "archivartaunik/GPT_XTTS_V2_40EP"
 checkpoint_file = hf_hub_download(repo_id, filename="model.pth")
 config_file = hf_hub_download(repo_id, filename="config.json")
 vocab_file = hf_hub_download(repo_id, filename="vocab.json")
@@ -62,13 +58,10 @@ def text_to_speech(belarusian_story, lang="be", speaker_audio_file=default_voice
     )
-import tempfile
-from scipy.io.wavfile import write
-def text_to_speech(belarusian_story, lang="be", speaker_audio_file=default_voice_file):
-    # Правяраем, ці пададзены файл голасу
-    if speaker_audio_file is None:
-        raise ValueError("Speaker audio file is not provided.")
     # Атрыманне латэнтных умоў і эмацый
     gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
@@ -107,18 +100,19 @@ def text_to_speech(belarusian_story, lang="be", speaker_audio_file=default_voice
     return temp_file.name
-# Інтэрфейс Gradio
 demo = gr.Interface(
     fn=text_to_speech,
     inputs=[
         gr.Textbox(lines=5, label="Тэкст на беларускай мове"),
         gr.Textbox(value="be", label="Мова (па змаўчанні BE)", visible=False),
-        gr.File(label="Файл голасу (па змаўчанні voice.wav)", file_types=[".wav"], interactive=True),
     ],
     outputs="audio",
-    title="XTTS Belarusian TTS Demo",
-    description="Увядзіце тэкст, і мадэль пераўтворыць яго ў аўдыя. Вы можаце выкарыстоўваць голас па змаўчанні або загрузіць уласны.",
 )
 if __name__ == "__main__":
     demo.launch()

+# Імпартуем патрэбныя модулі
+import gradio as gr
+import torch
+from tqdm import tqdm
+from underthesea import sent_tokenize
+from TTS.tts.configs.xtts_config import XttsConfig
+from TTS.tts.models.xtts import Xtts
+from huggingface_hub import hf_hub_downloadimport os
 import sys
+import tempfile
+from scipy.io.wavfile import write
 # Клонуем рэпазіторый, калі ён яшчэ не загружаны
 if not os.path.exists("XTTSv2-Finetuning-for-New-Languages"):
 # Дадаем тэчку TTS у PYTHONPATH
 sys.path.append("./TTS")
 # Вызначэнне прылады (выкарыстоўваецца GPU, калі даступна)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 # Шлях да мадэлі ў Hugging Face
+repo_id = "archivartaunik/BE_XTTS_V2_60epoch3Dataset"
 checkpoint_file = hf_hub_download(repo_id, filename="model.pth")
 config_file = hf_hub_download(repo_id, filename="config.json")
 vocab_file = hf_hub_download(repo_id, filename="vocab.json")
     )
+def text_to_speech(belarusian_story, lang="be", speaker_audio_file=None):
+    # Калі файл не пададзены, выкарыстоўваем голас па змаўчанні
+    if not speaker_audio_file or (not isinstance(speaker_audio_file, str) and speaker_audio_file.name == ""):
+        speaker_audio_file = default_voice_file
     # Атрыманне латэнтных умоў і эмацый
     gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
     return temp_file.name
 demo = gr.Interface(
     fn=text_to_speech,
     inputs=[
         gr.Textbox(lines=5, label="Тэкст на беларускай мове"),
         gr.Textbox(value="be", label="Мова (па змаўчанні BE)", visible=False),
+        gr.Audio(source="microphone", type="filepath", label="Запішыце або загрузіце файл голасу (без іншых гукаў)", interactive=True),
     ],
     outputs="audio",
+    title="Belarusian TTS Demo",
+    description="Увядзіце тэкст і мадэль пераўтворыць яго ў аўдыя. Вы можаце выкарыстоўваць голас па змаўчанні, загрузіць уласны, або запісаць з мікрафона.",
 )
 if __name__ == "__main__":
     demo.launch()