Spaces:

archivartaunik
/

Bextts

Running on Zero

App Files Files Community

archivartaunik commited on Dec 29, 2024

Commit

3e36194

verified ·

1 Parent(s): 666d1ff

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -20

app.py CHANGED Viewed

@@ -5,7 +5,6 @@ from huggingface_hub import hf_hub_download
 import os
 import sys
 import tempfile
-from pathlib import Path
 from scipy.io.wavfile import write
 import numpy as np
 from tqdm import tqdm
@@ -22,37 +21,37 @@ except ImportError:
     from TTS.tts.configs.xtts_config import XttsConfig
     from TTS.tts.models.xtts import Xtts
-# Шляхі да файлаў
 repo_id = "archivartaunik/BE_XTTS_V2_60epoch3Dataset"
-model_path = Path("./model")
-model_path.mkdir(exist_ok=True)
-checkpoint_file = model_path / "model.pth"
-config_file = model_path / "config.json"
-vocab_file = model_path / "vocab.json"
-default_voice_file = model_path / "voice.wav"
-if not checkpoint_file.exists():
-    hf_hub_download(repo_id, filename="model.pth", local_dir=model_path)
-if not config_file.exists():
-    hf_hub_download(repo_id, filename="config.json", local_dir=model_path)
-if not vocab_file.exists():
-    hf_hub_download(repo_id, filename="vocab.json", local_dir=model_path)
-if not default_voice_file.exists():
-    hf_hub_download(repo_id, filename="voice.wav", local_dir=model_path)
 # Загрузка канфігурацыі і мадэлі адзін раз
 config = XttsConfig()
 config.load_json(config_file)
 XTTS_MODEL = Xtts.init_from_config(config)
-XTTS_MODEL.load_checkpoint(config, checkpoint_path=checkpoint_file, vocab_path=vocab_file, use_deepspeed=False)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 XTTS_MODEL.to(device)
 sampling_rate = XTTS_MODEL.config.audio["sample_rate"]
 @spaces.GPU(duration=60)
-def text_to_speech(belarusian_story, speaker_audio_file=None): # Прыбралі аргумент lang
     if not speaker_audio_file or (not isinstance(speaker_audio_file, str) and speaker_audio_file.name == ""):
-        speaker_audio_file = str(default_voice_file)
     try:
         gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
@@ -75,7 +74,7 @@ def text_to_speech(belarusian_story, speaker_audio_file=None): # Прыбрал
             with torch.no_grad():
                 wav_chunk = XTTS_MODEL.inference(
                     text=text,
-                    language="be", # Зафіксавалі мову як "be"
                     gpt_cond_latent=gpt_cond_latent,
                     speaker_embedding=speaker_embedding,
                     temperature=0.1,

 import os
 import sys
 import tempfile
 from scipy.io.wavfile import write
 import numpy as np
 from tqdm import tqdm
     from TTS.tts.configs.xtts_config import XttsConfig
     from TTS.tts.models.xtts import Xtts
+# Шляхі да файлаў (цяпер як радкі)
 repo_id = "archivartaunik/BE_XTTS_V2_60epoch3Dataset"
+model_dir = "./model"  # Дырэкторыя для захавання мадэлі
+os.makedirs(model_dir, exist_ok=True) # Ствараем дырэкторыю, калі яе няма
+checkpoint_file = os.path.join(model_dir, "model.pth")
+config_file = os.path.join(model_dir, "config.json")
+vocab_file = os.path.join(model_dir, "vocab.json")
+default_voice_file = os.path.join(model_dir, "voice.wav")
+if not os.path.exists(checkpoint_file):
+    hf_hub_download(repo_id, filename="model.pth", local_dir=model_dir)
+if not os.path.exists(config_file):
+    hf_hub_download(repo_id, filename="config.json", local_dir=model_dir)
+if not os.path.exists(vocab_file):
+    hf_hub_download(repo_id, filename="vocab.json", local_dir=model_dir)
+if not os.path.exists(default_voice_file):
+    hf_hub_download(repo_id, filename="voice.wav", local_dir=model_dir)
 # Загрузка канфігурацыі і мадэлі адзін раз
 config = XttsConfig()
 config.load_json(config_file)
 XTTS_MODEL = Xtts.init_from_config(config)
+XTTS_MODEL.load_checkpoint(config, checkpoint_path=checkpoint_file, vocab_path=vocab_file, use_deepspeed=False) # Тут выпраўленне
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 XTTS_MODEL.to(device)
 sampling_rate = XTTS_MODEL.config.audio["sample_rate"]
 @spaces.GPU(duration=60)
+def text_to_speech(belarusian_story, speaker_audio_file=None):
     if not speaker_audio_file or (not isinstance(speaker_audio_file, str) and speaker_audio_file.name == ""):
+        speaker_audio_file = default_voice_file
     try:
         gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
             with torch.no_grad():
                 wav_chunk = XTTS_MODEL.inference(
                     text=text,
+                    language="be",
                     gpt_cond_latent=gpt_cond_latent,
                     speaker_embedding=speaker_embedding,
                     temperature=0.1,