Spaces:
Running
on
Zero
Running
on
Zero
archivartaunik
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,6 @@ from huggingface_hub import hf_hub_download
|
|
5 |
import os
|
6 |
import sys
|
7 |
import tempfile
|
8 |
-
from pathlib import Path
|
9 |
from scipy.io.wavfile import write
|
10 |
import numpy as np
|
11 |
from tqdm import tqdm
|
@@ -22,37 +21,37 @@ except ImportError:
|
|
22 |
from TTS.tts.configs.xtts_config import XttsConfig
|
23 |
from TTS.tts.models.xtts import Xtts
|
24 |
|
25 |
-
# Шляхі да файлаў
|
26 |
repo_id = "archivartaunik/BE_XTTS_V2_60epoch3Dataset"
|
27 |
-
|
28 |
-
|
29 |
-
checkpoint_file =
|
30 |
-
config_file =
|
31 |
-
vocab_file =
|
32 |
-
default_voice_file =
|
33 |
|
34 |
-
if not
|
35 |
-
hf_hub_download(repo_id, filename="model.pth", local_dir=
|
36 |
-
if not
|
37 |
-
hf_hub_download(repo_id, filename="config.json", local_dir=
|
38 |
-
if not
|
39 |
-
hf_hub_download(repo_id, filename="vocab.json", local_dir=
|
40 |
-
if not
|
41 |
-
hf_hub_download(repo_id, filename="voice.wav", local_dir=
|
42 |
|
43 |
# Загрузка канфігурацыі і мадэлі адзін раз
|
44 |
config = XttsConfig()
|
45 |
config.load_json(config_file)
|
46 |
XTTS_MODEL = Xtts.init_from_config(config)
|
47 |
-
XTTS_MODEL.load_checkpoint(config, checkpoint_path=checkpoint_file, vocab_path=vocab_file, use_deepspeed=False)
|
48 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
49 |
XTTS_MODEL.to(device)
|
50 |
sampling_rate = XTTS_MODEL.config.audio["sample_rate"]
|
51 |
|
52 |
@spaces.GPU(duration=60)
|
53 |
-
def text_to_speech(belarusian_story, speaker_audio_file=None):
|
54 |
if not speaker_audio_file or (not isinstance(speaker_audio_file, str) and speaker_audio_file.name == ""):
|
55 |
-
speaker_audio_file =
|
56 |
|
57 |
try:
|
58 |
gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
|
@@ -75,7 +74,7 @@ def text_to_speech(belarusian_story, speaker_audio_file=None): # Прыбрал
|
|
75 |
with torch.no_grad():
|
76 |
wav_chunk = XTTS_MODEL.inference(
|
77 |
text=text,
|
78 |
-
language="be",
|
79 |
gpt_cond_latent=gpt_cond_latent,
|
80 |
speaker_embedding=speaker_embedding,
|
81 |
temperature=0.1,
|
|
|
5 |
import os
|
6 |
import sys
|
7 |
import tempfile
|
|
|
8 |
from scipy.io.wavfile import write
|
9 |
import numpy as np
|
10 |
from tqdm import tqdm
|
|
|
21 |
from TTS.tts.configs.xtts_config import XttsConfig
|
22 |
from TTS.tts.models.xtts import Xtts
|
23 |
|
24 |
+
# Шляхі да файлаў (цяпер як радкі)
|
25 |
repo_id = "archivartaunik/BE_XTTS_V2_60epoch3Dataset"
|
26 |
+
model_dir = "./model" # Дырэкторыя для захавання мадэлі
|
27 |
+
os.makedirs(model_dir, exist_ok=True) # Ствараем дырэкторыю, калі яе няма
|
28 |
+
checkpoint_file = os.path.join(model_dir, "model.pth")
|
29 |
+
config_file = os.path.join(model_dir, "config.json")
|
30 |
+
vocab_file = os.path.join(model_dir, "vocab.json")
|
31 |
+
default_voice_file = os.path.join(model_dir, "voice.wav")
|
32 |
|
33 |
+
if not os.path.exists(checkpoint_file):
|
34 |
+
hf_hub_download(repo_id, filename="model.pth", local_dir=model_dir)
|
35 |
+
if not os.path.exists(config_file):
|
36 |
+
hf_hub_download(repo_id, filename="config.json", local_dir=model_dir)
|
37 |
+
if not os.path.exists(vocab_file):
|
38 |
+
hf_hub_download(repo_id, filename="vocab.json", local_dir=model_dir)
|
39 |
+
if not os.path.exists(default_voice_file):
|
40 |
+
hf_hub_download(repo_id, filename="voice.wav", local_dir=model_dir)
|
41 |
|
42 |
# Загрузка канфігурацыі і мадэлі адзін раз
|
43 |
config = XttsConfig()
|
44 |
config.load_json(config_file)
|
45 |
XTTS_MODEL = Xtts.init_from_config(config)
|
46 |
+
XTTS_MODEL.load_checkpoint(config, checkpoint_path=checkpoint_file, vocab_path=vocab_file, use_deepspeed=False) # Тут выпраўленне
|
47 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
48 |
XTTS_MODEL.to(device)
|
49 |
sampling_rate = XTTS_MODEL.config.audio["sample_rate"]
|
50 |
|
51 |
@spaces.GPU(duration=60)
|
52 |
+
def text_to_speech(belarusian_story, speaker_audio_file=None):
|
53 |
if not speaker_audio_file or (not isinstance(speaker_audio_file, str) and speaker_audio_file.name == ""):
|
54 |
+
speaker_audio_file = default_voice_file
|
55 |
|
56 |
try:
|
57 |
gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
|
|
|
74 |
with torch.no_grad():
|
75 |
wav_chunk = XTTS_MODEL.inference(
|
76 |
text=text,
|
77 |
+
language="be",
|
78 |
gpt_cond_latent=gpt_cond_latent,
|
79 |
speaker_embedding=speaker_embedding,
|
80 |
temperature=0.1,
|