archivartaunik commited on
Commit
3e36194
·
verified ·
1 Parent(s): 666d1ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -20
app.py CHANGED
@@ -5,7 +5,6 @@ from huggingface_hub import hf_hub_download
5
  import os
6
  import sys
7
  import tempfile
8
- from pathlib import Path
9
  from scipy.io.wavfile import write
10
  import numpy as np
11
  from tqdm import tqdm
@@ -22,37 +21,37 @@ except ImportError:
22
  from TTS.tts.configs.xtts_config import XttsConfig
23
  from TTS.tts.models.xtts import Xtts
24
 
25
- # Шляхі да файлаў
26
  repo_id = "archivartaunik/BE_XTTS_V2_60epoch3Dataset"
27
- model_path = Path("./model")
28
- model_path.mkdir(exist_ok=True)
29
- checkpoint_file = model_path / "model.pth"
30
- config_file = model_path / "config.json"
31
- vocab_file = model_path / "vocab.json"
32
- default_voice_file = model_path / "voice.wav"
33
 
34
- if not checkpoint_file.exists():
35
- hf_hub_download(repo_id, filename="model.pth", local_dir=model_path)
36
- if not config_file.exists():
37
- hf_hub_download(repo_id, filename="config.json", local_dir=model_path)
38
- if not vocab_file.exists():
39
- hf_hub_download(repo_id, filename="vocab.json", local_dir=model_path)
40
- if not default_voice_file.exists():
41
- hf_hub_download(repo_id, filename="voice.wav", local_dir=model_path)
42
 
43
  # Загрузка канфігурацыі і мадэлі адзін раз
44
  config = XttsConfig()
45
  config.load_json(config_file)
46
  XTTS_MODEL = Xtts.init_from_config(config)
47
- XTTS_MODEL.load_checkpoint(config, checkpoint_path=checkpoint_file, vocab_path=vocab_file, use_deepspeed=False)
48
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
49
  XTTS_MODEL.to(device)
50
  sampling_rate = XTTS_MODEL.config.audio["sample_rate"]
51
 
52
  @spaces.GPU(duration=60)
53
- def text_to_speech(belarusian_story, speaker_audio_file=None): # Прыбралі аргумент lang
54
  if not speaker_audio_file or (not isinstance(speaker_audio_file, str) and speaker_audio_file.name == ""):
55
- speaker_audio_file = str(default_voice_file)
56
 
57
  try:
58
  gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
@@ -75,7 +74,7 @@ def text_to_speech(belarusian_story, speaker_audio_file=None): # Прыбрал
75
  with torch.no_grad():
76
  wav_chunk = XTTS_MODEL.inference(
77
  text=text,
78
- language="be", # Зафіксавалі мову як "be"
79
  gpt_cond_latent=gpt_cond_latent,
80
  speaker_embedding=speaker_embedding,
81
  temperature=0.1,
 
5
  import os
6
  import sys
7
  import tempfile
 
8
  from scipy.io.wavfile import write
9
  import numpy as np
10
  from tqdm import tqdm
 
21
  from TTS.tts.configs.xtts_config import XttsConfig
22
  from TTS.tts.models.xtts import Xtts
23
 
24
+ # Шляхі да файлаў (цяпер як радкі)
25
  repo_id = "archivartaunik/BE_XTTS_V2_60epoch3Dataset"
26
+ model_dir = "./model" # Дырэкторыя для захавання мадэлі
27
+ os.makedirs(model_dir, exist_ok=True) # Ствараем дырэкторыю, калі яе няма
28
+ checkpoint_file = os.path.join(model_dir, "model.pth")
29
+ config_file = os.path.join(model_dir, "config.json")
30
+ vocab_file = os.path.join(model_dir, "vocab.json")
31
+ default_voice_file = os.path.join(model_dir, "voice.wav")
32
 
33
+ if not os.path.exists(checkpoint_file):
34
+ hf_hub_download(repo_id, filename="model.pth", local_dir=model_dir)
35
+ if not os.path.exists(config_file):
36
+ hf_hub_download(repo_id, filename="config.json", local_dir=model_dir)
37
+ if not os.path.exists(vocab_file):
38
+ hf_hub_download(repo_id, filename="vocab.json", local_dir=model_dir)
39
+ if not os.path.exists(default_voice_file):
40
+ hf_hub_download(repo_id, filename="voice.wav", local_dir=model_dir)
41
 
42
  # Загрузка канфігурацыі і мадэлі адзін раз
43
  config = XttsConfig()
44
  config.load_json(config_file)
45
  XTTS_MODEL = Xtts.init_from_config(config)
46
+ XTTS_MODEL.load_checkpoint(config, checkpoint_path=checkpoint_file, vocab_path=vocab_file, use_deepspeed=False) # Тут выпраўленне
47
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
48
  XTTS_MODEL.to(device)
49
  sampling_rate = XTTS_MODEL.config.audio["sample_rate"]
50
 
51
  @spaces.GPU(duration=60)
52
+ def text_to_speech(belarusian_story, speaker_audio_file=None):
53
  if not speaker_audio_file or (not isinstance(speaker_audio_file, str) and speaker_audio_file.name == ""):
54
+ speaker_audio_file = default_voice_file
55
 
56
  try:
57
  gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(
 
74
  with torch.no_grad():
75
  wav_chunk = XTTS_MODEL.inference(
76
  text=text,
77
+ language="be",
78
  gpt_cond_latent=gpt_cond_latent,
79
  speaker_embedding=speaker_embedding,
80
  temperature=0.1,