Spaces:

dmeck
/

RVC-Speakers

Sleeping

App Files Files Community

glide-the commited on Aug 24, 2023

Commit

5ff06d1

1 Parent(s): 155b8bc

Add file to Git LFS tracking

Browse files

Files changed (3) hide show

bark/mode_load.py +9 -4
speakers/processors/bark_to_voice.py +10 -5
speakers/speakers.yaml +1 -0

bark/mode_load.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from bark.model_fine import FineGPT, FineGPTConfig
 from bark.model import GPT, GPTConfig
 from huggingface_hub import hf_hub_download
@@ -139,8 +141,8 @@ def _download(self, from_hf_path, file_name, local_dir):
     hf_hub_download(repo_id=from_hf_path, filename=file_name, local_dir=local_dir)
-def _load_codec_model(device):
-    model = EncodecModel.encodec_model_24khz()
     model.set_target_bandwidth(6.0)
     model.eval()
     model.to(device)
@@ -214,7 +216,7 @@ class BarkModelLoader:
     _tokenizer_path: str = "bert-base-multilingual-cased"
     _encodec: EncodecModel
-    def __init__(self, tokenizer_path: str, text_path: str, coarse_path: str, fine_path: str, device: str):
         if tokenizer_path:
             self._tokenizer_path = tokenizer_path
@@ -222,6 +224,10 @@ class BarkModelLoader:
         self._tokenizer = BertTokenizer.from_pretrained(self._tokenizer_path)
         logger.info(f"BertTokenizer loaded")
         self._text_model.model_path = text_path
         self._coarse_model.model_path = coarse_path
         self._fine_model.model_path = fine_path
@@ -286,7 +292,6 @@ class BarkModelLoader:
             self._coarse_model.model = model
         elif model_type.model_type == "fine_model":
             self._fine_model.model = model
-        self._encodec = _load_codec_model(device)
     def generate_text_semantic(
             self,

+from pathlib import Path
 from bark.model_fine import FineGPT, FineGPTConfig
 from bark.model import GPT, GPTConfig
 from huggingface_hub import hf_hub_download
     hf_hub_download(repo_id=from_hf_path, filename=file_name, local_dir=local_dir)
+def _load_codec_model(device,codec_repository_path: str):
+    model = EncodecModel.encodec_model_24khz(pretrained=True, repository=Path(codec_repository_path))
     model.set_target_bandwidth(6.0)
     model.eval()
     model.to(device)
     _tokenizer_path: str = "bert-base-multilingual-cased"
     _encodec: EncodecModel
+    def __init__(self, codec_repository_path: str, tokenizer_path: str, text_path: str, coarse_path: str, fine_path: str, device: str):
         if tokenizer_path:
             self._tokenizer_path = tokenizer_path
         self._tokenizer = BertTokenizer.from_pretrained(self._tokenizer_path)
         logger.info(f"BertTokenizer loaded")
+        logger.info(f"_encodec load.")
+        self._encodec = _load_codec_model(device=device, codec_repository_path=codec_repository_path)
+        logger.info(f"_encodec loaded")
         self._text_model.model_path = text_path
         self._coarse_model.model_path = coarse_path
         self._fine_model.model_path = fine_path
             self._coarse_model.model = model
         elif model_type.model_type == "fine_model":
             self._fine_model.model = model
     def generate_text_semantic(
             self,

speakers/processors/bark_to_voice.py CHANGED Viewed

@@ -56,9 +56,10 @@ class BarkProcessorData(ProcessorData):
 @registry.register_processor("bark_to_voice")
 class BarkToVoice(BaseProcessor):
-    def __init__(self, tokenizer_path: str, text_path: str, coarse_path: str, fine_path: str):
         super().__init__()
-        self._load_bark_mode(tokenizer_path=tokenizer_path,
                              text_path=text_path,
                              coarse_path=coarse_path,
                              fine_path=fine_path)
@@ -92,12 +93,15 @@ class BarkToVoice(BaseProcessor):
         if cfg is None:
             raise RuntimeError("from_config cfg is None.")
         tokenizer_path = cfg.get("tokenizer_path", "")
         text_model_path = cfg.get("text_model_path", "")
         coarse_model_path = cfg.get("coarse_model_path", "")
         fine_model_path = cfg.get("fine_model_path", "")
-        return cls(tokenizer_path=os.path.join(registry.get_path("bark_library_root"),
                                                tokenizer_path),
                    text_path=os.path.join(registry.get_path("bark_library_root"),
                                           text_model_path),
@@ -110,10 +114,11 @@ class BarkToVoice(BaseProcessor):
     def match(self, data: ProcessorData):
         return "BARK" in data.type
-    def _load_bark_mode(self, tokenizer_path: str, text_path: str, coarse_path: str, fine_path: str):
         logger.info(f'Bark model loading')
-        self.bark_load = BarkModelLoader(tokenizer_path=tokenizer_path,
                                          text_path=text_path,
                                          coarse_path=coarse_path,
                                          fine_path=fine_path,

 @registry.register_processor("bark_to_voice")
 class BarkToVoice(BaseProcessor):
+    def __init__(self,codec_repository_path: str, tokenizer_path: str, text_path: str, coarse_path: str, fine_path: str):
         super().__init__()
+        self._load_bark_mode(codec_repository_path=codec_repository_path,
+                             tokenizer_path=tokenizer_path,
                              text_path=text_path,
                              coarse_path=coarse_path,
                              fine_path=fine_path)
         if cfg is None:
             raise RuntimeError("from_config cfg is None.")
+        codec_repository_path = cfg.get("codec_repository_path", "")
         tokenizer_path = cfg.get("tokenizer_path", "")
         text_model_path = cfg.get("text_model_path", "")
         coarse_model_path = cfg.get("coarse_model_path", "")
         fine_model_path = cfg.get("fine_model_path", "")
+        return cls(codec_repository_path=os.path.join(registry.get_path("bark_library_root"),
+                                                     codec_repository_path),
+                   tokenizer_path=os.path.join(registry.get_path("bark_library_root"),
                                                tokenizer_path),
                    text_path=os.path.join(registry.get_path("bark_library_root"),
                                           text_model_path),
     def match(self, data: ProcessorData):
         return "BARK" in data.type
+    def _load_bark_mode(self, codec_repository_path: str, tokenizer_path: str, text_path: str, coarse_path: str, fine_path: str):
         logger.info(f'Bark model loading')
+        self.bark_load = BarkModelLoader(codec_repository_path=codec_repository_path,
+                                         tokenizer_path=tokenizer_path,
                                          text_path=text_path,
                                          coarse_path=coarse_path,
                                          fine_path=fine_path,

speakers/speakers.yaml CHANGED Viewed

@@ -14,6 +14,7 @@ preprocess:
         rvc_config_file: "rvc.yaml"
     - bark_processor:
         name: "bark_to_voice"
         tokenizer_path: "model/bert-base-multilingual-cased"
         text_model_path: "model/suno/bark_v0/text_2.pt"
         coarse_model_path: "model/suno/bark_v0/coarse_2.pt"

         rvc_config_file: "rvc.yaml"
     - bark_processor:
         name: "bark_to_voice"
+        codec_repository_path: "model/codec"
         tokenizer_path: "model/bert-base-multilingual-cased"
         text_model_path: "model/suno/bark_v0/text_2.pt"
         coarse_model_path: "model/suno/bark_v0/coarse_2.pt"