import os import soundfile as sf from espnet_onnx import Text2Speech from pydantic import BaseModel class TextInput(BaseModel): text: str = "" class TTSModel: def __init__(self): self.model = None self.model_path = "models/tts" self.output_path = "template/sample.wav" def load_model(self, model_path): self.model = Text2Speech(model_dir=f"{self.model_path}/{model_path}") def generate(self, text_input: TextInput): if self.model is None: raise RuntimeError("Model is not loaded.") if os.path.exists(self.output_path): os.remove(self.output_path) audio = self.model(text_input.text)["wav"] sf.write(self.output_path, audio, 16000) return