Aliaksandr
e2e audio book generation (#5)
c2fa877 unverified
raw
history blame
1.78 kB
import typing as t
from dotenv import load_dotenv
from elevenlabs.client import AsyncElevenLabs, ElevenLabs
from elevenlabs import VoiceSettings
load_dotenv()
from src.config import logger, ELEVENLABS_API_KEY
ELEVEN_CLIENT = ElevenLabs(api_key=ELEVENLABS_API_KEY)
ELEVEN_CLIENT_ASYNC = AsyncElevenLabs(api_key=ELEVENLABS_API_KEY)
def tts_stream(voice_id: str, text: str) -> t.Iterator[bytes]:
async_iter = ELEVEN_CLIENT.text_to_speech.convert(voice_id=voice_id, text=text)
for chunk in async_iter:
if chunk:
yield chunk
def tts(voice_id: str, text: str):
tts_iter = tts_stream(voice_id=voice_id, text=text)
combined = b"".join(tts_iter)
return combined
async def tts_astream(
voice_id: str, text: str, params: dict | None = None
) -> t.AsyncIterator[bytes]:
params_all = dict(voice_id=voice_id, text=text)
if params is not None:
params_all["voice_settings"] = VoiceSettings( # type: ignore
stability=params.get("stability"),
similarity_boost=params.get("similarity_boost"),
style=params.get("style"),
)
logger.info(f"call to 11labs TTS endpoint with params: {params_all}")
async_iter = ELEVEN_CLIENT_ASYNC.text_to_speech.convert(**params_all)
async for chunk in async_iter:
if chunk:
yield chunk
async def sound_generation_astream(
sound_generation_data: dict,
) -> t.AsyncIterator[bytes]:
async_iter = ELEVEN_CLIENT_ASYNC.text_to_sound_effects.convert(
text=sound_generation_data["text"],
duration_seconds=sound_generation_data["duration_seconds"],
prompt_influence=sound_generation_data["prompt_influence"],
)
async for chunk in async_iter:
if chunk:
yield chunk