import typing as t

from dotenv import load_dotenv
from elevenlabs.client import AsyncElevenLabs, ElevenLabs
from elevenlabs import VoiceSettings

load_dotenv()

from src.config import logger, ELEVENLABS_API_KEY

ELEVEN_CLIENT = ElevenLabs(api_key=ELEVENLABS_API_KEY)

ELEVEN_CLIENT_ASYNC = AsyncElevenLabs(api_key=ELEVENLABS_API_KEY)


def tts_stream(voice_id: str, text: str) -> t.Iterator[bytes]:
    async_iter = ELEVEN_CLIENT.text_to_speech.convert(voice_id=voice_id, text=text)
    for chunk in async_iter:
        if chunk:
            yield chunk


def tts(voice_id: str, text: str):
    tts_iter = tts_stream(voice_id=voice_id, text=text)
    combined = b"".join(tts_iter)
    return combined


async def tts_astream(
    voice_id: str, text: str, params: dict | None = None
) -> t.AsyncIterator[bytes]:
    params_all = dict(voice_id=voice_id, text=text)

    if params is not None:
        params_all["voice_settings"] = VoiceSettings(  # type: ignore
            stability=params.get("stability"),
            similarity_boost=params.get("similarity_boost"),
            style=params.get("style"),
        )

    logger.info(f"call to 11labs TTS endpoint with params: {params_all}")
    async_iter = ELEVEN_CLIENT_ASYNC.text_to_speech.convert(**params_all)
    async for chunk in async_iter:
        if chunk:
            yield chunk


async def sound_generation_astream(
    sound_generation_data: dict,
) -> t.AsyncIterator[bytes]:
    async_iter = ELEVEN_CLIENT_ASYNC.text_to_sound_effects.convert(
        text=sound_generation_data["text"],
        duration_seconds=sound_generation_data["duration_seconds"],
        prompt_influence=sound_generation_data["prompt_influence"],
    )
    async for chunk in async_iter:
        if chunk:
            yield chunk