import openai import json from requests import HTTPError from abc import ABC, abstractmethod from .prompts import SOUND_EFFECT_GENERATION, SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION, TEXT_MODIFICATION, TEXT_MODIFICATION_WITH_SSML from .utils import get_audio_duration from src.config import logger class AbstractEffectGenerator(ABC): @abstractmethod def generate_text_for_sound_effect(self, text)-> dict: pass @abstractmethod def generate_parameters_for_sound_effect(self, text: str, generated_audio_file: str)-> dict: pass @abstractmethod def add_emotion_to_text(self, text: str) -> dict: pass class EffectGenerator(AbstractEffectGenerator): def __init__(self, api_key: str, predict_duration: bool = True, model_type: str = 'gpt-4o'): self.client = openai.OpenAI(api_key=api_key) self.sound_effect_prompt = SOUND_EFFECT_GENERATION if predict_duration else SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION self.text_modification_prompt = TEXT_MODIFICATION_WITH_SSML self.model_type = model_type logger.info(f"EffectGenerator initialized with model_type: {model_type}, predict_duration: {predict_duration}") def generate_text_for_sound_effect(self, text: str) -> dict: """Generate sound effect description and parameters based on input text.""" try: completion = self.client.chat.completions.create( model=self.model_type, messages=[ {"role": "system", "content": self.sound_effect_prompt}, {"role": "user", "content": text} ], response_format={"type": "json_object"} ) # Extracting the output chatgpt_output = completion.choices[0].message.content # Parse and return JSON response output_dict = json.loads(chatgpt_output) logger.info("Successfully generated sound effect description: %s", output_dict) return output_dict except json.JSONDecodeError as e: logger.error("Failed to parse the output text as JSON: %s", e) raise RuntimeError(f"Error: Failed to parse the output text as JSON.\nOutput: {chatgpt_output}") except HTTPError as e: logger.error("HTTP error occurred: %s", e) raise RuntimeError(f"HTTP Error: {e}") except Exception as e: logger.error("Unexpected error occurred: %s", e) raise RuntimeError(f"Unexpected Error: {e}") def generate_parameters_for_sound_effect(self, text: str, generated_audio_file: str = None)-> dict: llm_output = self.generate_text_for_sound_effect(text) if generated_audio_file is not None: llm_output['duration_seconds'] = get_audio_duration(generated_audio_file) logger.info("Added duration_seconds to output based on generated audio file: %s", generated_audio_file) return llm_output def add_emotion_to_text(self, text: str) -> dict: completion = self.client.chat.completions.create( model=self.model_type, messages=[{"role": "system", "content": self.text_modification_prompt}, {"role": "user", "content": text}], response_format={"type": "json_object"} ) chatgpt_output = completion.choices[0].message.content try: output_dict = json.loads(chatgpt_output) logger.info("Successfully modified text with emotional cues: %s", output_dict) return output_dict except json.JSONDecodeError as e: logger.error("Error in parsing the modified text: %s", e) raise f"error, output_text: {chatgpt_output}" class EffectGeneratorAsync(AbstractEffectGenerator): def __init__(self, api_key: str, predict_duration: bool = True, model_type: str = 'gpt-4o'): self.client = openai.AsyncOpenAI(api_key=api_key) self.sound_effect_prompt = SOUND_EFFECT_GENERATION if predict_duration else SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION self.text_modification_prompt = TEXT_MODIFICATION_WITH_SSML self.model_type = model_type async def generate_text_for_sound_effect(self, text: str) -> dict: """Asynchronous version to generate sound effect description.""" try: completion = await self.client.chat.completions.create( model=self.model_type, messages=[ {"role": "system", "content": self.sound_effect_prompt}, {"role": "user", "content": text} ], response_format={"type": "json_object"} ) # Extracting the output chatgpt_output = completion.choices[0].message.content # Parse and return JSON response output_dict = json.loads(chatgpt_output) logger.info("Successfully generated sound effect description: %s", output_dict) return output_dict except json.JSONDecodeError as e: logger.error("Failed to parse the output text as JSON: %s", e) raise RuntimeError(f"Error: Failed to parse the output text as JSON.\nOutput: {chatgpt_output}") except HTTPError as e: logger.error("HTTP error occurred: %s", e) raise RuntimeError(f"HTTP Error: {e}") except Exception as e: logger.error("Unexpected error occurred: %s", e) raise RuntimeError(f"Unexpected Error: {e}") async def generate_parameters_for_sound_effect(self, text: str, generated_audio_file: str = None) -> dict: llm_output = await self.generate_text_for_sound_effect(text) if generated_audio_file is not None: llm_output['duration_seconds'] = get_audio_duration(generated_audio_file) logger.info("Added duration_seconds to output based on generated audio file: %s", generated_audio_file) return llm_output async def add_emotion_to_text(self, text: str) -> dict: completion = await self.client.chat.completions.create( model=self.model_type, messages=[{"role": "system", "content": self.text_modification_prompt}, {"role": "user", "content": text}], response_format={"type": "json_object"} ) chatgpt_output = completion.choices[0].message.content try: output_dict = json.loads(chatgpt_output) logger.info("Successfully modified text with emotional cues: %s", output_dict) return output_dict except json.JSONDecodeError as e: logger.error("Error in parsing the modified text: %s", e) raise f"error, output_text: {chatgpt_output}"