bl4dylion commited on
Commit
93a309d
Β·
1 Parent(s): 367a693

main logic of generation text for sound

Browse files
Files changed (1) hide show
  1. src/emotions/generation.py +148 -0
src/emotions/generation.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import openai
2
+ import json
3
+ from requests import HTTPError
4
+ from abc import ABC, abstractmethod
5
+
6
+ from .prompts import SOUND_EFFECT_GENERATION, SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION, TEXT_MODIFICATION
7
+ from .utils import get_audio_duration
8
+ from src.config import logger
9
+
10
+
11
+ class AbstractEffectGenerator(ABC):
12
+ @abstractmethod
13
+ def generate_text_for_sound_effect(self, text)-> dict:
14
+ pass
15
+
16
+ @abstractmethod
17
+ def generate_parameters_for_sound_effect(self, text: str, generated_audio_file: str)-> dict:
18
+ pass
19
+
20
+ @abstractmethod
21
+ def add_emotion_to_text(self, text: str) -> dict:
22
+ pass
23
+
24
+ class EffectGenerator(AbstractEffectGenerator):
25
+ def __init__(self, api_key: str, predict_duration: bool = True, model_type: str = 'gpt-4o'):
26
+ self.client = openai.OpenAI(api_key=api_key)
27
+ self.sound_effect_prompt = SOUND_EFFECT_GENERATION if predict_duration else SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION
28
+ self.text_modification_prompt = TEXT_MODIFICATION
29
+ self.model_type = model_type
30
+ logger.info(f"EffectGenerator initialized with model_type: {model_type}, predict_duration: {predict_duration}")
31
+
32
+ def generate_text_for_sound_effect(self, text: str) -> dict:
33
+ """Generate sound effect description and parameters based on input text."""
34
+ try:
35
+ completion = self.client.chat.completions.create(
36
+ model=self.model_type,
37
+ messages=[
38
+ {"role": "system", "content": self.sound_effect_prompt},
39
+ {"role": "user", "content": text}
40
+ ],
41
+ response_format={"type": "json_object"}
42
+ )
43
+ # Extracting the output
44
+ chatgpt_output = completion.choices[0].message.content
45
+
46
+ # Parse and return JSON response
47
+ output_dict = json.loads(chatgpt_output)
48
+ logger.info("Successfully generated sound effect description: %s", output_dict)
49
+ return output_dict
50
+
51
+ except json.JSONDecodeError as e:
52
+ logger.error("Failed to parse the output text as JSON: %s", e)
53
+ raise RuntimeError(f"Error: Failed to parse the output text as JSON.\nOutput: {chatgpt_output}")
54
+
55
+ except HTTPError as e:
56
+ logger.error("HTTP error occurred: %s", e)
57
+ raise RuntimeError(f"HTTP Error: {e}")
58
+
59
+ except Exception as e:
60
+ logger.error("Unexpected error occurred: %s", e)
61
+ raise RuntimeError(f"Unexpected Error: {e}")
62
+
63
+ def generate_parameters_for_sound_effect(self, text: str, generated_audio_file: str = None)-> dict:
64
+ llm_output = self.generate_text_for_sound_effect(text)
65
+ if generated_audio_file is not None:
66
+ llm_output['duration_seconds'] = get_audio_duration(generated_audio_file)
67
+ logger.info("Added duration_seconds to output based on generated audio file: %s", generated_audio_file)
68
+ return llm_output
69
+
70
+ def add_emotion_to_text(self, text: str) -> dict:
71
+ completion = self.client.chat.completions.create(
72
+ model=self.model_type,
73
+ messages=[{"role": "system", "content": self.text_modification_prompt},
74
+ {"role": "user", "content": text}],
75
+ response_format={"type": "json_object"}
76
+ )
77
+ chatgpt_output = completion.choices[0].message.content
78
+ try:
79
+ output_dict = json.loads(chatgpt_output)
80
+ logger.info("Successfully modified text with emotional cues: %s", output_dict)
81
+ return output_dict
82
+ except json.JSONDecodeError as e:
83
+ logger.error("Error in parsing the modified text: %s", e)
84
+ raise f"error, output_text: {chatgpt_output}"
85
+
86
+
87
+ class EffectGeneratorAsync(AbstractEffectGenerator):
88
+ def __init__(self, api_key: str, predict_duration: bool = True, model_type: str = 'gpt-4o'):
89
+ self.client = openai.AsyncOpenAI(api_key=api_key)
90
+ self.sound_effect_prompt = SOUND_EFFECT_GENERATION if predict_duration else SOUND_EFFECT_GENERATION_WITHOUT_DURATION_PREDICTION
91
+ self.text_modification_prompt = TEXT_MODIFICATION
92
+ self.model_type = model_type
93
+
94
+ async def generate_text_for_sound_effect(self, text: str) -> dict:
95
+ """Asynchronous version to generate sound effect description."""
96
+ try:
97
+ completion = await self.client.chat.completions.create(
98
+ model=self.model_type,
99
+ messages=[
100
+ {"role": "system", "content": self.sound_effect_prompt},
101
+ {"role": "user", "content": text}
102
+ ],
103
+ response_format={"type": "json_object"}
104
+ )
105
+ # Extracting the output
106
+ chatgpt_output = completion.choices[0].message.content
107
+
108
+ # Parse and return JSON response
109
+ output_dict = json.loads(chatgpt_output)
110
+ logger.info("Successfully generated sound effect description: %s", output_dict)
111
+ return output_dict
112
+
113
+ except json.JSONDecodeError as e:
114
+ logger.error("Failed to parse the output text as JSON: %s", e)
115
+ raise RuntimeError(f"Error: Failed to parse the output text as JSON.\nOutput: {chatgpt_output}")
116
+
117
+ except HTTPError as e:
118
+ logger.error("HTTP error occurred: %s", e)
119
+ raise RuntimeError(f"HTTP Error: {e}")
120
+
121
+ except Exception as e:
122
+ logger.error("Unexpected error occurred: %s", e)
123
+ raise RuntimeError(f"Unexpected Error: {e}")
124
+
125
+
126
+ async def generate_parameters_for_sound_effect(self, text: str, generated_audio_file: str) -> dict:
127
+ llm_output = await self.generate_text_for_sound_effect(text)
128
+ if generated_audio_file is not None:
129
+ llm_output['duration_seconds'] = get_audio_duration(generated_audio_file)
130
+ logger.info("Added duration_seconds to output based on generated audio file: %s", generated_audio_file)
131
+ return llm_output
132
+
133
+ async def add_emotion_to_text(self, text: str) -> dict:
134
+ completion = await self.client.chat.completions.create(
135
+ model=self.model_type,
136
+ messages=[{"role": "system", "content": self.text_modification_prompt},
137
+ {"role": "user", "content": text}],
138
+ response_format={"type": "json_object"}
139
+ )
140
+ chatgpt_output = completion.choices[0].message.content
141
+ try:
142
+ output_dict = json.loads(chatgpt_output)
143
+ logger.info("Successfully modified text with emotional cues: %s", output_dict)
144
+ return output_dict
145
+ except json.JSONDecodeError as e:
146
+ logger.error("Error in parsing the modified text: %s", e)
147
+ raise f"error, output_text: {chatgpt_output}"
148
+