Andrei Kulchyk Andrei Kulchyk commited on
Commit
8797a8a
Β·
unverified Β·
1 Parent(s): aefca6a

Add checkbox for effects generation (#11)

Browse files

Co-authored-by: Andrei Kulchyk <[email protected]>

Files changed (3) hide show
  1. app.py +9 -3
  2. src/audio_generators.py +9 -4
  3. src/builder.py +4 -3
app.py CHANGED
@@ -45,7 +45,11 @@ def load_text_from_file(uploaded_file):
45
  return text
46
 
47
 
48
- async def respond(text: str, uploaded_file) -> tuple[Path | None, str]:
 
 
 
 
49
  if uploaded_file is not None:
50
  try:
51
  text = load_text_from_file(uploaded_file=uploaded_file)
@@ -54,7 +58,7 @@ async def respond(text: str, uploaded_file) -> tuple[Path | None, str]:
54
  return (None, str(e))
55
 
56
  builder = AudiobookBuilder()
57
- audio_fp = await builder.run(text=text)
58
  return audio_fp, ""
59
 
60
 
@@ -78,10 +82,12 @@ with gr.Blocks(title="Audiobooks Generation") as ui:
78
  label="Error Messages", interactive=False, visible=False
79
  ) # Initially hidden
80
 
 
 
81
  submit_button = gr.Button("Submit")
82
  submit_button.click(
83
  fn=respond,
84
- inputs=[text_input, file_input], # Include the uploaded file as an input
85
  outputs=[
86
  audio_output,
87
  error_output,
 
45
  return text
46
 
47
 
48
+ async def respond(
49
+ text: str,
50
+ uploaded_file,
51
+ generate_effects: bool,
52
+ ) -> tuple[Path | None, str]:
53
  if uploaded_file is not None:
54
  try:
55
  text = load_text_from_file(uploaded_file=uploaded_file)
 
58
  return (None, str(e))
59
 
60
  builder = AudiobookBuilder()
61
+ audio_fp = await builder.run(text=text, generate_effects=generate_effects)
62
  return audio_fp, ""
63
 
64
 
 
82
  label="Error Messages", interactive=False, visible=False
83
  ) # Initially hidden
84
 
85
+ effects_generation_checkbox = gr.Checkbox(label="Generate background effects")
86
+
87
  submit_button = gr.Button("Submit")
88
  submit_button.click(
89
  fn=respond,
90
+ inputs=[text_input, file_input, effects_generation_checkbox], # Include the uploaded file as an input
91
  outputs=[
92
  audio_output,
93
  error_output,
src/audio_generators.py CHANGED
@@ -67,10 +67,15 @@ class AudioGeneratorWithEffects:
67
  text_split: SplitTextOutput,
68
  character_to_voice: dict[str, str],
69
  out_path: Path | None = None,
 
 
70
  ) -> Path:
71
  """Main method to generate the audiobook with TTS, emotion, and sound effects."""
72
  num_lines = len(text_split.phrases)
73
- lines_for_sound_effect = self._select_lines_for_sound_effect(num_lines)
 
 
 
74
 
75
  # Step 1: Process and modify text
76
  modified_texts, sound_emotion_results = await self._process_and_modify_text(
@@ -96,9 +101,9 @@ class AudioGeneratorWithEffects:
96
 
97
  return final_output
98
 
99
- def _select_lines_for_sound_effect(self, num_lines: int) -> list[int]:
100
- """Select 20% of the lines randomly for sound effect generation."""
101
- return random.sample(range(num_lines), k=int(0.0 * num_lines))
102
 
103
  async def _process_and_modify_text(
104
  self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]
 
67
  text_split: SplitTextOutput,
68
  character_to_voice: dict[str, str],
69
  out_path: Path | None = None,
70
+ *,
71
+ generate_effects: bool = True,
72
  ) -> Path:
73
  """Main method to generate the audiobook with TTS, emotion, and sound effects."""
74
  num_lines = len(text_split.phrases)
75
+ lines_for_sound_effect = self._select_lines_for_sound_effect(
76
+ num_lines, fraction=float(0.2 * generate_effects),
77
+ )
78
+ logger.info(f"{generate_effects = }, {lines_for_sound_effect = }")
79
 
80
  # Step 1: Process and modify text
81
  modified_texts, sound_emotion_results = await self._process_and_modify_text(
 
101
 
102
  return final_output
103
 
104
+ def _select_lines_for_sound_effect(self, num_lines: int, fraction: float) -> list[int]:
105
+ """Select % of the lines randomly for sound effect generation."""
106
+ return random.sample(range(num_lines), k=int(fraction * num_lines))
107
 
108
  async def _process_and_modify_text(
109
  self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]
src/builder.py CHANGED
@@ -1,6 +1,6 @@
1
  from langchain_community.callbacks import get_openai_callback
2
 
3
- from src.audio_generators import AudioGeneratorSimple, AudioGeneratorWithEffects
4
  from src.lc_callbacks import LCMessageLoggerAsync
5
  from src.select_voice_chain import SelectVoiceChainOutput, VoiceSelector
6
  from src.text_split_chain import SplitTextOutput, create_split_text_chain
@@ -9,7 +9,7 @@ from src.utils import GPTModels
9
 
10
  class AudiobookBuilder:
11
 
12
- def __init__(self):
13
  self.voice_selector = VoiceSelector(
14
  csv_table_fp="data/11labs_available_tts_voices.csv"
15
  )
@@ -39,7 +39,7 @@ class AudiobookBuilder:
39
  )
40
  return chain_out
41
 
42
- async def run(self, text: str):
43
  text_split = await self.split_text(text)
44
  select_voice_chain_out = await self.map_characters_to_voices(
45
  text_split=text_split
@@ -48,5 +48,6 @@ class AudiobookBuilder:
48
  out_path = await self.audio_generator.generate_audio(
49
  text_split=text_split,
50
  character_to_voice=select_voice_chain_out.character2voice,
 
51
  )
52
  return out_path
 
1
  from langchain_community.callbacks import get_openai_callback
2
 
3
+ from src.audio_generators import AudioGeneratorWithEffects
4
  from src.lc_callbacks import LCMessageLoggerAsync
5
  from src.select_voice_chain import SelectVoiceChainOutput, VoiceSelector
6
  from src.text_split_chain import SplitTextOutput, create_split_text_chain
 
9
 
10
  class AudiobookBuilder:
11
 
12
+ def __init__(self) -> None:
13
  self.voice_selector = VoiceSelector(
14
  csv_table_fp="data/11labs_available_tts_voices.csv"
15
  )
 
39
  )
40
  return chain_out
41
 
42
+ async def run(self, text: str, *, generate_effects: bool):
43
  text_split = await self.split_text(text)
44
  select_voice_chain_out = await self.map_characters_to_voices(
45
  text_split=text_split
 
48
  out_path = await self.audio_generator.generate_audio(
49
  text_split=text_split,
50
  character_to_voice=select_voice_chain_out.character2voice,
51
+ generate_effects=generate_effects,
52
  )
53
  return out_path