Spaces:

ales
/

ai-audio-books

Sleeping

Andrei Kulchyk Andrei Kulchyk commited on Oct 11, 2024

Commit

8797a8a

unverified ·

1 Parent(s): aefca6a

Add checkbox for effects generation (#11)

Co-authored-by: Andrei Kulchyk <[email protected]>

Files changed (3) hide show

app.py CHANGED Viewed

@@ -45,7 +45,11 @@ def load_text_from_file(uploaded_file):
     return text
-async def respond(text: str, uploaded_file) -> tuple[Path | None, str]:
     if uploaded_file is not None:
         try:
             text = load_text_from_file(uploaded_file=uploaded_file)
@@ -54,7 +58,7 @@ async def respond(text: str, uploaded_file) -> tuple[Path | None, str]:
             return (None, str(e))
     builder = AudiobookBuilder()
-    audio_fp = await builder.run(text=text)
     return audio_fp, ""
@@ -78,10 +82,12 @@ with gr.Blocks(title="Audiobooks Generation") as ui:
             label="Error Messages", interactive=False, visible=False
         )  # Initially hidden
     submit_button = gr.Button("Submit")
     submit_button.click(
         fn=respond,
-        inputs=[text_input, file_input],  # Include the uploaded file as an input
         outputs=[
             audio_output,
             error_output,

     return text
+async def respond(
+    text: str,
+    uploaded_file,
+    generate_effects: bool,
+) -> tuple[Path | None, str]:
     if uploaded_file is not None:
         try:
             text = load_text_from_file(uploaded_file=uploaded_file)
             return (None, str(e))
     builder = AudiobookBuilder()
+    audio_fp = await builder.run(text=text, generate_effects=generate_effects)
     return audio_fp, ""
             label="Error Messages", interactive=False, visible=False
         )  # Initially hidden
+    effects_generation_checkbox = gr.Checkbox(label="Generate background effects")
     submit_button = gr.Button("Submit")
     submit_button.click(
         fn=respond,
+        inputs=[text_input, file_input, effects_generation_checkbox],  # Include the uploaded file as an input
         outputs=[
             audio_output,
             error_output,

src/audio_generators.py CHANGED Viewed

@@ -67,10 +67,15 @@ class AudioGeneratorWithEffects:
         text_split: SplitTextOutput,
         character_to_voice: dict[str, str],
         out_path: Path | None = None,
     ) -> Path:
         """Main method to generate the audiobook with TTS, emotion, and sound effects."""
         num_lines = len(text_split.phrases)
-        lines_for_sound_effect = self._select_lines_for_sound_effect(num_lines)
         # Step 1: Process and modify text
         modified_texts, sound_emotion_results = await self._process_and_modify_text(
@@ -96,9 +101,9 @@ class AudioGeneratorWithEffects:
         return final_output
-    def _select_lines_for_sound_effect(self, num_lines: int) -> list[int]:
-        """Select 20% of the lines randomly for sound effect generation."""
-        return random.sample(range(num_lines), k=int(0.0 * num_lines))
     async def _process_and_modify_text(
         self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]

         text_split: SplitTextOutput,
         character_to_voice: dict[str, str],
         out_path: Path | None = None,
+        *,
+        generate_effects: bool = True,
     ) -> Path:
         """Main method to generate the audiobook with TTS, emotion, and sound effects."""
         num_lines = len(text_split.phrases)
+        lines_for_sound_effect = self._select_lines_for_sound_effect(
+            num_lines, fraction=float(0.2 * generate_effects),
+        )
+        logger.info(f"{generate_effects = }, {lines_for_sound_effect = }")
         # Step 1: Process and modify text
         modified_texts, sound_emotion_results = await self._process_and_modify_text(
         return final_output
+    def _select_lines_for_sound_effect(self, num_lines: int, fraction: float) -> list[int]:
+        """Select % of the lines randomly for sound effect generation."""
+        return random.sample(range(num_lines), k=int(fraction * num_lines))
     async def _process_and_modify_text(
         self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]

src/builder.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from langchain_community.callbacks import get_openai_callback
-from src.audio_generators import AudioGeneratorSimple, AudioGeneratorWithEffects
 from src.lc_callbacks import LCMessageLoggerAsync
 from src.select_voice_chain import SelectVoiceChainOutput, VoiceSelector
 from src.text_split_chain import SplitTextOutput, create_split_text_chain
@@ -9,7 +9,7 @@ from src.utils import GPTModels
 class AudiobookBuilder:
-    def __init__(self):
         self.voice_selector = VoiceSelector(
             csv_table_fp="data/11labs_available_tts_voices.csv"
         )
@@ -39,7 +39,7 @@ class AudiobookBuilder:
             )
         return chain_out
-    async def run(self, text: str):
         text_split = await self.split_text(text)
         select_voice_chain_out = await self.map_characters_to_voices(
             text_split=text_split
@@ -48,5 +48,6 @@ class AudiobookBuilder:
         out_path = await self.audio_generator.generate_audio(
             text_split=text_split,
             character_to_voice=select_voice_chain_out.character2voice,
         )
         return out_path

 from langchain_community.callbacks import get_openai_callback
+from src.audio_generators import AudioGeneratorWithEffects
 from src.lc_callbacks import LCMessageLoggerAsync
 from src.select_voice_chain import SelectVoiceChainOutput, VoiceSelector
 from src.text_split_chain import SplitTextOutput, create_split_text_chain
 class AudiobookBuilder:
+    def __init__(self) -> None:
         self.voice_selector = VoiceSelector(
             csv_table_fp="data/11labs_available_tts_voices.csv"
         )
             )
         return chain_out
+    async def run(self, text: str, *, generate_effects: bool):
         text_split = await self.split_text(text)
         select_voice_chain_out = await self.map_characters_to_voices(
             text_split=text_split
         out_path = await self.audio_generator.generate_audio(
             text_split=text_split,
             character_to_voice=select_voice_chain_out.character2voice,
+            generate_effects=generate_effects,
         )
         return out_path