Spaces:
Sleeping
Sleeping
Andrei Kulchyk
Andrei Kulchyk
commited on
Add checkbox for effects generation (#11)
Browse filesCo-authored-by: Andrei Kulchyk <[email protected]>
- app.py +9 -3
- src/audio_generators.py +9 -4
- src/builder.py +4 -3
app.py
CHANGED
@@ -45,7 +45,11 @@ def load_text_from_file(uploaded_file):
|
|
45 |
return text
|
46 |
|
47 |
|
48 |
-
async def respond(
|
|
|
|
|
|
|
|
|
49 |
if uploaded_file is not None:
|
50 |
try:
|
51 |
text = load_text_from_file(uploaded_file=uploaded_file)
|
@@ -54,7 +58,7 @@ async def respond(text: str, uploaded_file) -> tuple[Path | None, str]:
|
|
54 |
return (None, str(e))
|
55 |
|
56 |
builder = AudiobookBuilder()
|
57 |
-
audio_fp = await builder.run(text=text)
|
58 |
return audio_fp, ""
|
59 |
|
60 |
|
@@ -78,10 +82,12 @@ with gr.Blocks(title="Audiobooks Generation") as ui:
|
|
78 |
label="Error Messages", interactive=False, visible=False
|
79 |
) # Initially hidden
|
80 |
|
|
|
|
|
81 |
submit_button = gr.Button("Submit")
|
82 |
submit_button.click(
|
83 |
fn=respond,
|
84 |
-
inputs=[text_input, file_input], # Include the uploaded file as an input
|
85 |
outputs=[
|
86 |
audio_output,
|
87 |
error_output,
|
|
|
45 |
return text
|
46 |
|
47 |
|
48 |
+
async def respond(
|
49 |
+
text: str,
|
50 |
+
uploaded_file,
|
51 |
+
generate_effects: bool,
|
52 |
+
) -> tuple[Path | None, str]:
|
53 |
if uploaded_file is not None:
|
54 |
try:
|
55 |
text = load_text_from_file(uploaded_file=uploaded_file)
|
|
|
58 |
return (None, str(e))
|
59 |
|
60 |
builder = AudiobookBuilder()
|
61 |
+
audio_fp = await builder.run(text=text, generate_effects=generate_effects)
|
62 |
return audio_fp, ""
|
63 |
|
64 |
|
|
|
82 |
label="Error Messages", interactive=False, visible=False
|
83 |
) # Initially hidden
|
84 |
|
85 |
+
effects_generation_checkbox = gr.Checkbox(label="Generate background effects")
|
86 |
+
|
87 |
submit_button = gr.Button("Submit")
|
88 |
submit_button.click(
|
89 |
fn=respond,
|
90 |
+
inputs=[text_input, file_input, effects_generation_checkbox], # Include the uploaded file as an input
|
91 |
outputs=[
|
92 |
audio_output,
|
93 |
error_output,
|
src/audio_generators.py
CHANGED
@@ -67,10 +67,15 @@ class AudioGeneratorWithEffects:
|
|
67 |
text_split: SplitTextOutput,
|
68 |
character_to_voice: dict[str, str],
|
69 |
out_path: Path | None = None,
|
|
|
|
|
70 |
) -> Path:
|
71 |
"""Main method to generate the audiobook with TTS, emotion, and sound effects."""
|
72 |
num_lines = len(text_split.phrases)
|
73 |
-
lines_for_sound_effect = self._select_lines_for_sound_effect(
|
|
|
|
|
|
|
74 |
|
75 |
# Step 1: Process and modify text
|
76 |
modified_texts, sound_emotion_results = await self._process_and_modify_text(
|
@@ -96,9 +101,9 @@ class AudioGeneratorWithEffects:
|
|
96 |
|
97 |
return final_output
|
98 |
|
99 |
-
def _select_lines_for_sound_effect(self, num_lines: int) -> list[int]:
|
100 |
-
"""Select
|
101 |
-
return random.sample(range(num_lines), k=int(
|
102 |
|
103 |
async def _process_and_modify_text(
|
104 |
self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]
|
|
|
67 |
text_split: SplitTextOutput,
|
68 |
character_to_voice: dict[str, str],
|
69 |
out_path: Path | None = None,
|
70 |
+
*,
|
71 |
+
generate_effects: bool = True,
|
72 |
) -> Path:
|
73 |
"""Main method to generate the audiobook with TTS, emotion, and sound effects."""
|
74 |
num_lines = len(text_split.phrases)
|
75 |
+
lines_for_sound_effect = self._select_lines_for_sound_effect(
|
76 |
+
num_lines, fraction=float(0.2 * generate_effects),
|
77 |
+
)
|
78 |
+
logger.info(f"{generate_effects = }, {lines_for_sound_effect = }")
|
79 |
|
80 |
# Step 1: Process and modify text
|
81 |
modified_texts, sound_emotion_results = await self._process_and_modify_text(
|
|
|
101 |
|
102 |
return final_output
|
103 |
|
104 |
+
def _select_lines_for_sound_effect(self, num_lines: int, fraction: float) -> list[int]:
|
105 |
+
"""Select % of the lines randomly for sound effect generation."""
|
106 |
+
return random.sample(range(num_lines), k=int(fraction * num_lines))
|
107 |
|
108 |
async def _process_and_modify_text(
|
109 |
self, text_split: SplitTextOutput, lines_for_sound_effect: list[int]
|
src/builder.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from langchain_community.callbacks import get_openai_callback
|
2 |
|
3 |
-
from src.audio_generators import
|
4 |
from src.lc_callbacks import LCMessageLoggerAsync
|
5 |
from src.select_voice_chain import SelectVoiceChainOutput, VoiceSelector
|
6 |
from src.text_split_chain import SplitTextOutput, create_split_text_chain
|
@@ -9,7 +9,7 @@ from src.utils import GPTModels
|
|
9 |
|
10 |
class AudiobookBuilder:
|
11 |
|
12 |
-
def __init__(self):
|
13 |
self.voice_selector = VoiceSelector(
|
14 |
csv_table_fp="data/11labs_available_tts_voices.csv"
|
15 |
)
|
@@ -39,7 +39,7 @@ class AudiobookBuilder:
|
|
39 |
)
|
40 |
return chain_out
|
41 |
|
42 |
-
async def run(self, text: str):
|
43 |
text_split = await self.split_text(text)
|
44 |
select_voice_chain_out = await self.map_characters_to_voices(
|
45 |
text_split=text_split
|
@@ -48,5 +48,6 @@ class AudiobookBuilder:
|
|
48 |
out_path = await self.audio_generator.generate_audio(
|
49 |
text_split=text_split,
|
50 |
character_to_voice=select_voice_chain_out.character2voice,
|
|
|
51 |
)
|
52 |
return out_path
|
|
|
1 |
from langchain_community.callbacks import get_openai_callback
|
2 |
|
3 |
+
from src.audio_generators import AudioGeneratorWithEffects
|
4 |
from src.lc_callbacks import LCMessageLoggerAsync
|
5 |
from src.select_voice_chain import SelectVoiceChainOutput, VoiceSelector
|
6 |
from src.text_split_chain import SplitTextOutput, create_split_text_chain
|
|
|
9 |
|
10 |
class AudiobookBuilder:
|
11 |
|
12 |
+
def __init__(self) -> None:
|
13 |
self.voice_selector = VoiceSelector(
|
14 |
csv_table_fp="data/11labs_available_tts_voices.csv"
|
15 |
)
|
|
|
39 |
)
|
40 |
return chain_out
|
41 |
|
42 |
+
async def run(self, text: str, *, generate_effects: bool):
|
43 |
text_split = await self.split_text(text)
|
44 |
select_voice_chain_out = await self.map_characters_to_voices(
|
45 |
text_split=text_split
|
|
|
48 |
out_path = await self.audio_generator.generate_audio(
|
49 |
text_split=text_split,
|
50 |
character_to_voice=select_voice_chain_out.character2voice,
|
51 |
+
generate_effects=generate_effects,
|
52 |
)
|
53 |
return out_path
|