Andrei Kulchyk Andrei Kulchyk commited on
Commit
1decf45
Β·
unverified Β·
1 Parent(s): 38a2004

Fix UI, gather TTS tasks (#4)

Browse files

* Fix UI, gather TTS tasks

* Remove hardcoding

---------

Co-authored-by: Andrei Kulchyk <[email protected]>

Files changed (1) hide show
  1. app.py +16 -14
app.py CHANGED
@@ -1,10 +1,10 @@
 
1
  import json
2
  import os
3
  import re
4
  from pathlib import Path
5
  from uuid import uuid4
6
 
7
- import librosa
8
  import requests
9
  import gradio as gr
10
  import pandas as pd
@@ -71,6 +71,10 @@ clear and consistent manner, suitable for subsequent text-to-speech processing.
71
  VOICES = pd.read_csv("data/11labs_tts_voices.csv").query("language == 'en'")
72
 
73
 
 
 
 
 
74
  class AudiobookBuilder:
75
  def __init__(
76
  self,
@@ -120,7 +124,7 @@ class AudiobookBuilder:
120
  annotated_text: str,
121
  character_to_voice: dict[str, str],
122
  ) -> Path:
123
- results = []
124
  current_character = "narrator"
125
  for line in annotated_text.splitlines():
126
  cleaned_line = line.strip().lower()
@@ -132,16 +136,16 @@ class AudiobookBuilder:
132
  pass
133
  voice_id = character_to_voice[current_character]
134
  character_text = cleaned_line[cleaned_line.rfind("]")+1:].lstrip()
135
- results.append(tts_astream(voice_id=voice_id, text=character_text))
136
 
 
137
  save_dir = Path("data") / "books"
138
  save_dir.mkdir(exist_ok=True)
139
  save_path = save_dir / f"{uuid4()}.wav"
140
  with open(save_path, "wb") as ab:
141
  for result in results:
142
- async for chunk in result:
143
- if chunk:
144
- ab.write(chunk)
145
  return save_path
146
 
147
  @staticmethod
@@ -219,7 +223,7 @@ def parse_pdf(file_path):
219
  return "\n".join([doc.page_content for doc in documents])
220
 
221
 
222
- async def respond(text, uploaded_file):
223
  # Check if a file is uploaded
224
  if uploaded_file is not None:
225
  # Save the uploaded file temporarily to check its size
@@ -249,9 +253,7 @@ async def respond(text, uploaded_file):
249
  character_to_gender = builder.classify_characters(text, unique_characters)
250
  character_to_voice = builder.map_characters_to_voices(character_to_gender)
251
  save_path = await builder.generate_audio(annotated_text, character_to_voice)
252
-
253
- audio, sr = librosa.load(str(save_path), sr=None)
254
- return (sr, audio)
255
 
256
 
257
  def refresh():
@@ -267,7 +269,7 @@ with gr.Blocks(title="Audiobooks Generation") as ui:
267
  file_input = gr.File(label="Upload a text file or PDF", file_types=['.txt', '.pdf'])
268
 
269
  with gr.Row(variant="panel"):
270
- audio_output = gr.Audio(label="Generated audio", type="numpy")
271
  error_output = gr.Textbox(label="Error Messages", interactive=False, visible=False) # Initially hidden
272
 
273
  submit_button = gr.Button("Submit")
@@ -286,20 +288,20 @@ with gr.Blocks(title="Audiobooks Generation") as ui:
286
 
287
  # Hide error message dynamically when input is received
288
  text_input.change(
289
- fn=lambda: gr.update(visible=False), # Hide the error field
290
  inputs=[text_input],
291
  outputs=error_output
292
  )
293
 
294
  file_input.change(
295
- fn=lambda: gr.update(visible=False), # Hide the error field
296
  inputs=[file_input],
297
  outputs=error_output
298
  )
299
 
300
  # To clear error field when refreshing
301
  refresh_button.click(
302
- fn=lambda: gr.update(visible=False), # Hide the error field
303
  inputs=[],
304
  outputs=error_output,
305
  )
 
1
+ import asyncio
2
  import json
3
  import os
4
  import re
5
  from pathlib import Path
6
  from uuid import uuid4
7
 
 
8
  import requests
9
  import gradio as gr
10
  import pandas as pd
 
71
  VOICES = pd.read_csv("data/11labs_tts_voices.csv").query("language == 'en'")
72
 
73
 
74
+ async def consume_aiter(aiterator):
75
+ return [x async for x in aiterator]
76
+
77
+
78
  class AudiobookBuilder:
79
  def __init__(
80
  self,
 
124
  annotated_text: str,
125
  character_to_voice: dict[str, str],
126
  ) -> Path:
127
+ tasks = []
128
  current_character = "narrator"
129
  for line in annotated_text.splitlines():
130
  cleaned_line = line.strip().lower()
 
136
  pass
137
  voice_id = character_to_voice[current_character]
138
  character_text = cleaned_line[cleaned_line.rfind("]")+1:].lstrip()
139
+ tasks.append(tts_astream(voice_id=voice_id, text=character_text))
140
 
141
+ results = await asyncio.gather(*(consume_aiter(t) for t in tasks))
142
  save_dir = Path("data") / "books"
143
  save_dir.mkdir(exist_ok=True)
144
  save_path = save_dir / f"{uuid4()}.wav"
145
  with open(save_path, "wb") as ab:
146
  for result in results:
147
+ for chunk in result:
148
+ ab.write(chunk)
 
149
  return save_path
150
 
151
  @staticmethod
 
223
  return "\n".join([doc.page_content for doc in documents])
224
 
225
 
226
+ async def respond(text: str, uploaded_file) -> tuple[Path | None, str]:
227
  # Check if a file is uploaded
228
  if uploaded_file is not None:
229
  # Save the uploaded file temporarily to check its size
 
253
  character_to_gender = builder.classify_characters(text, unique_characters)
254
  character_to_voice = builder.map_characters_to_voices(character_to_gender)
255
  save_path = await builder.generate_audio(annotated_text, character_to_voice)
256
+ return save_path, ""
 
 
257
 
258
 
259
  def refresh():
 
269
  file_input = gr.File(label="Upload a text file or PDF", file_types=['.txt', '.pdf'])
270
 
271
  with gr.Row(variant="panel"):
272
+ audio_output = gr.Audio(label="Generated audio", type="filepath")
273
  error_output = gr.Textbox(label="Error Messages", interactive=False, visible=False) # Initially hidden
274
 
275
  submit_button = gr.Button("Submit")
 
288
 
289
  # Hide error message dynamically when input is received
290
  text_input.change(
291
+ fn=lambda _: gr.update(visible=False), # Hide the error field
292
  inputs=[text_input],
293
  outputs=error_output
294
  )
295
 
296
  file_input.change(
297
+ fn=lambda _: gr.update(visible=False), # Hide the error field
298
  inputs=[file_input],
299
  outputs=error_output
300
  )
301
 
302
  # To clear error field when refreshing
303
  refresh_button.click(
304
+ fn=lambda _: gr.update(visible=False), # Hide the error field
305
  inputs=[],
306
  outputs=error_output,
307
  )