Spaces:
Running
e2e audio book generation (#5)
Browse files* add character to voice props mapping; refactor
* Remove unused method from audiobook builder
* Export available TTS voices by API key
* Ignore `.DS_Store`
* Edit columns of the exported csv file
* Read and preprocess voices
* Add more voices and re-export the CSV file (#7)
Co-authored-by: Andrei Kulchyk <[email protected]>
* Add more old female shared voices (#9)
Co-authored-by: Andrei Kulchyk <[email protected]>
* Add emotion and text modification (#8)
* add default param
* add voice setting to tts, add sound_generation_astream
* add text_modification, sound generation, overlay main and effect
* return old audio function, split new audio function
* map characters to voices; refactor
* upd app.py
* refactor keys usage
* fix simple audio generator
* fix simple audio generator
---------
Co-authored-by: Andrei Kulchyk <[email protected]>
Co-authored-by: Andrei Kulchyk <[email protected]>
Co-authored-by: Maksim Liutisch <[email protected]>
- .env.template +2 -1
- .gitignore +7 -3
- 11labs_available_tts_voices.csv +23 -0
- app.py +45 -240
- data/11labs_available_tts_voices.csv +0 -0
- data/11labs_tts_old_female_voices.csv +102 -0
- pg.ipynb +965 -25
- readme.md +5 -0
- scripts/add_voices.py +51 -0
- scripts/export_available_voices.py +49 -0
- src/audio_generators.py +239 -0
- src/builder.py +52 -0
- src/config.py +9 -0
- src/emotions/generation.py +1 -1
- generate_emotional_voice.py β src/generate_emotional_voice.py +0 -0
- src/lc_callbacks.py +59 -0
- src/prompts.py +47 -0
- src/select_voice_chain.py +181 -0
- src/text_split_chain.py +63 -37
- src/tts.py +31 -6
- src/utils.py +4 -0
@@ -1,2 +1,3 @@
|
|
1 |
OPENAI_API_KEY="..."
|
2 |
-
11LABS_API_KEY="..."
|
|
|
|
1 |
OPENAI_API_KEY="..."
|
2 |
+
11LABS_API_KEY="..."
|
3 |
+
AIML_API_KEY="..."
|
@@ -1,5 +1,9 @@
|
|
1 |
__pycache__
|
2 |
-
|
3 |
-
.env
|
4 |
venv
|
5 |
-
.python-version
|
|
|
|
|
|
|
|
|
|
|
|
1 |
__pycache__
|
2 |
+
.vscode
|
|
|
3 |
venv
|
4 |
+
.python-version
|
5 |
+
.DS_Store
|
6 |
+
|
7 |
+
data/books
|
8 |
+
|
9 |
+
.env
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
voice_id,name,preview_url,owner_id,permission_on_resource,is_legacy,is_mixed,accent,description,age,gender,category,language,descriptive
|
2 |
+
9BWtsMINqrJLrRacOk9x,Aria,https://storage.googleapis.com/eleven-public-prod/premade/voices/9BWtsMINqrJLrRacOk9x/405766b8-1f4e-4d3c-aba1-6f25333823ec.mp3,,,False,False,American,expressive,middle-aged,female,social media,,
|
3 |
+
CwhRBWXzGAHq8TQ4Fs17,Roger,https://storage.googleapis.com/eleven-public-prod/premade/voices/CwhRBWXzGAHq8TQ4Fs17/58ee3ff5-f6f2-4628-93b8-e38eb31806b0.mp3,,,False,False,American,confident,middle-aged,male,social media,,
|
4 |
+
EXAVITQu4vr4xnSDxMaL,Sarah,https://storage.googleapis.com/eleven-public-prod/premade/voices/EXAVITQu4vr4xnSDxMaL/01a3e33c-6e99-4ee7-8543-ff2216a32186.mp3,,,False,False,american,soft,young,female,news,,
|
5 |
+
FGY2WhTYpPnrIDTdsKH5,Laura,https://storage.googleapis.com/eleven-public-prod/premade/voices/FGY2WhTYpPnrIDTdsKH5/67341759-ad08-41a5-be6e-de12fe448618.mp3,,,False,False,American,upbeat,young,female,social media,,
|
6 |
+
IKne3meq5aSn9XLyUdCD,Charlie,https://storage.googleapis.com/eleven-public-prod/premade/voices/IKne3meq5aSn9XLyUdCD/102de6f2-22ed-43e0-a1f1-111fa75c5481.mp3,,,False,False,Australian,natural,middle aged,male,conversational,,
|
7 |
+
JBFqnCBsd6RMkjVDRZzb,George,https://storage.googleapis.com/eleven-public-prod/premade/voices/JBFqnCBsd6RMkjVDRZzb/e6206d1a-0721-4787-aafb-06a6e705cac5.mp3,,,False,False,British,warm,middle aged,male,narration,,
|
8 |
+
N2lVS1w4EtoT3dr4eOWO,Callum,https://storage.googleapis.com/eleven-public-prod/premade/voices/N2lVS1w4EtoT3dr4eOWO/ac833bd8-ffda-4938-9ebc-b0f99ca25481.mp3,,,False,False,Transatlantic,intense,middle-aged,male,characters,,
|
9 |
+
SAz9YHcvj6GT2YYXdXww,River,https://storage.googleapis.com/eleven-public-prod/premade/voices/SAz9YHcvj6GT2YYXdXww/e6c95f0b-2227-491a-b3d7-2249240decb7.mp3,,,False,False,American,confident,middle-aged,non-binary,social media,,
|
10 |
+
TX3LPaxmHKxFdv7VOQHJ,Liam,https://storage.googleapis.com/eleven-public-prod/premade/voices/TX3LPaxmHKxFdv7VOQHJ/63148076-6363-42db-aea8-31424308b92c.mp3,,,False,False,American,articulate,young,male,narration,,
|
11 |
+
XB0fDUnXU5powFXDhCwa,Charlotte,https://storage.googleapis.com/eleven-public-prod/premade/voices/XB0fDUnXU5powFXDhCwa/942356dc-f10d-4d89-bda5-4f8505ee038b.mp3,,,False,False,Swedish,seductive,young,female,characters,,
|
12 |
+
Xb7hH8MSUJpSbSDYk0k2,Alice,https://storage.googleapis.com/eleven-public-prod/premade/voices/Xb7hH8MSUJpSbSDYk0k2/d10f7534-11f6-41fe-a012-2de1e482d336.mp3,,,False,False,British,confident,middle-aged,female,news,,
|
13 |
+
XrExE9yKIg1WjnnlVkGX,Matilda,https://storage.googleapis.com/eleven-public-prod/premade/voices/XrExE9yKIg1WjnnlVkGX/b930e18d-6b4d-466e-bab2-0ae97c6d8535.mp3,,,False,False,American,friendly,middle-aged,female,narration,,
|
14 |
+
bIHbv24MWmeRgasZH58o,Will,https://storage.googleapis.com/eleven-public-prod/premade/voices/bIHbv24MWmeRgasZH58o/8caf8f3d-ad29-4980-af41-53f20c72d7a4.mp3,,,False,False,American,friendly,young,male,social media,,
|
15 |
+
cgSgspJ2msm6clMCkdW9,Jessica,https://storage.googleapis.com/eleven-public-prod/premade/voices/cgSgspJ2msm6clMCkdW9/56a97bf8-b69b-448f-846c-c3a11683d45a.mp3,,,False,False,American,expressive,young,female,conversational,,
|
16 |
+
cjVigY5qzO86Huf0OWal,Eric,https://storage.googleapis.com/eleven-public-prod/premade/voices/cjVigY5qzO86Huf0OWal/d098fda0-6456-4030-b3d8-63aa048c9070.mp3,,,False,False,American,friendly,middle-aged,male,conversational,,
|
17 |
+
iP95p4xoKVk53GoZ742B,Chris,https://storage.googleapis.com/eleven-public-prod/premade/voices/iP95p4xoKVk53GoZ742B/3f4bde72-cc48-40dd-829f-57fbf906f4d7.mp3,,,False,False,American,casual,middle-aged,male,conversational,,
|
18 |
+
nPczCjzI2devNBz1zQrb,Brian,https://storage.googleapis.com/eleven-public-prod/premade/voices/nPczCjzI2devNBz1zQrb/2dd3e72c-4fd3-42f1-93ea-abc5d4e5aa1d.mp3,,,False,False,American,deep,middle-aged,male,narration,,
|
19 |
+
onwK4e9ZLuTAKqWW03F9,Daniel,https://storage.googleapis.com/eleven-public-prod/premade/voices/onwK4e9ZLuTAKqWW03F9/7eee0236-1a72-4b86-b303-5dcadc007ba9.mp3,,,False,False,British,authoritative,middle-aged,male,news,,
|
20 |
+
pFZP5JQG7iQjIQuC4Bku,Lily,https://storage.googleapis.com/eleven-public-prod/premade/voices/pFZP5JQG7iQjIQuC4Bku/89b68b35-b3dd-4348-a84a-a3c13a3c2b30.mp3,,,False,False,British,warm,middle-aged,female,narration,,
|
21 |
+
pqHfZKP75CvOlQylNhV4,Bill,https://storage.googleapis.com/eleven-public-prod/premade/voices/pqHfZKP75CvOlQylNhV4/d782b3ff-84ba-4029-848c-acf01285524d.mp3,,,False,False,American,trustworthy,old,male,narration,,
|
22 |
+
jnmgvhGiCSW0gr6y3lc8,Jafar - Deep Narrator,https://storage.googleapis.com/eleven-public-prod/database/user/sD92HnMHS9WZLXKNTKxmnC8XmJ32/voices/jnmgvhGiCSW0gr6y3lc8/Id5ipSQDNJCELSTy4SpT.mp3,,admin,False,False,modern standard,,middle_aged,male,narrative_story,ar,deep
|
23 |
+
vfaqCOvlrKi4Zp7C2IAm,Demon Monster,https://storage.googleapis.com/eleven-public-prod/custom/voices/vfaqCOvlrKi4Zp7C2IAm/mLg8Cp6Jq1r0xAbqjgwJ.mp3,,admin,False,False,american,,middle_aged,neutral,characters_animation,en,deep
|
@@ -1,259 +1,54 @@
|
|
1 |
-
import asyncio
|
2 |
-
import json
|
3 |
import os
|
4 |
-
import re
|
5 |
from pathlib import Path
|
6 |
-
from uuid import uuid4
|
7 |
|
8 |
-
import requests
|
9 |
import gradio as gr
|
10 |
-
import pandas as pd
|
11 |
from dotenv import load_dotenv
|
12 |
-
from elevenlabs import AsyncElevenLabs
|
13 |
from langchain_community.document_loaders import PyPDFLoader
|
14 |
-
from openai import OpenAI
|
15 |
-
|
16 |
-
from src.tts import tts_astream
|
17 |
-
|
18 |
|
19 |
load_dotenv()
|
20 |
|
|
|
|
|
21 |
|
22 |
-
api_key = os.getenv("AIML_API_KEY")
|
23 |
-
FILE_SIZE_MAX = 0.5 #in mb
|
24 |
-
|
25 |
-
CHARACTER_CLASSIFICATION_PROMPT = """
|
26 |
-
**Task:**
|
27 |
-
Analyze the provided story text and classify each character in the given list \
|
28 |
-
by their gender. Use `"M"` for Male and `"F"` for Female. Classify the \
|
29 |
-
characters based on contextual clues such as names, pronouns, descriptions, \
|
30 |
-
roles, and interactions within the story.
|
31 |
-
|
32 |
-
**Output Format:**
|
33 |
-
Provide the classification in a JSON object where each key is a character's \
|
34 |
-
name, and the value is `"M"` or `"F"`.
|
35 |
-
|
36 |
-
**Example Input:**
|
37 |
-
```
|
38 |
-
### Story
|
39 |
-
Once upon a time Alice met Bob and Charlie.
|
40 |
-
|
41 |
-
### Characters
|
42 |
-
["alice", "bob", "charlie"]
|
43 |
-
```
|
44 |
-
|
45 |
-
**Example Output:**
|
46 |
-
```json
|
47 |
-
{
|
48 |
-
"alice": "F",
|
49 |
-
"bob": "M",
|
50 |
-
"charlie": "M"
|
51 |
-
}
|
52 |
-
"""
|
53 |
-
|
54 |
-
|
55 |
-
TEXT_ANNOTATION_PROMPT = """\
|
56 |
-
**Task:**
|
57 |
-
Analyze the provided text and annotate each segment by indicating whether it is \
|
58 |
-
part of the narration or spoken by a specific character. Use "Narrator" for \
|
59 |
-
narration and the character's name for dialogues. Format the annotated text in a \
|
60 |
-
clear and consistent manner, suitable for subsequent text-to-speech processing.
|
61 |
-
|
62 |
-
**Formatting Guidelines:**
|
63 |
-
|
64 |
-
- Narration: Prefix with `[Narrator]`
|
65 |
-
- Character Dialogue: Prefix with `[Character Name]`
|
66 |
-
- Multiple Characters Speaking: Prefix with `[Character Name 1] [Character Name 2] ... [Character Name N]`
|
67 |
-
- Consistent Line Breaks: Ensure each labeled segment starts on a new line for clarity.
|
68 |
-
"""
|
69 |
-
|
70 |
-
|
71 |
-
VOICES = pd.read_csv("data/11labs_tts_voices.csv").query("language == 'en'")
|
72 |
-
|
73 |
-
|
74 |
-
async def consume_aiter(aiterator):
|
75 |
-
return [x async for x in aiterator]
|
76 |
-
|
77 |
-
|
78 |
-
class AudiobookBuilder:
|
79 |
-
def __init__(
|
80 |
-
self,
|
81 |
-
*,
|
82 |
-
aiml_api_key: str | None = None,
|
83 |
-
aiml_base_url: str = "https://api.aimlapi.com/v1",
|
84 |
-
eleven_api_key: str | None = None,
|
85 |
-
) -> None:
|
86 |
-
self._aiml_api_key = aiml_api_key or os.environ["AIML_API_KEY"]
|
87 |
-
self._aiml_base_url = aiml_base_url
|
88 |
-
self._aiml_client = OpenAI(api_key=api_key, base_url=self._aiml_base_url)
|
89 |
-
self._default_narrator_voice = "ALY2WaJPY0oBJlqpQbfW"
|
90 |
-
self._eleven_api_key = eleven_api_key or os.environ["11LABS_API_KEY"]
|
91 |
-
self._eleven_client = AsyncElevenLabs(api_key=self._eleven_api_key)
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
},
|
99 |
-
{
|
100 |
-
"role": "user",
|
101 |
-
"content": text,
|
102 |
-
}
|
103 |
-
])
|
104 |
-
return response["choices"][0]["message"]["content"]
|
105 |
-
|
106 |
-
def classify_characters(self, annotated_text: str, unique_characters: list[str]) -> dict:
|
107 |
-
response = self._send_request_to_llm(
|
108 |
-
messages=[
|
109 |
-
{
|
110 |
-
"role": "system",
|
111 |
-
"content": CHARACTER_CLASSIFICATION_PROMPT,
|
112 |
-
},
|
113 |
-
{
|
114 |
-
"role": "user",
|
115 |
-
"content": f"### Story\n\n{annotated_text}\n\n### Characters\n\n{unique_characters}",
|
116 |
-
},
|
117 |
-
],
|
118 |
-
response_format={"type": "json_object"},
|
119 |
-
)
|
120 |
-
return json.loads(response["choices"][0]["message"]["content"])
|
121 |
-
|
122 |
-
async def generate_audio(
|
123 |
-
self,
|
124 |
-
annotated_text: str,
|
125 |
-
character_to_voice: dict[str, str],
|
126 |
-
) -> Path:
|
127 |
-
tasks = []
|
128 |
-
current_character = "narrator"
|
129 |
-
for line in annotated_text.splitlines():
|
130 |
-
cleaned_line = line.strip().lower()
|
131 |
-
if not cleaned_line:
|
132 |
-
continue
|
133 |
-
try:
|
134 |
-
current_character = re.findall(r"\[[\w\s]+\]", cleaned_line)[0][1:-1]
|
135 |
-
except:
|
136 |
-
pass
|
137 |
-
voice_id = character_to_voice[current_character]
|
138 |
-
character_text = cleaned_line[cleaned_line.rfind("]")+1:].lstrip()
|
139 |
-
tasks.append(tts_astream(voice_id=voice_id, text=character_text))
|
140 |
|
141 |
-
results = await asyncio.gather(*(consume_aiter(t) for t in tasks))
|
142 |
-
save_dir = Path("data") / "books"
|
143 |
-
save_dir.mkdir(exist_ok=True)
|
144 |
-
save_path = save_dir / f"{uuid4()}.wav"
|
145 |
-
with open(save_path, "wb") as ab:
|
146 |
-
for result in results:
|
147 |
-
for chunk in result:
|
148 |
-
ab.write(chunk)
|
149 |
-
return save_path
|
150 |
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
for line in annotated_text.splitlines():
|
155 |
-
cleaned_line = line.strip().lower()
|
156 |
-
if not cleaned_line.startswith("["):
|
157 |
-
continue
|
158 |
-
line_characters = re.findall(r"\[[\w\s]+\]", cleaned_line)
|
159 |
-
characters = characters.union(ch[1:-1] for ch in line_characters)
|
160 |
-
return list(characters - {"narrator"})
|
161 |
-
|
162 |
-
def map_characters_to_voices(self, character_to_gender: dict[str, str]) -> dict[str, str]:
|
163 |
-
character_to_voice = {"narrator": self._default_narrator_voice}
|
164 |
-
|
165 |
-
# Damy vperyod!
|
166 |
-
f_characters = [character for character, gender in character_to_gender.items() if gender.strip().lower() == "f"]
|
167 |
-
if f_characters:
|
168 |
-
f_voices = VOICES.query("gender == 'female'").iloc[:len(f_characters)].copy()
|
169 |
-
f_voices["character"] = f_characters
|
170 |
-
character_to_voice |= f_voices.set_index("character")["voice_id"].to_dict()
|
171 |
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
m_voices["character"] = m_characters
|
176 |
-
character_to_voice |= m_voices.set_index("character")["voice_id"].to_dict()
|
177 |
-
|
178 |
-
return character_to_voice
|
179 |
-
|
180 |
-
def _send_request_to_llm(self, messages: list[dict], **kwargs) -> dict:
|
181 |
-
response = requests.post(
|
182 |
-
url=f"{self._aiml_base_url}/chat/completions",
|
183 |
-
headers={
|
184 |
-
"Authorization": f"Bearer {self._aiml_api_key}",
|
185 |
-
"Content-Type": "application/json",
|
186 |
-
},
|
187 |
-
data=json.dumps({
|
188 |
-
"model": "gpt-4o",
|
189 |
-
"temperature": 0.0,
|
190 |
-
"messages": messages,
|
191 |
-
"stream": False,
|
192 |
-
"max_tokens": 16_384,
|
193 |
-
**kwargs,
|
194 |
-
}),
|
195 |
)
|
196 |
-
response.raise_for_status()
|
197 |
-
return response.json()
|
198 |
-
|
199 |
-
def _send_request_to_tts(self, voice_id: str, text: str):
|
200 |
-
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
|
201 |
-
headers = {
|
202 |
-
"Accept": "audio/mpeg",
|
203 |
-
"Content-Type": "application/json",
|
204 |
-
"xi-api-key": self._eleven_api_key,
|
205 |
-
}
|
206 |
-
data = {
|
207 |
-
"text": text,
|
208 |
-
"model_id": "eleven_monolingual_v1",
|
209 |
-
"voice_settings": {
|
210 |
-
"stability": 0.5,
|
211 |
-
"similarity_boost": 0.5
|
212 |
-
}
|
213 |
-
}
|
214 |
-
response = requests.post(url, json=data, headers=headers)
|
215 |
-
response.raise_for_status()
|
216 |
-
return response
|
217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
|
219 |
-
|
220 |
-
"""Parse the PDF file and return the text content."""
|
221 |
-
loader = PyPDFLoader(file_path)
|
222 |
-
documents = loader.load()
|
223 |
-
return "\n".join([doc.page_content for doc in documents])
|
224 |
|
225 |
|
226 |
async def respond(text: str, uploaded_file) -> tuple[Path | None, str]:
|
227 |
-
# Check if a file is uploaded
|
228 |
if uploaded_file is not None:
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
error_message = f"Error: The uploaded file exceeds the size limit of {FILE_SIZE_MAX} MB."
|
235 |
-
return None, error_message # Return None for audio output and the error message
|
236 |
-
|
237 |
-
# Determine file type
|
238 |
-
if uploaded_file.name.endswith('.txt'):
|
239 |
-
# Read the text from the uploaded .txt file
|
240 |
-
with open(temp_file_path, 'r', encoding='utf-8') as file:
|
241 |
-
text = file.read()
|
242 |
-
elif uploaded_file.name.endswith('.pdf'):
|
243 |
-
# Parse the PDF file and extract text
|
244 |
-
text = parse_pdf(temp_file_path)
|
245 |
-
else:
|
246 |
-
error_message = "Error: Unsupported file type. Please upload a .txt or .pdf file."
|
247 |
-
return None, error_message
|
248 |
|
249 |
-
# Proceed with the audiobook generation
|
250 |
builder = AudiobookBuilder()
|
251 |
-
|
252 |
-
|
253 |
-
character_to_gender = builder.classify_characters(text, unique_characters)
|
254 |
-
character_to_voice = builder.map_characters_to_voices(character_to_gender)
|
255 |
-
save_path = await builder.generate_audio(annotated_text, character_to_voice)
|
256 |
-
return save_path, ""
|
257 |
|
258 |
|
259 |
def refresh():
|
@@ -266,37 +61,48 @@ with gr.Blocks(title="Audiobooks Generation") as ui:
|
|
266 |
with gr.Row(variant="panel"):
|
267 |
text_input = gr.Textbox(label="Enter the book text", lines=20)
|
268 |
# Add a file upload field for .txt and .pdf files
|
269 |
-
file_input = gr.File(
|
|
|
|
|
270 |
|
271 |
with gr.Row(variant="panel"):
|
272 |
audio_output = gr.Audio(label="Generated audio", type="filepath")
|
273 |
-
error_output = gr.Textbox(
|
|
|
|
|
274 |
|
275 |
submit_button = gr.Button("Submit")
|
276 |
submit_button.click(
|
277 |
fn=respond,
|
278 |
inputs=[text_input, file_input], # Include the uploaded file as an input
|
279 |
-
outputs=[
|
|
|
|
|
|
|
280 |
)
|
281 |
|
282 |
refresh_button = gr.Button("Refresh")
|
283 |
refresh_button.click(
|
284 |
fn=refresh,
|
285 |
inputs=[],
|
286 |
-
outputs=[
|
|
|
|
|
|
|
|
|
287 |
)
|
288 |
|
289 |
# Hide error message dynamically when input is received
|
290 |
text_input.change(
|
291 |
fn=lambda _: gr.update(visible=False), # Hide the error field
|
292 |
inputs=[text_input],
|
293 |
-
outputs=error_output
|
294 |
)
|
295 |
|
296 |
file_input.change(
|
297 |
fn=lambda _: gr.update(visible=False), # Hide the error field
|
298 |
inputs=[file_input],
|
299 |
-
outputs=error_output
|
300 |
)
|
301 |
|
302 |
# To clear error field when refreshing
|
@@ -307,4 +113,3 @@ with gr.Blocks(title="Audiobooks Generation") as ui:
|
|
307 |
)
|
308 |
|
309 |
ui.launch()
|
310 |
-
|
|
|
|
|
|
|
1 |
import os
|
|
|
2 |
from pathlib import Path
|
|
|
3 |
|
|
|
4 |
import gradio as gr
|
|
|
5 |
from dotenv import load_dotenv
|
|
|
6 |
from langchain_community.document_loaders import PyPDFLoader
|
|
|
|
|
|
|
|
|
7 |
|
8 |
load_dotenv()
|
9 |
|
10 |
+
from src.builder import AudiobookBuilder
|
11 |
+
from src.config import logger, FILE_SIZE_MAX
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
+
def parse_pdf(file_path):
|
15 |
+
"""Parse the PDF file and return the text content."""
|
16 |
+
loader = PyPDFLoader(file_path)
|
17 |
+
documents = loader.load()
|
18 |
+
return "\n".join([doc.page_content for doc in documents])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
def load_text_from_file(uploaded_file):
|
22 |
+
# Save the uploaded file temporarily to check its size
|
23 |
+
temp_file_path = uploaded_file.name
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
|
25 |
+
if os.path.getsize(temp_file_path) > FILE_SIZE_MAX * 1024 * 1024:
|
26 |
+
raise ValueError(
|
27 |
+
f"The uploaded file exceeds the size limit of {FILE_SIZE_MAX} MB."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
+
if uploaded_file.name.endswith(".txt"):
|
31 |
+
with open(temp_file_path, "r", encoding="utf-8") as file:
|
32 |
+
text = file.read()
|
33 |
+
elif uploaded_file.name.endswith(".pdf"):
|
34 |
+
text = parse_pdf(temp_file_path)
|
35 |
+
else:
|
36 |
+
raise ValueError("Unsupported file type. Please upload a .txt or .pdf file.")
|
37 |
|
38 |
+
return text
|
|
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
async def respond(text: str, uploaded_file) -> tuple[Path | None, str]:
|
|
|
42 |
if uploaded_file is not None:
|
43 |
+
try:
|
44 |
+
text = load_text_from_file(uploaded_file=uploaded_file)
|
45 |
+
except Exception as e:
|
46 |
+
logger.exception(e)
|
47 |
+
return (None, str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
|
|
49 |
builder = AudiobookBuilder()
|
50 |
+
audio_fp = await builder.run(text=text)
|
51 |
+
return audio_fp, ""
|
|
|
|
|
|
|
|
|
52 |
|
53 |
|
54 |
def refresh():
|
|
|
61 |
with gr.Row(variant="panel"):
|
62 |
text_input = gr.Textbox(label="Enter the book text", lines=20)
|
63 |
# Add a file upload field for .txt and .pdf files
|
64 |
+
file_input = gr.File(
|
65 |
+
label="Upload a text file or PDF", file_types=[".txt", ".pdf"]
|
66 |
+
)
|
67 |
|
68 |
with gr.Row(variant="panel"):
|
69 |
audio_output = gr.Audio(label="Generated audio", type="filepath")
|
70 |
+
error_output = gr.Textbox(
|
71 |
+
label="Error Messages", interactive=False, visible=False
|
72 |
+
) # Initially hidden
|
73 |
|
74 |
submit_button = gr.Button("Submit")
|
75 |
submit_button.click(
|
76 |
fn=respond,
|
77 |
inputs=[text_input, file_input], # Include the uploaded file as an input
|
78 |
+
outputs=[
|
79 |
+
audio_output,
|
80 |
+
error_output,
|
81 |
+
], # Include the audio output and error message output
|
82 |
)
|
83 |
|
84 |
refresh_button = gr.Button("Refresh")
|
85 |
refresh_button.click(
|
86 |
fn=refresh,
|
87 |
inputs=[],
|
88 |
+
outputs=[
|
89 |
+
audio_output,
|
90 |
+
error_output,
|
91 |
+
file_input,
|
92 |
+
], # Reset audio output, error message, and uploaded file
|
93 |
)
|
94 |
|
95 |
# Hide error message dynamically when input is received
|
96 |
text_input.change(
|
97 |
fn=lambda _: gr.update(visible=False), # Hide the error field
|
98 |
inputs=[text_input],
|
99 |
+
outputs=error_output,
|
100 |
)
|
101 |
|
102 |
file_input.change(
|
103 |
fn=lambda _: gr.update(visible=False), # Hide the error field
|
104 |
inputs=[file_input],
|
105 |
+
outputs=error_output,
|
106 |
)
|
107 |
|
108 |
# To clear error field when refreshing
|
|
|
113 |
)
|
114 |
|
115 |
ui.launch()
|
|
The diff for this file is too large to render.
See raw diff
|
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
public_owner_id,voice_id,date_unix,name,accent,gender,age,descriptive,use_case,category,language,description,preview_url,usage_character_count_1_y,usage_character_count_7_d,play_api_usage_character_count_1_y,cloned_by_count,rate,free_users_allowed,live_moderation_enabled,featured,notice_period,instagram_username,twitter_username,youtube_username,tiktok_username,image_url
|
2 |
+
09297050488006f9753bb196dd095ff5cf32ccfb63b8f08b9663b2f1d47dc6e8,lNABL6eI3BpPT8BvSqjK,1725571069,Jacqui Griffin,australian,female,old,chill,conversational,professional,en,Australian female voice. Works well for conversations.,https://storage.googleapis.com/eleven-public-prod/database/user/TOGdTXMbSsdvQf3ERegntWNS8fb2/voices/lNABL6eI3BpPT8BvSqjK/IzCltgswHo7KvyEzLcuT.mp3,797865,63207,0,207,1.0,True,False,False,,,,,,
|
3 |
+
b574841da3a907e3e0e9abfc0abd798b052d806b440d85389612d0733d312f4f,aM1H4Rj2mHP2jNxWV9Hi,1718992057,"Queen Rosamund - British, Older Woman",british,female,old,formal,characters_animation,generated,,Old British female voice. Perfect for Character in a Story.,https://storage.googleapis.com/eleven-public-prod/rm4oNHot8CNsnaq47GCYK3qi0973/voices/aM1H4Rj2mHP2jNxWV9Hi/0277eecb-022f-4c0f-aab0-898e6643033f.mp3,1463219,380249,0,586,1.0,True,False,False,,,,,,
|
4 |
+
d004fac99c70608d38bdf9287e732194e2720362bed6f0724204550794083a61,jjadftdbI7mhF1E015Z6,1718452567,Sita 2,indian,female,old,meditative,narrative_story,generated,,Old Indian female voice. Perfect for Narrations.,https://storage.googleapis.com/eleven-public-prod/CkPyanZ3KxZD3IblrdIGnr2gjPG3/voices/jjadftdbI7mhF1E015Z6/c6b9db28-ac02-4769-8aab-381430892d4f.mp3,272870,20834,79,85,1.0,True,False,False,,,,,,
|
5 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,VJ4AboK3yq9TAHn1iqoZ,1717152747,"Kirsten - Elegant, Knowledgeable, and Reassuring",british,female,old,formal,narrative_story,generated,,"Step into the world of Kirsten, where every word is delivered with a touch of elegance and a wealth of knowledge. Imagine the calm, reassuring presence of a seasoned educator, combined with the refined tones of a classic British accent. Kirsten's voice is like a soothing cup of tea on a rainy day, offering comfort and wisdom with every phrase. It's the sound of experience and grace, making each message not just heard, but felt and cherished.",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/VJ4AboK3yq9TAHn1iqoZ/848d1c92-6383-458c-8fee-b1ca5babd474.mp3,800058,15526,0,189,1.0,True,False,False,,,,,,
|
6 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,IGTHGcpKolOGcaYmL2cj,1717152590,"Dorothy - Graceful, Wise, and Warm",british,female,old,wise,narrative_story,generated,,"Step into the timeless elegance of Dorothy, where every word is laced with decades of wisdom and a comforting warmth. Imagine the soothing presence of a beloved grandmother, paired with the refined sophistication of a seasoned storyteller. Dorothy's voice is like a gentle embrace, offering solace and insight, wrapped in the rich, melodic tones of a classic British accent. It's the voice of cherished traditions and heartfelt advice, making each message not just conveyed, but treasured and revered",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/IGTHGcpKolOGcaYmL2cj/d0284caf-18de-4f61-8889-8662123f7f6e.mp3,2831666,226064,0,780,1.0,True,False,False,,,,,,
|
7 |
+
9104f466a293c99963e66f347b8faf904dc65dec464c358ebfd0a893a325ef96,3rvQA7NoZyH1zO3Q6Vf0,1716850162,"Amrita - soft, wise older Indian female",indian,female,old,wise,narrative_story,generated,,"Amrita's lovely voice comes to life with unparalleled presence and clarity. The wisdom of her years shines through in every syllable, captivating listeners with its warmth and sincerity. Elevate your project with the rich, soulful sound of an extraordinary Indian voice - choose Amrita and let her timeless elegance leave a lasting impression on your audience.",https://storage.googleapis.com/eleven-public-prod/eLaM21XgxcfeBSAwAsC2RpZjzo73/voices/3rvQA7NoZyH1zO3Q6Vf0/911d2d1a-8fac-4650-8d64-2b787c43496a.mp3,1251597,177815,0,214,1.0,True,False,False,,,,,,
|
8 |
+
977860ffec693c67ffac96239bdb4df008992d8d92eb768e204d822338c9e76d,69ZSWTIwfCFv75X87JU2,1716238221,Margaret - British Minister,british,female,old,classy,conversational,generated,,"A mature, clear, female voice, with strong & refined British accent that expresses confidence, authority and clarity.",https://storage.googleapis.com/eleven-public-prod/tg6yj6NvyqMkkhbZTnL2gZIfBRX2/voices/69ZSWTIwfCFv75X87JU2/68b445ea-9edd-44e4-9c0b-9bd22b14d335.mp3,771785,42289,0,281,1.0,True,False,False,,,,,,
|
9 |
+
a03680ee07adb36e2bc67b48a7c38483fff4c6afac7a92ef3be18cf41ada24ce,f3SgjPzocHWjSn3VXeKy,1715703436,Heather - Mature British lady,british,female,old,mature,conversational,generated,,"A mature British female voice, perfect for conversation, audiobooks and voiceovers.",https://storage.googleapis.com/eleven-public-prod/28U448fBuufmkz5lMBQX6JLm8sw2/voices/f3SgjPzocHWjSn3VXeKy/c9a06d6e-50fa-4ce3-9e7f-f6fd8c252898.mp3,8093207,55663,0,245,1.0,True,False,False,,,,,,
|
10 |
+
51995843ef183917ccc6e566d63b3d8e459c6ebfcb96d233e02721effda5cb92,WuGSSUryPegJHo50QGgT,1715413267,Judith,british,female,old,serious,informative_educational,generated,,"An old woman with a British accent, works also well for German. Good for serious text.",https://storage.googleapis.com/eleven-public-prod/onxO2ObRCRW3Qzhb9LmVbv77uAz2/voices/WuGSSUryPegJHo50QGgT/038d3f91-ddd4-4e74-8ac7-061f12cff5d3.mp3,3402384,179112,61895,712,1.0,True,False,False,,,,,,
|
11 |
+
977860ffec693c67ffac96239bdb4df008992d8d92eb768e204d822338c9e76d,QYnGzKou48JismUzBHvo,1714891652,"Victoria, Queen of England",british,female,old,formal,narrative_story,generated,,"An old British female voice, aristocratic, posh, royal style with a characteristic vintage tone. Ideal for narrations, stories and philosophical quotes.",https://storage.googleapis.com/eleven-public-prod/tg6yj6NvyqMkkhbZTnL2gZIfBRX2/voices/QYnGzKou48JismUzBHvo/119dfc33-3ef8-4268-9847-9c23af5fb9e9.mp3,4260098,393261,0,728,1.0,True,False,False,,,,,,
|
12 |
+
42471e883c16fbd1b9a4b9ae8a3ece94ba3b8dddda6e3aec270208680cd6d4c6,r8SplNeU9vfxsNrJFstn,1714407069,Kay - 65 years old,british,female,old,calm,conversational,generated,,"Old British female with a Calm, peaceful and positive voice. Suitable for conversations.",https://storage.googleapis.com/eleven-public-prod/8UfhnmuNoPSOxBcoJqW3NsegvUG2/voices/r8SplNeU9vfxsNrJFstn/823adabf-a99a-4537-ac95-90e2ee315d2d.mp3,894353,59103,0,294,1.0,True,False,False,,,,,,
|
13 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,uQmNx4S4cocfWWmwNHT3,1714308585,"Old Osirion Woman - Timeless, Mystical, Nurturing",nigerian,female,old,mature,narrative_story,generated,,"Imagine a voice that echoes through the ages, a gentle whisper that carries the secrets of a thousand lifetimes. The Old Osirion Woman's aged African accent is a testament to a life spent in communion with the ancient mysteries, each word a fragment of the sacred knowledge passed down through generations. Her voice is a soothing presence, a reminder that even in the midst of life's storms, there is always a safe harbor to be found in the wisdom of our ancestors.",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/uQmNx4S4cocfWWmwNHT3/0e135507-23a5-43ef-819f-93aaac0e23dd.mp3,1524666,76338,0,416,1.0,True,False,False,,,,,,
|
14 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,vCQaM78HXEXteeEWauFy,1714308128,"Tira Shabbar - Spirited, Irreverent, Young-at-Heart",australian,female,old,upbeat,narrative_story,generated,,"Picture a voice that crackles with the energy of a life well-lived, a mischievous cackle that belies the weathered face and silver hair. Tira Shabbar's aged Australian accent is a testament to a spirit that refuses to be tamed by the passage of time, each word a defiant declaration of a woman who knows that age is just a number. Her voice is a reminder that growing old doesn't mean growing dull, and that there's always room for a bit of irreverence and a whole lot of laughter.",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/vCQaM78HXEXteeEWauFy/22b88f00-da4b-43cb-b18d-98bf0c40ff63.mp3,130741,3712,0,71,1.0,True,False,False,,,,,,
|
15 |
+
1150f5b541afb62e981002a8067180c156d245861d82047cd2e5752d322fa12f,R83VJaKz4Mij1dGaO2fg,1714101019,Granny,british,female,old,serious,narrative_story,generated,,Old British woman. Voice works well for children stories.,https://storage.googleapis.com/eleven-public-prod/Zjk0AixKhQcOmS4vXLTw9VDqTZs1/voices/R83VJaKz4Mij1dGaO2fg/1bdc82dd-cb0e-40ce-bdf9-06f8778fb51a.mp3,3245857,218265,0,760,1.0,True,False,False,,,,,,
|
16 |
+
283370bb8371ee841816259d6a8cea4daa60aff22387f6b3863171179b230ee8,2nUnj3DfV6Rng6S1nxpc,1713496408,Beth,australian,female,old,mature,narrative_story,generated,,An old Australian woman with a mature tone. Great for storytelling.,https://storage.googleapis.com/eleven-public-prod/mUrWJ9JzNYYcuxwAk2qxG6vVprD3/voices/2nUnj3DfV6Rng6S1nxpc/594125c3-9e4d-45c8-b702-3afd18fe5638.mp3,2396684,174369,0,562,1.0,True,False,False,,,,,,
|
17 |
+
22a8b42c88d15c30699616cfcd7d04c15d5568644ef9690aa88a92e6c950adf0,XVUdMPWYupdlPLINhTLD,1711924485,Linda - a warm knowledgeable old soul,british,female,old,calm,narrative_story,generated,,"A warm, knowledgeable, old female voice with calming British accent. Perfect for story telling.",https://storage.googleapis.com/eleven-public-prod/NEdxyb7lQtRYFoUmcIKtRWwuIiz2/voices/XVUdMPWYupdlPLINhTLD/165d3f2c-31d7-441e-9f70-fdcda8fb7930.mp3,904978,11160,0,199,1.0,True,False,False,,,,,,
|
18 |
+
22a8b42c88d15c30699616cfcd7d04c15d5568644ef9690aa88a92e6c950adf0,wOIqSB8LjATp0zzjRILp,1711629658,Vivian - knowledgeable voice ,british,female,old,pleasant,narrative_story,generated,,An old and knowledgeable female voice with calming British accent. Perfect for story telling.,https://storage.googleapis.com/eleven-public-prod/NEdxyb7lQtRYFoUmcIKtRWwuIiz2/voices/wOIqSB8LjATp0zzjRILp/fc2eaa6e-6be5-4f13-8608-00bf781339ba.mp3,46099885,3883672,0,5113,1.0,True,False,False,,,,,,
|
19 |
+
1099aa9e6f219adc12f1a2f9d4ca116ce01b0b293cd825b2ec4db471a0bfed7c,pW8kAbWAgAwDqbZSMUhQ,1711485734,Ingrid,british,female,old,mature,narrative_story,generated,,An old British woman with a mature tone. Good for storytelling.,https://storage.googleapis.com/eleven-public-prod/mr4YzL9ZZMYq1OqQNnttWvMA7S13/voices/pW8kAbWAgAwDqbZSMUhQ/161ac904-cece-4785-8379-98c0dafea579.mp3,47407,292,0,38,1.0,True,False,False,,,,,,
|
20 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,jaGvGi01GvObcoSABZwh,1711467819,"Mora of Maragall - Resilient, Compassionate, Inspiring",american,female,old,wise,narrative_story,generated,,"Imagine a voice that carries the weight of a thousand untold stories, a tapestry woven from the threads of a life filled with love, loss, and unbreakable resilience. Mora of Maragall's aged American accent is a testament to the enduring power of the human spirit, each word a brushstroke in a masterpiece of survival. Her voice is a soothing whisper, a gentle reminder that even in our darkest moments, we are never truly alone.",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/jaGvGi01GvObcoSABZwh/3662fb1a-bd49-4f6d-881e-2c9b0502300c.mp3,1292564,74118,0,329,1.0,True,False,False,,,,,,
|
21 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,kf7joTHJRyu5pb0eY4gx,1711353027,"Sajvara - Spirited, Unconventional, Insightful",australian,female,old,confident,narrative_story,generated,,"Picture a voice that carries the warmth of the Australian sun, a melodic drawl that invites you to sit down and listen to the tales of a life well-lived. Sajvara's mature accent is a testament to her unyielding spirit, each word a defiant declaration of a woman who has always marched to the beat of her own drum. Her voice is a reminder that age is merely a number, and that the fire of youth can burn brightly within us no matter how many years have passed. ",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/kf7joTHJRyu5pb0eY4gx/11c099b8-079d-42bb-8eff-f6b199c3f357.mp3,193949,20979,0,100,1.0,True,False,False,,,,,,
|
22 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,fxQFZRT7hIpyRq0yZeOQ,1711352972,"Carmin Isandre - Wise, Soothing, Nurturing",american,female,old,wise,narrative_story,generated,,"Imagine a voice that wraps around you like a warm embrace, a soothing balm for the soul in a world of chaos. Carmin Isandre's mature American accent is a testament to a life well-lived, each word infused with the wisdom of countless experiences. Her voice is a gentle reminder that even in the darkest of times, there is always a glimmer of hope, a reason to keep pushing forward. It's a voice that speaks of comfort and understanding, of a love that knows no bounds.",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/fxQFZRT7hIpyRq0yZeOQ/63a9bb61-00f9-4a66-b7b1-7b4b2ed5fd24.mp3,628085,47311,0,241,1.0,True,False,False,,,,,,
|
23 |
+
3b046d60fcd17976beb7615ba307918ce6e96ccf3669f20e633370be151cea44,63ur1dBC0fbh8vjHcKJM,1711302335,Nova - Wise and Tranquil ,british,female,old,wise,narrative_story,generated,,A wise old woman both distinguished and tranquil.,https://storage.googleapis.com/eleven-public-prod/gDnvR6qy9Rc0kZH6eIhVhVhKmPm2/voices/63ur1dBC0fbh8vjHcKJM/be673720-7c04-4109-8bb3-caa3d17be64b.mp3,3797724,329890,0,596,1.0,True,False,False,,,,,,
|
24 |
+
975806d31867760071b036d17c04648ab4384f1092bd815fc406a22daae477c0,xvJ4s2ai1gMSHMX6ezee,1711266627,Mary Agnus- Bedtime Storyteller ,british,female,old,casual,narrative_story,generated,,A sweet old British woman whose voice is comfy and warm. A very wise woman whose lived a full life and can narrate stories well.,https://storage.googleapis.com/eleven-public-prod/lZwuGoST8hSkpDj4RkkAjULEXNk1/voices/xvJ4s2ai1gMSHMX6ezee/87ac5174-e4ba-432d-bcd8-4137e807d524.mp3,313899,10248,0,144,1.0,True,False,False,,,,,,
|
25 |
+
4d5eb3f090411aedada12c6e513ea6baa924337830360acecd2b9c0ccd881fc0,Zvx9NywaYxmwMRl6DF1n,1710618645,Smart Sara,,female,old,serious,narrative_story,generated,,Old American Smart Soothing female Voice. Perfect for Narrations.,https://storage.googleapis.com/eleven-public-prod/9Db7pWvpiZMs7YYnEIDSKtCRoIv1/voices/Zvx9NywaYxmwMRl6DF1n/3858870d-b8db-4901-9a4c-c9ee87ebe0fc.mp3,365583,27762,0,95,1.0,True,False,False,,,,,,
|
26 |
+
a42905fd2095e55c89320aa75143b885e3ccd020577c280c8c0d0d022f3cfe44,7NsaqHdLuKNFvEfjpUno,1710547544,Seer Morganna,,female,old,wise,characters_animation,professional,en,The voice of an old wise seer woman telling people of their fortunes. Works well for Animations and characters in a story.,https://storage.googleapis.com/eleven-public-prod/custom/voices/7NsaqHdLuKNFvEfjpUno/LyhT6nxqgbsw2z56SfVt.mp3,4714410,160871,0,2030,1.0,True,False,False,730.0,,,,,
|
27 |
+
a0a69bbe9463c5e2efe6837954873fdc4b6a58da6d56674fdcebfe39f7aab8ec,33908jj7LBmsd0deWaxM,1709591869,wise-woman,african,female,old,confident,narrative_story,generated,,An older sounding African woman with a reassuring and wise tone. Great for Narrations & storytelling.,https://storage.googleapis.com/eleven-public-prod/nSmUbArDURbjgDFz5jx6gXxYsNC3/voices/33908jj7LBmsd0deWaxM/f8701a46-3e13-484e-a822-f61981b1b1dc.mp3,9076603,234505,0,1695,1.0,True,False,False,,,,,,
|
28 |
+
a81bbf479abf4a0ffe8765828a23e5405f1ac86229f2f83f706831e240de39ee,ZPRrQNjI1MYprX52TfHx,1708730891,Elizabeth - Wise and wistful,british,female,old,wise,narrative_story,generated,,"An older British lady with a deeper, sadder voice, who would narrate a poem about the passing of time through the eyes of a butterfly.",https://storage.googleapis.com/eleven-public-prod/Q0RSCGTO1zVjAhzqZHLY9UutPPr1/voices/ZPRrQNjI1MYprX52TfHx/564ea0e8-c750-445c-9079-f0ab4001235b.mp3,2661336,43818,0,743,1.0,True,False,False,,,,,,
|
29 |
+
0848a73b2cbe5ac0b5b232990b8935d1eee699360a0f0e9cca7ee5a568d20f75,aZAqu61ePodZWxXkMytx,1708266597,Betty - old-fashioned cottage lady,british,female,old,modulated,news,generated,,Old regal British female voice. perfect for audiobooks and narration.,https://storage.googleapis.com/eleven-public-prod/KPcCRM7adAREA7cQAgSb3rOvAgR2/voices/aZAqu61ePodZWxXkMytx/25a164ba-405a-42e5-b373-71946462d601.mp3,436196,11635,0,150,1.0,True,False,False,,,,,,
|
30 |
+
5d30d4a2ec6598900481a1f72bab23755468fd157c39bf18814bab6fb2f50c59,8Qv9gDuH46pwJr2h4S4Y,1708202961,Susan,indian,female,old,modulated,conversational,generated,,"Old Indian woman with an Amazingly strong, smooth voice with full range of emotions to draw listeners in. Impressive delivery of ups and downs.",https://storage.googleapis.com/eleven-public-prod/63YWf9FcbyS5PpTayiUUxnQ95Cj2/voices/8Qv9gDuH46pwJr2h4S4Y/2f72c490-316f-42e3-a368-4642d46afc3d.mp3,405110,83250,0,216,1.0,True,False,False,,,,,,
|
31 |
+
1e1affa15fbd1d3ad7310126a2dcc7a77b5d061f869b553486261aecdff41ace,eAtUQeq5p2a0JxXknbJJ,1708062288,Judy - Aged and Confident Elder,american,female,old,confident,,generated,,An old American woman with a confident and low tone. Great for storytellers. ,https://storage.googleapis.com/eleven-public-prod/fG8Y3OsnLHbylI1xZwADY3JN8D53/voices/eAtUQeq5p2a0JxXknbJJ/15f3200e-3551-4d99-9740-83dee0bbe310.mp3,5101199,901203,0,1287,1.0,True,False,False,,,,,,
|
32 |
+
5d30d4a2ec6598900481a1f72bab23755468fd157c39bf18814bab6fb2f50c59,Q5I5x63XfdFTP9wEIcHL,1706498836,Annie,african,female,old,cute,narrative_story,generated,,An old African female. Great for Action Novel - excellent story-telling and perfect for engaging listeners.,https://storage.googleapis.com/eleven-public-prod/63YWf9FcbyS5PpTayiUUxnQ95Cj2/voices/Q5I5x63XfdFTP9wEIcHL/b23d7806-575c-4180-a007-da86a585aac0.mp3,500594,96169,0,168,1.0,True,False,False,,,,,,
|
33 |
+
5d30d4a2ec6598900481a1f72bab23755468fd157c39bf18814bab6fb2f50c59,QxTS99N2QuIZNwTkGB5M,1706228568,Jenn,american,female,old,serious,conversational,generated,,"An Old American female
|
34 |
+
with a Stern and solid voice. Works well for conversations.",https://storage.googleapis.com/eleven-public-prod/63YWf9FcbyS5PpTayiUUxnQ95Cj2/voices/QxTS99N2QuIZNwTkGB5M/518786bf-ee01-400e-a690-c75193fee06a.mp3,419637,30782,0,203,1.0,True,False,False,,,,,,
|
35 |
+
f5248a581cb94c9bd4d00bf9460db3aecba20866d7af85aad71396e96abab76c,SfsSbS0dbG2xzhuUuDWr,1706074322,Π‘Π΅ΡΠ³Π΅ΠΉ,american,female,old,cute,,generated,,Old American woman with a kind and sweet voice. Great for storytelling and shorts.,https://storage.googleapis.com/eleven-public-prod/sr5K1y3DOSVaGS5KIqhGn1jmdZI2/voices/SfsSbS0dbG2xzhuUuDWr/862666a6-0976-4249-85c4-5a3d61355f9d.mp3,135519,1248,0,39,1.0,True,False,False,,,,,,
|
36 |
+
a8191b2827241867444bfc070ec8692bd9d53f97a252a9349179439512db1892,bRp2SymzfuLvibautVoX,1705976128,Sybil ,australian,female,old,casual,conversational,generated,,An old female voice with an Australian accent. Good for conversations.,https://storage.googleapis.com/eleven-public-prod/LPSMwp0QPIPEu76c5UE6a2Tg6kU2/voices/bRp2SymzfuLvibautVoX/eb9ad599-df42-4447-8d85-b19ac1333018.mp3,151713,681,0,80,1.0,True,False,False,,,,,,
|
37 |
+
a8191b2827241867444bfc070ec8692bd9d53f97a252a9349179439512db1892,jyT8jhJpDl8Qk8zKi8Bw,1705648950,"Preeti -- Husky, sophisticated confident",indian,female,old,confident,characters_animation,generated,,"Old Indian woman with a luxurious, rich, polished voice. Great for character in a story.",https://storage.googleapis.com/eleven-public-prod/LPSMwp0QPIPEu76c5UE6a2Tg6kU2/voices/jyT8jhJpDl8Qk8zKi8Bw/f2b522e0-df25-4211-8d5f-6147f7352431.mp3,3734284,63612,0,1132,1.0,True,False,False,,,,,,
|
38 |
+
0a2d18668f851f8c09d88b3e339cead767d15c13049b13966010696b0a9142e6,yUku46DmlK2BEPMaZE92,1705036057,Patty - shouty grandma,american,female,old,intense,characters_animation,generated,,Old American woman with a Loud and boisterous sounding tone. Great for character in a story.,https://storage.googleapis.com/eleven-public-prod/o8UgP4tcAscn6EXDwTeRPj0ozlr2/voices/yUku46DmlK2BEPMaZE92/d6c0383e-348f-4c29-8407-8141d02e8522.mp3,1385435,19144,0,627,1.0,True,False,False,,,,,,
|
39 |
+
c8b33ea0fdec4be644fb062b17bcdd3fac4533b45ee3c27a990ead0e589bd7a7,21CxujpA2izHC6XgTymI,1704761850,Beatrice - energetic older female voice,american,female,old,confident,,generated,,Old American woman with active and live voice. Great for storytelling.,https://storage.googleapis.com/eleven-public-prod/wEEKAeCSeBZSdqYiXcgvNykOvKH3/voices/21CxujpA2izHC6XgTymI/b8e4dda8-9b3f-4a55-ab90-4b4f0629e935.mp3,469564,4438,0,145,1.0,True,False,False,,,,,,
|
40 |
+
11c5d6a405f2ea1c0d647a75dab55c5b69449a86cb897eea8b6df2aeef51890d,XAgoCDXnUkcbMCT12gAd,1704706866,Mamaie,,female,old,wise,,generated,romanian,An old Romanian old woman. Voice is great for telling stories.,https://storage.googleapis.com/eleven-public-prod/dxfIEGFlD3gkffyZ293MKahG49y1/voices/XAgoCDXnUkcbMCT12gAd/aefc8890-6f90-4402-9ed9-355d26869b73.mp3,1349195,8868,0,395,1.0,True,False,False,,,,,,
|
41 |
+
3e17a650ce2fcfcc835c11e911945bdd94aa23250f8fb78c39de1e025458d560,k9tfrJwHIOEFSLhjCWd5,1704645808,Jenny,british,female,old,formal,entertainment_tv,generated,,Old British Woman with a confident voice. Perfect as a news reader. ,https://storage.googleapis.com/eleven-public-prod/lW2YzHJa1eOZXqscTYeTrHhR1VI3/voices/k9tfrJwHIOEFSLhjCWd5/2002fb60-10fd-4f45-8532-65f0e82a58e0.mp3,604501,6279,0,172,1.0,True,False,False,,,,,,
|
42 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,3ccpjJKEZpMFMIcjKGzr,1703510268,"Urgathoa - Crisp, Dignified, & Haunting",british,female,old,serious,,generated,,"Urgathoa's voice is a whisper from the past, a crisp articulation that speaks of centuries-old British elegance. Each syllable she utters is wrapped in a dignified air, carrying stories of ancient manors and misty moors. Her haunting timbre lingers in the mind, a ghostly melody that enchants and intrigues, inviting listeners into a world of timeless mystery and grace.",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/3ccpjJKEZpMFMIcjKGzr/72bc9181-457c-40e4-9f3c-a7d2470be628.mp3,396071,8368,0,165,1.0,True,False,False,,,,,,
|
43 |
+
0bd919c134b0cf918d32017f6ef3f87bd346e6754cf40d9944dcf763e743720b,sQ2lr2hYxVEofB7IqIFi,1703270357,"Your ""favourite"" English teacher",british,female,old,classy,,generated,en,,https://storage.googleapis.com/eleven-public-prod/d6CgKdQkSye6emRLiJNzGAKsT8b2/voices/sQ2lr2hYxVEofB7IqIFi/cc16887b-17c5-4d82-9bc8-31c4171a42d8.mp3,272472,59,0,90,1.0,True,False,False,,,,,,
|
44 |
+
4ff53b20c22ac93c6e3059476c7d03daba978d4c0bca97536bc05f7fb95150c4,n3goVBXUQvmAJZON7vtF,1703236559,Ava - Old And Deep ,american,female,old,deep,conversational,generated,,An Old American Girl Voice Prefect For conversational Content. ,https://storage.googleapis.com/eleven-public-prod/VtRLZruZ0sVrmVjviMLYEpj3RTk2/voices/n3goVBXUQvmAJZON7vtF/285dd124-2dbc-43a1-85dc-3195069fd570.mp3,13154067,455700,0,1787,1.0,True,False,False,,,,,,
|
45 |
+
4ff53b20c22ac93c6e3059476c7d03daba978d4c0bca97536bc05f7fb95150c4,mh6sjQW5PGNyQIL4bEsC,1703236433,Mia - Old And Confident ,american,female,old,confident,entertainment_tv,generated,,An old American woman's voice. Sounds confident and would be great as a news reader. ,https://storage.googleapis.com/eleven-public-prod/VtRLZruZ0sVrmVjviMLYEpj3RTk2/voices/mh6sjQW5PGNyQIL4bEsC/c738c418-1457-4818-bbc0-145a8a5d8620.mp3,1304258,7048,0,351,1.0,True,False,False,,,,,,
|
46 |
+
a73ca6d475fd58941a94abc1b18ef1bf03492a64774450e0a8d7c3bb48c6e1fd,p9QSgy0OYpLYPPGJGmB1,1703124776,"Alicia - Warm, expressive, posh, old British female",british,female,old,calm,characters_animation,generated,,An old British female voice that's warm and good for nobility characters.,https://storage.googleapis.com/eleven-public-prod/udmG0I9oKegHHyrU3sEvatdvG2p1/voices/p9QSgy0OYpLYPPGJGmB1/9ad14ae6-5918-4d49-85fb-311dc1ec6d0c.mp3,2240393,36639,0,606,1.0,True,False,False,,,,,,
|
47 |
+
5ec9e52b92a371ff47071c25cdfed44341c28fbd33264e6793773ae6a4778b31,QYKxqAoKlq1fAEIkvXN2,1703032997,"Madison - wise old lady - mother, grandma, aunt",american,female,old,wise,informative_educational,generated,,"Old American woman. A wise lady with the perfect sounding voice of an mother, teacher, reporter.",https://storage.googleapis.com/eleven-public-prod/DA7lAIEDyBfTHHO37W8Bkr4lzDw1/voices/QYKxqAoKlq1fAEIkvXN2/050fe016-5417-4c43-ba53-da00abe0fe57.mp3,2178106,82944,0,650,1.0,True,False,False,,,,,,
|
48 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,TLqtyQVABL0hCk08NhJm,1701433543,"Kherreonoskelis - Majestic, Profound, & Timeless",british,female,old,wise,,generated,,"Old British female voice, embodying a blend of wisdom, elegance, and timelessness. Her rich, resonant tones carry a depth of experience, perfect for narratives that require a touch of classic sophistication and charm. Kherreonoskelis's voice is ideal for storytelling, documentaries, and projects that demand a touch of British heritage and a sense of enduring grace.",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/TLqtyQVABL0hCk08NhJm/0a23c704-2e64-44dc-9395-93b74b532ce1.mp3,154788,493,0,60,1.0,True,False,False,,,,,,
|
49 |
+
ebc55c5eda1b3218f480f9221fddc482683561ded4c8ef3bd737eadd4cb1b786,suIOLCiurnF0pyuGnM4m,1699604468,Alice - calm & composed,american,female,old,calm,,generated,,"An old American female voice with calm and composed characteristics Great for nighttime story narrations, audiobook narration, etc.",https://storage.googleapis.com/eleven-public-prod/9Dv3jzCkGMNqEr6RT7qtT9CsIgv1/voices/suIOLCiurnF0pyuGnM4m/b517801a-d162-4644-8864-255b8cddbcd6.mp3,2449161862,62781673,0,3482,1.0,True,False,False,,,,,,
|
50 |
+
36b58c5e7086239436709a70ded5b021d8bcf9aed915571d657f547414bf3f15,NfQYRtnjZblOOEFPrTDz,1698802724,Florence - Mature Educated,british,female,old,formal,,generated,,An old British woman voice with an educated intonation. Great for conversation.,https://storage.googleapis.com/eleven-public-prod/FTgfQkBx0ofkyQeGUEZlzoPMEMJ2/voices/NfQYRtnjZblOOEFPrTDz/33ef548d-401e-499c-a0e6-b54520d74054.mp3,3700660,3586,0,550,1.0,True,False,False,,,,,,
|
51 |
+
5d30d4a2ec6598900481a1f72bab23755468fd157c39bf18814bab6fb2f50c59,i34yTJEqzPTYSCs2ikVu,1698466039,Mistress Valerie,british,female,old,calm,characters_animation,generated,,Old British woman with a Calm and confident voice. Excellent for secondary character personas.,https://storage.googleapis.com/eleven-public-prod/63YWf9FcbyS5PpTayiUUxnQ95Cj2/voices/i34yTJEqzPTYSCs2ikVu/c82bd308-8956-434a-9090-a4ec8e1c5f61.mp3,1861317,43789,0,760,1.0,True,False,False,,,,,,
|
52 |
+
ecb2ed01ab999a7b8a123bd3aac6e558ec0492cad54c7415fb398cff5f8af599,l9hiSvBi6nMFP7kZNR2h,1697044336,"Sylvia - confident, sensible, wise",british,female,old,wise,,generated,,"An old British woman with a wise voice. This voice works well for narration, dialogue, and other dramatic projects.",https://storage.googleapis.com/eleven-public-prod/hvC8EldP9VRdzrDJBbCcl4McvKC2/voices/l9hiSvBi6nMFP7kZNR2h/32daa4ea-c66f-4981-8ddf-9bd8f380084d.mp3,1004986,7927,0,244,1.0,True,False,False,,,,,,
|
53 |
+
ecb2ed01ab999a7b8a123bd3aac6e558ec0492cad54c7415fb398cff5f8af599,HAKNM6g2wxqt5xaOP2sw,1695537666,"Sigrid - solemn, raspy, wise",british,female,old,raspy,,generated,,Old British woman with a peaceful and raspy voice perfect for narrators and storytellers. ,https://storage.googleapis.com/eleven-public-prod/hvC8EldP9VRdzrDJBbCcl4McvKC2/voices/HAKNM6g2wxqt5xaOP2sw/fc3795a6-2c30-4ddd-8dbd-79f97a92a843.mp3,39976150,746266,3772,2906,1.0,True,False,False,,,,,,
|
54 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,0pEZm4Ek0U5nghhwauyv,1695495700,Nefreti Clepati,african,female,old,wise,,generated,,Old Woman with an African accent. Voice is a weave of ancient African wisdom and timeless grace. Great for children storytellers.,https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/0pEZm4Ek0U5nghhwauyv/935336eb-d523-454e-854e-5fa70a9bd424.mp3,570673,9569,0,190,1.0,True,False,False,,,,,,
|
55 |
+
a21f4eaefb435135e0504e63fed17a51db66b700828344658273de01ab61e22a,UDIetJF6suUQ6OyfTeWH,1695370281,Meredith | Old Wise Lady ,british,female,old,,,generated,,,https://storage.googleapis.com/eleven-public-prod/92BbRPA2BlMytDr3JOTY1wz8sGX2/voices/UDIetJF6suUQ6OyfTeWH/cd2ccade-533e-48b3-afc2-ba804ca64df1.mp3,511906,6542,0,202,1.0,True,False,False,,,,,,
|
56 |
+
644eb23e4b354fc185266eb8a26720e051a3e78661c9be7ac6e1edbc38eec5df,uIoHGOWYuIIrThXQj6gQ,1695366841,Molero,american,female,old,,,generated,,,https://storage.googleapis.com/eleven-public-prod/Rn9qBrAUApXaTI32LPiCiVvTzUN2/voices/uIoHGOWYuIIrThXQj6gQ/b08f58ec-9e98-489a-ab28-50782c2696df.mp3,1013482,6258,0,236,1.0,True,False,False,,,,,,
|
57 |
+
d22d22377826be549f36e7e09758ddc52490d88251814dd37b7199a4b64ac451,RSJVEr0ddnllYvtLXb1a,1695042347,Grandma Margaret - Storybook Narrator,british,female,old,calm,,generated,,An old British woman. The perfect voice for relaxing bedtime stories.,https://storage.googleapis.com/eleven-public-prod/LJJ7WdMVGqdBNPYE1pqd5ARQ4ac2/voices/RSJVEr0ddnllYvtLXb1a/e474173d-117f-4f2b-93b2-fd74ad58ed32.mp3,15748822,960445,0,2831,1.0,True,False,False,,,,,,
|
58 |
+
69367947e5f116032e7fbd4fdb8fa146a6786327c6f878154a8efe6cf46997ea,ot3VPPr4ihiiiSDI89UK,1694207767,Cordelia - Shakespearean enthusiast,british,female,old,confident,,generated,,Old British woman with a strong and confident voice great for narration .,https://storage.googleapis.com/eleven-public-prod/XnWQKpONeagjUl8iUXy6rvndEts2/voices/ot3VPPr4ihiiiSDI89UK/e14cbb2a-4a5a-497f-9355-ba5147911220.mp3,192879,12822,0,42,1.0,True,False,False,,,,,,
|
59 |
+
410e4777a4b0a9361e871f17774572072bfc2dc19e0b9b8820c2942794435f3e,XjfzjX8kIJIgTgycgGUA,1694074280,Shelly - Assertive Soulful,british,female,old,wise,,generated,,Old British Woman. Voice is perfect for narrating passionate wisdom and lived experiences into helpful educational dialog.,https://storage.googleapis.com/eleven-public-prod/ikNVj8ML3Lgb9KGOLP3VLBt2lC13/voices/XjfzjX8kIJIgTgycgGUA/99278495-29b5-45b8-ae4a-2ec021829177.mp3,780615,5165,0,180,1.0,True,False,False,,,,,,
|
60 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,NgXWLvu81XjQ1554TYSI,1693649902,Marta - Officious,american,female,old,serious,,generated,,"An old American female voice that epitomizes bureaucratic efficiency with a dash of stern oversight. Voice is tailor-made for narratives that call for formality, structure, and a touch of rigidity. ",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/NgXWLvu81XjQ1554TYSI/c753935c-9e25-4a14-aa2a-a68acf12a2a3.mp3,571967,6933,0,215,1.0,True,False,False,,,,,,
|
61 |
+
379475b3a7fb2bbd16c01afba4f8f69730d9d3bd57f690877204041dcd1de9c2,SlJzuRzvNnTNsSjFRTYB,1693575265,Minerva - Fantasy Professor,british,female,old,grumpy,characters_animation,generated,,"Older female British professor, stern sounding.",https://storage.googleapis.com/eleven-public-prod/ZzIzVo9ATCeBEsjfTWa17GTKTtn1/voices/SlJzuRzvNnTNsSjFRTYB/935ff751-fceb-4c3c-811e-bcc533d13608.mp3,5773534,23839,0,1513,1.0,True,False,False,,,,,,
|
62 |
+
a73ca6d475fd58941a94abc1b18ef1bf03492a64774450e0a8d7c3bb48c6e1fd,wm667EeJWPcj78lX6MOR,1692058663,Clarice - Kind and Trustworthy,british,female,old,husky,narrative_story,generated,,A kind and trustworthy British woman with a slightly husky voice.,https://storage.googleapis.com/eleven-public-prod/udmG0I9oKegHHyrU3sEvatdvG2p1/voices/wm667EeJWPcj78lX6MOR/7a46fb5c-7e66-4ec1-9130-666ba9cce76f.mp3,3615679,23902,0,352,1.0,True,False,False,,,,,,
|
63 |
+
44496be597370a26498388f515f07364af86603f62fec234d810e3b98404c008,6vtgCQ0WCLo5jtLGjdm3,1691661664,Elizabeth - grumpy lady,australian,female,old,formal,characters_animation,generated,,A grumpy old womans voice. For storytelling or other character content. Also works for children's stories.,https://storage.googleapis.com/eleven-public-prod/5BeqKP8nJih9lRRH5x0j5lfF5AY2/voices/6vtgCQ0WCLo5jtLGjdm3/35cc77ad-8c66-4a6e-a110-e643cc1e068f.mp3,1361656,58284,0,835,1.0,True,False,False,,,,,,
|
64 |
+
4c7e0a65d5daaa95241e12cac72dbbae5d5494bcf7509c3ca97798b1a7bcc544,3DtKkssOoROWXYa2hyty,1691615755,Soosii,american,female,old,calm,narrative_story,generated,,"A calm, older American female voice. Would work well for a podcast.",https://storage.googleapis.com/eleven-public-prod/Je2OUP5SdgSZhG40J7umHz4bhXB3/voices/3DtKkssOoROWXYa2hyty/3a6e2d96-5a61-4455-99c0-3218d04f8f6c.mp3,299801,2371,0,115,1.0,True,False,False,,,,,,
|
65 |
+
4c7e0a65d5daaa95241e12cac72dbbae5d5494bcf7509c3ca97798b1a7bcc544,BzmS47m5BBDKC0I02Jq2,1691615190,Rayan,american,female,old,confident,entertainment_tv,generated,,A confident and reassuring older female voice with an American accent. Would work well for a news reader.,https://storage.googleapis.com/eleven-public-prod/Je2OUP5SdgSZhG40J7umHz4bhXB3/voices/BzmS47m5BBDKC0I02Jq2/f30551d4-73af-4171-ac38-56a194b6cad5.mp3,13431178,221407,0,1738,1.0,True,False,False,,,,,,
|
66 |
+
4c7e0a65d5daaa95241e12cac72dbbae5d5494bcf7509c3ca97798b1a7bcc544,RC5YEaJn4ZyXHqHxKdSa,1691614758,Nancy,american,female,old,pleasant,narrative_story,generated,,An older sounding American woman with a reassuring and wise tone. Great for podcasts.,https://storage.googleapis.com/eleven-public-prod/Je2OUP5SdgSZhG40J7umHz4bhXB3/voices/RC5YEaJn4ZyXHqHxKdSa/8c991f98-1975-4b2d-8fa8-1e6ab6d8ebf8.mp3,635376,4304,0,177,1.0,True,False,False,,,,,,
|
67 |
+
bf5a02da2ea165358c78543a1eb4067aec1bf0fc02b2a25ea71b0cc3c8f4981c,uhsqzHkkiYlOv63jxpnz,1691362644,Kim,american,female,old,rough,characters_animation,generated,,Old american woman. Has a rough voice. Great as a character.,https://storage.googleapis.com/eleven-public-prod/vKxgocWeALaiSNSLpR6Vs1n1oD82/voices/uhsqzHkkiYlOv63jxpnz/66f301e8-0349-4091-8f00-2628283ce65c.mp3,2591470,92195,0,824,1.0,True,False,False,,,,,,
|
68 |
+
bf5a02da2ea165358c78543a1eb4067aec1bf0fc02b2a25ea71b0cc3c8f4981c,pgwBOyXqijAkMhjWpUWs,1691362517, Martha - Narration,american,female,old,husky,narrative_story,generated,,Old american woman. Has a husky tone to her voice. Great for Audiobooks.#,https://storage.googleapis.com/eleven-public-prod/vKxgocWeALaiSNSLpR6Vs1n1oD82/voices/pgwBOyXqijAkMhjWpUWs/649b0a84-93b4-44c3-95ef-d45b0176a10e.mp3,1725900,23702,0,540,1.0,True,False,False,,,,,,
|
69 |
+
0c7a0a07052b2f2bb4b73b2963357bde7f22aa26f9e58cd0f7a850c8d695f439,OoxZRfmtuNo7XbScKAUJ,1691348740,Dolores - friendly and strong female,american,female,old,confident,narrative_story,generated,,An older American female voice. Perfect for audiobooks and narration.,https://storage.googleapis.com/eleven-public-prod/DUlqb4sMu2gK8EEQx88MJY3Cyfn2/voices/OoxZRfmtuNo7XbScKAUJ/e91ccbc5-7a05-46ab-a45a-8ceff0a6385d.mp3,10092869,343142,0,1494,1.0,True,False,False,,,,,,
|
70 |
+
99d9061aa71795e5df26eb3a8eb450bea8dbcbd2898840f42c1c447ce796d57a,IFipGg7zg7ZimrDYVgra,1691156875,Margaret,british,female,old,calm,narrative_story,generated,,A calm and older voice which works well for storytelling.,https://storage.googleapis.com/eleven-public-prod/esyl6WJRqhUEedyxBxZhGBXPEga2/voices/IFipGg7zg7ZimrDYVgra/7bcbfb50-e476-4b58-ae78-07afc6d1800a.mp3,711591,2401,0,218,1.0,True,False,False,,,,,,
|
71 |
+
99d9061aa71795e5df26eb3a8eb450bea8dbcbd2898840f42c1c447ce796d57a,kyfbTOVg0ohSPkm8iuWA,1691155967,Edith - soft,british,female,old,meditative,narrative_story,generated,,A soft and calm voice for meditation.,https://storage.googleapis.com/eleven-public-prod/esyl6WJRqhUEedyxBxZhGBXPEga2/voices/kyfbTOVg0ohSPkm8iuWA/8f277366-06f0-4990-8080-02de14d87039.mp3,336436,2312,0,119,1.0,True,False,False,,,,,,
|
72 |
+
99d9061aa71795e5df26eb3a8eb450bea8dbcbd2898840f42c1c447ce796d57a,Hk5QbbI2GcN3oB2Aq9aI,1691155559,Eleanor - gentle,british,female,old,meditative,narrative_story,generated,,A calm and gentle british voice that helps relax for meditation.,https://storage.googleapis.com/eleven-public-prod/esyl6WJRqhUEedyxBxZhGBXPEga2/voices/Hk5QbbI2GcN3oB2Aq9aI/122e8907-ea11-4157-9c5a-c2b4f014a187.mp3,6735585,359583,0,1411,1.0,True,False,False,,,,,,
|
73 |
+
836701f99bab484f5e1bff8e9df42d970fe56c858d8b9d99ea8443e7dffede63,pmuxntG1PPdwuxZ3C3Z8,1691038095,Agatha,australian,female,old,deep,narrative_story,generated,,Good for book narrations,https://storage.googleapis.com/eleven-public-prod/nEuqdw9r52P6G0TUAV73G0Ajpfu1/voices/pmuxntG1PPdwuxZ3C3Z8/1fce58a4-c25d-4460-b9cf-6aef99f27784.mp3,4803110,126277,0,961,1.0,True,False,False,,,,,,
|
74 |
+
836701f99bab484f5e1bff8e9df42d970fe56c858d8b9d99ea8443e7dffede63,ksWHnl0TUhFwuLbukqBl,1691038090,Margareth,australian,female,old,husky,narrative_story,generated,,A strong and mature voice.,https://storage.googleapis.com/eleven-public-prod/nEuqdw9r52P6G0TUAV73G0Ajpfu1/voices/ksWHnl0TUhFwuLbukqBl/f01cefa6-9a4a-4f61-b208-aa986c401093.mp3,7502361,482873,0,1389,1.0,True,False,False,,,,,,
|
75 |
+
0c7a0a07052b2f2bb4b73b2963357bde7f22aa26f9e58cd0f7a850c8d695f439,SQDHdjAFJ4Gv4e67zWBy,1690034258,Edith - older british meditations,british,female,old,meditative,narrative_story,generated,,A crisp and classy older female voice that could be great for guied meditations.,https://storage.googleapis.com/eleven-public-prod/DUlqb4sMu2gK8EEQx88MJY3Cyfn2/voices/SQDHdjAFJ4Gv4e67zWBy/474f96bb-2f22-406b-9399-8228668cbd30.mp3,1113112,4780,0,180,1.0,True,False,False,,,,,,
|
76 |
+
0c7a0a07052b2f2bb4b73b2963357bde7f22aa26f9e58cd0f7a850c8d695f439,eyZvHxtSFu7tpfRh5EkX,1690020253,Darlene - meditative,american,female,old,meditative,narrative_story,generated,,An old female American voice. Great for guided meditations,https://storage.googleapis.com/eleven-public-prod/DUlqb4sMu2gK8EEQx88MJY3Cyfn2/voices/eyZvHxtSFu7tpfRh5EkX/8806a574-313d-4fd4-afbc-f8f724b23487.mp3,795877,23008,0,267,1.0,True,False,False,,,,,,
|
77 |
+
d6294577338bf43f27c71c65dba49dadb4191eed08daf27c5c946376f9dc0655,2limOQJuWEAP3IwwXJbC,1689582163,Renata,american,female,old,formal,characters_animation,generated,,"An orld, well articulated woman.",https://storage.googleapis.com/eleven-public-prod/TFeOnlmn4lhUjj1XGhWdOjBR67c2/voices/2limOQJuWEAP3IwwXJbC/d868bcd3-aed7-45f8-be03-563df6f14c4c.mp3,27366527,502865,0,997,1.0,True,False,False,,,,,,
|
78 |
+
34479111a63cfac869f6b446fdbd75c305674b1a01ada2c0e8c6a733886bbe71,7RLPNAWDxHipPlYOJNOd,1689404539,Ruth - grandmother storyteller,american,female,old,calm,narrative_story,generated,,A calm gradmotherly voice. Great for narration.,https://storage.googleapis.com/eleven-public-prod/Eq8tyIYOx5PoyXO0aMbKV4xG1FW2/voices/7RLPNAWDxHipPlYOJNOd/a8ef974d-e908-4e0c-8afd-ae106bcda06f.mp3,5342785,192107,0,962,1.0,True,False,False,,,,,,
|
79 |
+
2bbbfb2a230bdd92d2f3804c6432d3be70346f5379765e422e13e4bcc5ad13df,wlvYdFzBkPVIYW2RFRTK,1689369962,Nora ,american,female,old,confident,narrative_story,generated,,A confident and trustworthy sounding older American female voice. Great for story telling,https://storage.googleapis.com/eleven-public-prod/JY9vKnZTPXQnDGirHlGqollDbXK2/voices/wlvYdFzBkPVIYW2RFRTK/d162a2d2-a9c2-4486-8eec-6d17a5200a0c.mp3,34323894,1449518,0,3889,1.0,True,False,False,,,,,,
|
80 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,T7plPRDWMnVk7I62vsHN,1689250268,Aspexia,american,female,old,meditative,narrative_story,generated,,"A calming female voice, good for fortune tellers or old female narrators",https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/T7plPRDWMnVk7I62vsHN/bf27c933-d8ff-4a03-934e-4249b3f9d979.mp3,1551919,2066,0,453,1.0,True,False,False,,,,,,
|
81 |
+
e000b5d54ff462681322c06f3c0c7a716d5ef95198651b6dbec7398758c7635b,NdwVOwEVotsiBFMZ0Y55,1689202125,Liz - dramatic female,Queens English,female,old,intense,narrative_story,generated,,"A dramatic voice for speeches, poetry recital and storytelling.",https://storage.googleapis.com/eleven-public-prod/8RG7hiW9bPeKX0ABsbBgqsx91yo2/voices/NdwVOwEVotsiBFMZ0Y55/2baf4464-687e-4f90-8a57-149618472a87.mp3,837684,15847,0,343,1.0,True,False,False,,,,,,
|
82 |
+
178d6ec9585322ace54c434731853b4b1cf761c9e857d2c052a006e95c1c4ce4,e9wI4JxQSXpRCJ2jWAQ1,1689144757,Edith - elegant and mature,british,female,old,formal,characters_animation,generated,,"A very elegant, older voice. Brilliant for characters.",https://storage.googleapis.com/eleven-public-prod/Po6p57Agv8gcu7SoJK1UqfoRhWe2/voices/e9wI4JxQSXpRCJ2jWAQ1/714bef46-459d-449c-9fdb-ed77bef59140.mp3,21125912,989929,0,2609,1.0,True,False,False,,,,,,
|
83 |
+
dc1690554d734de44f3a482b9feae535429dd333d2b55eb7fc0854084b7b12b1,Dbha8aDV1Pta0jO1dPFe,1689021646,Agatha,british,female,old,intense,characters_animation,generated,,A bossy and sharp voice. Good for characters.,https://storage.googleapis.com/eleven-public-prod/QqPX80f3uLQ8bhiwZcWgOzeEDiA2/voices/Dbha8aDV1Pta0jO1dPFe/4456bf6c-1d6b-463a-b977-514395fedc8c.mp3,1072349,17797,0,399,1.0,True,False,False,,,,,,
|
84 |
+
be7f9f595e3d07e71d0a01b1ccbab78849989c7eeacce7020ae1536009a3d15c,Yo6STHSbBMineA0zLZBO,1688967103,Michelle - Old and Daring,american,female,old,confident,characters_animation,generated,,A mature and daring woman.,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/Yo6STHSbBMineA0zLZBO/4f1bd6c2-6902-4d6c-b628-94e3246645cb.mp3,11115893,804167,0,287,1.0,True,False,False,,,,,,
|
85 |
+
be7f9f595e3d07e71d0a01b1ccbab78849989c7eeacce7020ae1536009a3d15c,ioxAr77HaV2DuUy5DjKd,1688963182,Petra - Old and Mature,american,female,old,confident,narrative_story,generated,,"A mature older woman with a concise tone, perfect for casual speech or storytelling.",https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/ioxAr77HaV2DuUy5DjKd/550303c5-a160-47cc-9ba5-e2650c0d91f5.mp3,351797,12895,0,133,1.0,True,False,False,,,,,,
|
86 |
+
4b113a79483f9f605e3c4f326afdb78977f12f08bdfaea48e0a23936dee96a76,L3YfDZpdhwYj7nQKaiuQ,1688709183,Nicola - School Teacher,American,female,old,intense,narrative_story,generated,,An intense older female voice that sounds serious. American accent with a British twang. Good as a storyteller.,https://storage.googleapis.com/eleven-public-prod/JCg9HEpN0egDS3MqV81P54Oe5hw2/voices/L3YfDZpdhwYj7nQKaiuQ/0c3f99e8-9d7d-4b36-8095-98a3061f1c3c.mp3,5747509,161666,0,468,1.0,True,False,False,,,,,,
|
87 |
+
0eecba332ac86935dc998011c97e4ddc2c61149ec49ca9433384c062876e3505,sFsWdUOcIWXwHiRLXR5c,1688664529,lizzy - refined victorian,british,female,old,casual,conversational,generated,,A refined voice with proper speech and slightly filtered sound.,https://storage.googleapis.com/eleven-public-prod/WRsTli8VFnhT3D38XdcWm8SzCW23/voices/sFsWdUOcIWXwHiRLXR5c/9ff31955-0b0d-414a-a6a3-514203111a4e.mp3,120791993,109177,1939,4659,1.0,True,False,False,,,,,,
|
88 |
+
8305443ce50a5dba3fcb6735634e88257c3b6365fb539fe4389b842565c2c53f,ELOMiPOAHWgSDD6OQ7OL,1688484760,Dath Ilan,african,female,old,calm,conversational,generated,,An old African Woman. good for stories.,https://storage.googleapis.com/eleven-public-prod/lA5gT7FFPLVt66XJyVjpVR1ljPA2/voices/ELOMiPOAHWgSDD6OQ7OL/b36ccb5a-339b-4cb6-bd0b-830e530fba66.mp3,747120,26505,0,327,1.0,True,False,False,,,,,,
|
89 |
+
7b0490fb29b2b7f378afc01e1a396c224abf8a748ac9de9a71d62e6da52e08ab,pHCQtfdVmZlNk4ZQ37Ou,1688233864,Eleanor,british,female,old,wise,narrative_story,generated,,A wise mentor. Great for audiobooks or video games.,https://storage.googleapis.com/eleven-public-prod/lFrBHoLWvjcj6VDp4ZdFvyTNWVb2/voices/pHCQtfdVmZlNk4ZQ37Ou/25ba5491-296b-4b94-8544-4cb5e0472b95.mp3,67347096,777362,100349,7825,1.0,True,False,False,,,,,,
|
90 |
+
be7f9f595e3d07e71d0a01b1ccbab78849989c7eeacce7020ae1536009a3d15c,fjT2oASjaekKvmZP6J60,1688156881,Andria - Wise and Strict,american,female,old,wise,narrative_story,generated,,A wise and strict woman with a tone that's perfect for storytelling.,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/fjT2oASjaekKvmZP6J60/33d4cf4c-5426-4091-babd-86e7b3c3c647.mp3,537675,8661,0,236,1.0,True,False,False,,,,,,
|
91 |
+
be7f9f595e3d07e71d0a01b1ccbab78849989c7eeacce7020ae1536009a3d15c,anTM0b7RpmlXjaq6fqEy,1688156725,Betsy - Wise and Thoughtful,american,female,old,wise,narrative_story,generated,,"A wise woman with a tone that sparks intelligence, perfect for formal speech or characters.",https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/anTM0b7RpmlXjaq6fqEy/24840a90-07eb-4aca-a7f5-e6622340bb63.mp3,3754574,81585,1289,1154,1.0,True,False,False,,,,,,
|
92 |
+
950f55b97cd98e992c16302e998cfd2fd771eedbed45a4b56c84183f38b411be,TBWkUFaGLHIFiicYLzJI,1688019005,Enni,american,female,old,husky,characters_animation,generated,,A strong and loud voice. Good for animations.,https://storage.googleapis.com/eleven-public-prod/UwDtqCF44YaL77wxb8DVQlHT5Gp1/voices/TBWkUFaGLHIFiicYLzJI/cef389d9-5168-4e7a-a12b-0a9f78117fea.mp3,1552664,18616,0,455,1.0,True,False,False,,,,,,
|
93 |
+
84f314fe5a331a894bb2e50098eec1db5c108e647fec0e2c2ed0fbe256ad8c07,kE3ZEcODhuIdDkIcndJy,1687978970,Penelope,british,female,old,classy,narrative_story,generated,,"A classy, older British woman. Would work well as a narrator.",https://storage.googleapis.com/eleven-public-prod/Foy81KCMK6USPGMlGpBANcTZqCr1/voices/kE3ZEcODhuIdDkIcndJy/204d648d-bf43-4492-817e-63e3455dd753.mp3,2807740,27680,0,217,1.0,True,False,False,,,,,,
|
94 |
+
d66d15b9ca15b15b4900cc9e11ccebfb79819288de019d64027d91d9d3d0a520,11wGfYcURgJBFn1pJflc,1687901793,Lila - strong older woman,american,female,old,husky,narrative_story,generated,,An older woman's strong voice. Great for storytelling.,https://storage.googleapis.com/eleven-public-prod/6wyZqb4TmkhFCynseqeaEolrAnj2/voices/11wGfYcURgJBFn1pJflc/96428c46-9418-4141-b245-795b39b12dcc.mp3,2194212,25950,0,432,1.0,True,False,False,,,,,,
|
95 |
+
be7f9f595e3d07e71d0a01b1ccbab78849989c7eeacce7020ae1536009a3d15c,NYPVTH5Y1UVSR6DSJUXg,1687841631,Tamika,american,female,old,calm,narrative_story,generated,,Young mother,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/NYPVTH5Y1UVSR6DSJUXg/b10a3566-8a58-45ae-8e48-d542f16c7919.mp3,112780007,1451387,0,6549,1.0,True,False,False,,,,,,
|
96 |
+
be7f9f595e3d07e71d0a01b1ccbab78849989c7eeacce7020ae1536009a3d15c,GBut0BDgVM2ez3OVjaGx,1687840795,Betty,american,female,old,pleasant,characters_animation,generated,,Betty can be an old fortuneteller,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/GBut0BDgVM2ez3OVjaGx/6dc927db-d9be-490a-9aa2-20b1d147c7a6.mp3,4172935,44576,0,574,1.0,True,False,False,,,,,,
|
97 |
+
be7f9f595e3d07e71d0a01b1ccbab78849989c7eeacce7020ae1536009a3d15c,sGVvB6QlbVtq3qLhYB4p,1687840534,Conny - Old and Stubborn,african,female,old,casual,conversational,generated,,"An old woman with a stubborn tone, perfect for casual conversation or characters.",https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/sGVvB6QlbVtq3qLhYB4p/2ff498cc-9738-42db-aa80-f2438bd20e8c.mp3,11673143,605991,0,2727,1.0,True,False,False,,,,,,
|
98 |
+
be7f9f595e3d07e71d0a01b1ccbab78849989c7eeacce7020ae1536009a3d15c,4CA4WYmW7ZKs5QbGDH06,1687839240,Brenda,american,female,old,meditative,narrative_story,generated,,Soothing voice for meditation.,https://storage.googleapis.com/eleven-public-prod/ofJ3ST4rvZcJUjpc7t5B7gqcCz93/voices/4CA4WYmW7ZKs5QbGDH06/244ee98e-a42e-4b30-a5c6-cf6c9b4abe1c.mp3,4778907,66730,0,1127,1.0,True,False,False,,,,,,
|
99 |
+
2c81f54893601d7cfb054eb86c33f86de846b2ccc0fe88bd0a8bb2f20aaae870,NVHXc3Kj8Pw5Ua4RnUxy,1687599205,Kathy,british,female,old,meditative,narrative_story,generated,,A pleasant and calm voice.,https://storage.googleapis.com/eleven-public-prod/hQUjuilqoPZmcxuPhS2ICdVngvz1/voices/5xXs4PF9ew1MRp1Ey3lt/91439d9b-8344-4251-9655-0c9af0057a0a.mp3,3590453,33161,0,336,1.0,True,False,False,,,,,,
|
100 |
+
2c81f54893601d7cfb054eb86c33f86de846b2ccc0fe88bd0a8bb2f20aaae870,5xXs4PF9ew1MRp1Ey3lt,1687599205,Kathy,british,female,old,meditative,narrative_story,generated,,A pleasant and calm voice.,https://storage.googleapis.com/eleven-public-prod/hQUjuilqoPZmcxuPhS2ICdVngvz1/voices/5xXs4PF9ew1MRp1Ey3lt/91439d9b-8344-4251-9655-0c9af0057a0a.mp3,3601090,43798,0,610,1.0,True,False,False,,,,,,
|
101 |
+
a73ca6d475fd58941a94abc1b18ef1bf03492a64774450e0a8d7c3bb48c6e1fd,PjpmIm8AmJJCbtkrDrgY,1687576570,Cara - Expressive and Direct,british,female,old,confident,entertainment_tv,generated,,"A woman with a direct and expressive voice, perfect for media creation and voiceovers.",https://storage.googleapis.com/eleven-public-prod/udmG0I9oKegHHyrU3sEvatdvG2p1/voices/PjpmIm8AmJJCbtkrDrgY/a678b834-9606-461f-9f6b-fd63c06cdb41.mp3,4097868,76092,0,932,1.0,True,False,False,,,,,,
|
102 |
+
b574841da3a907e3e0e9abfc0abd798b052d806b440d85389612d0733d312f4f,aM1H4Rj2mHP2jNxWV9Hi,1718992057,"Queen Rosamund - British, Older Woman",british,female,old,formal,characters_animation,generated,,Old British female voice. Perfect for Character in a Story.,https://storage.googleapis.com/eleven-public-prod/rm4oNHot8CNsnaq47GCYK3qi0973/voices/aM1H4Rj2mHP2jNxWV9Hi/0277eecb-022f-4c0f-aab0-898e6643033f.mp3,1463219,380249,0,586,1.0,True,False,False,,,,,,
|
@@ -14,56 +14,709 @@
|
|
14 |
"cell_type": "code",
|
15 |
"execution_count": 2,
|
16 |
"metadata": {},
|
17 |
-
"outputs": [
|
18 |
-
{
|
19 |
-
"data": {
|
20 |
-
"text/plain": [
|
21 |
-
"True"
|
22 |
-
]
|
23 |
-
},
|
24 |
-
"execution_count": 2,
|
25 |
-
"metadata": {},
|
26 |
-
"output_type": "execute_result"
|
27 |
-
}
|
28 |
-
],
|
29 |
"source": [
|
30 |
"import os\n",
|
31 |
"\n",
|
32 |
"import dotenv\n",
|
|
|
33 |
"from httpx import Timeout\n",
|
|
|
34 |
"from langchain_core.prompts import (\n",
|
35 |
" ChatPromptTemplate,\n",
|
36 |
" SystemMessagePromptTemplate,\n",
|
37 |
" HumanMessagePromptTemplate,\n",
|
38 |
")\n",
|
39 |
"from langchain_openai import ChatOpenAI\n",
|
40 |
-
"from pydantic import BaseModel\n",
|
41 |
"from langchain_community.callbacks import get_openai_callback\n",
|
42 |
"\n",
|
43 |
-
"
|
|
|
|
|
|
|
|
|
44 |
]
|
45 |
},
|
46 |
{
|
47 |
"cell_type": "code",
|
48 |
"execution_count": 3,
|
49 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
51 |
"source": [
|
52 |
-
"
|
53 |
-
"from src.utils import GPTModels\n",
|
54 |
-
"from src.text_split_chain import create_split_text_chain_v2"
|
55 |
]
|
56 |
},
|
57 |
{
|
58 |
"cell_type": "code",
|
59 |
"execution_count": 4,
|
60 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
"source": [
|
63 |
-
"chain =
|
64 |
-
"# chain =
|
65 |
"with get_openai_callback() as cb:\n",
|
66 |
-
" res = chain.invoke(
|
|
|
|
|
67 |
]
|
68 |
},
|
69 |
{
|
@@ -74,7 +727,7 @@
|
|
74 |
{
|
75 |
"data": {
|
76 |
"text/plain": [
|
77 |
-
"
|
78 |
]
|
79 |
},
|
80 |
"execution_count": 5,
|
@@ -90,12 +743,93 @@
|
|
90 |
"cell_type": "code",
|
91 |
"execution_count": 6,
|
92 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
"outputs": [
|
94 |
{
|
95 |
"name": "stdout",
|
96 |
"output_type": "stream",
|
97 |
"text": [
|
98 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
"--------------------\n",
|
100 |
"[narrator] Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Postβthe words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.\n",
|
101 |
"[narrator] When we came in she held us silent for a moment with a lifted hand.\n",
|
@@ -114,7 +848,7 @@
|
|
114 |
"[Jordan] βGood night,β\n",
|
115 |
"[narrator] she said softly.\n",
|
116 |
"[Jordan] βWake me at eight, wonβt you.β\n",
|
117 |
-
"[
|
118 |
"[Jordan] βI will. Good night, Mr. Carraway. See you anon.β\n",
|
119 |
"[Daisy] βOf course you will,β\n",
|
120 |
"[narrator] confirmed Daisy.\n",
|
@@ -144,8 +878,7 @@
|
|
144 |
}
|
145 |
],
|
146 |
"source": [
|
147 |
-
"
|
148 |
-
"print(annotated_text.to_pretty_text())"
|
149 |
]
|
150 |
},
|
151 |
{
|
@@ -171,6 +904,213 @@
|
|
171 |
"print(f'LLM usage:\\n\\n{cb}')"
|
172 |
]
|
173 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
{
|
175 |
"cell_type": "code",
|
176 |
"execution_count": null,
|
|
|
14 |
"cell_type": "code",
|
15 |
"execution_count": 2,
|
16 |
"metadata": {},
|
17 |
+
"outputs": [],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
"source": [
|
19 |
"import os\n",
|
20 |
"\n",
|
21 |
"import dotenv\n",
|
22 |
+
"import pandas as pd\n",
|
23 |
"from httpx import Timeout\n",
|
24 |
+
"from pydantic import BaseModel\n",
|
25 |
"from langchain_core.prompts import (\n",
|
26 |
" ChatPromptTemplate,\n",
|
27 |
" SystemMessagePromptTemplate,\n",
|
28 |
" HumanMessagePromptTemplate,\n",
|
29 |
")\n",
|
30 |
"from langchain_openai import ChatOpenAI\n",
|
|
|
31 |
"from langchain_community.callbacks import get_openai_callback\n",
|
32 |
"\n",
|
33 |
+
"import data.samples_to_split as samples\n",
|
34 |
+
"\n",
|
35 |
+
"from src.lc_callbacks import LCMessageLoggerAsync\n",
|
36 |
+
"from src.utils import GPTModels\n",
|
37 |
+
"from src.text_split_chain import create_split_text_chain"
|
38 |
]
|
39 |
},
|
40 |
{
|
41 |
"cell_type": "code",
|
42 |
"execution_count": 3,
|
43 |
"metadata": {},
|
44 |
+
"outputs": [
|
45 |
+
{
|
46 |
+
"data": {
|
47 |
+
"text/plain": [
|
48 |
+
"True"
|
49 |
+
]
|
50 |
+
},
|
51 |
+
"execution_count": 3,
|
52 |
+
"metadata": {},
|
53 |
+
"output_type": "execute_result"
|
54 |
+
}
|
55 |
+
],
|
56 |
+
"source": [
|
57 |
+
"dotenv.load_dotenv()"
|
58 |
+
]
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"cell_type": "code",
|
62 |
+
"execution_count": null,
|
63 |
+
"metadata": {},
|
64 |
"outputs": [],
|
65 |
+
"source": []
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"cell_type": "markdown",
|
69 |
+
"metadata": {},
|
70 |
"source": [
|
71 |
+
"## voices eda"
|
|
|
|
|
72 |
]
|
73 |
},
|
74 |
{
|
75 |
"cell_type": "code",
|
76 |
"execution_count": 4,
|
77 |
"metadata": {},
|
78 |
+
"outputs": [
|
79 |
+
{
|
80 |
+
"name": "stdout",
|
81 |
+
"output_type": "stream",
|
82 |
+
"text": [
|
83 |
+
"(468, 14)\n"
|
84 |
+
]
|
85 |
+
}
|
86 |
+
],
|
87 |
+
"source": [
|
88 |
+
"# df = pd.read_csv('data/11labs_tts_voices.csv')\n",
|
89 |
+
"df = pd.read_csv('data/11labs_available_tts_voices.csv')\n",
|
90 |
+
"df[\"age\"] = df[\"age\"].str.replace(\" \", \"_\").str.replace(\"-\", \"_\")\n",
|
91 |
+
"print(df.shape)"
|
92 |
+
]
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"cell_type": "code",
|
96 |
+
"execution_count": 5,
|
97 |
+
"metadata": {},
|
98 |
+
"outputs": [
|
99 |
+
{
|
100 |
+
"data": {
|
101 |
+
"text/plain": [
|
102 |
+
"Index(['voice_id', 'name', 'preview_url', 'owner_id', 'permission_on_resource',\n",
|
103 |
+
" 'is_legacy', 'is_mixed', 'accent', 'description', 'age', 'gender',\n",
|
104 |
+
" 'category', 'language', 'descriptive'],\n",
|
105 |
+
" dtype='object')"
|
106 |
+
]
|
107 |
+
},
|
108 |
+
"execution_count": 5,
|
109 |
+
"metadata": {},
|
110 |
+
"output_type": "execute_result"
|
111 |
+
}
|
112 |
+
],
|
113 |
+
"source": [
|
114 |
+
"df.columns"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": 6,
|
120 |
+
"metadata": {},
|
121 |
+
"outputs": [
|
122 |
+
{
|
123 |
+
"data": {
|
124 |
+
"text/plain": [
|
125 |
+
"language\n",
|
126 |
+
"NaN 264\n",
|
127 |
+
"en 203\n",
|
128 |
+
"romanian 1\n",
|
129 |
+
"Name: count, dtype: int64"
|
130 |
+
]
|
131 |
+
},
|
132 |
+
"execution_count": 6,
|
133 |
+
"metadata": {},
|
134 |
+
"output_type": "execute_result"
|
135 |
+
}
|
136 |
+
],
|
137 |
+
"source": [
|
138 |
+
"df['language'].value_counts(dropna=False)"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"cell_type": "code",
|
143 |
+
"execution_count": 7,
|
144 |
+
"metadata": {},
|
145 |
+
"outputs": [
|
146 |
+
{
|
147 |
+
"data": {
|
148 |
+
"text/plain": [
|
149 |
+
"gender\n",
|
150 |
+
"female 231\n",
|
151 |
+
"male 230\n",
|
152 |
+
"neutral 6\n",
|
153 |
+
"non-binary 1\n",
|
154 |
+
"Name: count, dtype: int64"
|
155 |
+
]
|
156 |
+
},
|
157 |
+
"execution_count": 7,
|
158 |
+
"metadata": {},
|
159 |
+
"output_type": "execute_result"
|
160 |
+
}
|
161 |
+
],
|
162 |
+
"source": [
|
163 |
+
"df['gender'].value_counts(dropna=False)"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"cell_type": "code",
|
168 |
+
"execution_count": 8,
|
169 |
+
"metadata": {},
|
170 |
+
"outputs": [
|
171 |
+
{
|
172 |
+
"data": {
|
173 |
+
"text/plain": [
|
174 |
+
"age\n",
|
175 |
+
"middle_aged 183\n",
|
176 |
+
"young 143\n",
|
177 |
+
"old 140\n",
|
178 |
+
"NaN 2\n",
|
179 |
+
"Name: count, dtype: int64"
|
180 |
+
]
|
181 |
+
},
|
182 |
+
"execution_count": 8,
|
183 |
+
"metadata": {},
|
184 |
+
"output_type": "execute_result"
|
185 |
+
}
|
186 |
+
],
|
187 |
+
"source": [
|
188 |
+
"df['age'].value_counts(dropna=False)"
|
189 |
+
]
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"cell_type": "code",
|
193 |
+
"execution_count": 14,
|
194 |
+
"metadata": {},
|
195 |
+
"outputs": [
|
196 |
+
{
|
197 |
+
"data": {
|
198 |
+
"text/html": [
|
199 |
+
"<div>\n",
|
200 |
+
"<style scoped>\n",
|
201 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
202 |
+
" vertical-align: middle;\n",
|
203 |
+
" }\n",
|
204 |
+
"\n",
|
205 |
+
" .dataframe tbody tr th {\n",
|
206 |
+
" vertical-align: top;\n",
|
207 |
+
" }\n",
|
208 |
+
"\n",
|
209 |
+
" .dataframe thead th {\n",
|
210 |
+
" text-align: right;\n",
|
211 |
+
" }\n",
|
212 |
+
"</style>\n",
|
213 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
214 |
+
" <thead>\n",
|
215 |
+
" <tr style=\"text-align: right;\">\n",
|
216 |
+
" <th>gender</th>\n",
|
217 |
+
" <th>female</th>\n",
|
218 |
+
" <th>male</th>\n",
|
219 |
+
" <th>neutral</th>\n",
|
220 |
+
" <th>non-binary</th>\n",
|
221 |
+
" </tr>\n",
|
222 |
+
" <tr>\n",
|
223 |
+
" <th>age</th>\n",
|
224 |
+
" <th></th>\n",
|
225 |
+
" <th></th>\n",
|
226 |
+
" <th></th>\n",
|
227 |
+
" <th></th>\n",
|
228 |
+
" </tr>\n",
|
229 |
+
" </thead>\n",
|
230 |
+
" <tbody>\n",
|
231 |
+
" <tr>\n",
|
232 |
+
" <th>middle_aged</th>\n",
|
233 |
+
" <td>48</td>\n",
|
234 |
+
" <td>130</td>\n",
|
235 |
+
" <td>4</td>\n",
|
236 |
+
" <td>1</td>\n",
|
237 |
+
" </tr>\n",
|
238 |
+
" <tr>\n",
|
239 |
+
" <th>old</th>\n",
|
240 |
+
" <td>100</td>\n",
|
241 |
+
" <td>39</td>\n",
|
242 |
+
" <td>1</td>\n",
|
243 |
+
" <td>0</td>\n",
|
244 |
+
" </tr>\n",
|
245 |
+
" <tr>\n",
|
246 |
+
" <th>young</th>\n",
|
247 |
+
" <td>83</td>\n",
|
248 |
+
" <td>59</td>\n",
|
249 |
+
" <td>1</td>\n",
|
250 |
+
" <td>0</td>\n",
|
251 |
+
" </tr>\n",
|
252 |
+
" <tr>\n",
|
253 |
+
" <th>NaN</th>\n",
|
254 |
+
" <td>0</td>\n",
|
255 |
+
" <td>2</td>\n",
|
256 |
+
" <td>0</td>\n",
|
257 |
+
" <td>0</td>\n",
|
258 |
+
" </tr>\n",
|
259 |
+
" </tbody>\n",
|
260 |
+
"</table>\n",
|
261 |
+
"</div>"
|
262 |
+
],
|
263 |
+
"text/plain": [
|
264 |
+
"gender female male neutral non-binary\n",
|
265 |
+
"age \n",
|
266 |
+
"middle_aged 48 130 4 1\n",
|
267 |
+
"old 100 39 1 0\n",
|
268 |
+
"young 83 59 1 0\n",
|
269 |
+
"NaN 0 2 0 0"
|
270 |
+
]
|
271 |
+
},
|
272 |
+
"execution_count": 14,
|
273 |
+
"metadata": {},
|
274 |
+
"output_type": "execute_result"
|
275 |
+
}
|
276 |
+
],
|
277 |
+
"source": [
|
278 |
+
"df.groupby(['age', 'gender'], dropna=False)['voice_id'].count().unstack(fill_value=0)"
|
279 |
+
]
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"cell_type": "code",
|
283 |
+
"execution_count": 13,
|
284 |
+
"metadata": {},
|
285 |
+
"outputs": [
|
286 |
+
{
|
287 |
+
"data": {
|
288 |
+
"text/html": [
|
289 |
+
"<div>\n",
|
290 |
+
"<style scoped>\n",
|
291 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
292 |
+
" vertical-align: middle;\n",
|
293 |
+
" }\n",
|
294 |
+
"\n",
|
295 |
+
" .dataframe tbody tr th {\n",
|
296 |
+
" vertical-align: top;\n",
|
297 |
+
" }\n",
|
298 |
+
"\n",
|
299 |
+
" .dataframe thead th {\n",
|
300 |
+
" text-align: right;\n",
|
301 |
+
" }\n",
|
302 |
+
"</style>\n",
|
303 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
304 |
+
" <thead>\n",
|
305 |
+
" <tr style=\"text-align: right;\">\n",
|
306 |
+
" <th></th>\n",
|
307 |
+
" <th>gender</th>\n",
|
308 |
+
" <th>female</th>\n",
|
309 |
+
" <th>male</th>\n",
|
310 |
+
" <th>neutral</th>\n",
|
311 |
+
" <th>non-binary</th>\n",
|
312 |
+
" </tr>\n",
|
313 |
+
" <tr>\n",
|
314 |
+
" <th>language</th>\n",
|
315 |
+
" <th>age</th>\n",
|
316 |
+
" <th></th>\n",
|
317 |
+
" <th></th>\n",
|
318 |
+
" <th></th>\n",
|
319 |
+
" <th></th>\n",
|
320 |
+
" </tr>\n",
|
321 |
+
" </thead>\n",
|
322 |
+
" <tbody>\n",
|
323 |
+
" <tr>\n",
|
324 |
+
" <th rowspan=\"4\" valign=\"top\">en</th>\n",
|
325 |
+
" <th>middle_aged</th>\n",
|
326 |
+
" <td>30</td>\n",
|
327 |
+
" <td>91</td>\n",
|
328 |
+
" <td>2</td>\n",
|
329 |
+
" <td>0</td>\n",
|
330 |
+
" </tr>\n",
|
331 |
+
" <tr>\n",
|
332 |
+
" <th>old</th>\n",
|
333 |
+
" <td>3</td>\n",
|
334 |
+
" <td>3</td>\n",
|
335 |
+
" <td>0</td>\n",
|
336 |
+
" <td>0</td>\n",
|
337 |
+
" </tr>\n",
|
338 |
+
" <tr>\n",
|
339 |
+
" <th>young</th>\n",
|
340 |
+
" <td>34</td>\n",
|
341 |
+
" <td>38</td>\n",
|
342 |
+
" <td>0</td>\n",
|
343 |
+
" <td>0</td>\n",
|
344 |
+
" </tr>\n",
|
345 |
+
" <tr>\n",
|
346 |
+
" <th>NaN</th>\n",
|
347 |
+
" <td>0</td>\n",
|
348 |
+
" <td>2</td>\n",
|
349 |
+
" <td>0</td>\n",
|
350 |
+
" <td>0</td>\n",
|
351 |
+
" </tr>\n",
|
352 |
+
" <tr>\n",
|
353 |
+
" <th>romanian</th>\n",
|
354 |
+
" <th>old</th>\n",
|
355 |
+
" <td>1</td>\n",
|
356 |
+
" <td>0</td>\n",
|
357 |
+
" <td>0</td>\n",
|
358 |
+
" <td>0</td>\n",
|
359 |
+
" </tr>\n",
|
360 |
+
" <tr>\n",
|
361 |
+
" <th rowspan=\"3\" valign=\"top\">NaN</th>\n",
|
362 |
+
" <th>middle_aged</th>\n",
|
363 |
+
" <td>18</td>\n",
|
364 |
+
" <td>39</td>\n",
|
365 |
+
" <td>2</td>\n",
|
366 |
+
" <td>1</td>\n",
|
367 |
+
" </tr>\n",
|
368 |
+
" <tr>\n",
|
369 |
+
" <th>old</th>\n",
|
370 |
+
" <td>96</td>\n",
|
371 |
+
" <td>36</td>\n",
|
372 |
+
" <td>1</td>\n",
|
373 |
+
" <td>0</td>\n",
|
374 |
+
" </tr>\n",
|
375 |
+
" <tr>\n",
|
376 |
+
" <th>young</th>\n",
|
377 |
+
" <td>49</td>\n",
|
378 |
+
" <td>21</td>\n",
|
379 |
+
" <td>1</td>\n",
|
380 |
+
" <td>0</td>\n",
|
381 |
+
" </tr>\n",
|
382 |
+
" </tbody>\n",
|
383 |
+
"</table>\n",
|
384 |
+
"</div>"
|
385 |
+
],
|
386 |
+
"text/plain": [
|
387 |
+
"gender female male neutral non-binary\n",
|
388 |
+
"language age \n",
|
389 |
+
"en middle_aged 30 91 2 0\n",
|
390 |
+
" old 3 3 0 0\n",
|
391 |
+
" young 34 38 0 0\n",
|
392 |
+
" NaN 0 2 0 0\n",
|
393 |
+
"romanian old 1 0 0 0\n",
|
394 |
+
"NaN middle_aged 18 39 2 1\n",
|
395 |
+
" old 96 36 1 0\n",
|
396 |
+
" young 49 21 1 0"
|
397 |
+
]
|
398 |
+
},
|
399 |
+
"execution_count": 13,
|
400 |
+
"metadata": {},
|
401 |
+
"output_type": "execute_result"
|
402 |
+
}
|
403 |
+
],
|
404 |
+
"source": [
|
405 |
+
"df.groupby(['language', 'age', 'gender'], dropna=False)['voice_id'].count().unstack(fill_value=0)"
|
406 |
+
]
|
407 |
+
},
|
408 |
+
{
|
409 |
+
"cell_type": "code",
|
410 |
+
"execution_count": 15,
|
411 |
+
"metadata": {},
|
412 |
+
"outputs": [
|
413 |
+
{
|
414 |
+
"data": {
|
415 |
+
"text/plain": [
|
416 |
+
"descriptive\n",
|
417 |
+
"confident 64\n",
|
418 |
+
"calm 44\n",
|
419 |
+
"casual 34\n",
|
420 |
+
"pleasant 31\n",
|
421 |
+
"deep 28\n",
|
422 |
+
"NaN 26\n",
|
423 |
+
"professional 26\n",
|
424 |
+
"upbeat 22\n",
|
425 |
+
"wise 20\n",
|
426 |
+
"formal 17\n",
|
427 |
+
"intense 13\n",
|
428 |
+
"serious 13\n",
|
429 |
+
"meditative 11\n",
|
430 |
+
"modulated 11\n",
|
431 |
+
"excited 10\n",
|
432 |
+
"husky 10\n",
|
433 |
+
"mature 8\n",
|
434 |
+
"classy 8\n",
|
435 |
+
"chill 7\n",
|
436 |
+
"neutral 7\n",
|
437 |
+
"crisp 6\n",
|
438 |
+
"gentle 6\n",
|
439 |
+
"childish 6\n",
|
440 |
+
"hyped 6\n",
|
441 |
+
"cute 5\n",
|
442 |
+
"sassy 4\n",
|
443 |
+
"soft 4\n",
|
444 |
+
"rough 3\n",
|
445 |
+
"grumpy 3\n",
|
446 |
+
"whispery 3\n",
|
447 |
+
"robotic 3\n",
|
448 |
+
"relaxed 3\n",
|
449 |
+
"raspy 2\n",
|
450 |
+
"cheeky 1\n",
|
451 |
+
"sad 1\n",
|
452 |
+
"anxious 1\n",
|
453 |
+
"motivational 1\n",
|
454 |
+
"Name: count, dtype: int64"
|
455 |
+
]
|
456 |
+
},
|
457 |
+
"execution_count": 15,
|
458 |
+
"metadata": {},
|
459 |
+
"output_type": "execute_result"
|
460 |
+
}
|
461 |
+
],
|
462 |
+
"source": [
|
463 |
+
"df['descriptive'].value_counts(dropna=False)"
|
464 |
+
]
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"cell_type": "code",
|
468 |
+
"execution_count": 17,
|
469 |
+
"metadata": {},
|
470 |
+
"outputs": [
|
471 |
+
{
|
472 |
+
"data": {
|
473 |
+
"text/plain": [
|
474 |
+
"(39, 14)"
|
475 |
+
]
|
476 |
+
},
|
477 |
+
"execution_count": 17,
|
478 |
+
"metadata": {},
|
479 |
+
"output_type": "execute_result"
|
480 |
+
}
|
481 |
+
],
|
482 |
+
"source": [
|
483 |
+
"age_group = 'old'\n",
|
484 |
+
"gender = 'male'\n",
|
485 |
+
"df_filtered = df[(df['age'] == age_group) & (df['gender'] == gender)]\n",
|
486 |
+
"df_filtered.shape"
|
487 |
+
]
|
488 |
+
},
|
489 |
+
{
|
490 |
+
"cell_type": "code",
|
491 |
+
"execution_count": 18,
|
492 |
+
"metadata": {},
|
493 |
+
"outputs": [
|
494 |
+
{
|
495 |
+
"data": {
|
496 |
+
"text/html": [
|
497 |
+
"<div>\n",
|
498 |
+
"<style scoped>\n",
|
499 |
+
" .dataframe tbody tr th:only-of-type {\n",
|
500 |
+
" vertical-align: middle;\n",
|
501 |
+
" }\n",
|
502 |
+
"\n",
|
503 |
+
" .dataframe tbody tr th {\n",
|
504 |
+
" vertical-align: top;\n",
|
505 |
+
" }\n",
|
506 |
+
"\n",
|
507 |
+
" .dataframe thead th {\n",
|
508 |
+
" text-align: right;\n",
|
509 |
+
" }\n",
|
510 |
+
"</style>\n",
|
511 |
+
"<table border=\"1\" class=\"dataframe\">\n",
|
512 |
+
" <thead>\n",
|
513 |
+
" <tr style=\"text-align: right;\">\n",
|
514 |
+
" <th></th>\n",
|
515 |
+
" <th>voice_id</th>\n",
|
516 |
+
" <th>name</th>\n",
|
517 |
+
" <th>preview_url</th>\n",
|
518 |
+
" <th>owner_id</th>\n",
|
519 |
+
" <th>permission_on_resource</th>\n",
|
520 |
+
" <th>is_legacy</th>\n",
|
521 |
+
" <th>is_mixed</th>\n",
|
522 |
+
" <th>accent</th>\n",
|
523 |
+
" <th>description</th>\n",
|
524 |
+
" <th>age</th>\n",
|
525 |
+
" <th>gender</th>\n",
|
526 |
+
" <th>category</th>\n",
|
527 |
+
" <th>language</th>\n",
|
528 |
+
" <th>descriptive</th>\n",
|
529 |
+
" </tr>\n",
|
530 |
+
" </thead>\n",
|
531 |
+
" <tbody>\n",
|
532 |
+
" <tr>\n",
|
533 |
+
" <th>245</th>\n",
|
534 |
+
" <td>ugI9yHu7QMtMOjozITa3</td>\n",
|
535 |
+
" <td>Nimbus - deep & meditative</td>\n",
|
536 |
+
" <td>https://storage.googleapis.com/eleven-public-p...</td>\n",
|
537 |
+
" <td>NaN</td>\n",
|
538 |
+
" <td>admin</td>\n",
|
539 |
+
" <td>False</td>\n",
|
540 |
+
" <td>False</td>\n",
|
541 |
+
" <td>american</td>\n",
|
542 |
+
" <td>NaN</td>\n",
|
543 |
+
" <td>old</td>\n",
|
544 |
+
" <td>male</td>\n",
|
545 |
+
" <td>entertainment_tv</td>\n",
|
546 |
+
" <td>NaN</td>\n",
|
547 |
+
" <td>neutral</td>\n",
|
548 |
+
" </tr>\n",
|
549 |
+
" <tr>\n",
|
550 |
+
" <th>284</th>\n",
|
551 |
+
" <td>1SJjcjy45jFu6erSHVWq</td>\n",
|
552 |
+
" <td>Howard - American Radio Voice</td>\n",
|
553 |
+
" <td>https://storage.googleapis.com/eleven-public-p...</td>\n",
|
554 |
+
" <td>NaN</td>\n",
|
555 |
+
" <td>admin</td>\n",
|
556 |
+
" <td>False</td>\n",
|
557 |
+
" <td>False</td>\n",
|
558 |
+
" <td>american</td>\n",
|
559 |
+
" <td>NaN</td>\n",
|
560 |
+
" <td>old</td>\n",
|
561 |
+
" <td>male</td>\n",
|
562 |
+
" <td>advertisement</td>\n",
|
563 |
+
" <td>NaN</td>\n",
|
564 |
+
" <td>modulated</td>\n",
|
565 |
+
" </tr>\n",
|
566 |
+
" <tr>\n",
|
567 |
+
" <th>362</th>\n",
|
568 |
+
" <td>oUAzGw71wG6JCbHMK33s</td>\n",
|
569 |
+
" <td>Mark - calm and wise teacher</td>\n",
|
570 |
+
" <td>https://storage.googleapis.com/eleven-public-p...</td>\n",
|
571 |
+
" <td>NaN</td>\n",
|
572 |
+
" <td>admin</td>\n",
|
573 |
+
" <td>False</td>\n",
|
574 |
+
" <td>False</td>\n",
|
575 |
+
" <td>british</td>\n",
|
576 |
+
" <td>NaN</td>\n",
|
577 |
+
" <td>old</td>\n",
|
578 |
+
" <td>male</td>\n",
|
579 |
+
" <td>informative_educational</td>\n",
|
580 |
+
" <td>NaN</td>\n",
|
581 |
+
" <td>deep</td>\n",
|
582 |
+
" </tr>\n",
|
583 |
+
" </tbody>\n",
|
584 |
+
"</table>\n",
|
585 |
+
"</div>"
|
586 |
+
],
|
587 |
+
"text/plain": [
|
588 |
+
" voice_id name \\\n",
|
589 |
+
"245 ugI9yHu7QMtMOjozITa3 Nimbus - deep & meditative \n",
|
590 |
+
"284 1SJjcjy45jFu6erSHVWq Howard - American Radio Voice \n",
|
591 |
+
"362 oUAzGw71wG6JCbHMK33s Mark - calm and wise teacher \n",
|
592 |
+
"\n",
|
593 |
+
" preview_url owner_id \\\n",
|
594 |
+
"245 https://storage.googleapis.com/eleven-public-p... NaN \n",
|
595 |
+
"284 https://storage.googleapis.com/eleven-public-p... NaN \n",
|
596 |
+
"362 https://storage.googleapis.com/eleven-public-p... NaN \n",
|
597 |
+
"\n",
|
598 |
+
" permission_on_resource is_legacy is_mixed accent description age \\\n",
|
599 |
+
"245 admin False False american NaN old \n",
|
600 |
+
"284 admin False False american NaN old \n",
|
601 |
+
"362 admin False False british NaN old \n",
|
602 |
+
"\n",
|
603 |
+
" gender category language descriptive \n",
|
604 |
+
"245 male entertainment_tv NaN neutral \n",
|
605 |
+
"284 male advertisement NaN modulated \n",
|
606 |
+
"362 male informative_educational NaN deep "
|
607 |
+
]
|
608 |
+
},
|
609 |
+
"execution_count": 18,
|
610 |
+
"metadata": {},
|
611 |
+
"output_type": "execute_result"
|
612 |
+
}
|
613 |
+
],
|
614 |
+
"source": [
|
615 |
+
"df_filtered.sample(3)"
|
616 |
+
]
|
617 |
+
},
|
618 |
+
{
|
619 |
+
"cell_type": "code",
|
620 |
+
"execution_count": 20,
|
621 |
+
"metadata": {},
|
622 |
+
"outputs": [
|
623 |
+
{
|
624 |
+
"data": {
|
625 |
+
"text/plain": [
|
626 |
+
"['HrciSEXYMv69BAJ4ixOW', 'oUAzGw71wG6JCbHMK33s', 'Zl8mecngHM53e1hl151S']"
|
627 |
+
]
|
628 |
+
},
|
629 |
+
"execution_count": 20,
|
630 |
+
"metadata": {},
|
631 |
+
"output_type": "execute_result"
|
632 |
+
}
|
633 |
+
],
|
634 |
+
"source": [
|
635 |
+
"df_filtered.sample(3)['voice_id'].to_list()"
|
636 |
+
]
|
637 |
+
},
|
638 |
+
{
|
639 |
+
"cell_type": "code",
|
640 |
+
"execution_count": null,
|
641 |
+
"metadata": {},
|
642 |
"outputs": [],
|
643 |
+
"source": []
|
644 |
+
},
|
645 |
+
{
|
646 |
+
"cell_type": "markdown",
|
647 |
+
"metadata": {},
|
648 |
+
"source": [
|
649 |
+
"## split text into character phrases"
|
650 |
+
]
|
651 |
+
},
|
652 |
+
{
|
653 |
+
"cell_type": "code",
|
654 |
+
"execution_count": 4,
|
655 |
+
"metadata": {},
|
656 |
+
"outputs": [
|
657 |
+
{
|
658 |
+
"name": "stderr",
|
659 |
+
"output_type": "stream",
|
660 |
+
"text": [
|
661 |
+
"2024-10-10 02:34:52,755 [INFO] audio-books (lc_callbacks.py): call to <failed to determine LLM> with 2 messages:\n",
|
662 |
+
"{'role': 'system', 'content': 'you are provided with the book sample.\\nplease rewrite it and insert xml tags indicating character to whom current phrase belongs.\\nfor example: <narrator>I looked at her</narrator><Jill>What are you looking at?</Jill>\\n\\nNotes:\\n- sometimes narrator is one of characters taking part in the action.\\nin this case use narrator\\'s name (if available) instead of \"narrator\"\\n- if it\\'s impossible to identify character name from the text provided, use codes \"c1\", \"c2\", etc,\\nwhere \"c\" prefix means character and number is used to enumerate unknown characters\\n- all quotes of direct speech must be attributed to characters, for example:\\n<Tom>βSheβs a nice girl,β</Tom><narrator>said Tom after a moment.</narrator>\\nmind that sometimes narrator could also be a character.\\n- use ALL available context to determine the character.\\nsometimes the character name becomes clear from the following phrases\\n- DO NOT include in your response anything except for the original text with character xml tags!!!\\n'}\n",
|
663 |
+
"{'role': 'human', 'content': 'Here is the book sample:\\n---\\nInside, the crimson room bloomed with light. Tom and Miss Baker sat at\\neither end of the long couch and she read aloud to him from the\\nSaturday Evening Postβthe words, murmurous and uninflected, running\\ntogether in a soothing tune. The lamplight, bright on his boots and\\ndull on the autumn-leaf yellow of her hair, glinted along the paper as\\nshe turned a page with a flutter of slender muscles in her arms.\\n\\nWhen we came in she held us silent for a moment with a lifted hand.\\n\\nβTo be continued,β she said, tossing the magazine on the table, βin\\nour very next issue.β\\n\\nHer body asserted itself with a restless movement of her knee, and she\\nstood up.\\n\\nβTen oβclock,β she remarked, apparently finding the time on the\\nceiling. βTime for this good girl to go to bed.β\\n\\nβJordanβs going to play in the tournament tomorrow,β explained Daisy,\\nβover at Westchester.β\\n\\nβOhβyouβre Jordan Baker.β\\n\\nI knew now why her face was familiarβits pleasing contemptuous\\nexpression had looked out at me from many rotogravure pictures of the\\nsporting life at Asheville and Hot Springs and Palm Beach. I had heard\\nsome story of her too, a critical, unpleasant story, but what it was I\\nhad forgotten long ago.\\n\\nβGood night,β she said softly. βWake me at eight, wonβt you.β\\n\\nβIf youβll get up.β\\n\\nβI will. Good night, Mr. Carraway. See you anon.β\\n\\nβOf course you will,β confirmed Daisy. βIn fact I think Iβll arrange a\\nmarriage. Come over often, Nick, and Iβll sort ofβohβfling you\\ntogether. You knowβlock you up accidentally in linen closets and push\\nyou out to sea in a boat, and all that sort of thingββ\\n\\nβGood night,β called Miss Baker from the stairs. βI havenβt heard a\\nword.β\\n\\nβSheβs a nice girl,β said Tom after a moment. βThey oughtnβt to let\\nher run around the country this way.β\\n\\nβWho oughtnβt to?β inquired Daisy coldly.\\n\\nβHer family.β\\n\\nβHer family is one aunt about a thousand years old. Besides, Nickβs\\ngoing to look after her, arenβt you, Nick? Sheβs going to spend lots\\nof weekends out here this summer. I think the home influence will be\\nvery good for her.β\\n\\nDaisy and Tom looked at each other for a moment in silence.\\n\\nβIs she from New York?β I asked quickly.\\n\\nβFrom Louisville. Our white girlhood was passed together there. Our\\nbeautiful whiteββ\\n\\nβDid you give Nick a little heart to heart talk on the veranda?β\\ndemanded Tom suddenly.\\n\\nβDid I?β She looked at me. βI canβt seem to remember, but I think we\\ntalked about the Nordic race. Yes, Iβm sure we did. It sort of crept\\nup on us and first thing you knowββ\\n\\nβDonβt believe everything you hear, Nick,β he advised me.\\n'}\n",
|
664 |
+
"2024-10-10 02:35:04,369 [INFO] httpx (_client.py): HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
|
665 |
+
"2024-10-10 02:35:04,383 [INFO] audio-books (lc_callbacks.py): raw LLM response: \"<narrator>Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Postβthe words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.</narrator>\n",
|
666 |
+
"\n",
|
667 |
+
"<narrator>When we came in she held us silent for a moment with a lifted hand.</narrator>\n",
|
668 |
+
"\n",
|
669 |
+
"<Jordan>βTo be continued,β</Jordan> <narrator>she said, tossing the magazine on the table,</narrator> <Jordan>βin our very next issue.β</Jordan>\n",
|
670 |
+
"\n",
|
671 |
+
"<narrator>Her body asserted itself with a restless movement of her knee, and she stood up.</narrator>\n",
|
672 |
+
"\n",
|
673 |
+
"<Jordan>βTen oβclock,β</Jordan> <narrator>she remarked, apparently finding the time on the ceiling.</narrator> <Jordan>βTime for this good girl to go to bed.β</Jordan>\n",
|
674 |
+
"\n",
|
675 |
+
"<Daisy>βJordanβs going to play in the tournament tomorrow,β</Daisy> <narrator>explained Daisy,</narrator> <Daisy>βover at Westchester.β</Daisy>\n",
|
676 |
+
"\n",
|
677 |
+
"<narrator>βOhβyouβre Jordan Baker.β</narrator>\n",
|
678 |
+
"\n",
|
679 |
+
"<narrator>I knew now why her face was familiarβits pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.</narrator>\n",
|
680 |
+
"\n",
|
681 |
+
"<Jordan>βGood night,β</Jordan> <narrator>she said softly.</narrator> <Jordan>βWake me at eight, wonβt you.β</Jordan>\n",
|
682 |
+
"\n",
|
683 |
+
"<Daisy>βIf youβll get up.β</Daisy>\n",
|
684 |
+
"\n",
|
685 |
+
"<Jordan>βI will. Good night, Mr. Carraway. See you anon.β</Jordan>\n",
|
686 |
+
"\n",
|
687 |
+
"<Daisy>βOf course you will,β</Daisy> <narrator>confirmed Daisy.</narrator> <Daisy>βIn fact I think Iβll arrange a marriage. Come over often, Nick, and Iβll sort ofβohβfling you together. You knowβlock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thingββ</Daisy>\n",
|
688 |
+
"\n",
|
689 |
+
"<Jordan>βGood night,β</Jordan> <narrator>called Miss Baker from the stairs.</narrator> <Jordan>βI havenβt heard a word.β</Jordan>\n",
|
690 |
+
"\n",
|
691 |
+
"<Tom>βSheβs a nice girl,β</Tom> <narrator>said Tom after a moment.</narrator> <Tom>βThey oughtnβt to let her run around the country this way.β</Tom>\n",
|
692 |
+
"\n",
|
693 |
+
"<Daisy>βWho oughtnβt to?β</Daisy> <narrator>inquired Daisy coldly.</narrator>\n",
|
694 |
+
"\n",
|
695 |
+
"<Tom>βHer family.β</Tom>\n",
|
696 |
+
"\n",
|
697 |
+
"<Daisy>βHer family is one aunt about a thousand years old. Besides, Nickβs going to look after her, arenβt you, Nick? Sheβs going to spend lots of weekends out here this summer. I think the home influence will be very good for her.β</Daisy>\n",
|
698 |
+
"\n",
|
699 |
+
"<narrator>Daisy and Tom looked at each other for a moment in silence.</narrator>\n",
|
700 |
+
"\n",
|
701 |
+
"<narrator>βIs she from New York?β</narrator> <narrator>I asked quickly.</narrator>\n",
|
702 |
+
"\n",
|
703 |
+
"<Daisy>βFrom Louisville. Our white girlhood was passed together there. Our beautiful whiteββ</Daisy>\n",
|
704 |
+
"\n",
|
705 |
+
"<Tom>βDid you give Nick a little heart to heart talk on the veranda?β</Tom> <narrator>demanded Tom suddenly.</narrator>\n",
|
706 |
+
"\n",
|
707 |
+
"<Daisy>βDid I?β</Daisy> <narrator>She looked at me.</narrator> <Daisy>βI canβt seem to remember, but I think we talked about the Nordic race. Yes, Iβm sure we did. It sort of crept up on us and first thing you knowββ</Daisy>\n",
|
708 |
+
"\n",
|
709 |
+
"<Tom>βDonβt believe everything you hear, Nick,β</Tom> <narrator>he advised me.</narrator>\"\n"
|
710 |
+
]
|
711 |
+
}
|
712 |
+
],
|
713 |
"source": [
|
714 |
+
"chain = create_split_text_chain(llm_model=GPTModels.GPT_4o)\n",
|
715 |
+
"# chain = create_split_text_chain(llm_model=GPTModels.GPT_4_TURBO_2024_04_09)\n",
|
716 |
"with get_openai_callback() as cb:\n",
|
717 |
+
" res = chain.invoke(\n",
|
718 |
+
" {\"text\": samples.GATSBY_2}, config={\"callbacks\": [LCMessageLoggerAsync()]}\n",
|
719 |
+
" )"
|
720 |
]
|
721 |
},
|
722 |
{
|
|
|
727 |
{
|
728 |
"data": {
|
729 |
"text/plain": [
|
730 |
+
"SplitTextOutput(text_raw='Inside, the crimson room bloomed with light. Tom and Miss Baker sat at\\neither end of the long couch and she read aloud to him from the\\nSaturday Evening Postβthe words, murmurous and uninflected, running\\ntogether in a soothing tune. The lamplight, bright on his boots and\\ndull on the autumn-leaf yellow of her hair, glinted along the paper as\\nshe turned a page with a flutter of slender muscles in her arms.\\n\\nWhen we came in she held us silent for a moment with a lifted hand.\\n\\nβTo be continued,β she said, tossing the magazine on the table, βin\\nour very next issue.β\\n\\nHer body asserted itself with a restless movement of her knee, and she\\nstood up.\\n\\nβTen oβclock,β she remarked, apparently finding the time on the\\nceiling. βTime for this good girl to go to bed.β\\n\\nβJordanβs going to play in the tournament tomorrow,β explained Daisy,\\nβover at Westchester.β\\n\\nβOhβyouβre Jordan Baker.β\\n\\nI knew now why her face was familiarβits pleasing contemptuous\\nexpression had looked out at me from many rotogravure pictures of the\\nsporting life at Asheville and Hot Springs and Palm Beach. I had heard\\nsome story of her too, a critical, unpleasant story, but what it was I\\nhad forgotten long ago.\\n\\nβGood night,β she said softly. βWake me at eight, wonβt you.β\\n\\nβIf youβll get up.β\\n\\nβI will. Good night, Mr. Carraway. See you anon.β\\n\\nβOf course you will,β confirmed Daisy. βIn fact I think Iβll arrange a\\nmarriage. Come over often, Nick, and Iβll sort ofβohβfling you\\ntogether. You knowβlock you up accidentally in linen closets and push\\nyou out to sea in a boat, and all that sort of thingββ\\n\\nβGood night,β called Miss Baker from the stairs. βI havenβt heard a\\nword.β\\n\\nβSheβs a nice girl,β said Tom after a moment. βThey oughtnβt to let\\nher run around the country this way.β\\n\\nβWho oughtnβt to?β inquired Daisy coldly.\\n\\nβHer family.β\\n\\nβHer family is one aunt about a thousand years old. Besides, Nickβs\\ngoing to look after her, arenβt you, Nick? Sheβs going to spend lots\\nof weekends out here this summer. I think the home influence will be\\nvery good for her.β\\n\\nDaisy and Tom looked at each other for a moment in silence.\\n\\nβIs she from New York?β I asked quickly.\\n\\nβFrom Louisville. Our white girlhood was passed together there. Our\\nbeautiful whiteββ\\n\\nβDid you give Nick a little heart to heart talk on the veranda?β\\ndemanded Tom suddenly.\\n\\nβDid I?β She looked at me. βI canβt seem to remember, but I think we\\ntalked about the Nordic race. Yes, Iβm sure we did. It sort of crept\\nup on us and first thing you knowββ\\n\\nβDonβt believe everything you hear, Nick,β he advised me.\\n', text_annotated='<narrator>Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Postβthe words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.</narrator>\\n\\n<narrator>When we came in she held us silent for a moment with a lifted hand.</narrator>\\n\\n<Jordan>βTo be continued,β</Jordan> <narrator>she said, tossing the magazine on the table,</narrator> <Jordan>βin our very next issue.β</Jordan>\\n\\n<narrator>Her body asserted itself with a restless movement of her knee, and she stood up.</narrator>\\n\\n<Jordan>βTen oβclock,β</Jordan> <narrator>she remarked, apparently finding the time on the ceiling.</narrator> <Jordan>βTime for this good girl to go to bed.β</Jordan>\\n\\n<Daisy>βJordanβs going to play in the tournament tomorrow,β</Daisy> <narrator>explained Daisy,</narrator> <Daisy>βover at Westchester.β</Daisy>\\n\\n<narrator>βOhβyouβre Jordan Baker.β</narrator>\\n\\n<narrator>I knew now why her face was familiarβits pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.</narrator>\\n\\n<Jordan>βGood night,β</Jordan> <narrator>she said softly.</narrator> <Jordan>βWake me at eight, wonβt you.β</Jordan>\\n\\n<Daisy>βIf youβll get up.β</Daisy>\\n\\n<Jordan>βI will. Good night, Mr. Carraway. See you anon.β</Jordan>\\n\\n<Daisy>βOf course you will,β</Daisy> <narrator>confirmed Daisy.</narrator> <Daisy>βIn fact I think Iβll arrange a marriage. Come over often, Nick, and Iβll sort ofβohβfling you together. You knowβlock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thingββ</Daisy>\\n\\n<Jordan>βGood night,β</Jordan> <narrator>called Miss Baker from the stairs.</narrator> <Jordan>βI havenβt heard a word.β</Jordan>\\n\\n<Tom>βSheβs a nice girl,β</Tom> <narrator>said Tom after a moment.</narrator> <Tom>βThey oughtnβt to let her run around the country this way.β</Tom>\\n\\n<Daisy>βWho oughtnβt to?β</Daisy> <narrator>inquired Daisy coldly.</narrator>\\n\\n<Tom>βHer family.β</Tom>\\n\\n<Daisy>βHer family is one aunt about a thousand years old. Besides, Nickβs going to look after her, arenβt you, Nick? Sheβs going to spend lots of weekends out here this summer. I think the home influence will be very good for her.β</Daisy>\\n\\n<narrator>Daisy and Tom looked at each other for a moment in silence.</narrator>\\n\\n<narrator>βIs she from New York?β</narrator> <narrator>I asked quickly.</narrator>\\n\\n<Daisy>βFrom Louisville. Our white girlhood was passed together there. Our beautiful whiteββ</Daisy>\\n\\n<Tom>βDid you give Nick a little heart to heart talk on the veranda?β</Tom> <narrator>demanded Tom suddenly.</narrator>\\n\\n<Daisy>βDid I?β</Daisy> <narrator>She looked at me.</narrator> <Daisy>βI canβt seem to remember, but I think we talked about the Nordic race. Yes, Iβm sure we did. It sort of crept up on us and first thing you knowββ</Daisy>\\n\\n<Tom>βDonβt believe everything you hear, Nick,β</Tom> <narrator>he advised me.</narrator>')"
|
731 |
]
|
732 |
},
|
733 |
"execution_count": 5,
|
|
|
743 |
"cell_type": "code",
|
744 |
"execution_count": 6,
|
745 |
"metadata": {},
|
746 |
+
"outputs": [
|
747 |
+
{
|
748 |
+
"data": {
|
749 |
+
"text/plain": [
|
750 |
+
"['Tom', 'Jordan', 'Daisy', 'narrator']"
|
751 |
+
]
|
752 |
+
},
|
753 |
+
"execution_count": 6,
|
754 |
+
"metadata": {},
|
755 |
+
"output_type": "execute_result"
|
756 |
+
}
|
757 |
+
],
|
758 |
+
"source": [
|
759 |
+
"res.characters"
|
760 |
+
]
|
761 |
+
},
|
762 |
+
{
|
763 |
+
"cell_type": "code",
|
764 |
+
"execution_count": 7,
|
765 |
+
"metadata": {},
|
766 |
"outputs": [
|
767 |
{
|
768 |
"name": "stdout",
|
769 |
"output_type": "stream",
|
770 |
"text": [
|
771 |
+
"<narrator>Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Postβthe words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.</narrator>\n",
|
772 |
+
"\n",
|
773 |
+
"<narrator>When we came in she held us silent for a moment with a lifted hand.</narrator>\n",
|
774 |
+
"\n",
|
775 |
+
"<Jordan>βTo be continued,β</Jordan> <narrator>she said, tossing the magazine on the table,</narrator> <Jordan>βin our very next issue.β</Jordan>\n",
|
776 |
+
"\n",
|
777 |
+
"<narrator>Her body asserted itself with a restless movement of her knee, and she stood up.</narrator>\n",
|
778 |
+
"\n",
|
779 |
+
"<Jordan>βTen oβclock,β</Jordan> <narrator>she remarked, apparently finding the time on the ceiling.</narrator> <Jordan>βTime for this good girl to go to bed.β</Jordan>\n",
|
780 |
+
"\n",
|
781 |
+
"<Daisy>βJordanβs going to play in the tournament tomorrow,β</Daisy> <narrator>explained Daisy,</narrator> <Daisy>βover at Westchester.β</Daisy>\n",
|
782 |
+
"\n",
|
783 |
+
"<narrator>βOhβyouβre Jordan Baker.β</narrator>\n",
|
784 |
+
"\n",
|
785 |
+
"<narrator>I knew now why her face was familiarβits pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.</narrator>\n",
|
786 |
+
"\n",
|
787 |
+
"<Jordan>βGood night,β</Jordan> <narrator>she said softly.</narrator> <Jordan>βWake me at eight, wonβt you.β</Jordan>\n",
|
788 |
+
"\n",
|
789 |
+
"<Daisy>βIf youβll get up.β</Daisy>\n",
|
790 |
+
"\n",
|
791 |
+
"<Jordan>βI will. Good night, Mr. Carraway. See you anon.β</Jordan>\n",
|
792 |
+
"\n",
|
793 |
+
"<Daisy>βOf course you will,β</Daisy> <narrator>confirmed Daisy.</narrator> <Daisy>βIn fact I think Iβll arrange a marriage. Come over often, Nick, and Iβll sort ofβohβfling you together. You knowβlock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thingββ</Daisy>\n",
|
794 |
+
"\n",
|
795 |
+
"<Jordan>βGood night,β</Jordan> <narrator>called Miss Baker from the stairs.</narrator> <Jordan>βI havenβt heard a word.β</Jordan>\n",
|
796 |
+
"\n",
|
797 |
+
"<Tom>βSheβs a nice girl,β</Tom> <narrator>said Tom after a moment.</narrator> <Tom>βThey oughtnβt to let her run around the country this way.β</Tom>\n",
|
798 |
+
"\n",
|
799 |
+
"<Daisy>βWho oughtnβt to?β</Daisy> <narrator>inquired Daisy coldly.</narrator>\n",
|
800 |
+
"\n",
|
801 |
+
"<Tom>βHer family.β</Tom>\n",
|
802 |
+
"\n",
|
803 |
+
"<Daisy>βHer family is one aunt about a thousand years old. Besides, Nickβs going to look after her, arenβt you, Nick? Sheβs going to spend lots of weekends out here this summer. I think the home influence will be very good for her.β</Daisy>\n",
|
804 |
+
"\n",
|
805 |
+
"<narrator>Daisy and Tom looked at each other for a moment in silence.</narrator>\n",
|
806 |
+
"\n",
|
807 |
+
"<narrator>βIs she from New York?β</narrator> <narrator>I asked quickly.</narrator>\n",
|
808 |
+
"\n",
|
809 |
+
"<Daisy>βFrom Louisville. Our white girlhood was passed together there. Our beautiful whiteββ</Daisy>\n",
|
810 |
+
"\n",
|
811 |
+
"<Tom>βDid you give Nick a little heart to heart talk on the veranda?β</Tom> <narrator>demanded Tom suddenly.</narrator>\n",
|
812 |
+
"\n",
|
813 |
+
"<Daisy>βDid I?β</Daisy> <narrator>She looked at me.</narrator> <Daisy>βI canβt seem to remember, but I think we talked about the Nordic race. Yes, Iβm sure we did. It sort of crept up on us and first thing you knowββ</Daisy>\n",
|
814 |
+
"\n",
|
815 |
+
"<Tom>βDonβt believe everything you hear, Nick,β</Tom> <narrator>he advised me.</narrator>\n"
|
816 |
+
]
|
817 |
+
}
|
818 |
+
],
|
819 |
+
"source": [
|
820 |
+
"print(res.text_annotated)"
|
821 |
+
]
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"cell_type": "code",
|
825 |
+
"execution_count": 8,
|
826 |
+
"metadata": {},
|
827 |
+
"outputs": [
|
828 |
+
{
|
829 |
+
"name": "stdout",
|
830 |
+
"output_type": "stream",
|
831 |
+
"text": [
|
832 |
+
"characters: ['Tom', 'Jordan', 'Daisy', 'narrator']\n",
|
833 |
"--------------------\n",
|
834 |
"[narrator] Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Postβthe words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.\n",
|
835 |
"[narrator] When we came in she held us silent for a moment with a lifted hand.\n",
|
|
|
848 |
"[Jordan] βGood night,β\n",
|
849 |
"[narrator] she said softly.\n",
|
850 |
"[Jordan] βWake me at eight, wonβt you.β\n",
|
851 |
+
"[Daisy] βIf youβll get up.β\n",
|
852 |
"[Jordan] βI will. Good night, Mr. Carraway. See you anon.β\n",
|
853 |
"[Daisy] βOf course you will,β\n",
|
854 |
"[narrator] confirmed Daisy.\n",
|
|
|
878 |
}
|
879 |
],
|
880 |
"source": [
|
881 |
+
"print(res.to_pretty_text())"
|
|
|
882 |
]
|
883 |
},
|
884 |
{
|
|
|
904 |
"print(f'LLM usage:\\n\\n{cb}')"
|
905 |
]
|
906 |
},
|
907 |
+
{
|
908 |
+
"cell_type": "code",
|
909 |
+
"execution_count": null,
|
910 |
+
"metadata": {},
|
911 |
+
"outputs": [],
|
912 |
+
"source": []
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"cell_type": "markdown",
|
916 |
+
"metadata": {},
|
917 |
+
"source": [
|
918 |
+
"## map characters to voices"
|
919 |
+
]
|
920 |
+
},
|
921 |
+
{
|
922 |
+
"cell_type": "code",
|
923 |
+
"execution_count": 10,
|
924 |
+
"metadata": {},
|
925 |
+
"outputs": [],
|
926 |
+
"source": [
|
927 |
+
"from src.select_voice_chain import create_voice_mapping_chain"
|
928 |
+
]
|
929 |
+
},
|
930 |
+
{
|
931 |
+
"cell_type": "code",
|
932 |
+
"execution_count": 11,
|
933 |
+
"metadata": {},
|
934 |
+
"outputs": [],
|
935 |
+
"source": [
|
936 |
+
"chain = create_voice_mapping_chain(llm_model=GPTModels.GPT_4_TURBO_2024_04_09)"
|
937 |
+
]
|
938 |
+
},
|
939 |
+
{
|
940 |
+
"cell_type": "code",
|
941 |
+
"execution_count": 12,
|
942 |
+
"metadata": {},
|
943 |
+
"outputs": [
|
944 |
+
{
|
945 |
+
"data": {
|
946 |
+
"text/plain": [
|
947 |
+
"ChatPromptTemplate(input_variables=['characters', 'text'], input_types={}, partial_variables={'available_genders': '\"male\", \"female\"', 'available_age_groups': '\"old\", \"middle_aged\", \"young\"', 'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\\n\\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\\n\\nHere is the output schema:\\n```\\n{\"$defs\": {\"CharacterProperties\": {\"properties\": {\"gender\": {\"title\": \"Gender\", \"type\": \"string\"}, \"age_group\": {\"title\": \"Age Group\", \"type\": \"string\"}}, \"required\": [\"gender\", \"age_group\"], \"title\": \"CharacterProperties\", \"type\": \"object\"}}, \"properties\": {\"character2props\": {\"additionalProperties\": {\"$ref\": \"#/$defs/CharacterProperties\"}, \"title\": \"Character2Props\", \"type\": \"object\"}}, \"required\": [\"character2props\"]}\\n```'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['available_age_groups', 'available_genders', 'format_instructions'], input_types={}, partial_variables={}, template='You are a helpful assistant proficient in literature and psychology.\\nOur goal is to create an audio book from the given text.\\nFor that we need to hire voice actors.\\nPlease help us to find the right actor for each character present in the text.\\n\\nYou are provided with the text split by the characters\\nto whom text parts belong to.\\n\\nYour task is to assign available properties to each character provided.\\nList of available properties:\\n- gender: {available_genders}\\n- age_group: {available_age_groups}\\n\\nNOTES:\\n- assign EXACTLY ONE property value for each property\\n- select properties values ONLY from the list of AVAILABLE property values\\n- fill properties for ALL characters from the list provided\\n- DO NOT include any characters absent in the list provided\\n\\n{format_instructions}\\n'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['characters', 'text'], input_types={}, partial_variables={}, template='<text>\\n{text}\\n</text>\\n\\n<characters>\\n{characters}\\n</characters>\\n'), additional_kwargs={})])\n",
|
948 |
+
"| RunnableBinding(bound=ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x174a82d80>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x174a812e0>, root_client=<openai.OpenAI object at 0x174a82d50>, root_async_client=<openai.AsyncOpenAI object at 0x174a81730>, model_name='gpt-4-turbo-2024-04-09', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'), request_timeout=Timeout(connect=4, read=60, write=60, pool=60)), kwargs={'response_format': {'type': 'json_object'}}, config={}, config_factories=[])\n",
|
949 |
+
"| PydanticOutputParser(pydantic_object=<class 'src.select_voice_chain.AllCharactersProperties'>)"
|
950 |
+
]
|
951 |
+
},
|
952 |
+
"execution_count": 12,
|
953 |
+
"metadata": {},
|
954 |
+
"output_type": "execute_result"
|
955 |
+
}
|
956 |
+
],
|
957 |
+
"source": [
|
958 |
+
"chain"
|
959 |
+
]
|
960 |
+
},
|
961 |
+
{
|
962 |
+
"cell_type": "code",
|
963 |
+
"execution_count": 14,
|
964 |
+
"metadata": {},
|
965 |
+
"outputs": [
|
966 |
+
{
|
967 |
+
"name": "stderr",
|
968 |
+
"output_type": "stream",
|
969 |
+
"text": [
|
970 |
+
"2024-10-10 02:37:46,347 [INFO] audio-books (lc_callbacks.py): call to gpt-4-turbo-2024-04-09 with 2 messages:\n",
|
971 |
+
"{'role': 'system', 'content': 'You are a helpful assistant proficient in literature and psychology.\\nOur goal is to create an audio book from the given text.\\nFor that we need to hire voice actors.\\nPlease help us to find the right actor for each character present in the text.\\n\\nYou are provided with the text split by the characters\\nto whom text parts belong to.\\n\\nYour task is to assign available properties to each character provided.\\nList of available properties:\\n- gender: \"male\", \"female\"\\n- age_group: \"old\", \"middle_aged\", \"young\"\\n\\nNOTES:\\n- assign EXACTLY ONE property value for each property\\n- select properties values ONLY from the list of AVAILABLE property values\\n- fill properties for ALL characters from the list provided\\n- DO NOT include any characters absent in the list provided\\n\\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\\n\\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\\n\\nHere is the output schema:\\n```\\n{\"$defs\": {\"CharacterProperties\": {\"properties\": {\"gender\": {\"title\": \"Gender\", \"type\": \"string\"}, \"age_group\": {\"title\": \"Age Group\", \"type\": \"string\"}}, \"required\": [\"gender\", \"age_group\"], \"title\": \"CharacterProperties\", \"type\": \"object\"}}, \"properties\": {\"character2props\": {\"additionalProperties\": {\"$ref\": \"#/$defs/CharacterProperties\"}, \"title\": \"Character2Props\", \"type\": \"object\"}}, \"required\": [\"character2props\"]}\\n```\\n'}\n",
|
972 |
+
"{'role': 'human', 'content': \"<text>\\n<narrator>Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Postβthe words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.</narrator>\\n\\n<narrator>When we came in she held us silent for a moment with a lifted hand.</narrator>\\n\\n<Jordan>βTo be continued,β</Jordan> <narrator>she said, tossing the magazine on the table,</narrator> <Jordan>βin our very next issue.β</Jordan>\\n\\n<narrator>Her body asserted itself with a restless movement of her knee, and she stood up.</narrator>\\n\\n<Jordan>βTen oβclock,β</Jordan> <narrator>she remarked, apparently finding the time on the ceiling.</narrator> <Jordan>βTime for this good girl to go to bed.β</Jordan>\\n\\n<Daisy>βJordanβs going to play in the tournament tomorrow,β</Daisy> <narrator>explained Daisy,</narrator> <Daisy>βover at Westchester.β</Daisy>\\n\\n<narrator>βOhβyouβre Jordan Baker.β</narrator>\\n\\n<narrator>I knew now why her face was familiarβits pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.</narrator>\\n\\n<Jordan>βGood night,β</Jordan> <narrator>she said softly.</narrator> <Jordan>βWake me at eight, wonβt you.β</Jordan>\\n\\n<Daisy>βIf youβll get up.β</Daisy>\\n\\n<Jordan>βI will. Good night, Mr. Carraway. See you anon.β</Jordan>\\n\\n<Daisy>βOf course you will,β</Daisy> <narrator>confirmed Daisy.</narrator> <Daisy>βIn fact I think Iβll arrange a marriage. Come over often, Nick, and Iβll sort ofβohβfling you together. You knowβlock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thingββ</Daisy>\\n\\n<Jordan>βGood night,β</Jordan> <narrator>called Miss Baker from the stairs.</narrator> <Jordan>βI havenβt heard a word.β</Jordan>\\n\\n<Tom>βSheβs a nice girl,β</Tom> <narrator>said Tom after a moment.</narrator> <Tom>βThey oughtnβt to let her run around the country this way.β</Tom>\\n\\n<Daisy>βWho oughtnβt to?β</Daisy> <narrator>inquired Daisy coldly.</narrator>\\n\\n<Tom>βHer family.β</Tom>\\n\\n<Daisy>βHer family is one aunt about a thousand years old. Besides, Nickβs going to look after her, arenβt you, Nick? Sheβs going to spend lots of weekends out here this summer. I think the home influence will be very good for her.β</Daisy>\\n\\n<narrator>Daisy and Tom looked at each other for a moment in silence.</narrator>\\n\\n<narrator>βIs she from New York?β</narrator> <narrator>I asked quickly.</narrator>\\n\\n<Daisy>βFrom Louisville. Our white girlhood was passed together there. Our beautiful whiteββ</Daisy>\\n\\n<Tom>βDid you give Nick a little heart to heart talk on the veranda?β</Tom> <narrator>demanded Tom suddenly.</narrator>\\n\\n<Daisy>βDid I?β</Daisy> <narrator>She looked at me.</narrator> <Daisy>βI canβt seem to remember, but I think we talked about the Nordic race. Yes, Iβm sure we did. It sort of crept up on us and first thing you knowββ</Daisy>\\n\\n<Tom>βDonβt believe everything you hear, Nick,β</Tom> <narrator>he advised me.</narrator>\\n</text>\\n\\n<characters>\\n['Tom', 'Jordan', 'Daisy', 'narrator']\\n</characters>\\n\"}\n",
|
973 |
+
"2024-10-10 02:37:52,060 [INFO] httpx (_client.py): HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n",
|
974 |
+
"2024-10-10 02:37:52,063 [INFO] audio-books (lc_callbacks.py): raw LLM response: \"{\n",
|
975 |
+
" \"character2props\": {\n",
|
976 |
+
" \"Tom\": {\n",
|
977 |
+
" \"gender\": \"male\",\n",
|
978 |
+
" \"age_group\": \"middle_aged\"\n",
|
979 |
+
" },\n",
|
980 |
+
" \"Jordan\": {\n",
|
981 |
+
" \"gender\": \"female\",\n",
|
982 |
+
" \"age_group\": \"young\"\n",
|
983 |
+
" },\n",
|
984 |
+
" \"Daisy\": {\n",
|
985 |
+
" \"gender\": \"female\",\n",
|
986 |
+
" \"age_group\": \"young\"\n",
|
987 |
+
" },\n",
|
988 |
+
" \"narrator\": {\n",
|
989 |
+
" \"gender\": \"male\",\n",
|
990 |
+
" \"age_group\": \"middle_aged\"\n",
|
991 |
+
" }\n",
|
992 |
+
" }\n",
|
993 |
+
"}\"\n"
|
994 |
+
]
|
995 |
+
}
|
996 |
+
],
|
997 |
+
"source": [
|
998 |
+
"res2 = chain.invoke(\n",
|
999 |
+
" {\"text\": res.text_annotated, \"characters\": res.characters},\n",
|
1000 |
+
" config={\"callbacks\": [LCMessageLoggerAsync()]},\n",
|
1001 |
+
")"
|
1002 |
+
]
|
1003 |
+
},
|
1004 |
+
{
|
1005 |
+
"cell_type": "code",
|
1006 |
+
"execution_count": 15,
|
1007 |
+
"metadata": {},
|
1008 |
+
"outputs": [
|
1009 |
+
{
|
1010 |
+
"data": {
|
1011 |
+
"text/plain": [
|
1012 |
+
"AllCharactersProperties(character2props={'Tom': CharacterProperties(gender='male', age_group='middle_aged'), 'Jordan': CharacterProperties(gender='female', age_group='young'), 'Daisy': CharacterProperties(gender='female', age_group='young'), 'narrator': CharacterProperties(gender='male', age_group='middle_aged')})"
|
1013 |
+
]
|
1014 |
+
},
|
1015 |
+
"execution_count": 15,
|
1016 |
+
"metadata": {},
|
1017 |
+
"output_type": "execute_result"
|
1018 |
+
}
|
1019 |
+
],
|
1020 |
+
"source": [
|
1021 |
+
"res2"
|
1022 |
+
]
|
1023 |
+
},
|
1024 |
+
{
|
1025 |
+
"cell_type": "code",
|
1026 |
+
"execution_count": null,
|
1027 |
+
"metadata": {},
|
1028 |
+
"outputs": [
|
1029 |
+
{
|
1030 |
+
"name": "stdout",
|
1031 |
+
"output_type": "stream",
|
1032 |
+
"text": [
|
1033 |
+
"<class 'pandas.core.frame.DataFrame'>\n",
|
1034 |
+
"RangeIndex: 22 entries, 0 to 21\n",
|
1035 |
+
"Data columns (total 14 columns):\n",
|
1036 |
+
" # Column Non-Null Count Dtype \n",
|
1037 |
+
"--- ------ -------------- ----- \n",
|
1038 |
+
" 0 voice_id 22 non-null object \n",
|
1039 |
+
" 1 name 22 non-null object \n",
|
1040 |
+
" 2 preview_url 22 non-null object \n",
|
1041 |
+
" 3 owner_id 0 non-null float64\n",
|
1042 |
+
" 4 permission_on_resource 2 non-null object \n",
|
1043 |
+
" 5 is_legacy 22 non-null bool \n",
|
1044 |
+
" 6 is_mixed 22 non-null bool \n",
|
1045 |
+
" 7 accent 22 non-null object \n",
|
1046 |
+
" 8 description 20 non-null object \n",
|
1047 |
+
" 9 age 22 non-null object \n",
|
1048 |
+
" 10 gender 22 non-null object \n",
|
1049 |
+
" 11 category 22 non-null object \n",
|
1050 |
+
" 12 language 2 non-null object \n",
|
1051 |
+
" 13 descriptive 2 non-null object \n",
|
1052 |
+
"dtypes: bool(2), float64(1), object(11)\n",
|
1053 |
+
"memory usage: 2.2+ KB\n"
|
1054 |
+
]
|
1055 |
+
}
|
1056 |
+
],
|
1057 |
+
"source": [
|
1058 |
+
"voices = pd.read_csv(\"11labs_available_tts_voices.csv\")\n",
|
1059 |
+
"voices.info()"
|
1060 |
+
]
|
1061 |
+
},
|
1062 |
+
{
|
1063 |
+
"cell_type": "code",
|
1064 |
+
"execution_count": null,
|
1065 |
+
"metadata": {},
|
1066 |
+
"outputs": [
|
1067 |
+
{
|
1068 |
+
"data": {
|
1069 |
+
"text/plain": [
|
1070 |
+
"array(['middle_aged', 'young', 'old'], dtype=object)"
|
1071 |
+
]
|
1072 |
+
},
|
1073 |
+
"metadata": {},
|
1074 |
+
"output_type": "display_data"
|
1075 |
+
}
|
1076 |
+
],
|
1077 |
+
"source": [
|
1078 |
+
"voices[\"age\"].unique()"
|
1079 |
+
]
|
1080 |
+
},
|
1081 |
+
{
|
1082 |
+
"cell_type": "code",
|
1083 |
+
"execution_count": null,
|
1084 |
+
"metadata": {},
|
1085 |
+
"outputs": [
|
1086 |
+
{
|
1087 |
+
"data": {
|
1088 |
+
"text/plain": [
|
1089 |
+
"array(['female', 'male', 'non-binary', 'neutral'], dtype=object)"
|
1090 |
+
]
|
1091 |
+
},
|
1092 |
+
"metadata": {},
|
1093 |
+
"output_type": "display_data"
|
1094 |
+
}
|
1095 |
+
],
|
1096 |
+
"source": [
|
1097 |
+
"voices[\"gender\"].unique()"
|
1098 |
+
]
|
1099 |
+
},
|
1100 |
+
{
|
1101 |
+
"cell_type": "code",
|
1102 |
+
"execution_count": null,
|
1103 |
+
"metadata": {},
|
1104 |
+
"outputs": [],
|
1105 |
+
"source": []
|
1106 |
+
},
|
1107 |
+
{
|
1108 |
+
"cell_type": "code",
|
1109 |
+
"execution_count": null,
|
1110 |
+
"metadata": {},
|
1111 |
+
"outputs": [],
|
1112 |
+
"source": []
|
1113 |
+
},
|
1114 |
{
|
1115 |
"cell_type": "code",
|
1116 |
"execution_count": null,
|
@@ -9,6 +9,11 @@
|
|
9 |
- [x] generate good enough sound effects for background
|
10 |
- [ ] mix effects with narrration
|
11 |
- [x] allow files uplaod (.txt)
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
### Backlog
|
14 |
- [ ] prepare text for TTS
|
|
|
9 |
- [x] generate good enough sound effects for background
|
10 |
- [ ] mix effects with narrration
|
11 |
- [x] allow files uplaod (.txt)
|
12 |
+
- optimizations
|
13 |
+
- [ ] combine sequential phrases of same character in single phrase
|
14 |
+
- [ ] support large texts. use batching. problem: how to ensure same characters?
|
15 |
+
can detect characters in first prompt, then split text in each batch into character phrases
|
16 |
+
- [ ] probably split large phrases into smaller ones
|
17 |
|
18 |
### Backlog
|
19 |
- [ ] prepare text for TTS
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
|
3 |
+
import click
|
4 |
+
import pandas as pd
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from elevenlabs import ElevenLabs
|
7 |
+
from elevenlabs.core import ApiError
|
8 |
+
from tqdm.auto import tqdm
|
9 |
+
|
10 |
+
|
11 |
+
logging.basicConfig(
|
12 |
+
level=logging.INFO,
|
13 |
+
format="%(asctime)s [%(levelname)s] %(name)s (%(filename)s): %(message)s",
|
14 |
+
)
|
15 |
+
logger = logging.getLogger("add-voices")
|
16 |
+
|
17 |
+
|
18 |
+
load_dotenv()
|
19 |
+
|
20 |
+
|
21 |
+
@click.command()
|
22 |
+
@click.option("-ak", "--api-key", envvar="11LABS_API_KEY")
|
23 |
+
@click.option("-i", "--input-csv-path", default="data/11labs_tts_voices.csv")
|
24 |
+
def main(*, api_key: str | None, input_csv_path: str) -> None:
|
25 |
+
if api_key is None:
|
26 |
+
raise OSError("Who's gonna set the `11LABS_API_KEY` environmental variable?")
|
27 |
+
|
28 |
+
client = ElevenLabs(api_key=api_key)
|
29 |
+
voices_to_import = pd.read_csv(input_csv_path)
|
30 |
+
|
31 |
+
for _, row in tqdm(voices_to_import.iterrows(), total=len(voices_to_import)):
|
32 |
+
try:
|
33 |
+
client.voices.add_sharing_voice(
|
34 |
+
public_user_id=(public_user_id := row["public_owner_id"]),
|
35 |
+
voice_id=(voice_id := row["voice_id"]),
|
36 |
+
new_name=(name := row["name"]),
|
37 |
+
)
|
38 |
+
except ApiError:
|
39 |
+
logger.error(
|
40 |
+
f"Shared voice with `{public_user_id = }`, `{voice_id = }` "
|
41 |
+
"already added."
|
42 |
+
)
|
43 |
+
else:
|
44 |
+
logger.info(
|
45 |
+
f"Added shared voice with `{public_user_id = }`, `{voice_id = }`, "
|
46 |
+
f"`{name = }`."
|
47 |
+
)
|
48 |
+
|
49 |
+
|
50 |
+
if __name__ == "__main__":
|
51 |
+
main()
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
|
4 |
+
import click
|
5 |
+
import pandas as pd
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from elevenlabs import ElevenLabs
|
8 |
+
|
9 |
+
|
10 |
+
logging.basicConfig(
|
11 |
+
level=logging.INFO,
|
12 |
+
format="%(asctime)s [%(levelname)s] %(name)s (%(filename)s): %(message)s",
|
13 |
+
)
|
14 |
+
logger = logging.getLogger("export-available-voices")
|
15 |
+
|
16 |
+
|
17 |
+
load_dotenv()
|
18 |
+
|
19 |
+
|
20 |
+
@click.command()
|
21 |
+
@click.option("-ak", "--api-key", envvar="11LABS_API_KEY")
|
22 |
+
@click.option("-o", "--output-csv-path", default="data/11labs_available_tts_voices.csv")
|
23 |
+
def main(*, api_key: str | None, output_csv_path: str) -> None:
|
24 |
+
if api_key is None:
|
25 |
+
raise OSError("Who's gonna set the `11LABS_API_KEY` environmental variable?")
|
26 |
+
|
27 |
+
client = ElevenLabs(api_key=api_key)
|
28 |
+
response = client.voices.get_all()
|
29 |
+
available_voices = pd.DataFrame.from_records([voice.model_dump(
|
30 |
+
include={
|
31 |
+
"voice_id", "name", "language", "labels", "description", "preview_url",
|
32 |
+
},
|
33 |
+
) for voice in response.voices])
|
34 |
+
available_voices = pd.concat((
|
35 |
+
available_voices.drop(columns=[
|
36 |
+
"labels", "description", "available_for_tiers", "settings", "sharing",
|
37 |
+
"high_quality_base_model_ids", "safety_control", "voice_verification",
|
38 |
+
"category", "samples",
|
39 |
+
]),
|
40 |
+
pd.DataFrame.from_records(available_voices["labels"]).rename(
|
41 |
+
columns={"use_case": "category"}
|
42 |
+
),
|
43 |
+
), axis=1)
|
44 |
+
|
45 |
+
available_voices.drop(columns="fine_tuning").to_csv(output_csv_path, index=False)
|
46 |
+
|
47 |
+
|
48 |
+
if __name__ == "__main__":
|
49 |
+
main()
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import os
|
3 |
+
import re
|
4 |
+
from pathlib import Path
|
5 |
+
from uuid import uuid4
|
6 |
+
import random
|
7 |
+
|
8 |
+
from langchain_community.callbacks import get_openai_callback
|
9 |
+
from pydub import AudioSegment
|
10 |
+
|
11 |
+
from src.lc_callbacks import LCMessageLoggerAsync
|
12 |
+
from src.tts import tts_astream, sound_generation_astream
|
13 |
+
from src.utils import consume_aiter
|
14 |
+
from src.emotions.generation import EffectGeneratorAsync
|
15 |
+
from src.emotions.utils import add_overlay_for_audio
|
16 |
+
from src.config import AI_ML_API_KEY, ELEVENLABS_MAX_PARALLEL, logger
|
17 |
+
from src.text_split_chain import SplitTextOutput
|
18 |
+
|
19 |
+
|
20 |
+
class AudioGeneratorSimple:
|
21 |
+
|
22 |
+
async def generate_audio(
|
23 |
+
self,
|
24 |
+
text_split: SplitTextOutput,
|
25 |
+
character_to_voice: dict[str, str],
|
26 |
+
) -> Path:
|
27 |
+
semaphore = asyncio.Semaphore(ELEVENLABS_MAX_PARALLEL)
|
28 |
+
|
29 |
+
async def tts_astream_with_semaphore(voice_id: str, text: str):
|
30 |
+
async with semaphore:
|
31 |
+
iter_ = tts_astream(voice_id=voice_id, text=text)
|
32 |
+
bytes_ = await consume_aiter(iter_)
|
33 |
+
return bytes_
|
34 |
+
|
35 |
+
tasks = []
|
36 |
+
for character_phrase in text_split.phrases:
|
37 |
+
voice_id = character_to_voice[character_phrase.character]
|
38 |
+
task = tts_astream_with_semaphore(
|
39 |
+
voice_id=voice_id, text=character_phrase.text
|
40 |
+
)
|
41 |
+
tasks.append(task)
|
42 |
+
|
43 |
+
results = await asyncio.gather(*tasks)
|
44 |
+
|
45 |
+
save_dir = Path("data") / "books"
|
46 |
+
save_dir.mkdir(exist_ok=True)
|
47 |
+
audio_combined_fp = save_dir / f"{uuid4()}.wav"
|
48 |
+
|
49 |
+
logger.info(f'saving generated audio book to: "{audio_combined_fp}"')
|
50 |
+
with open(audio_combined_fp, "wb") as ab:
|
51 |
+
for result in results:
|
52 |
+
for chunk in result:
|
53 |
+
ab.write(chunk)
|
54 |
+
|
55 |
+
return audio_combined_fp
|
56 |
+
|
57 |
+
|
58 |
+
class AudioGeneratorWithEffects:
|
59 |
+
|
60 |
+
def __init__(self):
|
61 |
+
self.effect_generator = EffectGeneratorAsync(AI_ML_API_KEY)
|
62 |
+
|
63 |
+
async def generate_audio_with_text_modification(
|
64 |
+
self,
|
65 |
+
annotated_text: str,
|
66 |
+
character_to_voice: dict[str, str],
|
67 |
+
) -> Path:
|
68 |
+
"""Main method to generate the audiobook with TTS, emotion, and sound effects."""
|
69 |
+
num_lines = len(annotated_text.splitlines())
|
70 |
+
lines_for_sound_effect = self._select_lines_for_sound_effect(num_lines)
|
71 |
+
|
72 |
+
# Step 1: Process and modify text
|
73 |
+
modified_texts, sound_emotion_results = await self._process_and_modify_text(
|
74 |
+
annotated_text, lines_for_sound_effect
|
75 |
+
)
|
76 |
+
|
77 |
+
# Step 2: Generate TTS audio for modified text
|
78 |
+
tts_results, temp_files = await self._generate_tts_audio(
|
79 |
+
annotated_text, modified_texts, character_to_voice
|
80 |
+
)
|
81 |
+
|
82 |
+
# Step 3: Add sound effects to selected lines
|
83 |
+
audio_chunks = await self._add_sound_effects(
|
84 |
+
tts_results, lines_for_sound_effect, sound_emotion_results, temp_files
|
85 |
+
)
|
86 |
+
|
87 |
+
# Step 4: Merge audio files
|
88 |
+
final_output = self._merge_audio_files(audio_chunks)
|
89 |
+
|
90 |
+
# Clean up temporary files
|
91 |
+
self._cleanup_temp_files(temp_files)
|
92 |
+
|
93 |
+
return final_output
|
94 |
+
|
95 |
+
def _select_lines_for_sound_effect(self, num_lines: int) -> list[int]:
|
96 |
+
"""Select 20% of the lines randomly for sound effect generation."""
|
97 |
+
return random.sample(range(num_lines), k=int(0.2 * num_lines))
|
98 |
+
|
99 |
+
async def _process_and_modify_text(
|
100 |
+
self, annotated_text: str, lines_for_sound_effect: list[int]
|
101 |
+
) -> tuple[list[dict], list[dict]]:
|
102 |
+
"""Process the text by modifying it and generating tasks for sound effects."""
|
103 |
+
tasks_for_text_modification = []
|
104 |
+
sound_emotion_tasks = []
|
105 |
+
|
106 |
+
for idx, line in enumerate(annotated_text.splitlines()):
|
107 |
+
cleaned_line = line.strip().lower()
|
108 |
+
if not cleaned_line:
|
109 |
+
continue
|
110 |
+
|
111 |
+
# Extract character text
|
112 |
+
character_text = cleaned_line[cleaned_line.rfind("]") + 1 :].lstrip()
|
113 |
+
|
114 |
+
# Add text emotion modification task
|
115 |
+
tasks_for_text_modification.append(
|
116 |
+
self.effect_generator.add_emotion_to_text(character_text)
|
117 |
+
)
|
118 |
+
|
119 |
+
# If this line needs sound effects, generate parameters
|
120 |
+
if idx in lines_for_sound_effect:
|
121 |
+
sound_emotion_tasks.append(
|
122 |
+
self.effect_generator.generate_parameters_for_sound_effect(
|
123 |
+
character_text
|
124 |
+
)
|
125 |
+
)
|
126 |
+
|
127 |
+
# Await tasks for text modification and sound effects
|
128 |
+
modified_texts = await asyncio.gather(*tasks_for_text_modification)
|
129 |
+
sound_emotion_results = await asyncio.gather(*sound_emotion_tasks)
|
130 |
+
|
131 |
+
return modified_texts, sound_emotion_results
|
132 |
+
|
133 |
+
async def _generate_tts_audio(
|
134 |
+
self,
|
135 |
+
annotated_text: str,
|
136 |
+
modified_texts: list[dict], # TODO ? type ?
|
137 |
+
character_to_voice: dict[str, str],
|
138 |
+
) -> tuple[list[str], list[str]]:
|
139 |
+
"""Generate TTS audio for modified text."""
|
140 |
+
tasks_for_tts = []
|
141 |
+
temp_files = []
|
142 |
+
current_character = "narrator"
|
143 |
+
|
144 |
+
for idx, (modified_text, line) in enumerate(
|
145 |
+
zip(modified_texts, annotated_text.splitlines())
|
146 |
+
):
|
147 |
+
cleaned_line = line.strip().lower()
|
148 |
+
|
149 |
+
# Extract character
|
150 |
+
try:
|
151 |
+
current_character = re.findall(r"\[[\w\s]+\]", cleaned_line)[0][1:-1]
|
152 |
+
except IndexError:
|
153 |
+
pass
|
154 |
+
|
155 |
+
# Get voice ID and generate TTS
|
156 |
+
voice_id = character_to_voice[current_character]
|
157 |
+
tasks_for_tts.append(
|
158 |
+
tts_astream(
|
159 |
+
voice_id=voice_id,
|
160 |
+
text=modified_text["text"], # TODO ? type ?
|
161 |
+
params=modified_texts["params"], # TODO ? type ?
|
162 |
+
)
|
163 |
+
)
|
164 |
+
|
165 |
+
# Gather all TTS results
|
166 |
+
tts_results = await asyncio.gather(*(consume_aiter(t) for t in tasks_for_tts))
|
167 |
+
|
168 |
+
# Save the results to temporary files
|
169 |
+
tts_audio_files = []
|
170 |
+
for idx, tts_result in enumerate(tts_results):
|
171 |
+
tts_filename = f"tts_output_{idx}.wav"
|
172 |
+
with open(tts_filename, "wb") as ab:
|
173 |
+
for chunk in tts_result:
|
174 |
+
ab.write(chunk)
|
175 |
+
tts_audio_files.append(tts_filename)
|
176 |
+
temp_files.append(tts_filename)
|
177 |
+
|
178 |
+
return tts_audio_files, temp_files
|
179 |
+
|
180 |
+
async def _add_sound_effects(
|
181 |
+
self,
|
182 |
+
tts_audio_files: list[str],
|
183 |
+
lines_for_sound_effect: list[int],
|
184 |
+
sound_emotion_results: list[dict],
|
185 |
+
temp_files: list[str],
|
186 |
+
) -> list[str]:
|
187 |
+
"""Add sound effects to the selected lines."""
|
188 |
+
audio_chunks = []
|
189 |
+
for idx, tts_filename in enumerate(tts_audio_files):
|
190 |
+
# If the line has sound emotion data, generate sound effect and overlay
|
191 |
+
if idx in lines_for_sound_effect:
|
192 |
+
sound_effect_data = sound_emotion_results.pop(
|
193 |
+
0
|
194 |
+
) # Get next sound effect data
|
195 |
+
sound_effect_filename = f"sound_effect_{idx}.wav"
|
196 |
+
|
197 |
+
# Generate sound effect asynchronously
|
198 |
+
sound_result = await consume_aiter(
|
199 |
+
sound_generation_astream(sound_effect_data)
|
200 |
+
)
|
201 |
+
with open(sound_effect_filename, "wb") as ab:
|
202 |
+
for chunk in sound_result:
|
203 |
+
ab.write(chunk)
|
204 |
+
|
205 |
+
# Add sound effect overlay
|
206 |
+
output_filename = add_overlay_for_audio(
|
207 |
+
main_audio_filename=tts_filename,
|
208 |
+
sound_effect_filename=sound_effect_filename,
|
209 |
+
cycling_effect=True,
|
210 |
+
decrease_effect_volume=5,
|
211 |
+
)
|
212 |
+
audio_chunks.append(output_filename)
|
213 |
+
temp_files.append(sound_effect_filename) # Track temp files
|
214 |
+
temp_files.append(output_filename)
|
215 |
+
else:
|
216 |
+
audio_chunks.append(tts_filename)
|
217 |
+
|
218 |
+
return audio_chunks
|
219 |
+
|
220 |
+
def _merge_audio_files(self, audio_filenames: list[str]) -> Path:
|
221 |
+
"""Helper function to merge multiple audio files into one."""
|
222 |
+
combined = AudioSegment.from_file(audio_filenames[0])
|
223 |
+
for filename in audio_filenames[1:]:
|
224 |
+
next_audio = AudioSegment.from_file(filename)
|
225 |
+
combined += next_audio # Concatenate the audio
|
226 |
+
|
227 |
+
save_dir = Path("data") / "books"
|
228 |
+
save_dir.mkdir(exist_ok=True)
|
229 |
+
save_path = save_dir / f"{uuid4()}.wav"
|
230 |
+
combined.export(save_path, format="wav")
|
231 |
+
return Path(save_path)
|
232 |
+
|
233 |
+
def _cleanup_temp_files(self, temp_files: list[str]) -> None:
|
234 |
+
"""Helper function to delete all temporary files."""
|
235 |
+
for temp_file in temp_files:
|
236 |
+
try:
|
237 |
+
os.remove(temp_file)
|
238 |
+
except FileNotFoundError:
|
239 |
+
continue
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from langchain_community.callbacks import get_openai_callback
|
2 |
+
|
3 |
+
from src.audio_generators import AudioGeneratorSimple
|
4 |
+
from src.lc_callbacks import LCMessageLoggerAsync
|
5 |
+
from src.select_voice_chain import SelectVoiceChainOutput, VoiceSelector
|
6 |
+
from src.text_split_chain import SplitTextOutput, create_split_text_chain
|
7 |
+
from src.utils import GPTModels
|
8 |
+
|
9 |
+
|
10 |
+
class AudiobookBuilder:
|
11 |
+
|
12 |
+
def __init__(self):
|
13 |
+
self.voice_selector = VoiceSelector(
|
14 |
+
csv_table_fp="data/11labs_available_tts_voices.csv"
|
15 |
+
)
|
16 |
+
self.audio_generator = AudioGeneratorSimple()
|
17 |
+
|
18 |
+
async def split_text(self, text: str) -> SplitTextOutput:
|
19 |
+
chain = create_split_text_chain(llm_model=GPTModels.GPT_4o)
|
20 |
+
with get_openai_callback() as cb:
|
21 |
+
chain_out = await chain.ainvoke(
|
22 |
+
{"text": text}, config={"callbacks": [LCMessageLoggerAsync()]}
|
23 |
+
)
|
24 |
+
return chain_out
|
25 |
+
|
26 |
+
async def map_characters_to_voices(
|
27 |
+
self, text_split: SplitTextOutput
|
28 |
+
) -> SelectVoiceChainOutput:
|
29 |
+
chain = self.voice_selector.create_voice_mapping_chain(
|
30 |
+
llm_model=GPTModels.GPT_4o
|
31 |
+
)
|
32 |
+
with get_openai_callback() as cb:
|
33 |
+
chain_out = await chain.ainvoke(
|
34 |
+
{
|
35 |
+
"text": text_split.text_annotated,
|
36 |
+
"characters": text_split.characters,
|
37 |
+
},
|
38 |
+
config={"callbacks": [LCMessageLoggerAsync()]},
|
39 |
+
)
|
40 |
+
return chain_out
|
41 |
+
|
42 |
+
async def run(self, text: str):
|
43 |
+
text_split = await self.split_text(text)
|
44 |
+
select_voice_chain_out = await self.map_characters_to_voices(
|
45 |
+
text_split=text_split
|
46 |
+
)
|
47 |
+
# TODO: show select_voice_chain_out.character2props on UI
|
48 |
+
out_path = await self.audio_generator.generate_audio(
|
49 |
+
text_split=text_split,
|
50 |
+
character_to_voice=select_voice_chain_out.character2voice,
|
51 |
+
)
|
52 |
+
return out_path
|
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import logging
|
2 |
|
3 |
logging.basicConfig(
|
@@ -5,3 +6,11 @@ logging.basicConfig(
|
|
5 |
format="%(asctime)s [%(levelname)s] %(name)s (%(filename)s): %(message)s",
|
6 |
)
|
7 |
logger = logging.getLogger("audio-books")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
import logging
|
3 |
|
4 |
logging.basicConfig(
|
|
|
6 |
format="%(asctime)s [%(levelname)s] %(name)s (%(filename)s): %(message)s",
|
7 |
)
|
8 |
logger = logging.getLogger("audio-books")
|
9 |
+
|
10 |
+
|
11 |
+
ELEVENLABS_API_KEY = os.environ["11LABS_API_KEY"]
|
12 |
+
AI_ML_API_KEY = os.environ["AIML_API_KEY"]
|
13 |
+
|
14 |
+
FILE_SIZE_MAX = 0.5 # in mb
|
15 |
+
|
16 |
+
ELEVENLABS_MAX_PARALLEL = 15 # current limitation of available subscription
|
@@ -123,7 +123,7 @@ class EffectGeneratorAsync(AbstractEffectGenerator):
|
|
123 |
raise RuntimeError(f"Unexpected Error: {e}")
|
124 |
|
125 |
|
126 |
-
async def generate_parameters_for_sound_effect(self, text: str, generated_audio_file: str) -> dict:
|
127 |
llm_output = await self.generate_text_for_sound_effect(text)
|
128 |
if generated_audio_file is not None:
|
129 |
llm_output['duration_seconds'] = get_audio_duration(generated_audio_file)
|
|
|
123 |
raise RuntimeError(f"Unexpected Error: {e}")
|
124 |
|
125 |
|
126 |
+
async def generate_parameters_for_sound_effect(self, text: str, generated_audio_file: str = None) -> dict:
|
127 |
llm_output = await self.generate_text_for_sound_effect(text)
|
128 |
if generated_audio_file is not None:
|
129 |
llm_output['duration_seconds'] = get_audio_duration(generated_audio_file)
|
File without changes
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import typing as t
|
2 |
+
|
3 |
+
from langchain_core.callbacks import AsyncCallbackHandler
|
4 |
+
from langchain_core.outputs import ChatGeneration
|
5 |
+
from langchain_core.outputs.llm_result import LLMResult
|
6 |
+
from langchain_core.messages import BaseMessage
|
7 |
+
|
8 |
+
from src.config import logger
|
9 |
+
|
10 |
+
|
11 |
+
class LCMessageLoggerAsync(AsyncCallbackHandler):
|
12 |
+
"""Custom callback to make Langchain logs easy to read"""
|
13 |
+
|
14 |
+
@staticmethod
|
15 |
+
def langchain_msg_2_role_content(msg: BaseMessage):
|
16 |
+
return {"role": msg.type, "content": msg.content}
|
17 |
+
|
18 |
+
def __init__(self, log_raw_llm_response=True):
|
19 |
+
super().__init__()
|
20 |
+
self._log_raw_llm_response = log_raw_llm_response
|
21 |
+
|
22 |
+
def on_chat_model_start(
|
23 |
+
self,
|
24 |
+
serialized: dict[str, t.Any],
|
25 |
+
messages: list[list[BaseMessage]],
|
26 |
+
**kwargs: t.Any,
|
27 |
+
) -> t.Any:
|
28 |
+
"""Run when Chat Model starts running."""
|
29 |
+
if len(messages) != 1:
|
30 |
+
raise ValueError(f'expected "messages" to have len 1, got: {len(messages)}')
|
31 |
+
|
32 |
+
kwargs = serialized["kwargs"]
|
33 |
+
model_name = kwargs.get("model_name")
|
34 |
+
if not model_name:
|
35 |
+
model_name = kwargs.get("deployment_name")
|
36 |
+
if not model_name:
|
37 |
+
model_name = "<failed to determine LLM>"
|
38 |
+
|
39 |
+
msgs_list = list(map(self.langchain_msg_2_role_content, messages[0]))
|
40 |
+
msgs_str = "\n".join(map(str, msgs_list))
|
41 |
+
|
42 |
+
logger.info(f"call to {model_name} with {len(msgs_list)} messages:\n{msgs_str}")
|
43 |
+
|
44 |
+
def on_llm_end(self, response: LLMResult, **kwargs: t.Any) -> t.Any:
|
45 |
+
"""Run when LLM ends running."""
|
46 |
+
generations = response.generations
|
47 |
+
if len(generations) != 1:
|
48 |
+
raise ValueError(
|
49 |
+
f'expected "generations" to have len 1, got: {len(generations)}'
|
50 |
+
)
|
51 |
+
if len(generations[0]) != 1:
|
52 |
+
raise ValueError(
|
53 |
+
f'expected "generations[0]" to have len 1, got: {len(generations[0])}'
|
54 |
+
)
|
55 |
+
|
56 |
+
if self._log_raw_llm_response is True:
|
57 |
+
gen: ChatGeneration = generations[0][0]
|
58 |
+
ai_msg = gen.message
|
59 |
+
logger.info(f'raw LLM response: "{ai_msg.content}"')
|
@@ -109,3 +109,50 @@ sometimes the character name becomes clear from the following phrases
|
|
109 |
Here is the book sample:
|
110 |
---
|
111 |
{text}"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
Here is the book sample:
|
110 |
---
|
111 |
{text}"""
|
112 |
+
|
113 |
+
|
114 |
+
class CharacterVoicePropertiesPrompt:
|
115 |
+
SYSTEM = """\
|
116 |
+
You are a helpful assistant proficient in literature and psychology.
|
117 |
+
Our goal is to create an audio book from the given text.
|
118 |
+
For that we need to hire voice actors.
|
119 |
+
Please help us to find the right actor for each character present in the text.
|
120 |
+
|
121 |
+
You are provided with the text split by the characters
|
122 |
+
to whom text parts belong to.
|
123 |
+
|
124 |
+
Your task is to assign available properties to each character provided.
|
125 |
+
List of available properties:
|
126 |
+
- gender: {available_genders}
|
127 |
+
- age_group: {available_age_groups}
|
128 |
+
|
129 |
+
NOTES:
|
130 |
+
- assign EXACTLY ONE property value for each property
|
131 |
+
- select properties values ONLY from the list of AVAILABLE property values
|
132 |
+
- fill properties for ALL characters from the list provided
|
133 |
+
- DO NOT include any characters absent in the list provided
|
134 |
+
|
135 |
+
{format_instructions}
|
136 |
+
"""
|
137 |
+
|
138 |
+
# You MUST answer with the following JSON:
|
139 |
+
# {{
|
140 |
+
# "character2props":
|
141 |
+
# {{
|
142 |
+
# <character_name>:
|
143 |
+
# {{
|
144 |
+
# "gender": <value>,
|
145 |
+
# "age_group": <value>
|
146 |
+
# }}
|
147 |
+
# }}
|
148 |
+
# }}
|
149 |
+
|
150 |
+
USER = """\
|
151 |
+
<text>
|
152 |
+
{text}
|
153 |
+
</text>
|
154 |
+
|
155 |
+
<characters>
|
156 |
+
{characters}
|
157 |
+
</characters>
|
158 |
+
"""
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import StrEnum
|
2 |
+
|
3 |
+
import pandas as pd
|
4 |
+
from langchain_core.output_parsers import PydanticOutputParser
|
5 |
+
from langchain_core.prompts import (
|
6 |
+
ChatPromptTemplate,
|
7 |
+
HumanMessagePromptTemplate,
|
8 |
+
SystemMessagePromptTemplate,
|
9 |
+
)
|
10 |
+
from langchain_core.runnables import RunnablePassthrough
|
11 |
+
from pydantic import BaseModel
|
12 |
+
|
13 |
+
from src.config import logger
|
14 |
+
from src.prompts import CharacterVoicePropertiesPrompt
|
15 |
+
from src.utils import GPTModels, get_chat_llm
|
16 |
+
|
17 |
+
|
18 |
+
class Property(StrEnum):
|
19 |
+
gender = "gender"
|
20 |
+
age_group = "age_group"
|
21 |
+
|
22 |
+
|
23 |
+
class CharacterProperties(BaseModel):
|
24 |
+
gender: str
|
25 |
+
age_group: str
|
26 |
+
|
27 |
+
def __hash__(self):
|
28 |
+
return hash((self.gender, self.age_group))
|
29 |
+
|
30 |
+
|
31 |
+
class AllCharactersProperties(BaseModel):
|
32 |
+
character2props: dict[str, CharacterProperties]
|
33 |
+
|
34 |
+
|
35 |
+
class CharacterPropertiesNullable(BaseModel):
|
36 |
+
gender: str | None
|
37 |
+
age_group: str | None
|
38 |
+
|
39 |
+
def __hash__(self):
|
40 |
+
return hash((self.gender, self.age_group))
|
41 |
+
|
42 |
+
|
43 |
+
class AllCharactersPropertiesNullable(BaseModel):
|
44 |
+
character2props: dict[str, CharacterPropertiesNullable]
|
45 |
+
|
46 |
+
|
47 |
+
class SelectVoiceChainOutput(BaseModel):
|
48 |
+
character2props: dict[str, CharacterPropertiesNullable]
|
49 |
+
character2voice: dict[str, str]
|
50 |
+
|
51 |
+
|
52 |
+
class VoiceSelector:
|
53 |
+
PROPERTY_VALUES = {
|
54 |
+
Property.gender: {"male", "female"},
|
55 |
+
Property.age_group: {"young", "middle_aged", "old"},
|
56 |
+
}
|
57 |
+
|
58 |
+
def __init__(self, csv_table_fp: str):
|
59 |
+
self.df = self.read_data_table(csv_table_fp=csv_table_fp)
|
60 |
+
|
61 |
+
def read_data_table(self, csv_table_fp: str):
|
62 |
+
logger.info(f'reading voice data from: "{csv_table_fp}"')
|
63 |
+
df = pd.read_csv(csv_table_fp)
|
64 |
+
df["age"] = df["age"].str.replace(" ", "_").str.replace("-", "_")
|
65 |
+
return df
|
66 |
+
|
67 |
+
def get_available_properties_str(self, prop: Property):
|
68 |
+
vals = self.PROPERTY_VALUES[prop]
|
69 |
+
res = ", ".join(f'"{v}"' for v in vals)
|
70 |
+
return res
|
71 |
+
|
72 |
+
def _get_voices_single_props(
|
73 |
+
self, character_props: CharacterPropertiesNullable, n_characters: int
|
74 |
+
):
|
75 |
+
if n_characters <= 0:
|
76 |
+
raise ValueError(n_characters)
|
77 |
+
|
78 |
+
df_filtered = self.df
|
79 |
+
if val := character_props.gender:
|
80 |
+
df_filtered = df_filtered[df_filtered["gender"] == val]
|
81 |
+
if val := character_props.age_group:
|
82 |
+
df_filtered = df_filtered[df_filtered["age"] == val]
|
83 |
+
|
84 |
+
voice_ids = df_filtered.sample(n_characters)["voice_id"].to_list()
|
85 |
+
return voice_ids
|
86 |
+
|
87 |
+
def get_voices(self, inputs: dict) -> dict:
|
88 |
+
character_props: AllCharactersPropertiesNullable = inputs["charater_props"]
|
89 |
+
|
90 |
+
# check for Nones.
|
91 |
+
# TODO: for simplicity we raise error if LLM failed to select valid property value.
|
92 |
+
# else, we would need to implement clever mapping to avoid overlapping between voices.
|
93 |
+
for char, props in character_props.character2props.items():
|
94 |
+
if props.age_group is None or props.gender is None:
|
95 |
+
raise ValueError(props)
|
96 |
+
|
97 |
+
prop2character = {}
|
98 |
+
for character, props in character_props.character2props.items():
|
99 |
+
prop2character.setdefault(props, set()).add(character)
|
100 |
+
|
101 |
+
character2voice = {}
|
102 |
+
for props, characters in prop2character.items():
|
103 |
+
voice_ids = self._get_voices_single_props(
|
104 |
+
character_props=props, n_characters=len(characters)
|
105 |
+
)
|
106 |
+
character2voice.update(zip(characters, voice_ids))
|
107 |
+
|
108 |
+
return character2voice
|
109 |
+
|
110 |
+
def _remove_hallucinations_single_character(
|
111 |
+
self, character_props: CharacterProperties
|
112 |
+
):
|
113 |
+
def _process_prop(prop: Property, value: str):
|
114 |
+
if value not in self.PROPERTY_VALUES[prop]:
|
115 |
+
logger.warning(
|
116 |
+
f'LLM selected non-available {prop} value: "{value}". defaulting to None'
|
117 |
+
)
|
118 |
+
return None
|
119 |
+
return value
|
120 |
+
|
121 |
+
return CharacterPropertiesNullable(
|
122 |
+
gender=_process_prop(prop=Property.gender, value=character_props.gender),
|
123 |
+
age_group=_process_prop(
|
124 |
+
prop=Property.age_group, value=character_props.age_group
|
125 |
+
),
|
126 |
+
)
|
127 |
+
|
128 |
+
def remove_hallucinations(
|
129 |
+
self, props: AllCharactersProperties
|
130 |
+
) -> AllCharactersPropertiesNullable:
|
131 |
+
res = AllCharactersPropertiesNullable(
|
132 |
+
character2props={
|
133 |
+
k: self._remove_hallucinations_single_character(character_props=v)
|
134 |
+
for k, v in props.character2props.items()
|
135 |
+
}
|
136 |
+
)
|
137 |
+
return res
|
138 |
+
|
139 |
+
def pack_results(self, inputs: dict):
|
140 |
+
character_props: AllCharactersPropertiesNullable = inputs["charater_props"]
|
141 |
+
character2voice: dict[str, str] = inputs["character2voice"]
|
142 |
+
return SelectVoiceChainOutput(
|
143 |
+
character2props=character_props.character2props,
|
144 |
+
character2voice=character2voice,
|
145 |
+
)
|
146 |
+
|
147 |
+
def create_voice_mapping_chain(self, llm_model: GPTModels):
|
148 |
+
llm = get_chat_llm(llm_model=llm_model, temperature=0.0)
|
149 |
+
llm = llm.with_structured_output(AllCharactersProperties, method="json_mode")
|
150 |
+
|
151 |
+
output_parser = PydanticOutputParser(pydantic_object=AllCharactersProperties)
|
152 |
+
format_instructions = output_parser.get_format_instructions()
|
153 |
+
|
154 |
+
prompt = ChatPromptTemplate.from_messages(
|
155 |
+
[
|
156 |
+
SystemMessagePromptTemplate.from_template(
|
157 |
+
CharacterVoicePropertiesPrompt.SYSTEM
|
158 |
+
),
|
159 |
+
HumanMessagePromptTemplate.from_template(
|
160 |
+
CharacterVoicePropertiesPrompt.USER
|
161 |
+
),
|
162 |
+
]
|
163 |
+
)
|
164 |
+
prompt = prompt.partial(
|
165 |
+
**{
|
166 |
+
"available_genders": self.get_available_properties_str(Property.gender),
|
167 |
+
"available_age_groups": self.get_available_properties_str(
|
168 |
+
Property.age_group
|
169 |
+
),
|
170 |
+
"format_instructions": format_instructions,
|
171 |
+
}
|
172 |
+
)
|
173 |
+
|
174 |
+
chain = (
|
175 |
+
RunnablePassthrough.assign(
|
176 |
+
charater_props=prompt | llm | self.remove_hallucinations
|
177 |
+
)
|
178 |
+
| RunnablePassthrough.assign(character2voice=self.get_voices)
|
179 |
+
| self.pack_results
|
180 |
+
)
|
181 |
+
return chain
|
@@ -6,6 +6,7 @@ from langchain_core.prompts import (
|
|
6 |
HumanMessagePromptTemplate,
|
7 |
SystemMessagePromptTemplate,
|
8 |
)
|
|
|
9 |
from pydantic import BaseModel
|
10 |
|
11 |
from src.prompts import SplitTextPromptV1, SplitTextPromptV2
|
@@ -17,16 +18,35 @@ class CharacterPhrase(BaseModel):
|
|
17 |
text: str
|
18 |
|
19 |
|
20 |
-
class
|
21 |
-
|
|
|
|
|
22 |
_characters: list[str]
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
def __init__(self, **data):
|
25 |
super().__init__(**data)
|
|
|
26 |
self._characters = list(set(phrase.character for phrase in self.phrases))
|
|
|
27 |
|
28 |
@property
|
29 |
-
def
|
|
|
|
|
|
|
|
|
30 |
return self._characters
|
31 |
|
32 |
def to_pretty_text(self):
|
@@ -38,65 +58,71 @@ class CharacterAnnotatedText(BaseModel):
|
|
38 |
return res
|
39 |
|
40 |
|
41 |
-
|
42 |
-
characters: list[str]
|
43 |
-
parts: list[CharacterPhrase]
|
44 |
-
|
45 |
-
def to_character_annotated_text(self):
|
46 |
-
return CharacterAnnotatedText(phrases=self.parts)
|
47 |
-
|
48 |
-
|
49 |
-
def create_split_text_chain_v1(llm_model: GPTModels):
|
50 |
llm = get_chat_llm(llm_model=llm_model, temperature=0.0)
|
51 |
-
llm = llm.with_structured_output(SplitTextOutputV1)
|
52 |
|
53 |
prompt = ChatPromptTemplate.from_messages(
|
54 |
[
|
55 |
-
SystemMessagePromptTemplate.from_template(
|
56 |
-
HumanMessagePromptTemplate.from_template(
|
57 |
]
|
58 |
)
|
59 |
|
60 |
-
chain =
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
return chain
|
62 |
|
63 |
|
64 |
-
|
65 |
-
text_raw: str
|
66 |
-
_phrases: list[CharacterPhrase]
|
67 |
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
so we don't check that opening xml tags match closing ones
|
73 |
-
"""
|
74 |
-
pattern = re.compile(r"(?:<([^<>]+)>)(.*?)(?:</\1>)")
|
75 |
-
res = pattern.findall(text)
|
76 |
-
res = [CharacterPhrase(character=x[0], text=x[1]) for x in res]
|
77 |
-
return res
|
78 |
|
79 |
def __init__(self, **data):
|
80 |
super().__init__(**data)
|
81 |
-
self.
|
82 |
|
83 |
@property
|
84 |
-
def
|
85 |
-
return self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
def to_character_annotated_text(self):
|
88 |
-
return CharacterAnnotatedText(phrases=self.
|
89 |
|
90 |
|
91 |
-
def
|
92 |
llm = get_chat_llm(llm_model=llm_model, temperature=0.0)
|
|
|
93 |
|
94 |
prompt = ChatPromptTemplate.from_messages(
|
95 |
[
|
96 |
-
SystemMessagePromptTemplate.from_template(
|
97 |
-
HumanMessagePromptTemplate.from_template(
|
98 |
]
|
99 |
)
|
100 |
|
101 |
-
chain = prompt | llm
|
102 |
return chain
|
|
|
|
|
|
|
|
6 |
HumanMessagePromptTemplate,
|
7 |
SystemMessagePromptTemplate,
|
8 |
)
|
9 |
+
from langchain_core.runnables import RunnablePassthrough
|
10 |
from pydantic import BaseModel
|
11 |
|
12 |
from src.prompts import SplitTextPromptV1, SplitTextPromptV2
|
|
|
18 |
text: str
|
19 |
|
20 |
|
21 |
+
class SplitTextOutput(BaseModel):
|
22 |
+
text_raw: str
|
23 |
+
text_annotated: str
|
24 |
+
_phrases: list[CharacterPhrase]
|
25 |
_characters: list[str]
|
26 |
|
27 |
+
@staticmethod
|
28 |
+
def _parse_phrases_from_xml_tags(text):
|
29 |
+
"""
|
30 |
+
we rely on LLM to format response correctly.
|
31 |
+
so we don't check that opening xml tags match closing ones
|
32 |
+
"""
|
33 |
+
pattern = re.compile(r"(?:<([^<>]+)>)(.*?)(?:</\1>)")
|
34 |
+
res = pattern.findall(text)
|
35 |
+
res = [CharacterPhrase(character=x[0], text=x[1]) for x in res]
|
36 |
+
return res
|
37 |
+
|
38 |
def __init__(self, **data):
|
39 |
super().__init__(**data)
|
40 |
+
self._phrases = self._parse_phrases_from_xml_tags(self.text_annotated)
|
41 |
self._characters = list(set(phrase.character for phrase in self.phrases))
|
42 |
+
# TODO: can apply post-processing to merge same adjacent xml tags
|
43 |
|
44 |
@property
|
45 |
+
def phrases(self) -> list[CharacterPhrase]:
|
46 |
+
return self._phrases
|
47 |
+
|
48 |
+
@property
|
49 |
+
def characters(self) -> list[str]:
|
50 |
return self._characters
|
51 |
|
52 |
def to_pretty_text(self):
|
|
|
58 |
return res
|
59 |
|
60 |
|
61 |
+
def create_split_text_chain(llm_model: GPTModels):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
llm = get_chat_llm(llm_model=llm_model, temperature=0.0)
|
|
|
63 |
|
64 |
prompt = ChatPromptTemplate.from_messages(
|
65 |
[
|
66 |
+
SystemMessagePromptTemplate.from_template(SplitTextPromptV2.SYSTEM),
|
67 |
+
HumanMessagePromptTemplate.from_template(SplitTextPromptV2.USER),
|
68 |
]
|
69 |
)
|
70 |
|
71 |
+
chain = RunnablePassthrough.assign(
|
72 |
+
text_annotated=prompt | llm | StrOutputParser()
|
73 |
+
) | (
|
74 |
+
lambda inputs: SplitTextOutput(
|
75 |
+
text_raw=inputs["text"], text_annotated=inputs["text_annotated"]
|
76 |
+
)
|
77 |
+
)
|
78 |
return chain
|
79 |
|
80 |
|
81 |
+
###### old code ######
|
|
|
|
|
82 |
|
83 |
+
|
84 |
+
class CharacterAnnotatedText(BaseModel):
|
85 |
+
phrases: list[CharacterPhrase]
|
86 |
+
_characters: list[str]
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
def __init__(self, **data):
|
89 |
super().__init__(**data)
|
90 |
+
self._characters = list(set(phrase.character for phrase in self.phrases))
|
91 |
|
92 |
@property
|
93 |
+
def characters(self):
|
94 |
+
return self._characters
|
95 |
+
|
96 |
+
def to_pretty_text(self):
|
97 |
+
lines = []
|
98 |
+
lines.append(f"characters: {self.characters}")
|
99 |
+
lines.append("-" * 20)
|
100 |
+
lines.extend(f"[{phrase.character}] {phrase.text}" for phrase in self.phrases)
|
101 |
+
res = "\n".join(lines)
|
102 |
+
return res
|
103 |
+
|
104 |
+
|
105 |
+
class SplitTextOutputOld(BaseModel):
|
106 |
+
characters: list[str]
|
107 |
+
parts: list[CharacterPhrase]
|
108 |
|
109 |
def to_character_annotated_text(self):
|
110 |
+
return CharacterAnnotatedText(phrases=self.parts)
|
111 |
|
112 |
|
113 |
+
def create_split_text_chain_old(llm_model: GPTModels):
|
114 |
llm = get_chat_llm(llm_model=llm_model, temperature=0.0)
|
115 |
+
llm = llm.with_structured_output(SplitTextOutputOld, method="json_mode")
|
116 |
|
117 |
prompt = ChatPromptTemplate.from_messages(
|
118 |
[
|
119 |
+
SystemMessagePromptTemplate.from_template(SplitTextPromptV1.SYSTEM),
|
120 |
+
HumanMessagePromptTemplate.from_template(SplitTextPromptV1.USER),
|
121 |
]
|
122 |
)
|
123 |
|
124 |
+
chain = prompt | llm
|
125 |
return chain
|
126 |
+
|
127 |
+
|
128 |
+
## end of old code ##
|
@@ -1,16 +1,16 @@
|
|
1 |
-
import os
|
2 |
import typing as t
|
3 |
|
4 |
from dotenv import load_dotenv
|
5 |
from elevenlabs.client import AsyncElevenLabs, ElevenLabs
|
6 |
-
|
7 |
|
8 |
load_dotenv()
|
9 |
|
|
|
10 |
|
11 |
-
ELEVEN_CLIENT = ElevenLabs(api_key=
|
12 |
|
13 |
-
ELEVEN_CLIENT_ASYNC = AsyncElevenLabs(api_key=
|
14 |
|
15 |
|
16 |
def tts_stream(voice_id: str, text: str) -> t.Iterator[bytes]:
|
@@ -26,8 +26,33 @@ def tts(voice_id: str, text: str):
|
|
26 |
return combined
|
27 |
|
28 |
|
29 |
-
async def tts_astream(
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
async for chunk in async_iter:
|
32 |
if chunk:
|
33 |
yield chunk
|
|
|
|
|
1 |
import typing as t
|
2 |
|
3 |
from dotenv import load_dotenv
|
4 |
from elevenlabs.client import AsyncElevenLabs, ElevenLabs
|
5 |
+
from elevenlabs import VoiceSettings
|
6 |
|
7 |
load_dotenv()
|
8 |
|
9 |
+
from src.config import logger, ELEVENLABS_API_KEY
|
10 |
|
11 |
+
ELEVEN_CLIENT = ElevenLabs(api_key=ELEVENLABS_API_KEY)
|
12 |
|
13 |
+
ELEVEN_CLIENT_ASYNC = AsyncElevenLabs(api_key=ELEVENLABS_API_KEY)
|
14 |
|
15 |
|
16 |
def tts_stream(voice_id: str, text: str) -> t.Iterator[bytes]:
|
|
|
26 |
return combined
|
27 |
|
28 |
|
29 |
+
async def tts_astream(
|
30 |
+
voice_id: str, text: str, params: dict | None = None
|
31 |
+
) -> t.AsyncIterator[bytes]:
|
32 |
+
params_all = dict(voice_id=voice_id, text=text)
|
33 |
+
|
34 |
+
if params is not None:
|
35 |
+
params_all["voice_settings"] = VoiceSettings( # type: ignore
|
36 |
+
stability=params.get("stability"),
|
37 |
+
similarity_boost=params.get("similarity_boost"),
|
38 |
+
style=params.get("style"),
|
39 |
+
)
|
40 |
+
|
41 |
+
logger.info(f"call to 11labs TTS endpoint with params: {params_all}")
|
42 |
+
async_iter = ELEVEN_CLIENT_ASYNC.text_to_speech.convert(**params_all)
|
43 |
+
async for chunk in async_iter:
|
44 |
+
if chunk:
|
45 |
+
yield chunk
|
46 |
+
|
47 |
+
|
48 |
+
async def sound_generation_astream(
|
49 |
+
sound_generation_data: dict,
|
50 |
+
) -> t.AsyncIterator[bytes]:
|
51 |
+
async_iter = ELEVEN_CLIENT_ASYNC.text_to_sound_effects.convert(
|
52 |
+
text=sound_generation_data["text"],
|
53 |
+
duration_seconds=sound_generation_data["duration_seconds"],
|
54 |
+
prompt_influence=sound_generation_data["prompt_influence"],
|
55 |
+
)
|
56 |
async for chunk in async_iter:
|
57 |
if chunk:
|
58 |
yield chunk
|
@@ -15,3 +15,7 @@ def get_chat_llm(llm_model: GPTModels, temperature=0.0):
|
|
15 |
model=llm_model, temperature=temperature, timeout=Timeout(60, connect=4)
|
16 |
)
|
17 |
return llm
|
|
|
|
|
|
|
|
|
|
15 |
model=llm_model, temperature=temperature, timeout=Timeout(60, connect=4)
|
16 |
)
|
17 |
return llm
|
18 |
+
|
19 |
+
|
20 |
+
async def consume_aiter(aiterator):
|
21 |
+
return [x async for x in aiterator]
|