In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

import dotenv
import pandas as pd
from httpx import Timeout
from pydantic import BaseModel
from langchain_core.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
from langchain_openai import ChatOpenAI
from langchain_community.callbacks import get_openai_callback

import data.samples_to_split as samples

from src.lc_callbacks import LCMessageLoggerAsync
from src.utils import GPTModels
from src.text_split_chain import create_split_text_chain

In [3]:
dotenv.load_dotenv()

True

## voices eda

In [4]:
# df = pd.read_csv('data/11labs_tts_voices.csv')
df = pd.read_csv('data/11labs_available_tts_voices.csv')
df["age"] = df["age"].str.replace(" ", "_").str.replace("-", "_")
print(df.shape)

(468, 14)


In [5]:
df.columns

Index(['voice_id', 'name', 'preview_url', 'owner_id', 'permission_on_resource',
       'is_legacy', 'is_mixed', 'accent', 'description', 'age', 'gender',
       'category', 'language', 'descriptive'],
      dtype='object')

In [6]:
df['language'].value_counts(dropna=False)

language
NaN         264
en          203
romanian      1
Name: count, dtype: int64

In [7]:
df['gender'].value_counts(dropna=False)

gender
female        231
male          230
neutral         6
non-binary      1
Name: count, dtype: int64

In [8]:
df['age'].value_counts(dropna=False)

age
middle_aged    183
young          143
old            140
NaN              2
Name: count, dtype: int64

In [14]:
df.groupby(['age', 'gender'], dropna=False)['voice_id'].count().unstack(fill_value=0)

gender,female,male,neutral,non-binary
age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
middle_aged,48,130,4,1
old,100,39,1,0
young,83,59,1,0
,0,2,0,0


In [13]:
df.groupby(['language', 'age', 'gender'], dropna=False)['voice_id'].count().unstack(fill_value=0)

Unnamed: 0_level_0,gender,female,male,neutral,non-binary
language,age,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
en,middle_aged,30,91,2,0
en,old,3,3,0,0
en,young,34,38,0,0
en,,0,2,0,0
romanian,old,1,0,0,0
,middle_aged,18,39,2,1
,old,96,36,1,0
,young,49,21,1,0


In [15]:
df['descriptive'].value_counts(dropna=False)

descriptive
confident       64
calm            44
casual          34
pleasant        31
deep            28
NaN             26
professional    26
upbeat          22
wise            20
formal          17
intense         13
serious         13
meditative      11
modulated       11
excited         10
husky           10
mature           8
classy           8
chill            7
neutral          7
crisp            6
gentle           6
childish         6
hyped            6
cute             5
sassy            4
soft             4
rough            3
grumpy           3
whispery         3
robotic          3
relaxed          3
raspy            2
cheeky           1
sad              1
anxious          1
motivational     1
Name: count, dtype: int64

In [17]:
age_group = 'old'
gender = 'male'
df_filtered = df[(df['age'] == age_group) & (df['gender'] == gender)]
df_filtered.shape

(39, 14)

In [18]:
df_filtered.sample(3)

Unnamed: 0,voice_id,name,preview_url,owner_id,permission_on_resource,is_legacy,is_mixed,accent,description,age,gender,category,language,descriptive
245,ugI9yHu7QMtMOjozITa3,Nimbus - deep & meditative,https://storage.googleapis.com/eleven-public-p...,,admin,False,False,american,,old,male,entertainment_tv,,neutral
284,1SJjcjy45jFu6erSHVWq,Howard - American Radio Voice,https://storage.googleapis.com/eleven-public-p...,,admin,False,False,american,,old,male,advertisement,,modulated
362,oUAzGw71wG6JCbHMK33s,Mark - calm and wise teacher,https://storage.googleapis.com/eleven-public-p...,,admin,False,False,british,,old,male,informative_educational,,deep


In [20]:
df_filtered.sample(3)['voice_id'].to_list()

['HrciSEXYMv69BAJ4ixOW', 'oUAzGw71wG6JCbHMK33s', 'Zl8mecngHM53e1hl151S']

## split text into character phrases

In [4]:
chain = create_split_text_chain(llm_model=GPTModels.GPT_4o)
# chain = create_split_text_chain(llm_model=GPTModels.GPT_4_TURBO_2024_04_09)
with get_openai_callback() as cb:
    res = chain.invoke(
        {"text": samples.GATSBY_2}, config={"callbacks": [LCMessageLoggerAsync()]}
    )

2024-10-10 02:34:52,755 [INFO] audio-books (lc_callbacks.py): call to <failed to determine LLM> with 2 messages:
{'role': 'system', 'content': 'you are provided with the book sample.\nplease rewrite it and insert xml tags indicating character to whom current phrase belongs.\nfor example: <narrator>I looked at her</narrator><Jill>What are you looking at?</Jill>\n\nNotes:\n- sometimes narrator is one of characters taking part in the action.\nin this case use narrator\'s name (if available) instead of "narrator"\n- if it\'s impossible to identify character name from the text provided, use codes "c1", "c2", etc,\nwhere "c" prefix means character and number is used to enumerate unknown characters\n- all quotes of direct speech must be attributed to characters, for example:\n<Tom>“She’s a nice girl,”</Tom><narrator>said Tom after a moment.</narrator>\nmind that sometimes narrator could also be a character.\n- use ALL available context to determine the character.\nsometimes the character name

In [5]:
res

SplitTextOutput(text_raw='Inside, the crimson room bloomed with light. Tom and Miss Baker sat at\neither end of the long couch and she read aloud to him from the\nSaturday Evening Post—the words, murmurous and uninflected, running\ntogether in a soothing tune. The lamplight, bright on his boots and\ndull on the autumn-leaf yellow of her hair, glinted along the paper as\nshe turned a page with a flutter of slender muscles in her arms.\n\nWhen we came in she held us silent for a moment with a lifted hand.\n\n“To be continued,” she said, tossing the magazine on the table, “in\nour very next issue.”\n\nHer body asserted itself with a restless movement of her knee, and she\nstood up.\n\n“Ten o’clock,” she remarked, apparently finding the time on the\nceiling. “Time for this good girl to go to bed.”\n\n“Jordan’s going to play in the tournament tomorrow,” explained Daisy,\n“over at Westchester.”\n\n“Oh—you’re Jordan Baker.”\n\nI knew now why her face was familiar—its pleasing contemptuous\nex

In [6]:
res.characters

['Tom', 'Jordan', 'Daisy', 'narrator']

In [7]:
print(res.text_annotated)

<narrator>Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.</narrator>

<narrator>When we came in she held us silent for a moment with a lifted hand.</narrator>

<Jordan>“To be continued,”</Jordan> <narrator>she said, tossing the magazine on the table,</narrator> <Jordan>“in our very next issue.”</Jordan>

<narrator>Her body asserted itself with a restless movement of her knee, and she stood up.</narrator>

<Jordan>“Ten o’clock,”</Jordan> <narrator>she remarked, apparently finding the time on the ceiling.</narrator> <Jordan>“Time for this good girl to go to bed.”</Jordan>

<Daisy>“Jordan’s going to play in the tournament tomorrow,”</Da

In [8]:
print(res.to_pretty_text())

characters: ['Tom', 'Jordan', 'Daisy', 'narrator']
--------------------
[narrator] Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.
[narrator] When we came in she held us silent for a moment with a lifted hand.
[Jordan] “To be continued,”
[narrator] she said, tossing the magazine on the table,
[Jordan] “in our very next issue.”
[narrator] Her body asserted itself with a restless movement of her knee, and she stood up.
[Jordan] “Ten o’clock,”
[narrator] she remarked, apparently finding the time on the ceiling.
[Jordan] “Time for this good girl to go to bed.”
[Daisy] “Jordan’s going to play in the tournament tomorrow,”
[narrator] explai

In [9]:
print(f'LLM usage:\n\n{cb}')

LLM usage:

Tokens Used: 1817
	Prompt Tokens: 877
	Completion Tokens: 940
Successful Requests: 1
Total Cost (USD): $0.0115925


## map characters to voices

In [10]:
from src.select_voice_chain import create_voice_mapping_chain

In [11]:
chain = create_voice_mapping_chain(llm_model=GPTModels.GPT_4_TURBO_2024_04_09)

In [12]:
chain

ChatPromptTemplate(input_variables=['characters', 'text'], input_types={}, partial_variables={'available_genders': '"male", "female"', 'available_age_groups': '"old", "middle_aged", "young"', 'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"$defs": {"CharacterProperties": {"properties": {"gender": {"title": "Gender", "type": "string"}, "age_group": {"title": "Age Group", "type": "string"}}, "required": ["gender", "age_group"], "title": "CharacterProperties", "type": "object"}}, "properties": {"character2props": {"additionalProperties": {"$ref": "#/$defs/Chara

In [14]:
res2 = chain.invoke(
    {"text": res.text_annotated, "characters": res.characters},
    config={"callbacks": [LCMessageLoggerAsync()]},
)

2024-10-10 02:37:46,347 [INFO] audio-books (lc_callbacks.py): call to gpt-4-turbo-2024-04-09 with 2 messages:
{'role': 'system', 'content': 'You are a helpful assistant proficient in literature and psychology.\nOur goal is to create an audio book from the given text.\nFor that we need to hire voice actors.\nPlease help us to find the right actor for each character present in the text.\n\nYou are provided with the text split by the characters\nto whom text parts belong to.\n\nYour task is to assign available properties to each character provided.\nList of available properties:\n- gender: "male", "female"\n- age_group: "old", "middle_aged", "young"\n\nNOTES:\n- assign EXACTLY ONE property value for each property\n- select properties values ONLY from the list of AVAILABLE property values\n- fill properties for ALL characters from the list provided\n- DO NOT include any characters absent in the list provided\n\nThe output should be formatted as a JSON instance that conforms to the JSON sch

In [15]:
res2

AllCharactersProperties(character2props={'Tom': CharacterProperties(gender='male', age_group='middle_aged'), 'Jordan': CharacterProperties(gender='female', age_group='young'), 'Daisy': CharacterProperties(gender='female', age_group='young'), 'narrator': CharacterProperties(gender='male', age_group='middle_aged')})

In [None]:
voices = pd.read_csv("11labs_available_tts_voices.csv")
voices.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 14 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   voice_id                22 non-null     object 
 1   name                    22 non-null     object 
 2   preview_url             22 non-null     object 
 3   owner_id                0 non-null      float64
 4   permission_on_resource  2 non-null      object 
 5   is_legacy               22 non-null     bool   
 6   is_mixed                22 non-null     bool   
 7   accent                  22 non-null     object 
 8   description             20 non-null     object 
 9   age                     22 non-null     object 
 10  gender                  22 non-null     object 
 11  category                22 non-null     object 
 12  language                2 non-null      object 
 13  descriptive             2 non-null      object 
dtypes: bool(2), float64(1), object(11)
memory us

In [None]:
voices["age"].unique()

array(['middle_aged', 'young', 'old'], dtype=object)

In [None]:
voices["gender"].unique()

array(['female', 'male', 'non-binary', 'neutral'], dtype=object)