{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "import dotenv\n", "import pandas as pd\n", "from httpx import Timeout\n", "from pydantic import BaseModel\n", "from langchain_core.prompts import (\n", " ChatPromptTemplate,\n", " SystemMessagePromptTemplate,\n", " HumanMessagePromptTemplate,\n", ")\n", "from langchain_openai import ChatOpenAI\n", "from langchain_community.callbacks import get_openai_callback\n", "\n", "import data.samples_to_split as samples\n", "\n", "from src.lc_callbacks import LCMessageLoggerAsync\n", "from src.utils import GPTModels\n", "from src.text_split_chain import create_split_text_chain" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dotenv.load_dotenv()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## voices eda" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(468, 14)\n" ] } ], "source": [ "# df = pd.read_csv('data/11labs_tts_voices.csv')\n", "df = pd.read_csv('data/11labs_available_tts_voices.csv')\n", "df[\"age\"] = df[\"age\"].str.replace(\" \", \"_\").str.replace(\"-\", \"_\")\n", "print(df.shape)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['voice_id', 'name', 'preview_url', 'owner_id', 'permission_on_resource',\n", " 'is_legacy', 'is_mixed', 'accent', 'description', 'age', 'gender',\n", " 'category', 'language', 'descriptive'],\n", " dtype='object')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.columns" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "language\n", "NaN 264\n", "en 203\n", "romanian 1\n", "Name: count, dtype: int64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['language'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "gender\n", "female 231\n", "male 230\n", "neutral 6\n", "non-binary 1\n", "Name: count, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['gender'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "age\n", "middle_aged 183\n", "young 143\n", "old 140\n", "NaN 2\n", "Name: count, dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['age'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderfemalemaleneutralnon-binary
age
middle_aged4813041
old1003910
young835910
NaN0200
\n", "
" ], "text/plain": [ "gender female male neutral non-binary\n", "age \n", "middle_aged 48 130 4 1\n", "old 100 39 1 0\n", "young 83 59 1 0\n", "NaN 0 2 0 0" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(['age', 'gender'], dropna=False)['voice_id'].count().unstack(fill_value=0)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
genderfemalemaleneutralnon-binary
languageage
enmiddle_aged309120
old3300
young343800
NaN0200
romanianold1000
NaNmiddle_aged183921
old963610
young492110
\n", "
" ], "text/plain": [ "gender female male neutral non-binary\n", "language age \n", "en middle_aged 30 91 2 0\n", " old 3 3 0 0\n", " young 34 38 0 0\n", " NaN 0 2 0 0\n", "romanian old 1 0 0 0\n", "NaN middle_aged 18 39 2 1\n", " old 96 36 1 0\n", " young 49 21 1 0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(['language', 'age', 'gender'], dropna=False)['voice_id'].count().unstack(fill_value=0)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "descriptive\n", "confident 64\n", "calm 44\n", "casual 34\n", "pleasant 31\n", "deep 28\n", "NaN 26\n", "professional 26\n", "upbeat 22\n", "wise 20\n", "formal 17\n", "intense 13\n", "serious 13\n", "meditative 11\n", "modulated 11\n", "excited 10\n", "husky 10\n", "mature 8\n", "classy 8\n", "chill 7\n", "neutral 7\n", "crisp 6\n", "gentle 6\n", "childish 6\n", "hyped 6\n", "cute 5\n", "sassy 4\n", "soft 4\n", "rough 3\n", "grumpy 3\n", "whispery 3\n", "robotic 3\n", "relaxed 3\n", "raspy 2\n", "cheeky 1\n", "sad 1\n", "anxious 1\n", "motivational 1\n", "Name: count, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['descriptive'].value_counts(dropna=False)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(39, 14)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "age_group = 'old'\n", "gender = 'male'\n", "df_filtered = df[(df['age'] == age_group) & (df['gender'] == gender)]\n", "df_filtered.shape" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
voice_idnamepreview_urlowner_idpermission_on_resourceis_legacyis_mixedaccentdescriptionagegendercategorylanguagedescriptive
245ugI9yHu7QMtMOjozITa3Nimbus - deep & meditativehttps://storage.googleapis.com/eleven-public-p...NaNadminFalseFalseamericanNaNoldmaleentertainment_tvNaNneutral
2841SJjcjy45jFu6erSHVWqHoward - American Radio Voicehttps://storage.googleapis.com/eleven-public-p...NaNadminFalseFalseamericanNaNoldmaleadvertisementNaNmodulated
362oUAzGw71wG6JCbHMK33sMark - calm and wise teacherhttps://storage.googleapis.com/eleven-public-p...NaNadminFalseFalsebritishNaNoldmaleinformative_educationalNaNdeep
\n", "
" ], "text/plain": [ " voice_id name \\\n", "245 ugI9yHu7QMtMOjozITa3 Nimbus - deep & meditative \n", "284 1SJjcjy45jFu6erSHVWq Howard - American Radio Voice \n", "362 oUAzGw71wG6JCbHMK33s Mark - calm and wise teacher \n", "\n", " preview_url owner_id \\\n", "245 https://storage.googleapis.com/eleven-public-p... NaN \n", "284 https://storage.googleapis.com/eleven-public-p... NaN \n", "362 https://storage.googleapis.com/eleven-public-p... NaN \n", "\n", " permission_on_resource is_legacy is_mixed accent description age \\\n", "245 admin False False american NaN old \n", "284 admin False False american NaN old \n", "362 admin False False british NaN old \n", "\n", " gender category language descriptive \n", "245 male entertainment_tv NaN neutral \n", "284 male advertisement NaN modulated \n", "362 male informative_educational NaN deep " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_filtered.sample(3)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['HrciSEXYMv69BAJ4ixOW', 'oUAzGw71wG6JCbHMK33s', 'Zl8mecngHM53e1hl151S']" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_filtered.sample(3)['voice_id'].to_list()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## split text into character phrases" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-10 02:34:52,755 [INFO] audio-books (lc_callbacks.py): call to with 2 messages:\n", "{'role': 'system', 'content': 'you are provided with the book sample.\\nplease rewrite it and insert xml tags indicating character to whom current phrase belongs.\\nfor example: I looked at herWhat are you looking at?\\n\\nNotes:\\n- sometimes narrator is one of characters taking part in the action.\\nin this case use narrator\\'s name (if available) instead of \"narrator\"\\n- if it\\'s impossible to identify character name from the text provided, use codes \"c1\", \"c2\", etc,\\nwhere \"c\" prefix means character and number is used to enumerate unknown characters\\n- all quotes of direct speech must be attributed to characters, for example:\\n“She’s a nice girl,”said Tom after a moment.\\nmind that sometimes narrator could also be a character.\\n- use ALL available context to determine the character.\\nsometimes the character name becomes clear from the following phrases\\n- DO NOT include in your response anything except for the original text with character xml tags!!!\\n'}\n", "{'role': 'human', 'content': 'Here is the book sample:\\n---\\nInside, the crimson room bloomed with light. Tom and Miss Baker sat at\\neither end of the long couch and she read aloud to him from the\\nSaturday Evening Post—the words, murmurous and uninflected, running\\ntogether in a soothing tune. The lamplight, bright on his boots and\\ndull on the autumn-leaf yellow of her hair, glinted along the paper as\\nshe turned a page with a flutter of slender muscles in her arms.\\n\\nWhen we came in she held us silent for a moment with a lifted hand.\\n\\n“To be continued,” she said, tossing the magazine on the table, “in\\nour very next issue.”\\n\\nHer body asserted itself with a restless movement of her knee, and she\\nstood up.\\n\\n“Ten o’clock,” she remarked, apparently finding the time on the\\nceiling. “Time for this good girl to go to bed.”\\n\\n“Jordan’s going to play in the tournament tomorrow,” explained Daisy,\\n“over at Westchester.”\\n\\n“Oh—you’re Jordan Baker.”\\n\\nI knew now why her face was familiar—its pleasing contemptuous\\nexpression had looked out at me from many rotogravure pictures of the\\nsporting life at Asheville and Hot Springs and Palm Beach. I had heard\\nsome story of her too, a critical, unpleasant story, but what it was I\\nhad forgotten long ago.\\n\\n“Good night,” she said softly. “Wake me at eight, won’t you.”\\n\\n“If you’ll get up.”\\n\\n“I will. Good night, Mr. Carraway. See you anon.”\\n\\n“Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a\\nmarriage. Come over often, Nick, and I’ll sort of—oh—fling you\\ntogether. You know—lock you up accidentally in linen closets and push\\nyou out to sea in a boat, and all that sort of thing—”\\n\\n“Good night,” called Miss Baker from the stairs. “I haven’t heard a\\nword.”\\n\\n“She’s a nice girl,” said Tom after a moment. “They oughtn’t to let\\nher run around the country this way.”\\n\\n“Who oughtn’t to?” inquired Daisy coldly.\\n\\n“Her family.”\\n\\n“Her family is one aunt about a thousand years old. Besides, Nick’s\\ngoing to look after her, aren’t you, Nick? She’s going to spend lots\\nof weekends out here this summer. I think the home influence will be\\nvery good for her.”\\n\\nDaisy and Tom looked at each other for a moment in silence.\\n\\n“Is she from New York?” I asked quickly.\\n\\n“From Louisville. Our white girlhood was passed together there. Our\\nbeautiful white—”\\n\\n“Did you give Nick a little heart to heart talk on the veranda?”\\ndemanded Tom suddenly.\\n\\n“Did I?” She looked at me. “I can’t seem to remember, but I think we\\ntalked about the Nordic race. Yes, I’m sure we did. It sort of crept\\nup on us and first thing you know—”\\n\\n“Don’t believe everything you hear, Nick,” he advised me.\\n'}\n", "2024-10-10 02:35:04,369 [INFO] httpx (_client.py): HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", "2024-10-10 02:35:04,383 [INFO] audio-books (lc_callbacks.py): raw LLM response: \"Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.\n", "\n", "When we came in she held us silent for a moment with a lifted hand.\n", "\n", "“To be continued,” she said, tossing the magazine on the table, “in our very next issue.”\n", "\n", "Her body asserted itself with a restless movement of her knee, and she stood up.\n", "\n", "“Ten o’clock,” she remarked, apparently finding the time on the ceiling. “Time for this good girl to go to bed.”\n", "\n", "“Jordan’s going to play in the tournament tomorrow,” explained Daisy, “over at Westchester.”\n", "\n", "“Oh—you’re Jordan Baker.”\n", "\n", "I knew now why her face was familiar—its pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.\n", "\n", "“Good night,” she said softly. “Wake me at eight, won’t you.”\n", "\n", "“If you’ll get up.”\n", "\n", "“I will. Good night, Mr. Carraway. See you anon.”\n", "\n", "“Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”\n", "\n", "“Good night,” called Miss Baker from the stairs. “I haven’t heard a word.”\n", "\n", "“She’s a nice girl,” said Tom after a moment. “They oughtn’t to let her run around the country this way.”\n", "\n", "“Who oughtn’t to?” inquired Daisy coldly.\n", "\n", "“Her family.”\n", "\n", "“Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”\n", "\n", "Daisy and Tom looked at each other for a moment in silence.\n", "\n", "“Is she from New York?” I asked quickly.\n", "\n", "“From Louisville. Our white girlhood was passed together there. Our beautiful white—”\n", "\n", "“Did you give Nick a little heart to heart talk on the veranda?” demanded Tom suddenly.\n", "\n", "“Did I?” She looked at me. “I can’t seem to remember, but I think we talked about the Nordic race. Yes, I’m sure we did. It sort of crept up on us and first thing you know—”\n", "\n", "“Don’t believe everything you hear, Nick,” he advised me.\"\n" ] } ], "source": [ "chain = create_split_text_chain(llm_model=GPTModels.GPT_4o)\n", "# chain = create_split_text_chain(llm_model=GPTModels.GPT_4_TURBO_2024_04_09)\n", "with get_openai_callback() as cb:\n", " res = chain.invoke(\n", " {\"text\": samples.GATSBY_2}, config={\"callbacks\": [LCMessageLoggerAsync()]}\n", " )" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "SplitTextOutput(text_raw='Inside, the crimson room bloomed with light. Tom and Miss Baker sat at\\neither end of the long couch and she read aloud to him from the\\nSaturday Evening Post—the words, murmurous and uninflected, running\\ntogether in a soothing tune. The lamplight, bright on his boots and\\ndull on the autumn-leaf yellow of her hair, glinted along the paper as\\nshe turned a page with a flutter of slender muscles in her arms.\\n\\nWhen we came in she held us silent for a moment with a lifted hand.\\n\\n“To be continued,” she said, tossing the magazine on the table, “in\\nour very next issue.”\\n\\nHer body asserted itself with a restless movement of her knee, and she\\nstood up.\\n\\n“Ten o’clock,” she remarked, apparently finding the time on the\\nceiling. “Time for this good girl to go to bed.”\\n\\n“Jordan’s going to play in the tournament tomorrow,” explained Daisy,\\n“over at Westchester.”\\n\\n“Oh—you’re Jordan Baker.”\\n\\nI knew now why her face was familiar—its pleasing contemptuous\\nexpression had looked out at me from many rotogravure pictures of the\\nsporting life at Asheville and Hot Springs and Palm Beach. I had heard\\nsome story of her too, a critical, unpleasant story, but what it was I\\nhad forgotten long ago.\\n\\n“Good night,” she said softly. “Wake me at eight, won’t you.”\\n\\n“If you’ll get up.”\\n\\n“I will. Good night, Mr. Carraway. See you anon.”\\n\\n“Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a\\nmarriage. Come over often, Nick, and I’ll sort of—oh—fling you\\ntogether. You know—lock you up accidentally in linen closets and push\\nyou out to sea in a boat, and all that sort of thing—”\\n\\n“Good night,” called Miss Baker from the stairs. “I haven’t heard a\\nword.”\\n\\n“She’s a nice girl,” said Tom after a moment. “They oughtn’t to let\\nher run around the country this way.”\\n\\n“Who oughtn’t to?” inquired Daisy coldly.\\n\\n“Her family.”\\n\\n“Her family is one aunt about a thousand years old. Besides, Nick’s\\ngoing to look after her, aren’t you, Nick? She’s going to spend lots\\nof weekends out here this summer. I think the home influence will be\\nvery good for her.”\\n\\nDaisy and Tom looked at each other for a moment in silence.\\n\\n“Is she from New York?” I asked quickly.\\n\\n“From Louisville. Our white girlhood was passed together there. Our\\nbeautiful white—”\\n\\n“Did you give Nick a little heart to heart talk on the veranda?”\\ndemanded Tom suddenly.\\n\\n“Did I?” She looked at me. “I can’t seem to remember, but I think we\\ntalked about the Nordic race. Yes, I’m sure we did. It sort of crept\\nup on us and first thing you know—”\\n\\n“Don’t believe everything you hear, Nick,” he advised me.\\n', text_annotated='Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.\\n\\nWhen we came in she held us silent for a moment with a lifted hand.\\n\\n“To be continued,” she said, tossing the magazine on the table, “in our very next issue.”\\n\\nHer body asserted itself with a restless movement of her knee, and she stood up.\\n\\n“Ten o’clock,” she remarked, apparently finding the time on the ceiling. “Time for this good girl to go to bed.”\\n\\n“Jordan’s going to play in the tournament tomorrow,” explained Daisy, “over at Westchester.”\\n\\n“Oh—you’re Jordan Baker.”\\n\\nI knew now why her face was familiar—its pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.\\n\\n“Good night,” she said softly. “Wake me at eight, won’t you.”\\n\\n“If you’ll get up.”\\n\\n“I will. Good night, Mr. Carraway. See you anon.”\\n\\n“Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”\\n\\n“Good night,” called Miss Baker from the stairs. “I haven’t heard a word.”\\n\\n“She’s a nice girl,” said Tom after a moment. “They oughtn’t to let her run around the country this way.”\\n\\n“Who oughtn’t to?” inquired Daisy coldly.\\n\\n“Her family.”\\n\\n“Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”\\n\\nDaisy and Tom looked at each other for a moment in silence.\\n\\n“Is she from New York?” I asked quickly.\\n\\n“From Louisville. Our white girlhood was passed together there. Our beautiful white—”\\n\\n“Did you give Nick a little heart to heart talk on the veranda?” demanded Tom suddenly.\\n\\n“Did I?” She looked at me. “I can’t seem to remember, but I think we talked about the Nordic race. Yes, I’m sure we did. It sort of crept up on us and first thing you know—”\\n\\n“Don’t believe everything you hear, Nick,” he advised me.')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Tom', 'Jordan', 'Daisy', 'narrator']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res.characters" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.\n", "\n", "When we came in she held us silent for a moment with a lifted hand.\n", "\n", "“To be continued,” she said, tossing the magazine on the table, “in our very next issue.”\n", "\n", "Her body asserted itself with a restless movement of her knee, and she stood up.\n", "\n", "“Ten o’clock,” she remarked, apparently finding the time on the ceiling. “Time for this good girl to go to bed.”\n", "\n", "“Jordan’s going to play in the tournament tomorrow,” explained Daisy, “over at Westchester.”\n", "\n", "“Oh—you’re Jordan Baker.”\n", "\n", "I knew now why her face was familiar—its pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.\n", "\n", "“Good night,” she said softly. “Wake me at eight, won’t you.”\n", "\n", "“If you’ll get up.”\n", "\n", "“I will. Good night, Mr. Carraway. See you anon.”\n", "\n", "“Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”\n", "\n", "“Good night,” called Miss Baker from the stairs. “I haven’t heard a word.”\n", "\n", "“She’s a nice girl,” said Tom after a moment. “They oughtn’t to let her run around the country this way.”\n", "\n", "“Who oughtn’t to?” inquired Daisy coldly.\n", "\n", "“Her family.”\n", "\n", "“Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”\n", "\n", "Daisy and Tom looked at each other for a moment in silence.\n", "\n", "“Is she from New York?” I asked quickly.\n", "\n", "“From Louisville. Our white girlhood was passed together there. Our beautiful white—”\n", "\n", "“Did you give Nick a little heart to heart talk on the veranda?” demanded Tom suddenly.\n", "\n", "“Did I?” She looked at me. “I can’t seem to remember, but I think we talked about the Nordic race. Yes, I’m sure we did. It sort of crept up on us and first thing you know—”\n", "\n", "“Don’t believe everything you hear, Nick,” he advised me.\n" ] } ], "source": [ "print(res.text_annotated)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "characters: ['Tom', 'Jordan', 'Daisy', 'narrator']\n", "--------------------\n", "[narrator] Inside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.\n", "[narrator] When we came in she held us silent for a moment with a lifted hand.\n", "[Jordan] “To be continued,”\n", "[narrator] she said, tossing the magazine on the table,\n", "[Jordan] “in our very next issue.”\n", "[narrator] Her body asserted itself with a restless movement of her knee, and she stood up.\n", "[Jordan] “Ten o’clock,”\n", "[narrator] she remarked, apparently finding the time on the ceiling.\n", "[Jordan] “Time for this good girl to go to bed.”\n", "[Daisy] “Jordan’s going to play in the tournament tomorrow,”\n", "[narrator] explained Daisy,\n", "[Daisy] “over at Westchester.”\n", "[narrator] “Oh—you’re Jordan Baker.”\n", "[narrator] I knew now why her face was familiar—its pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.\n", "[Jordan] “Good night,”\n", "[narrator] she said softly.\n", "[Jordan] “Wake me at eight, won’t you.”\n", "[Daisy] “If you’ll get up.”\n", "[Jordan] “I will. Good night, Mr. Carraway. See you anon.”\n", "[Daisy] “Of course you will,”\n", "[narrator] confirmed Daisy.\n", "[Daisy] “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”\n", "[Jordan] “Good night,”\n", "[narrator] called Miss Baker from the stairs.\n", "[Jordan] “I haven’t heard a word.”\n", "[Tom] “She’s a nice girl,”\n", "[narrator] said Tom after a moment.\n", "[Tom] “They oughtn’t to let her run around the country this way.”\n", "[Daisy] “Who oughtn’t to?”\n", "[narrator] inquired Daisy coldly.\n", "[Tom] “Her family.”\n", "[Daisy] “Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”\n", "[narrator] Daisy and Tom looked at each other for a moment in silence.\n", "[narrator] “Is she from New York?”\n", "[narrator] I asked quickly.\n", "[Daisy] “From Louisville. Our white girlhood was passed together there. Our beautiful white—”\n", "[Tom] “Did you give Nick a little heart to heart talk on the veranda?”\n", "[narrator] demanded Tom suddenly.\n", "[Daisy] “Did I?”\n", "[narrator] She looked at me.\n", "[Daisy] “I can’t seem to remember, but I think we talked about the Nordic race. Yes, I’m sure we did. It sort of crept up on us and first thing you know—”\n", "[Tom] “Don’t believe everything you hear, Nick,”\n", "[narrator] he advised me.\n" ] } ], "source": [ "print(res.to_pretty_text())" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "LLM usage:\n", "\n", "Tokens Used: 1817\n", "\tPrompt Tokens: 877\n", "\tCompletion Tokens: 940\n", "Successful Requests: 1\n", "Total Cost (USD): $0.0115925\n" ] } ], "source": [ "print(f'LLM usage:\\n\\n{cb}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## map characters to voices" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "from src.select_voice_chain import create_voice_mapping_chain" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "chain = create_voice_mapping_chain(llm_model=GPTModels.GPT_4_TURBO_2024_04_09)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "ChatPromptTemplate(input_variables=['characters', 'text'], input_types={}, partial_variables={'available_genders': '\"male\", \"female\"', 'available_age_groups': '\"old\", \"middle_aged\", \"young\"', 'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\\n\\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\\n\\nHere is the output schema:\\n```\\n{\"$defs\": {\"CharacterProperties\": {\"properties\": {\"gender\": {\"title\": \"Gender\", \"type\": \"string\"}, \"age_group\": {\"title\": \"Age Group\", \"type\": \"string\"}}, \"required\": [\"gender\", \"age_group\"], \"title\": \"CharacterProperties\", \"type\": \"object\"}}, \"properties\": {\"character2props\": {\"additionalProperties\": {\"$ref\": \"#/$defs/CharacterProperties\"}, \"title\": \"Character2Props\", \"type\": \"object\"}}, \"required\": [\"character2props\"]}\\n```'}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['available_age_groups', 'available_genders', 'format_instructions'], input_types={}, partial_variables={}, template='You are a helpful assistant proficient in literature and psychology.\\nOur goal is to create an audio book from the given text.\\nFor that we need to hire voice actors.\\nPlease help us to find the right actor for each character present in the text.\\n\\nYou are provided with the text split by the characters\\nto whom text parts belong to.\\n\\nYour task is to assign available properties to each character provided.\\nList of available properties:\\n- gender: {available_genders}\\n- age_group: {available_age_groups}\\n\\nNOTES:\\n- assign EXACTLY ONE property value for each property\\n- select properties values ONLY from the list of AVAILABLE property values\\n- fill properties for ALL characters from the list provided\\n- DO NOT include any characters absent in the list provided\\n\\n{format_instructions}\\n'), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['characters', 'text'], input_types={}, partial_variables={}, template='\\n{text}\\n\\n\\n\\n{characters}\\n\\n'), additional_kwargs={})])\n", "| RunnableBinding(bound=ChatOpenAI(client=, async_client=, root_client=, root_async_client=, model_name='gpt-4-turbo-2024-04-09', temperature=0.0, model_kwargs={}, openai_api_key=SecretStr('**********'), request_timeout=Timeout(connect=4, read=60, write=60, pool=60)), kwargs={'response_format': {'type': 'json_object'}}, config={}, config_factories=[])\n", "| PydanticOutputParser(pydantic_object=)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-10-10 02:37:46,347 [INFO] audio-books (lc_callbacks.py): call to gpt-4-turbo-2024-04-09 with 2 messages:\n", "{'role': 'system', 'content': 'You are a helpful assistant proficient in literature and psychology.\\nOur goal is to create an audio book from the given text.\\nFor that we need to hire voice actors.\\nPlease help us to find the right actor for each character present in the text.\\n\\nYou are provided with the text split by the characters\\nto whom text parts belong to.\\n\\nYour task is to assign available properties to each character provided.\\nList of available properties:\\n- gender: \"male\", \"female\"\\n- age_group: \"old\", \"middle_aged\", \"young\"\\n\\nNOTES:\\n- assign EXACTLY ONE property value for each property\\n- select properties values ONLY from the list of AVAILABLE property values\\n- fill properties for ALL characters from the list provided\\n- DO NOT include any characters absent in the list provided\\n\\nThe output should be formatted as a JSON instance that conforms to the JSON schema below.\\n\\nAs an example, for the schema {\"properties\": {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\": \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\\nthe object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema. The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\\n\\nHere is the output schema:\\n```\\n{\"$defs\": {\"CharacterProperties\": {\"properties\": {\"gender\": {\"title\": \"Gender\", \"type\": \"string\"}, \"age_group\": {\"title\": \"Age Group\", \"type\": \"string\"}}, \"required\": [\"gender\", \"age_group\"], \"title\": \"CharacterProperties\", \"type\": \"object\"}}, \"properties\": {\"character2props\": {\"additionalProperties\": {\"$ref\": \"#/$defs/CharacterProperties\"}, \"title\": \"Character2Props\", \"type\": \"object\"}}, \"required\": [\"character2props\"]}\\n```\\n'}\n", "{'role': 'human', 'content': \"\\nInside, the crimson room bloomed with light. Tom and Miss Baker sat at either end of the long couch and she read aloud to him from the Saturday Evening Post—the words, murmurous and uninflected, running together in a soothing tune. The lamplight, bright on his boots and dull on the autumn-leaf yellow of her hair, glinted along the paper as she turned a page with a flutter of slender muscles in her arms.\\n\\nWhen we came in she held us silent for a moment with a lifted hand.\\n\\n“To be continued,” she said, tossing the magazine on the table, “in our very next issue.”\\n\\nHer body asserted itself with a restless movement of her knee, and she stood up.\\n\\n“Ten o’clock,” she remarked, apparently finding the time on the ceiling. “Time for this good girl to go to bed.”\\n\\n“Jordan’s going to play in the tournament tomorrow,” explained Daisy, “over at Westchester.”\\n\\n“Oh—you’re Jordan Baker.”\\n\\nI knew now why her face was familiar—its pleasing contemptuous expression had looked out at me from many rotogravure pictures of the sporting life at Asheville and Hot Springs and Palm Beach. I had heard some story of her too, a critical, unpleasant story, but what it was I had forgotten long ago.\\n\\n“Good night,” she said softly. “Wake me at eight, won’t you.”\\n\\n“If you’ll get up.”\\n\\n“I will. Good night, Mr. Carraway. See you anon.”\\n\\n“Of course you will,” confirmed Daisy. “In fact I think I’ll arrange a marriage. Come over often, Nick, and I’ll sort of—oh—fling you together. You know—lock you up accidentally in linen closets and push you out to sea in a boat, and all that sort of thing—”\\n\\n“Good night,” called Miss Baker from the stairs. “I haven’t heard a word.”\\n\\n“She’s a nice girl,” said Tom after a moment. “They oughtn’t to let her run around the country this way.”\\n\\n“Who oughtn’t to?” inquired Daisy coldly.\\n\\n“Her family.”\\n\\n“Her family is one aunt about a thousand years old. Besides, Nick’s going to look after her, aren’t you, Nick? She’s going to spend lots of weekends out here this summer. I think the home influence will be very good for her.”\\n\\nDaisy and Tom looked at each other for a moment in silence.\\n\\n“Is she from New York?” I asked quickly.\\n\\n“From Louisville. Our white girlhood was passed together there. Our beautiful white—”\\n\\n“Did you give Nick a little heart to heart talk on the veranda?” demanded Tom suddenly.\\n\\n“Did I?” She looked at me. “I can’t seem to remember, but I think we talked about the Nordic race. Yes, I’m sure we did. It sort of crept up on us and first thing you know—”\\n\\n“Don’t believe everything you hear, Nick,” he advised me.\\n\\n\\n\\n['Tom', 'Jordan', 'Daisy', 'narrator']\\n\\n\"}\n", "2024-10-10 02:37:52,060 [INFO] httpx (_client.py): HTTP Request: POST https://api.openai.com/v1/chat/completions \"HTTP/1.1 200 OK\"\n", "2024-10-10 02:37:52,063 [INFO] audio-books (lc_callbacks.py): raw LLM response: \"{\n", " \"character2props\": {\n", " \"Tom\": {\n", " \"gender\": \"male\",\n", " \"age_group\": \"middle_aged\"\n", " },\n", " \"Jordan\": {\n", " \"gender\": \"female\",\n", " \"age_group\": \"young\"\n", " },\n", " \"Daisy\": {\n", " \"gender\": \"female\",\n", " \"age_group\": \"young\"\n", " },\n", " \"narrator\": {\n", " \"gender\": \"male\",\n", " \"age_group\": \"middle_aged\"\n", " }\n", " }\n", "}\"\n" ] } ], "source": [ "res2 = chain.invoke(\n", " {\"text\": res.text_annotated, \"characters\": res.characters},\n", " config={\"callbacks\": [LCMessageLoggerAsync()]},\n", ")" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AllCharactersProperties(character2props={'Tom': CharacterProperties(gender='male', age_group='middle_aged'), 'Jordan': CharacterProperties(gender='female', age_group='young'), 'Daisy': CharacterProperties(gender='female', age_group='young'), 'narrator': CharacterProperties(gender='male', age_group='middle_aged')})" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 22 entries, 0 to 21\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 voice_id 22 non-null object \n", " 1 name 22 non-null object \n", " 2 preview_url 22 non-null object \n", " 3 owner_id 0 non-null float64\n", " 4 permission_on_resource 2 non-null object \n", " 5 is_legacy 22 non-null bool \n", " 6 is_mixed 22 non-null bool \n", " 7 accent 22 non-null object \n", " 8 description 20 non-null object \n", " 9 age 22 non-null object \n", " 10 gender 22 non-null object \n", " 11 category 22 non-null object \n", " 12 language 2 non-null object \n", " 13 descriptive 2 non-null object \n", "dtypes: bool(2), float64(1), object(11)\n", "memory usage: 2.2+ KB\n" ] } ], "source": [ "voices = pd.read_csv(\"11labs_available_tts_voices.csv\")\n", "voices.info()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['middle_aged', 'young', 'old'], dtype=object)" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "voices[\"age\"].unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['female', 'male', 'non-binary', 'neutral'], dtype=object)" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "voices[\"gender\"].unique()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "ai-audio-books", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }