{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Change audio by detecting onset \n",
"This notebook contains a method that could change the target video sound with a given audio."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Load packages"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {},
"outputs": [],
"source": [
"import IPython\n",
"import os\n",
"import numpy as np\n",
"from moviepy.editor import *\n",
"import librosa\n",
"from IPython.display import Audio\n",
"from IPython.display import Video"
]
},
{
"cell_type": "code",
"execution_count": 119,
"metadata": {},
"outputs": [],
"source": [
"# Read videos\n",
"origin_video_path = 'data/target.mp4'\n",
"conditional_video_path = 'data/conditional.mp4'\n",
"# conditional_video_path = 'data/dog_bark.mp4'\n",
"\n",
"ori_videoclip = VideoFileClip(origin_video_path)\n",
"con_videoclip = VideoFileClip(conditional_video_path)\n"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 120,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Video(origin_video_path, width=640)"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 121,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"Video(conditional_video_path, width=640)"
]
},
{
"cell_type": "code",
"execution_count": 122,
"metadata": {},
"outputs": [],
"source": [
"# get the audio track from video\n",
"ori_audioclip = ori_videoclip.audio\n",
"ori_audio, ori_sr = ori_audioclip.to_soundarray(), ori_audioclip.fps\n",
"con_audioclip = con_videoclip.audio\n",
"con_audio, con_sr = con_audioclip.to_soundarray(), con_audioclip.fps\n",
"\n",
"ori_audio = ori_audio.mean(-1)\n",
"con_audio = con_audio.mean(-1)\n",
"\n",
"target_sr = 22050\n",
"ori_audio = librosa.resample(ori_audio, orig_sr=ori_sr, target_sr=target_sr)\n",
"con_audio = librosa.resample(con_audio, orig_sr=con_sr, target_sr=target_sr)\n",
"\n",
"ori_sr, con_sr = target_sr, target_sr"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {},
"outputs": [],
"source": [
"def detect_onset_of_audio(audio, sample_rate):\n",
" onsets = librosa.onset.onset_detect(\n",
" y=audio, sr=sample_rate, units='samples', delta=0.3)\n",
" return onsets\n"
]
},
{
"cell_type": "code",
"execution_count": 124,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
"