Spaces:
Sleeping
Sleeping
kyunghun
commited on
Commit
·
9178cf3
1
Parent(s):
6ebcd6e
Add application file
Browse files
app.py
ADDED
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import matplotlib.pyplot as plt
|
2 |
+
import librosa
|
3 |
+
import librosa.display
|
4 |
+
import numpy as np
|
5 |
+
import os,sys
|
6 |
+
import ruptures as rpt
|
7 |
+
from glob import glob
|
8 |
+
from tqdm import tqdm
|
9 |
+
import soundfile
|
10 |
+
import pandas as pd
|
11 |
+
import csv
|
12 |
+
import gradio as gr
|
13 |
+
|
14 |
+
def fig_ax(figsize=(15, 5), dpi=150):
|
15 |
+
"""Return a (matplotlib) figure and ax objects with given size."""
|
16 |
+
return plt.subplots(figsize=figsize, dpi=dpi)
|
17 |
+
|
18 |
+
def get_sum_of_cost(algo, n_bkps) -> float:
|
19 |
+
"""Return the sum of costs for the change points `bkps`"""
|
20 |
+
bkps = algo.predict(n_bkps=n_bkps)
|
21 |
+
return algo.cost.sum_of_costs(bkps)
|
22 |
+
def variable_outputs(k):
|
23 |
+
k = int(k)
|
24 |
+
return [gr.Audio(visible=True)]*k + [gr.Audio(visible=False)]*(10-k)
|
25 |
+
def generate(wavfile,target_sampling_rate,hop_length_tempo,n_bkps_max):
|
26 |
+
|
27 |
+
if target_sampling_rate is not None:
|
28 |
+
signal2, sampling_rate = librosa.load(wavfile,sr=target_sampling_rate,mono=False)
|
29 |
+
else:
|
30 |
+
signal2, sampling_rate = librosa.load(wavfile,mono=False)
|
31 |
+
signal = signal2.sum(axis=0) / 2
|
32 |
+
# Compute the onset strength
|
33 |
+
hop_length_tempo = 512
|
34 |
+
oenv = librosa.onset.onset_strength(
|
35 |
+
y=signal, sr=sampling_rate, hop_length=hop_length_tempo
|
36 |
+
)
|
37 |
+
# Compute the tempogram
|
38 |
+
tempogram = librosa.feature.tempogram(
|
39 |
+
onset_envelope=oenv,
|
40 |
+
sr=sampling_rate,
|
41 |
+
hop_length=hop_length_tempo,
|
42 |
+
)
|
43 |
+
algo = rpt.KernelCPD(kernel="linear").fit(tempogram.T)
|
44 |
+
|
45 |
+
# Choose the number of changes (elbow heuristic)
|
46 |
+
n_bkps_max = 10 # K_max
|
47 |
+
# Start by computing the segmentation with most changes.
|
48 |
+
# After start, all segmentations with 1, 2,..., K_max-1 changes are also available for free.
|
49 |
+
_ = algo.predict(n_bkps_max)
|
50 |
+
array_of_n_bkps = np.arange(1, n_bkps_max + 1)
|
51 |
+
ex = [get_sum_of_cost(algo=algo, n_bkps=n_bkps) for n_bkps in array_of_n_bkps]
|
52 |
+
# print(ex[0])
|
53 |
+
biggiest=0
|
54 |
+
for i in range(1,len(ex)):
|
55 |
+
if abs(ex[i]- ex[i-1])>biggiest:
|
56 |
+
biggiest=abs(ex[i]- ex[i-1])
|
57 |
+
n_bkps=i+2
|
58 |
+
|
59 |
+
bkps = algo.predict(n_bkps=n_bkps)
|
60 |
+
# Convert the estimated change points (frame counts) to actual timestamps
|
61 |
+
bkps_times = librosa.frames_to_time(bkps, sr=sampling_rate, hop_length=hop_length_tempo)
|
62 |
+
|
63 |
+
# Compute change points corresponding indexes in original signal
|
64 |
+
bkps_time_indexes = (sampling_rate * bkps_times).astype(int).tolist()
|
65 |
+
bkps = [i//sampling_rate for i in bkps_time_indexes]
|
66 |
+
# print(bkps_time_indexes)
|
67 |
+
new_bkps_time_indexes =[]
|
68 |
+
if len(bkps_time_indexes)>2:
|
69 |
+
for i in range(len(bkps_time_indexes)):
|
70 |
+
if i==0:
|
71 |
+
if bkps_time_indexes[i]>=10*sampling_rate:
|
72 |
+
new_bkps_time_indexes.append(bkps_time_indexes[i])
|
73 |
+
elif i==len(bkps_time_indexes)-1:
|
74 |
+
if bkps_time_indexes[i]-bkps_time_indexes[i-1]<5*sampling_rate:
|
75 |
+
new_bkps_time_indexes.remove(new_bkps_time_indexes[-1])
|
76 |
+
new_bkps_time_indexes.append(bkps_time_indexes[i])
|
77 |
+
else:
|
78 |
+
if bkps_time_indexes[i]-bkps_time_indexes[i-1]>=10*sampling_rate:
|
79 |
+
new_bkps_time_indexes.append(bkps_time_indexes[i])
|
80 |
+
bkps_time_indexes = new_bkps_time_indexes
|
81 |
+
fig, ax = fig_ax()
|
82 |
+
_ = librosa.display.specshow(
|
83 |
+
tempogram,
|
84 |
+
ax=ax,
|
85 |
+
x_axis="s",
|
86 |
+
y_axis="tempo",
|
87 |
+
hop_length=hop_length_tempo,
|
88 |
+
sr=sampling_rate,
|
89 |
+
)
|
90 |
+
new_bkps_times = [ x/sampling_rate for x in bkps_time_indexes]
|
91 |
+
for b in new_bkps_times:
|
92 |
+
ax.axvline(b, ls="--", color="white", lw=4)
|
93 |
+
seg_list = []
|
94 |
+
for segment_number, (start, end) in enumerate(
|
95 |
+
rpt.utils.pairwise([0] + bkps_time_indexes), start=1
|
96 |
+
):
|
97 |
+
save_name= f"output_{segment_number}.mp3"
|
98 |
+
segment = signal2[:,start:end]
|
99 |
+
seg_list.append(save_name)
|
100 |
+
soundfile.write(save_name,
|
101 |
+
segment.T,
|
102 |
+
int(sampling_rate),
|
103 |
+
format='MP3'
|
104 |
+
)
|
105 |
+
seg_len = len(seg_list)
|
106 |
+
for i in range(10-seg_len):
|
107 |
+
seg_list.append("None")
|
108 |
+
return fig,seg_len,*seg_list
|
109 |
+
def list_map(lists):
|
110 |
+
print(len(lists), len(RESULTS))
|
111 |
+
for i in range(len(lists)):
|
112 |
+
RESULTS[i]= str(lists[i])
|
113 |
+
return RESULTS
|
114 |
+
with gr.Blocks() as demo:
|
115 |
+
gr.Markdown(
|
116 |
+
'''
|
117 |
+
# Demo of Music Segmentation(Intro, Verse, Outro..) using Change Detection Algoritm
|
118 |
+
'''
|
119 |
+
)
|
120 |
+
result_list = gr.State()
|
121 |
+
with gr.Column():
|
122 |
+
with gr.Row():
|
123 |
+
with gr.Column():
|
124 |
+
wavfile = gr.Audio(sources="upload", type="filepath")
|
125 |
+
btn_submit = gr.Button()
|
126 |
+
result_image = gr.Plot(label="result")
|
127 |
+
with gr.Accordion(label="Settings", open=False):
|
128 |
+
target_sampling_rate = gr.Number(label="target_sampling_rate", value=44100, interactive=True)
|
129 |
+
hop_length_tempo = gr.Number(label="hop_length_tempo", value=512, interactive=True)
|
130 |
+
n_bkps_max = gr.Number(label="n_bkps_max", value=10, interactive=True)
|
131 |
+
result_len = gr.Number(label="result_len",value=10,interactive=False)
|
132 |
+
RESULTS = []
|
133 |
+
with gr.Column():
|
134 |
+
for i in range(1,11):
|
135 |
+
w = gr.Audio(label=f"result part {i}",visible=False,type="filepath")
|
136 |
+
RESULTS.append(w)
|
137 |
+
result_len.change(variable_outputs,result_len,RESULTS)
|
138 |
+
# result_len.change(list_map,result_list,RESULTS)
|
139 |
+
btn_submit.click(
|
140 |
+
fn=generate,
|
141 |
+
inputs=[
|
142 |
+
wavfile,target_sampling_rate,hop_length_tempo,n_bkps_max
|
143 |
+
],
|
144 |
+
outputs=[
|
145 |
+
result_image,result_len,*RESULTS
|
146 |
+
],
|
147 |
+
)
|
148 |
+
|
149 |
+
demo.queue().launch(server_name="0.0.0.0")
|
150 |
+
|