kyunghun commited on
Commit
9178cf3
·
1 Parent(s): 6ebcd6e

Add application file

Browse files
Files changed (1) hide show
  1. app.py +150 -0
app.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import librosa
3
+ import librosa.display
4
+ import numpy as np
5
+ import os,sys
6
+ import ruptures as rpt
7
+ from glob import glob
8
+ from tqdm import tqdm
9
+ import soundfile
10
+ import pandas as pd
11
+ import csv
12
+ import gradio as gr
13
+
14
+ def fig_ax(figsize=(15, 5), dpi=150):
15
+ """Return a (matplotlib) figure and ax objects with given size."""
16
+ return plt.subplots(figsize=figsize, dpi=dpi)
17
+
18
+ def get_sum_of_cost(algo, n_bkps) -> float:
19
+ """Return the sum of costs for the change points `bkps`"""
20
+ bkps = algo.predict(n_bkps=n_bkps)
21
+ return algo.cost.sum_of_costs(bkps)
22
+ def variable_outputs(k):
23
+ k = int(k)
24
+ return [gr.Audio(visible=True)]*k + [gr.Audio(visible=False)]*(10-k)
25
+ def generate(wavfile,target_sampling_rate,hop_length_tempo,n_bkps_max):
26
+
27
+ if target_sampling_rate is not None:
28
+ signal2, sampling_rate = librosa.load(wavfile,sr=target_sampling_rate,mono=False)
29
+ else:
30
+ signal2, sampling_rate = librosa.load(wavfile,mono=False)
31
+ signal = signal2.sum(axis=0) / 2
32
+ # Compute the onset strength
33
+ hop_length_tempo = 512
34
+ oenv = librosa.onset.onset_strength(
35
+ y=signal, sr=sampling_rate, hop_length=hop_length_tempo
36
+ )
37
+ # Compute the tempogram
38
+ tempogram = librosa.feature.tempogram(
39
+ onset_envelope=oenv,
40
+ sr=sampling_rate,
41
+ hop_length=hop_length_tempo,
42
+ )
43
+ algo = rpt.KernelCPD(kernel="linear").fit(tempogram.T)
44
+
45
+ # Choose the number of changes (elbow heuristic)
46
+ n_bkps_max = 10 # K_max
47
+ # Start by computing the segmentation with most changes.
48
+ # After start, all segmentations with 1, 2,..., K_max-1 changes are also available for free.
49
+ _ = algo.predict(n_bkps_max)
50
+ array_of_n_bkps = np.arange(1, n_bkps_max + 1)
51
+ ex = [get_sum_of_cost(algo=algo, n_bkps=n_bkps) for n_bkps in array_of_n_bkps]
52
+ # print(ex[0])
53
+ biggiest=0
54
+ for i in range(1,len(ex)):
55
+ if abs(ex[i]- ex[i-1])>biggiest:
56
+ biggiest=abs(ex[i]- ex[i-1])
57
+ n_bkps=i+2
58
+
59
+ bkps = algo.predict(n_bkps=n_bkps)
60
+ # Convert the estimated change points (frame counts) to actual timestamps
61
+ bkps_times = librosa.frames_to_time(bkps, sr=sampling_rate, hop_length=hop_length_tempo)
62
+
63
+ # Compute change points corresponding indexes in original signal
64
+ bkps_time_indexes = (sampling_rate * bkps_times).astype(int).tolist()
65
+ bkps = [i//sampling_rate for i in bkps_time_indexes]
66
+ # print(bkps_time_indexes)
67
+ new_bkps_time_indexes =[]
68
+ if len(bkps_time_indexes)>2:
69
+ for i in range(len(bkps_time_indexes)):
70
+ if i==0:
71
+ if bkps_time_indexes[i]>=10*sampling_rate:
72
+ new_bkps_time_indexes.append(bkps_time_indexes[i])
73
+ elif i==len(bkps_time_indexes)-1:
74
+ if bkps_time_indexes[i]-bkps_time_indexes[i-1]<5*sampling_rate:
75
+ new_bkps_time_indexes.remove(new_bkps_time_indexes[-1])
76
+ new_bkps_time_indexes.append(bkps_time_indexes[i])
77
+ else:
78
+ if bkps_time_indexes[i]-bkps_time_indexes[i-1]>=10*sampling_rate:
79
+ new_bkps_time_indexes.append(bkps_time_indexes[i])
80
+ bkps_time_indexes = new_bkps_time_indexes
81
+ fig, ax = fig_ax()
82
+ _ = librosa.display.specshow(
83
+ tempogram,
84
+ ax=ax,
85
+ x_axis="s",
86
+ y_axis="tempo",
87
+ hop_length=hop_length_tempo,
88
+ sr=sampling_rate,
89
+ )
90
+ new_bkps_times = [ x/sampling_rate for x in bkps_time_indexes]
91
+ for b in new_bkps_times:
92
+ ax.axvline(b, ls="--", color="white", lw=4)
93
+ seg_list = []
94
+ for segment_number, (start, end) in enumerate(
95
+ rpt.utils.pairwise([0] + bkps_time_indexes), start=1
96
+ ):
97
+ save_name= f"output_{segment_number}.mp3"
98
+ segment = signal2[:,start:end]
99
+ seg_list.append(save_name)
100
+ soundfile.write(save_name,
101
+ segment.T,
102
+ int(sampling_rate),
103
+ format='MP3'
104
+ )
105
+ seg_len = len(seg_list)
106
+ for i in range(10-seg_len):
107
+ seg_list.append("None")
108
+ return fig,seg_len,*seg_list
109
+ def list_map(lists):
110
+ print(len(lists), len(RESULTS))
111
+ for i in range(len(lists)):
112
+ RESULTS[i]= str(lists[i])
113
+ return RESULTS
114
+ with gr.Blocks() as demo:
115
+ gr.Markdown(
116
+ '''
117
+ # Demo of Music Segmentation(Intro, Verse, Outro..) using Change Detection Algoritm
118
+ '''
119
+ )
120
+ result_list = gr.State()
121
+ with gr.Column():
122
+ with gr.Row():
123
+ with gr.Column():
124
+ wavfile = gr.Audio(sources="upload", type="filepath")
125
+ btn_submit = gr.Button()
126
+ result_image = gr.Plot(label="result")
127
+ with gr.Accordion(label="Settings", open=False):
128
+ target_sampling_rate = gr.Number(label="target_sampling_rate", value=44100, interactive=True)
129
+ hop_length_tempo = gr.Number(label="hop_length_tempo", value=512, interactive=True)
130
+ n_bkps_max = gr.Number(label="n_bkps_max", value=10, interactive=True)
131
+ result_len = gr.Number(label="result_len",value=10,interactive=False)
132
+ RESULTS = []
133
+ with gr.Column():
134
+ for i in range(1,11):
135
+ w = gr.Audio(label=f"result part {i}",visible=False,type="filepath")
136
+ RESULTS.append(w)
137
+ result_len.change(variable_outputs,result_len,RESULTS)
138
+ # result_len.change(list_map,result_list,RESULTS)
139
+ btn_submit.click(
140
+ fn=generate,
141
+ inputs=[
142
+ wavfile,target_sampling_rate,hop_length_tempo,n_bkps_max
143
+ ],
144
+ outputs=[
145
+ result_image,result_len,*RESULTS
146
+ ],
147
+ )
148
+
149
+ demo.queue().launch(server_name="0.0.0.0")
150
+