|
import torch
|
|
from tqdm import tqdm
|
|
from multiprocessing import Pool
|
|
from mel_processing import spectrogram_torch, mel_spectrogram_torch
|
|
from utils import load_wav_to_torch
|
|
|
|
|
|
class AudioProcessor:
|
|
def __init__(
|
|
self,
|
|
max_wav_value,
|
|
use_mel_spec_posterior,
|
|
filter_length,
|
|
n_mel_channels,
|
|
sampling_rate,
|
|
hop_length,
|
|
win_length,
|
|
mel_fmin,
|
|
mel_fmax,
|
|
):
|
|
self.max_wav_value = max_wav_value
|
|
self.use_mel_spec_posterior = use_mel_spec_posterior
|
|
self.filter_length = filter_length
|
|
self.n_mel_channels = n_mel_channels
|
|
self.sampling_rate = sampling_rate
|
|
self.hop_length = hop_length
|
|
self.win_length = win_length
|
|
self.mel_fmin = mel_fmin
|
|
self.mel_fmax = mel_fmax
|
|
|
|
def process_audio(self, filename):
|
|
audio, sampling_rate = load_wav_to_torch(filename)
|
|
audio_norm = audio / self.max_wav_value
|
|
audio_norm = audio_norm.unsqueeze(0)
|
|
spec_filename = filename.replace(".wav", ".spec.pt")
|
|
if self.use_mel_spec_posterior:
|
|
spec_filename = spec_filename.replace(".spec.pt", ".mel.pt")
|
|
try:
|
|
spec = torch.load(spec_filename)
|
|
except:
|
|
if self.use_mel_spec_posterior:
|
|
spec = mel_spectrogram_torch(
|
|
audio_norm,
|
|
self.filter_length,
|
|
self.n_mel_channels,
|
|
self.sampling_rate,
|
|
self.hop_length,
|
|
self.win_length,
|
|
self.mel_fmin,
|
|
self.mel_fmax,
|
|
center=False,
|
|
)
|
|
else:
|
|
spec = spectrogram_torch(
|
|
audio_norm,
|
|
self.filter_length,
|
|
self.sampling_rate,
|
|
self.hop_length,
|
|
self.win_length,
|
|
center=False,
|
|
)
|
|
spec = torch.squeeze(spec, 0)
|
|
torch.save(spec, spec_filename)
|
|
return spec, audio_norm
|
|
|
|
|
|
|
|
processor = AudioProcessor(
|
|
max_wav_value=32768.0,
|
|
use_mel_spec_posterior=False,
|
|
filter_length=2048,
|
|
n_mel_channels=128,
|
|
sampling_rate=44100,
|
|
hop_length=512,
|
|
win_length=2048,
|
|
mel_fmin=0.0,
|
|
mel_fmax="null",
|
|
)
|
|
|
|
with open("filelists/train.list", "r") as f:
|
|
filepaths = [line.split("|")[0] for line in f]
|
|
|
|
|
|
with Pool(processes=32) as pool:
|
|
with tqdm(total=len(filepaths)) as pbar:
|
|
for i, _ in enumerate(pool.imap_unordered(processor.process_audio, filepaths)):
|
|
pbar.update()
|
|
|