|
general: |
|
stage: "pretrain" |
|
corpus_type: "multi-unseen" |
|
source_path: "./data/jvs_22k-low" |
|
aux_path: "./data/jvs_22k" |
|
preprocessed_path: "./preprocessed/jvs" |
|
output_path: "./output/vocfeats/pretrain" |
|
test_wav_path: null |
|
feature_type: "vocfeats" |
|
hifigan_path: "./hifigan/hifigan_jvs_40d_600k" |
|
power_norm: True |
|
use_gst: False |
|
|
|
preprocess: |
|
n_train: 90 |
|
n_val: 5 |
|
n_test: 5 |
|
sampling_rate: 22050 |
|
frame_length: 1024 |
|
frame_shift: 256 |
|
fft_length: 1024 |
|
fmin: 0 |
|
fmax: 8000 |
|
n_mels: 80 |
|
cep_order: 40 |
|
f0_extractor: "dio" |
|
comp_factor: 1.0 |
|
min_magnitude: 0.00001 |
|
max_wav_value: 32768.0 |
|
segment_length: 2 |
|
|
|
train: |
|
batchsize: 8 |
|
epoch: 50 |
|
alpha: 0.1 |
|
augment: True |
|
multi_gpu_mode: False |
|
num_workers: 4 |
|
learning_rate: 0.005 |
|
grad_clip_thresh: 1.0 |
|
logger_step: 1000 |
|
load_pretrained: False |
|
pretrained_path: null |
|
early_stopping: False |
|
multi_scale_loss: |
|
use_linear: True |
|
gamma: 1.0 |
|
feature_loss: |
|
type: "mae" |