wavlm_vrs_ivack_k688 / hyperparams.yaml
mtauro's picture
Update hyperparams.yaml
52d242f
# ############################################################################
# Model: WAV2VEC base for Emotion Recognition
############################################################################
# Hparams NEEDED
HPARAMS_NEEDED: [ "out_n_neurons", "label_encoder", ]
# Modules Needed
MODULES_NEEDED: ["transf", "avg_pool", "enc", 'classifier']
# Feature parameters
wavlm_hub: "microsoft/wavlm-large"
# Pretrain folder (HuggingFace)
pretrained_path: "mtauro/wavlm_vrs_ivack_k688"
# parameters
#encoder_dim: 768
out_n_neurons: 2
transf: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
source: !ref <wavlm_hub>
save_path: wavlm_checkpoint
avg_pool: !new:speechbrain.nnet.pooling.StatisticsPooling
return_std: False
enc: !new:speechbrain.lobes.models.VanillaNN.VanillaNN
input_shape: [null, null, 1024]
dnn_blocks: 1
dnn_neurons: 1024 # this will be output size of 3rd dimension
classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
input_size: 1024 #192 for ecapa, double number of channels
out_neurons: !ref <out_n_neurons>
softmax: !new:speechbrain.nnet.activations.Softmax
model: !new:torch.nn.ModuleList
- [!ref <avg_pool>, !ref <enc>, !ref <classifier>]
modules:
transf: !ref <transf>
avg_pool: !ref <avg_pool>
enc: !ref <enc>
classifier: !ref <classifier>
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
loadables:
transf: !ref <transf>
avg_pool: !ref <avg_pool>
enc: !ref <enc>
classifier: !ref <classifier>
label_encoder: !ref <label_encoder>
paths:
transf: !ref <pretrained_path>/transf.ckpt
avg_pool: !ref <pretrained_path>/avg_pool.ckpt
enc: !ref <pretrained_path>/enc.ckpt
classifier: !ref <pretrained_path>/classifier.ckpt
label_encoder: !ref <pretrained_path>/label_encoder.txt