Spaces:

naonauno
/

dialogs2-factory

Paused

dialogs2-factory / config /Vq32ToVq8192.json

Upload 855 files

d66c48f verified 23 days ago

1.72 kB

	{
	"model_type": "AutoregressiveTransformer",
	"dataset": [
	"emilia",
	],
	"preprocess": {
	"hop_size": 320,
	"sample_rate": 16000, // HuBERT, WavLM, are both 16000
	"n_fft": 1920,
	"num_mels": 128,
	"win_size": 1920,
	"fmin": 0,
	"fmax": 12000,
	"mel_var": 8.14,
	"mel_mean": -4.92,
	"processed_dir": "",
	"valid_file": "valid.json",
	"train_file": "train.json",
	"min_dur": 3,
	"max_dur": 30,
	"load_phone": true,
	},
	"model": {
	"autoregressive_transformer": {
	"input_vocab_size": 1056,
	"output_vocab_size": 8192,
	"hidden_size": 1536,
	"intermediate_size": 6144,
	"num_hidden_layers": 12,
	"num_attention_heads": 16,
	"use_global_style_encoder": false
	},
	"train_both_vc_and_tts": true,
	"vc_input_token_type": "hubert_vevo_codec",
	"vc_input_vocab_size": 32,
	"tts_input_token_type": "g2p",
	"tts_input_vocab_size": 1024,
	"output_token_type": "hubert_codec",
	"representation_stat_mean_var_path": "./Amphion/models/vc/vevo/config/hubert_large_l18_mean_std.npz",
	"input_repcodec": {
	"config_path": "./Amphion/models/vc/vevo/config/hubert_large_l18_c32.yaml",
	},
	"output_repcodec": {
	"codebook_size": 8192, // VQ Codebook Size
	"hidden_size": 1024, // Representations Dim
	"codebook_dim": 8,
	"vocos_dim": 384,
	"vocos_intermediate_dim": 2048,
	"vocos_num_layers": 12,
	}
	},
	}