{ "gpt": { "gpt_max_audio_tokens": 630, "gpt_max_text_tokens": 402, "gpt_max_prompt_tokens": 70, "gpt_layers": 30, "gpt_n_model_channels": 1024, "gpt_n_heads": 16, "gpt_number_text_tokens": 57341, "gpt_start_text_token": 57187, "gpt_stop_text_token": 57184, "gpt_num_audio_tokens": 16386, "gpt_start_audio_token": 16384, "gpt_stop_audio_token": 16385, "gpt_code_stride_len": 640, "duration_const": 102400, "min_conditioning_length": 48000, "max_conditioning_length": 128000, "max_wav_length": 320000, "max_text_length": 200 }, "flow": { "output_size": 100, "input_embedding": { "out_channels": 512, "codebook_path": "fireredtts/modules/flow/codebook.npy", "freeze": true }, "encoder": { "input_size": 512, "output_size": 512, "attention_heads": 8, "linear_units": 2048, "num_blocks": 6, "dropout_rate": 0.01, "srcattention_start_index": 0, "srcattention_end_index": 2, "attention_dropout_rate": 0.01, "positional_dropout_rate": 0.01, "key_bias": true, "normalize_before": true }, "length_regulator": { "channels": 512, "num_blocks": 4 }, "mel_encoder": { "in_channels": 100, "out_channels": 512, "hidden_channels": 384, "reduction_rate": 4, "n_layers": 2, "n_blocks": 5, "kernel_size": 3 }, "decoder": { "t_scheduler": "cosine", "inference_cfg_rate": 0.7, "estimator": { "in_channels": 200, "out_channels": 100, "channels": [ 256, 256 ], "dropout": 0, "attention_head_dim": 64, "n_blocks": 4, "num_mid_blocks": 12, "num_heads": 8, "act_fn": "gelu" } } }, "bigvgan": { "num_mels": 100, "upsample_initial_channel": 1536, "upsample_rates": [ 5, 3, 2, 2, 2, 2 ], "upsample_kernel_sizes": [ 11, 7, 4, 4, 4, 4 ], "resblock_kernel_sizes": [ 3, 7, 11 ], "resblock_dilation_sizes": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "resblock_type": "1", "snake_logscale": true, "activation": "snakebeta", "use_tanh_at_final": false, "use_bias_at_final": false, "use_cuda_kernel": false } }