Spaces:

fireredteam
/

FireRedTTS

Running on Zero

App Files Files Community

hhguo commited on Sep 23, 2024

Commit

c29beed

1 Parent(s): 8cc3802

remove .DS_Store and add icons

Browse files

Files changed (1) hide show

configs/config_24k.json +123 -0

configs/config_24k.json ADDED Viewed

	@@ -0,0 +1,123 @@

+{
+    "gpt": {
+        "gpt_max_audio_tokens": 630,
+        "gpt_max_text_tokens": 402,
+        "gpt_max_prompt_tokens": 70,
+        "gpt_layers": 30,
+        "gpt_n_model_channels": 1024,
+        "gpt_n_heads": 16,
+        "gpt_number_text_tokens": 57341,
+        "gpt_start_text_token": 57187,
+        "gpt_stop_text_token": 57184,
+        "gpt_num_audio_tokens": 16386,
+        "gpt_start_audio_token": 16384,
+        "gpt_stop_audio_token": 16385,
+        "gpt_code_stride_len": 640,
+        "duration_const": 102400,
+        "min_conditioning_length": 48000,
+        "max_conditioning_length": 128000,
+        "max_wav_length": 320000,
+        "max_text_length": 200
+    },
+    "flow": {
+        "output_size": 100,
+        "input_embedding": {
+            "out_channels": 512,
+            "codebook_path": "fireredtts/modules/flow/codebook.npy",
+            "freeze": true
+        },
+        "encoder": {
+            "input_size": 512,
+            "output_size": 512,
+            "attention_heads": 8,
+            "linear_units": 2048,
+            "num_blocks": 6,
+            "dropout_rate": 0.01,
+            "srcattention_start_index": 0,
+            "srcattention_end_index": 2,
+            "attention_dropout_rate": 0.01,
+            "positional_dropout_rate": 0.01,
+            "key_bias": true,
+            "normalize_before": true
+        },
+        "length_regulator": {
+            "channels": 512,
+            "num_blocks": 4
+        },
+        "mel_encoder": {
+            "in_channels": 100,
+            "out_channels": 512,
+            "hidden_channels": 384,
+            "reduction_rate": 4,
+            "n_layers": 2,
+            "n_blocks": 5,
+            "kernel_size": 3
+        },
+        "decoder": {
+            "t_scheduler": "cosine",
+            "inference_cfg_rate": 0.7,
+            "estimator": {
+                "in_channels": 200,
+                "out_channels": 100,
+                "channels": [
+                    256,
+                    256
+                ],
+                "dropout": 0,
+                "attention_head_dim": 64,
+                "n_blocks": 4,
+                "num_mid_blocks": 12,
+                "num_heads": 8,
+                "act_fn": "gelu"
+            }
+        }
+    },
+    "bigvgan": {
+        "num_mels": 100,
+        "upsample_initial_channel": 1536,
+        "upsample_rates": [
+            5,
+            3,
+            2,
+            2,
+            2,
+            2
+        ],
+        "upsample_kernel_sizes": [
+            11,
+            7,
+            4,
+            4,
+            4,
+            4
+        ],
+        "resblock_kernel_sizes": [
+            3,
+            7,
+            11
+        ],
+        "resblock_dilation_sizes": [
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ],
+            [
+                1,
+                3,
+                5
+            ]
+        ],
+        "resblock_type": "1",
+        "snake_logscale": true,
+        "activation": "snakebeta",
+        "use_tanh_at_final": false,
+        "use_bias_at_final": false,
+        "use_cuda_kernel": false
+    }
+}