hhguo commited on
Commit
c29beed
·
1 Parent(s): 8cc3802

remove .DS_Store and add icons

Browse files
Files changed (1) hide show
  1. configs/config_24k.json +123 -0
configs/config_24k.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "gpt": {
3
+ "gpt_max_audio_tokens": 630,
4
+ "gpt_max_text_tokens": 402,
5
+ "gpt_max_prompt_tokens": 70,
6
+ "gpt_layers": 30,
7
+ "gpt_n_model_channels": 1024,
8
+ "gpt_n_heads": 16,
9
+ "gpt_number_text_tokens": 57341,
10
+ "gpt_start_text_token": 57187,
11
+ "gpt_stop_text_token": 57184,
12
+ "gpt_num_audio_tokens": 16386,
13
+ "gpt_start_audio_token": 16384,
14
+ "gpt_stop_audio_token": 16385,
15
+ "gpt_code_stride_len": 640,
16
+ "duration_const": 102400,
17
+ "min_conditioning_length": 48000,
18
+ "max_conditioning_length": 128000,
19
+ "max_wav_length": 320000,
20
+ "max_text_length": 200
21
+ },
22
+ "flow": {
23
+ "output_size": 100,
24
+ "input_embedding": {
25
+ "out_channels": 512,
26
+ "codebook_path": "fireredtts/modules/flow/codebook.npy",
27
+ "freeze": true
28
+ },
29
+ "encoder": {
30
+ "input_size": 512,
31
+ "output_size": 512,
32
+ "attention_heads": 8,
33
+ "linear_units": 2048,
34
+ "num_blocks": 6,
35
+ "dropout_rate": 0.01,
36
+ "srcattention_start_index": 0,
37
+ "srcattention_end_index": 2,
38
+ "attention_dropout_rate": 0.01,
39
+ "positional_dropout_rate": 0.01,
40
+ "key_bias": true,
41
+ "normalize_before": true
42
+ },
43
+ "length_regulator": {
44
+ "channels": 512,
45
+ "num_blocks": 4
46
+ },
47
+ "mel_encoder": {
48
+ "in_channels": 100,
49
+ "out_channels": 512,
50
+ "hidden_channels": 384,
51
+ "reduction_rate": 4,
52
+ "n_layers": 2,
53
+ "n_blocks": 5,
54
+ "kernel_size": 3
55
+ },
56
+ "decoder": {
57
+ "t_scheduler": "cosine",
58
+ "inference_cfg_rate": 0.7,
59
+ "estimator": {
60
+ "in_channels": 200,
61
+ "out_channels": 100,
62
+ "channels": [
63
+ 256,
64
+ 256
65
+ ],
66
+ "dropout": 0,
67
+ "attention_head_dim": 64,
68
+ "n_blocks": 4,
69
+ "num_mid_blocks": 12,
70
+ "num_heads": 8,
71
+ "act_fn": "gelu"
72
+ }
73
+ }
74
+ },
75
+ "bigvgan": {
76
+ "num_mels": 100,
77
+ "upsample_initial_channel": 1536,
78
+ "upsample_rates": [
79
+ 5,
80
+ 3,
81
+ 2,
82
+ 2,
83
+ 2,
84
+ 2
85
+ ],
86
+ "upsample_kernel_sizes": [
87
+ 11,
88
+ 7,
89
+ 4,
90
+ 4,
91
+ 4,
92
+ 4
93
+ ],
94
+ "resblock_kernel_sizes": [
95
+ 3,
96
+ 7,
97
+ 11
98
+ ],
99
+ "resblock_dilation_sizes": [
100
+ [
101
+ 1,
102
+ 3,
103
+ 5
104
+ ],
105
+ [
106
+ 1,
107
+ 3,
108
+ 5
109
+ ],
110
+ [
111
+ 1,
112
+ 3,
113
+ 5
114
+ ]
115
+ ],
116
+ "resblock_type": "1",
117
+ "snake_logscale": true,
118
+ "activation": "snakebeta",
119
+ "use_tanh_at_final": false,
120
+ "use_bias_at_final": false,
121
+ "use_cuda_kernel": false
122
+ }
123
+ }