Dongchao commited on
Commit
9552794
·
1 Parent(s): 229c19a

Upload 7 files

Browse files
codec/tts_model/ckpt_01215000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8c35c42dc3cb9960a1d00e06956b17e5506829df4b132b5b11a55e2e14f116
3
+ size 276692277
codec/tts_model/config.yaml ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generator:
2
+ name: SoundStream
3
+ config:
4
+ n_filters: 32
5
+ D: 128
6
+ target_bandwidths:
7
+ - 0.5
8
+ - 1
9
+ - 1.5
10
+ - 2
11
+ - 4
12
+ - 6
13
+ ratios:
14
+ - 8
15
+ - 5
16
+ - 4
17
+ - 2
18
+ sample_rate: 16000
19
+ bins: 1024
20
+ d_list:
21
+ - mfd
22
+ mfd:
23
+ name: MultiFrequencyDiscriminator
24
+ config:
25
+ hop_lengths:
26
+ - 32
27
+ - 64
28
+ - 128
29
+ - 256
30
+ - 512
31
+ - 1024
32
+ hidden_channels:
33
+ - 64
34
+ - 128
35
+ - 256
36
+ - 512
37
+ - 512
38
+ - 512
39
+ domain: double
40
+ mel_scale: true
41
+ sample_rate: 16000
42
+ mpd:
43
+ name: MultiPeriodDiscriminator
44
+ config:
45
+ period_sizes:
46
+ - 2
47
+ - 3
48
+ - 5
49
+ - 7
50
+ - 11
51
+ period_kernel_size: 5
52
+ msd:
53
+ name: MultiScaleDiscriminator
54
+ config:
55
+ num_scales: 3
56
+ pool_kernel_size: 4
57
+ pool_stride: 2
58
+ optimizer:
59
+ g:
60
+ name: AdamW
61
+ config:
62
+ lr: 0.0002
63
+ betas:
64
+ - 0.8
65
+ - 0.99
66
+ eps: 1.0e-06
67
+ d:
68
+ name: AdamW
69
+ config:
70
+ lr: 0.0002
71
+ betas:
72
+ - 0.8
73
+ - 0.99
74
+ eps: 1.0e-06
75
+ lr_scheduler:
76
+ g:
77
+ name: ExponentialLR
78
+ config:
79
+ gamma: 0.999
80
+ d:
81
+ name: ExponentialLR
82
+ config:
83
+ gamma: 0.999
84
+ criterion:
85
+ g_criterion:
86
+ name: losses.generator_loss.GeneratorSTFTLoss
87
+ config:
88
+ use_mel_loss: false
89
+ adv_criterion: MSEGLoss
90
+ mel_loss_weight: 45
91
+ use_feature_match: true
92
+ feat_match_loss_weight: 20
93
+ use_full_stft_loss: true
94
+ use_sub_stft_loss: true
95
+ full_stft_loss_weight: 1
96
+ sub_stft_loss_weight: 1
97
+ mel_scale_loss:
98
+ sampling_rate: 16000
99
+ n_fft: 1024
100
+ num_mels: 80
101
+ hop_size: 160
102
+ win_size: 800
103
+ fmin: 0
104
+ full_multi_scale_stft_loss:
105
+ fft_sizes:
106
+ - 512
107
+ - 1024
108
+ - 2048
109
+ win_sizes:
110
+ - 480
111
+ - 960
112
+ - 1200
113
+ hop_sizes:
114
+ - 120
115
+ - 240
116
+ - 300
117
+ sub_multi_scale_stft_loss:
118
+ num_bands: 6
119
+ fft_sizes:
120
+ - 128
121
+ - 256
122
+ - 256
123
+ win_sizes:
124
+ - 80
125
+ - 120
126
+ - 200
127
+ hop_sizes:
128
+ - 20
129
+ - 40
130
+ - 50
131
+ d_criterion:
132
+ name: losses.discriminator_loss.MSEDiscriminatorLoss
133
+ config: null
134
+ commit_loss_weight: 1.0
135
+ training_file: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/data/codec_data_24k/train_valid_lists/train.lst
136
+ validation_file: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/data/codec_data_24k/train_valid_lists/valid_256.lst
137
+ seed: 2333
138
+ cudnn_deterministic: false
139
+ tensorboard: true
140
+ checkpoint_interval: 5000
141
+ summary_interval: 100
142
+ validation_interval: 5000
143
+ num_epoches: 5000
144
+ print_freq: 10
145
+ discriminator_iter_start: 0
146
+ num_ckpt_keep: 10
147
+ segment_size: 48000
148
+ audio_norm_scale: 0.95
149
+ batch_size: 8
150
+ num_workers: 8
151
+ num_plots: 8
152
+ local_rank: 0
153
+ basic_model_config: config/encodec_16k_6kbps_v3_vqdp.yaml
154
+ exp_model_config: null
155
+ log_dir: exp_log/encodec_16k_6kbps_v3_vqdp_1disc
156
+ ngpus_per_node: 8
157
+ sample_rate: 16000
158
+ model_ckpt_dir: exp_log/encodec_16k_6kbps_v3_vqdp_1disc/model_ckpts
codec/tts_model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e8c35c42dc3cb9960a1d00e06956b17e5506829df4b132b5b11a55e2e14f116
3
+ size 276692277
codec/universal_model/ckpt_01455000.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a497cc8ef0e0819c23e9aaf7fd15d0b3bd7bb28817818f51c03cf591ca29e25
3
+ size 291880869
codec/universal_model/config.yaml ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ generator:
2
+ name: SoundStream
3
+ config:
4
+ n_filters: 32
5
+ D: 256
6
+ target_bandwidths:
7
+ - 0.5
8
+ - 1
9
+ - 1.5
10
+ - 2
11
+ - 4
12
+ ratios:
13
+ - 8
14
+ - 5
15
+ - 4
16
+ - 2
17
+ sample_rate: 16000
18
+ bins: 1024
19
+ d_list:
20
+ - mfd
21
+ mfd:
22
+ name: MultiFrequencyDiscriminator
23
+ config:
24
+ hop_lengths:
25
+ - 32
26
+ - 64
27
+ - 128
28
+ - 256
29
+ - 512
30
+ - 1024
31
+ hidden_channels:
32
+ - 64
33
+ - 128
34
+ - 256
35
+ - 512
36
+ - 512
37
+ - 512
38
+ domain: double
39
+ mel_scale: true
40
+ sample_rate: 16000
41
+ mpd:
42
+ name: MultiPeriodDiscriminator
43
+ config:
44
+ period_sizes:
45
+ - 2
46
+ - 3
47
+ - 5
48
+ - 7
49
+ - 11
50
+ period_kernel_size: 5
51
+ msd:
52
+ name: MultiScaleDiscriminator
53
+ config:
54
+ num_scales: 3
55
+ pool_kernel_size: 4
56
+ pool_stride: 2
57
+ optimizer:
58
+ g:
59
+ name: AdamW
60
+ config:
61
+ lr: 0.0002
62
+ betas:
63
+ - 0.8
64
+ - 0.99
65
+ eps: 1.0e-06
66
+ d:
67
+ name: AdamW
68
+ config:
69
+ lr: 0.0002
70
+ betas:
71
+ - 0.8
72
+ - 0.99
73
+ eps: 1.0e-06
74
+ lr_scheduler:
75
+ g:
76
+ name: ExponentialLR
77
+ config:
78
+ gamma: 0.999
79
+ d:
80
+ name: ExponentialLR
81
+ config:
82
+ gamma: 0.999
83
+ criterion:
84
+ g_criterion:
85
+ name: losses.generator_loss.GeneratorSTFTLoss
86
+ config:
87
+ use_mel_loss: false
88
+ adv_criterion: MSEGLoss
89
+ mel_loss_weight: 45
90
+ use_feature_match: true
91
+ feat_match_loss_weight: 20
92
+ use_full_stft_loss: true
93
+ use_sub_stft_loss: true
94
+ full_stft_loss_weight: 1
95
+ sub_stft_loss_weight: 1
96
+ mel_scale_loss:
97
+ sampling_rate: 16000
98
+ n_fft: 1024
99
+ num_mels: 80
100
+ hop_size: 160
101
+ win_size: 800
102
+ fmin: 0
103
+ full_multi_scale_stft_loss:
104
+ fft_sizes:
105
+ - 512
106
+ - 1024
107
+ - 2048
108
+ win_sizes:
109
+ - 480
110
+ - 960
111
+ - 1200
112
+ hop_sizes:
113
+ - 120
114
+ - 240
115
+ - 300
116
+ sub_multi_scale_stft_loss:
117
+ num_bands: 6
118
+ fft_sizes:
119
+ - 128
120
+ - 256
121
+ - 256
122
+ win_sizes:
123
+ - 80
124
+ - 120
125
+ - 200
126
+ hop_sizes:
127
+ - 20
128
+ - 40
129
+ - 50
130
+ d_criterion:
131
+ name: losses.discriminator_loss.MSEDiscriminatorLoss
132
+ config: null
133
+ commit_loss_weight: 1.0
134
+ training_file: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/audio_encodec/group_vqvae_16k_res2/big_data/train.lst
135
+ validation_file: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/audio_encodec/group_vqvae_16k_res2/big_data/val.lst
136
+ seed: 2333
137
+ cudnn_deterministic: false
138
+ tensorboard: true
139
+ checkpoint_interval: 5000
140
+ summary_interval: 100
141
+ validation_interval: 5000
142
+ num_epoches: 5000
143
+ print_freq: 10
144
+ discriminator_iter_start: 0
145
+ num_ckpt_keep: 10
146
+ segment_size: 32000
147
+ audio_norm_scale: 0.95
148
+ batch_size: 8
149
+ num_workers: 8
150
+ num_plots: 8
151
+ local_rank: 1
152
+ basic_model_config: config/encodec_16k_6kbps_v3_vqdp.yaml
153
+ exp_model_config: null
154
+ log_dir: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/SoundStream2/log/2022-11-14-19-48/exp/encodec_16k_6kbps_v3_vqdp_1disc
155
+ ngpus_per_node: 8
156
+ sample_rate: 16000
157
+ model_ckpt_dir: /apdcephfs_cq2/share_1297902/speech_user/shaunxliu/dongchao/code/SoundStream2/log/2022-11-14-19-48/exp/encodec_16k_6kbps_v3_vqdp_1disc/model_ckpts
codec/universal_model/model.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a497cc8ef0e0819c23e9aaf7fd15d0b3bd7bb28817818f51c03cf591ca29e25
3
+ size 291880869
hubert_base_ls960.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1703cf8d2cdc76f8c046f5f6a9bcd224e0e6caf4744cad1a1f4199c32cac8c8d
3
+ size 1136468879