Text-to-Speech
ESPnet
Icelandic
GunnarThor commited on
Commit
3cc9d65
·
1 Parent(s): f878369

add model files

Browse files
Files changed (23) hide show
  1. dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark +0 -0
  2. dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.scp +40 -0
  3. dump/xvector/eval1_1and2_spk_avg_phn/spk_xvector.ark +0 -0
  4. dump/xvector/eval1_1and2_spk_avg_phn/spk_xvector.scp +40 -0
  5. dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark +0 -0
  6. dump/xvector/train_1and2_spk_avg_phn/spk_xvector.scp +44 -0
  7. exp/tts_stats_raw_phn_none/train/feats_stats.npz +3 -0
  8. exp/tts_xvector_fastspeech2_spk_avg_combined/config.yaml +298 -0
  9. exp/tts_xvector_fastspeech2_spk_avg_combined/images/backward_time.png +0 -0
  10. exp/tts_xvector_fastspeech2_spk_avg_combined/images/decoder_alpha.png +0 -0
  11. exp/tts_xvector_fastspeech2_spk_avg_combined/images/duration_loss.png +0 -0
  12. exp/tts_xvector_fastspeech2_spk_avg_combined/images/encoder_alpha.png +0 -0
  13. exp/tts_xvector_fastspeech2_spk_avg_combined/images/energy_loss.png +0 -0
  14. exp/tts_xvector_fastspeech2_spk_avg_combined/images/forward_time.png +0 -0
  15. exp/tts_xvector_fastspeech2_spk_avg_combined/images/gpu_max_cached_mem_GB.png +0 -0
  16. exp/tts_xvector_fastspeech2_spk_avg_combined/images/iter_time.png +0 -0
  17. exp/tts_xvector_fastspeech2_spk_avg_combined/images/l1_loss.png +0 -0
  18. exp/tts_xvector_fastspeech2_spk_avg_combined/images/loss.png +0 -0
  19. exp/tts_xvector_fastspeech2_spk_avg_combined/images/optim0_lr0.png +0 -0
  20. exp/tts_xvector_fastspeech2_spk_avg_combined/images/optim_step_time.png +0 -0
  21. exp/tts_xvector_fastspeech2_spk_avg_combined/images/pitch_loss.png +0 -0
  22. exp/tts_xvector_fastspeech2_spk_avg_combined/images/train_time.png +0 -0
  23. exp/tts_xvector_fastspeech2_spk_avg_combined/valid.loss.ave_5best.pth +3 -0
dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark ADDED
Binary file (82.5 kB). View file
 
dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.scp ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:2
2
+ b dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:2062
3
+ c dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:4122
4
+ d dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:6182
5
+ e dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:8242
6
+ f dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:10302
7
+ g dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:12362
8
+ h dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:14422
9
+ s124 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:16485
10
+ s146 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:18548
11
+ s157 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:20611
12
+ s162 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:22674
13
+ s169 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:24737
14
+ s176 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:26800
15
+ s180 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:28863
16
+ s181 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:30926
17
+ s185 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:32989
18
+ s187 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:35052
19
+ s188 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:37115
20
+ s200 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:39178
21
+ s206 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:41241
22
+ s208 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:43304
23
+ s209 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:45367
24
+ s214 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:47430
25
+ s215 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:49493
26
+ s216 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:51556
27
+ s220 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:53619
28
+ s223 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:55682
29
+ s225 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:57745
30
+ s226 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:59808
31
+ s228 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:61871
32
+ s231 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:63934
33
+ s235 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:65997
34
+ s247 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:68060
35
+ s250 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:70123
36
+ s251 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:72186
37
+ s256 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:74249
38
+ s264 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:76312
39
+ s268 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:78375
40
+ s273 dump/xvector/dev_1and2_spk_avg_phn/spk_xvector.ark:80438
dump/xvector/eval1_1and2_spk_avg_phn/spk_xvector.ark ADDED
Binary file (82.5 kB). View file
 
dump/xvector/eval1_1and2_spk_avg_phn/spk_xvector.scp ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ s124 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:5
2
+ s146 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:2068
3
+ s157 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:4131
4
+ s162 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:6194
5
+ s169 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:8257
6
+ s176 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:10320
7
+ s178 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:12383
8
+ s180 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:14446
9
+ s181 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:16509
10
+ s185 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:18572
11
+ s186 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:20635
12
+ s187 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:22698
13
+ s188 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:24761
14
+ s200 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:26824
15
+ s206 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:28887
16
+ s208 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:30950
17
+ s209 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:33013
18
+ s214 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:35076
19
+ s215 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:37139
20
+ s216 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:39202
21
+ s220 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:41265
22
+ s221 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:43328
23
+ s222 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:45391
24
+ s223 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:47454
25
+ s225 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:49517
26
+ s226 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:51580
27
+ s228 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:53643
28
+ s231 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:55706
29
+ s234 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:57769
30
+ s235 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:59832
31
+ s236 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:61895
32
+ s240 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:63958
33
+ s247 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:66021
34
+ s250 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:68084
35
+ s251 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:70147
36
+ s256 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:72210
37
+ s258 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:74273
38
+ s264 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:76336
39
+ s268 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:78399
40
+ s273 dump/xvector/eval1_aug_clean2_phn/spk_xvector.ark:80462
dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark ADDED
Binary file (90.7 kB). View file
 
dump/xvector/train_1and2_spk_avg_phn/spk_xvector.scp ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ a dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:2
2
+ b dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:2062
3
+ c dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:4122
4
+ d dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:6182
5
+ e dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:8242
6
+ f dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:10302
7
+ g dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:12362
8
+ h dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:14422
9
+ s124 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:16485
10
+ s146 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:18548
11
+ s157 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:20611
12
+ s162 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:22674
13
+ s169 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:24737
14
+ s176 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:26800
15
+ s178 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:28863
16
+ s180 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:30926
17
+ s181 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:32989
18
+ s185 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:35052
19
+ s186 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:37115
20
+ s187 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:39178
21
+ s188 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:41241
22
+ s200 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:43304
23
+ s206 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:45367
24
+ s208 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:47430
25
+ s209 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:49493
26
+ s215 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:51556
27
+ s216 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:53619
28
+ s221 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:55682
29
+ s222 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:57745
30
+ s223 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:59808
31
+ s225 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:61871
32
+ s226 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:63934
33
+ s231 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:65997
34
+ s234 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:68060
35
+ s235 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:70123
36
+ s236 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:72186
37
+ s240 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:74249
38
+ s247 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:76312
39
+ s251 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:78375
40
+ s256 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:80438
41
+ s258 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:82501
42
+ s264 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:84564
43
+ s268 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:86627
44
+ s273 dump/xvector/train_1and2_spk_avg_phn/spk_xvector.ark:88690
exp/tts_stats_raw_phn_none/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2144ef98aeb791059fe387320b5015c24d31f5138c480b0e5104f394b187e2e0
3
+ size 1402
exp/tts_xvector_fastspeech2_spk_avg_combined/config.yaml ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: ./conf/tuning/train_xvector_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/tts_xvector_fastspeech2_spk_avg_combined
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 300
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 5
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 1.0
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 8
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ create_graph_in_tensorboard: false
57
+ use_wandb: false
58
+ wandb_project: null
59
+ wandb_id: null
60
+ wandb_entity: null
61
+ wandb_name: null
62
+ wandb_model_log_interval: -1
63
+ detect_anomaly: false
64
+ pretrain_path: null
65
+ init_param: []
66
+ ignore_init_mismatch: false
67
+ freeze_param: []
68
+ num_iters_per_epoch: 800
69
+ batch_size: 20
70
+ valid_batch_size: null
71
+ batch_bins: 4500000
72
+ valid_batch_bins: null
73
+ train_shape_file:
74
+ - exp/tts_stats_raw_phn_none/train/text_shape.phn
75
+ - exp/tts_stats_raw_phn_none/train/speech_shape
76
+ valid_shape_file:
77
+ - exp/tts_stats_raw_phn_none/valid/text_shape.phn
78
+ - exp/tts_stats_raw_phn_none/valid/speech_shape
79
+ batch_type: numel
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 150
83
+ - 204800
84
+ sort_in_batch: descending
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ train_data_path_and_name_and_type:
91
+ - - dump/raw/train_1and2_spk_avg_phn/text
92
+ - text
93
+ - text
94
+ - - data/train_1and2_spk_avg_phn/durations
95
+ - durations
96
+ - text_int
97
+ - - dump/raw/train_1and2_spk_avg_phn/wav.scp
98
+ - speech
99
+ - sound
100
+ - - exp/tts_stats_raw_phn_none/train/collect_feats/pitch.scp
101
+ - pitch
102
+ - npy
103
+ - - exp/tts_stats_raw_phn_none/train/collect_feats/energy.scp
104
+ - energy
105
+ - npy
106
+ - - dump/xvector/train_1and2_spk_avg_phn/xvector.scp
107
+ - spembs
108
+ - kaldi_ark
109
+ valid_data_path_and_name_and_type:
110
+ - - dump/raw/dev_1and2_spk_avg_phn/text
111
+ - text
112
+ - text
113
+ - - data/dev_1and2_spk_avg_phn/durations
114
+ - durations
115
+ - text_int
116
+ - - dump/raw/dev_1and2_spk_avg_phn/wav.scp
117
+ - speech
118
+ - sound
119
+ - - exp/tts_stats_raw_phn_none/valid/collect_feats/pitch.scp
120
+ - pitch
121
+ - npy
122
+ - - exp/tts_stats_raw_phn_none/valid/collect_feats/energy.scp
123
+ - energy
124
+ - npy
125
+ - - dump/xvector/dev_1and2_spk_avg_phn/xvector.scp
126
+ - spembs
127
+ - kaldi_ark
128
+ allow_variable_data_keys: false
129
+ max_cache_size: 0.0
130
+ max_cache_fd: 32
131
+ valid_max_cache_size: null
132
+ exclude_weight_decay: false
133
+ exclude_weight_decay_conf: {}
134
+ optim: adam
135
+ optim_conf:
136
+ lr: 1.0
137
+ scheduler: noamlr
138
+ scheduler_conf:
139
+ model_size: 384
140
+ warmup_steps: 4000
141
+ token_list:
142
+ - <blank>
143
+ - <unk>
144
+ - a
145
+ - r
146
+ - sil
147
+ - I
148
+ - t
149
+ - n
150
+ - s
151
+ - D
152
+ - Y
153
+ - E
154
+ - l
155
+ - v
156
+ - m
157
+ - h
158
+ - k
159
+ - j
160
+ - G
161
+ - T
162
+ - f
163
+ - p
164
+ - 'E:'
165
+ - c
166
+ - i
167
+ - 'au:'
168
+ - 'O:'
169
+ - 'a:'
170
+ - ei
171
+ - 'i:'
172
+ - r_0
173
+ - t_h
174
+ - O
175
+ - k_h
176
+ - ou
177
+ - ai
178
+ - '9'
179
+ - au
180
+ - 'I:'
181
+ - 'ou:'
182
+ - u
183
+ - 'ei:'
184
+ - N
185
+ - l_0
186
+ - 'u:'
187
+ - n_0
188
+ - '9:'
189
+ - 'ai:'
190
+ - 9i
191
+ - c_h
192
+ - p_h
193
+ - x
194
+ - C
195
+ - '9i:'
196
+ - 'Y:'
197
+ - J
198
+ - N_0
199
+ - m_0
200
+ - Oi
201
+ - Yi
202
+ - J_0
203
+ - spn
204
+ - '1'
205
+ - '7'
206
+ - <sos/eos>
207
+ odim: null
208
+ model_conf: {}
209
+ use_preprocessor: true
210
+ token_type: phn
211
+ bpemodel: null
212
+ non_linguistic_symbols: null
213
+ cleaner: null
214
+ g2p: g2p_is
215
+ feats_extract: fbank
216
+ feats_extract_conf:
217
+ n_fft: 1024
218
+ hop_length: 256
219
+ win_length: null
220
+ fs: 22050
221
+ fmin: 80
222
+ fmax: 7600
223
+ n_mels: 80
224
+ normalize: global_mvn
225
+ normalize_conf:
226
+ stats_file: exp/tts_stats_raw_phn_none/train/feats_stats.npz
227
+ tts: fastspeech2
228
+ tts_conf:
229
+ adim: 384
230
+ aheads: 2
231
+ elayers: 4
232
+ eunits: 1536
233
+ dlayers: 4
234
+ dunits: 1536
235
+ positionwise_layer_type: conv1d
236
+ positionwise_conv_kernel_size: 3
237
+ duration_predictor_layers: 2
238
+ duration_predictor_chans: 256
239
+ duration_predictor_kernel_size: 3
240
+ postnet_layers: 5
241
+ postnet_filts: 5
242
+ postnet_chans: 256
243
+ use_masking: true
244
+ use_scaled_pos_enc: true
245
+ encoder_normalize_before: true
246
+ decoder_normalize_before: true
247
+ reduction_factor: 1
248
+ init_type: xavier_uniform
249
+ init_enc_alpha: 1.0
250
+ init_dec_alpha: 1.0
251
+ transformer_enc_dropout_rate: 0.2
252
+ transformer_enc_positional_dropout_rate: 0.2
253
+ transformer_enc_attn_dropout_rate: 0.2
254
+ transformer_dec_dropout_rate: 0.2
255
+ transformer_dec_positional_dropout_rate: 0.2
256
+ transformer_dec_attn_dropout_rate: 0.2
257
+ pitch_predictor_layers: 5
258
+ pitch_predictor_chans: 256
259
+ pitch_predictor_kernel_size: 5
260
+ pitch_predictor_dropout: 0.5
261
+ pitch_embed_kernel_size: 1
262
+ pitch_embed_dropout: 0.0
263
+ stop_gradient_from_pitch_predictor: true
264
+ energy_predictor_layers: 2
265
+ energy_predictor_chans: 256
266
+ energy_predictor_kernel_size: 3
267
+ energy_predictor_dropout: 0.5
268
+ energy_embed_kernel_size: 1
269
+ energy_embed_dropout: 0.0
270
+ stop_gradient_from_energy_predictor: false
271
+ spk_embed_dim: 512
272
+ spk_embed_integration_type: add
273
+ pitch_extract: dio
274
+ pitch_extract_conf:
275
+ fs: 22050
276
+ n_fft: 1024
277
+ hop_length: 256
278
+ f0max: 400
279
+ f0min: 80
280
+ reduction_factor: 1
281
+ pitch_normalize: global_mvn
282
+ pitch_normalize_conf:
283
+ stats_file: exp/tts_stats_raw_phn_none/train/pitch_stats.npz
284
+ energy_extract: energy
285
+ energy_extract_conf:
286
+ fs: 22050
287
+ n_fft: 1024
288
+ hop_length: 256
289
+ win_length: null
290
+ reduction_factor: 1
291
+ energy_normalize: global_mvn
292
+ energy_normalize_conf:
293
+ stats_file: exp/tts_stats_raw_phn_none/train/energy_stats.npz
294
+ required:
295
+ - output_dir
296
+ - token_list
297
+ version: '202301'
298
+ distributed: false
exp/tts_xvector_fastspeech2_spk_avg_combined/images/backward_time.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/decoder_alpha.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/duration_loss.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/encoder_alpha.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/energy_loss.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/forward_time.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/gpu_max_cached_mem_GB.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/iter_time.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/l1_loss.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/loss.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/optim0_lr0.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/optim_step_time.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/pitch_loss.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/images/train_time.png ADDED
exp/tts_xvector_fastspeech2_spk_avg_combined/valid.loss.ave_5best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bed0827def029bb698090861215ffdd30cf688b7ce3a2ae4a7157979e5c84e34
3
+ size 149493989