Florian Lux commited on
Commit
35dfe6e
·
1 Parent(s): f23c138

add package requirements

Browse files
InferenceInterfaces/Meta_FastSpeech2.py CHANGED
@@ -1,9 +1,7 @@
1
- import itertools
2
  import os
3
 
4
  import librosa.display as lbd
5
  import matplotlib.pyplot as plt
6
- import sounddevice
7
  import soundfile
8
  import torch
9
 
@@ -75,46 +73,3 @@ class Meta_FastSpeech2(torch.nn.Module):
75
  plt.subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=.9, wspace=0.0, hspace=0.0)
76
  plt.show()
77
  return wave
78
-
79
- def read_to_file(self, text_list, file_location, silent=False, dur_list=None, pitch_list=None, energy_list=None):
80
- """
81
- :param silent: Whether to be verbose about the process
82
- :param text_list: A list of strings to be read
83
- :param file_location: The path and name of the file it should be saved to
84
- """
85
- if not dur_list:
86
- dur_list = []
87
- if not pitch_list:
88
- pitch_list = []
89
- if not energy_list:
90
- energy_list = []
91
- wav = None
92
- silence = torch.zeros([24000])
93
- for (text, durations, pitch, energy) in itertools.zip_longest(text_list, dur_list, pitch_list, energy_list):
94
- if text.strip() != "":
95
- if not silent:
96
- print("Now synthesizing: {}".format(text))
97
- if wav is None:
98
- if durations is not None:
99
- durations = durations.to(self.device)
100
- if pitch is not None:
101
- pitch = pitch.to(self.device)
102
- if energy is not None:
103
- energy = energy.to(self.device)
104
- wav = self(text, durations=durations, pitch=pitch, energy=energy).cpu()
105
- wav = torch.cat((wav, silence), 0)
106
- else:
107
- wav = torch.cat((wav, self(text, durations=durations.to(self.device), pitch=pitch.to(self.device), energy=energy.to(self.device)).cpu()), 0)
108
- wav = torch.cat((wav, silence), 0)
109
- soundfile.write(file=file_location, data=wav.cpu().numpy(), samplerate=48000)
110
-
111
- def read_aloud(self, text, view=False, blocking=False):
112
- if text.strip() == "":
113
- return
114
- wav = self(text, view).cpu()
115
- wav = torch.cat((wav, torch.zeros([24000])), 0)
116
- if not blocking:
117
- sounddevice.play(wav.numpy(), samplerate=48000)
118
- else:
119
- sounddevice.play(torch.cat((wav, torch.zeros([12000])), 0).numpy(), samplerate=48000)
120
- sounddevice.wait()
 
 
1
  import os
2
 
3
  import librosa.display as lbd
4
  import matplotlib.pyplot as plt
 
5
  import soundfile
6
  import torch
7
 
 
73
  plt.subplots_adjust(left=0.05, bottom=0.1, right=0.95, top=.9, wspace=0.0, hspace=0.0)
74
  plt.show()
75
  return wave
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
packages.txt.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ espeak-ng
2
+ libsndfile