Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -115,12 +115,13 @@ SAMPLE_RATE = 24000
|
|
115 |
|
116 |
@spaces.GPU(duration=10)
|
117 |
@torch.no_grad()
|
118 |
-
def forward(tokens,
|
119 |
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
120 |
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
121 |
text_mask = length_to_mask(input_lengths).to(device)
|
122 |
bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
|
123 |
d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
|
|
|
124 |
s = ref_s[:, 128:]
|
125 |
d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
|
126 |
x, _ = model.predictor.lstm(d)
|
@@ -147,8 +148,7 @@ def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=5000
|
|
147 |
elif len(tokens) > 510:
|
148 |
tokens = tokens[:510]
|
149 |
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
150 |
-
|
151 |
-
out = forward(tokens, ref_s, speed)
|
152 |
if reduce_noise > 0:
|
153 |
out = nr.reduce_noise(y=out, sr=SAMPLE_RATE, prop_decrease=reduce_noise, n_fft=512)
|
154 |
opening_cut = max(0, int(opening_cut / speed))
|
|
|
115 |
|
116 |
@spaces.GPU(duration=10)
|
117 |
@torch.no_grad()
|
118 |
+
def forward(tokens, voice, speed):
|
119 |
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
120 |
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
121 |
text_mask = length_to_mask(input_lengths).to(device)
|
122 |
bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
|
123 |
d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
|
124 |
+
ref_s = VOICES[voice]
|
125 |
s = ref_s[:, 128:]
|
126 |
d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
|
127 |
x, _ = model.predictor.lstm(d)
|
|
|
148 |
elif len(tokens) > 510:
|
149 |
tokens = tokens[:510]
|
150 |
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
151 |
+
out = forward(tokens, voice, speed)
|
|
|
152 |
if reduce_noise > 0:
|
153 |
out = nr.reduce_noise(y=out, sr=SAMPLE_RATE, prop_decrease=reduce_noise, n_fft=512)
|
154 |
opening_cut = max(0, int(opening_cut / speed))
|