hexgrad commited on
Commit
e5a80f9
·
verified ·
1 Parent(s): cbd3e80

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -115,12 +115,13 @@ SAMPLE_RATE = 24000
115
 
116
  @spaces.GPU(duration=10)
117
  @torch.no_grad()
118
- def forward(tokens, ref_s, speed):
119
  tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
120
  input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
121
  text_mask = length_to_mask(input_lengths).to(device)
122
  bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
123
  d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
 
124
  s = ref_s[:, 128:]
125
  d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
126
  x, _ = model.predictor.lstm(d)
@@ -147,8 +148,7 @@ def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=5000
147
  elif len(tokens) > 510:
148
  tokens = tokens[:510]
149
  ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
150
- ref_s = VOICES[voice]
151
- out = forward(tokens, ref_s, speed)
152
  if reduce_noise > 0:
153
  out = nr.reduce_noise(y=out, sr=SAMPLE_RATE, prop_decrease=reduce_noise, n_fft=512)
154
  opening_cut = max(0, int(opening_cut / speed))
 
115
 
116
  @spaces.GPU(duration=10)
117
  @torch.no_grad()
118
+ def forward(tokens, voice, speed):
119
  tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
120
  input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
121
  text_mask = length_to_mask(input_lengths).to(device)
122
  bert_dur = model.bert(tokens, attention_mask=(~text_mask).int())
123
  d_en = model.bert_encoder(bert_dur).transpose(-1, -2)
124
+ ref_s = VOICES[voice]
125
  s = ref_s[:, 128:]
126
  d = model.predictor.text_encoder(d_en, s, input_lengths, text_mask)
127
  x, _ = model.predictor.lstm(d)
 
148
  elif len(tokens) > 510:
149
  tokens = tokens[:510]
150
  ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
151
+ out = forward(tokens, voice, speed)
 
152
  if reduce_noise > 0:
153
  out = nr.reduce_noise(y=out, sr=SAMPLE_RATE, prop_decrease=reduce_noise, n_fft=512)
154
  opening_cut = max(0, int(opening_cut / speed))