Dionyssos commited on
Commit
bdfb608
·
1 Parent(s): a596c10

debug determinism - vits - duration predictor

Browse files
Files changed (2) hide show
  1. Modules/vits/models.py +3 -29
  2. msinference.py +1 -1
Modules/vits/models.py CHANGED
@@ -56,37 +56,11 @@ class StochasticDurationPredictor(nn.Module):
56
  x = self.proj(x) * x_mask
57
 
58
  if not reverse:
59
- flows = self.flows
60
- assert w is not None
61
-
62
- logdet_tot_q = 0
63
- h_w = self.post_pre(w)
64
- h_w = self.post_convs(h_w, x_mask)
65
- h_w = self.post_proj(h_w) * x_mask
66
- e_q = torch.randn(w.size(0), 2, w.size(2)).to(device=x.device, dtype=x.dtype) * x_mask
67
- z_q = e_q
68
- for flow in self.post_flows:
69
- z_q, logdet_q = flow(z_q, x_mask, g=(x + h_w))
70
- logdet_tot_q += logdet_q
71
- z_u, z1 = torch.split(z_q, [1, 1], 1)
72
- u = torch.sigmoid(z_u) * x_mask
73
- z0 = (w - u) * x_mask
74
- logdet_tot_q += torch.sum((F.logsigmoid(z_u) + F.logsigmoid(-z_u)) * x_mask, [1,2])
75
- logq = torch.sum(-0.5 * (math.log(2*math.pi) + (e_q**2)) * x_mask, [1,2]) - logdet_tot_q
76
-
77
- logdet_tot = 0
78
- z0, logdet = self.log_flow(z0, x_mask)
79
- logdet_tot += logdet
80
- z = torch.cat([z0, z1], 1)
81
- for flow in flows:
82
- z, logdet = flow(z, x_mask, g=x, reverse=reverse)
83
- logdet_tot = logdet_tot + logdet
84
- nll = torch.sum(0.5 * (math.log(2*math.pi) + (z**2)) * x_mask, [1,2]) - logdet_tot
85
- return nll + logq # [b]
86
  else:
87
  flows = list(reversed(self.flows))
88
  flows = flows[:-2] + [flows[-1]] # remove a useless vflow
89
- z = torch.randn(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype) * noise_scale
90
  for flow in flows:
91
  z = flow(z, x_mask, g=x, reverse=reverse)
92
  z0, z1 = torch.split(z, [1, 1], 1)
@@ -316,7 +290,7 @@ class SynthesizerTrn(nn.Module):
316
  m_p = torch.matmul(attn.squeeze(1), m_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t']
317
  logs_p = torch.matmul(attn.squeeze(1), logs_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t']
318
 
319
- z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale
320
  z = self.flow(z_p, y_mask, g=g, reverse=True)
321
  o = self.dec((z * y_mask)[:,:,:max_len], g=g)
322
  return o, attn, y_mask, (z, z_p, m_p, logs_p)
 
56
  x = self.proj(x) * x_mask
57
 
58
  if not reverse:
59
+ raise ValueError
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  else:
61
  flows = list(reversed(self.flows))
62
  flows = flows[:-2] + [flows[-1]] # remove a useless vflow
63
+ z = torch.zeros(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype) #* noise_scale
64
  for flow in flows:
65
  z = flow(z, x_mask, g=x, reverse=reverse)
66
  z0, z1 = torch.split(z, [1, 1], 1)
 
290
  m_p = torch.matmul(attn.squeeze(1), m_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t']
291
  logs_p = torch.matmul(attn.squeeze(1), logs_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t']
292
 
293
+ z_p = m_p + torch.zeros_like(m_p) * torch.exp(logs_p)#* noise_scale
294
  z = self.flow(z_p, y_mask, g=g, reverse=True)
295
  o = self.dec((z * y_mask)[:,:,:max_len], g=g)
296
  return o, attn, y_mask, (z, z_p, m_p, logs_p)
msinference.py CHANGED
@@ -468,7 +468,7 @@ def foreign(text=None, # list of text
468
  net_g.infer(
469
  x_tst,
470
  x_tst_lengths,
471
- noise_scale=0.667,
472
  noise_scale_w=1, #0, #0.8,
473
  length_scale=1.0 / speed)[0][0, 0].cpu().float().numpy()
474
  )
 
468
  net_g.infer(
469
  x_tst,
470
  x_tst_lengths,
471
+ noise_scale=0, #0.667,
472
  noise_scale_w=1, #0, #0.8,
473
  length_scale=1.0 / speed)[0][0, 0].cpu().float().numpy()
474
  )