ArkanDash commited on
Commit
d03d6be
·
1 Parent(s): 7e7d175

feat: update infer

Browse files
Files changed (2) hide show
  1. config.py +0 -2
  2. vc_infer_pipeline.py +13 -11
config.py CHANGED
@@ -71,8 +71,6 @@ class Config:
71
  / 1024
72
  + 0.4
73
  )
74
- if self.gpu_mem <= 4:
75
- nope = None
76
  elif torch.backends.mps.is_available():
77
  print("没有发现支持的N卡, 使用MPS进行推理")
78
  self.device = "mps"
 
71
  / 1024
72
  + 0.4
73
  )
 
 
74
  elif torch.backends.mps.is_available():
75
  print("没有发现支持的N卡, 使用MPS进行推理")
76
  self.device = "mps"
vc_infer_pipeline.py CHANGED
@@ -162,7 +162,7 @@ class VC(object):
162
  big_npy,
163
  index_rate,
164
  version,
165
- protect
166
  ): # ,file_index,file_big_npy
167
  feats = torch.from_numpy(audio0)
168
  if self.is_half:
@@ -184,8 +184,8 @@ class VC(object):
184
  with torch.no_grad():
185
  logits = model.extract_features(**inputs)
186
  feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
187
- if(protect<0.5):
188
- feats0=feats.clone()
189
  if (
190
  isinstance(index, type(None)) == False
191
  and isinstance(big_npy, type(None)) == False
@@ -211,8 +211,10 @@ class VC(object):
211
  )
212
 
213
  feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
214
- if(protect<0.5):
215
- feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
 
 
216
  t1 = ttime()
217
  p_len = audio0.shape[0] // self.window
218
  if feats.shape[1] < p_len:
@@ -221,13 +223,13 @@ class VC(object):
221
  pitch = pitch[:, :p_len]
222
  pitchf = pitchf[:, :p_len]
223
 
224
- if(protect<0.5):
225
  pitchff = pitchf.clone()
226
  pitchff[pitchf > 0] = 1
227
  pitchff[pitchf < 1] = protect
228
  pitchff = pitchff.unsqueeze(-1)
229
  feats = feats * pitchff + feats0 * (1 - pitchff)
230
- feats=feats.to(feats0.dtype)
231
  p_len = torch.tensor([p_len], device=self.device).long()
232
  with torch.no_grad():
233
  if pitch != None and pitchf != None:
@@ -356,7 +358,7 @@ class VC(object):
356
  big_npy,
357
  index_rate,
358
  version,
359
- protect
360
  )[self.t_pad_tgt : -self.t_pad_tgt]
361
  )
362
  else:
@@ -373,7 +375,7 @@ class VC(object):
373
  big_npy,
374
  index_rate,
375
  version,
376
- protect
377
  )[self.t_pad_tgt : -self.t_pad_tgt]
378
  )
379
  s = t
@@ -391,7 +393,7 @@ class VC(object):
391
  big_npy,
392
  index_rate,
393
  version,
394
- protect
395
  )[self.t_pad_tgt : -self.t_pad_tgt]
396
  )
397
  else:
@@ -408,7 +410,7 @@ class VC(object):
408
  big_npy,
409
  index_rate,
410
  version,
411
- protect
412
  )[self.t_pad_tgt : -self.t_pad_tgt]
413
  )
414
  audio_opt = np.concatenate(audio_opt)
 
162
  big_npy,
163
  index_rate,
164
  version,
165
+ protect,
166
  ): # ,file_index,file_big_npy
167
  feats = torch.from_numpy(audio0)
168
  if self.is_half:
 
184
  with torch.no_grad():
185
  logits = model.extract_features(**inputs)
186
  feats = model.final_proj(logits[0]) if version == "v1" else logits[0]
187
+ if protect < 0.5:
188
+ feats0 = feats.clone()
189
  if (
190
  isinstance(index, type(None)) == False
191
  and isinstance(big_npy, type(None)) == False
 
211
  )
212
 
213
  feats = F.interpolate(feats.permute(0, 2, 1), scale_factor=2).permute(0, 2, 1)
214
+ if protect < 0.5:
215
+ feats0 = F.interpolate(feats0.permute(0, 2, 1), scale_factor=2).permute(
216
+ 0, 2, 1
217
+ )
218
  t1 = ttime()
219
  p_len = audio0.shape[0] // self.window
220
  if feats.shape[1] < p_len:
 
223
  pitch = pitch[:, :p_len]
224
  pitchf = pitchf[:, :p_len]
225
 
226
+ if protect < 0.5:
227
  pitchff = pitchf.clone()
228
  pitchff[pitchf > 0] = 1
229
  pitchff[pitchf < 1] = protect
230
  pitchff = pitchff.unsqueeze(-1)
231
  feats = feats * pitchff + feats0 * (1 - pitchff)
232
+ feats = feats.to(feats0.dtype)
233
  p_len = torch.tensor([p_len], device=self.device).long()
234
  with torch.no_grad():
235
  if pitch != None and pitchf != None:
 
358
  big_npy,
359
  index_rate,
360
  version,
361
+ protect,
362
  )[self.t_pad_tgt : -self.t_pad_tgt]
363
  )
364
  else:
 
375
  big_npy,
376
  index_rate,
377
  version,
378
+ protect,
379
  )[self.t_pad_tgt : -self.t_pad_tgt]
380
  )
381
  s = t
 
393
  big_npy,
394
  index_rate,
395
  version,
396
+ protect,
397
  )[self.t_pad_tgt : -self.t_pad_tgt]
398
  )
399
  else:
 
410
  big_npy,
411
  index_rate,
412
  version,
413
+ protect,
414
  )[self.t_pad_tgt : -self.t_pad_tgt]
415
  )
416
  audio_opt = np.concatenate(audio_opt)