Politrees commited on
Commit
9103fdb
β€’
1 Parent(s): 9e66ad7

Update src/vc_infer_pipeline.py

Browse files
Files changed (1) hide show
  1. src/vc_infer_pipeline.py +32 -39
src/vc_infer_pipeline.py CHANGED
@@ -1,13 +1,14 @@
 
1
  import numpy as np, parselmouth, torch, pdb, sys, os
2
  from time import time as ttime
3
  import torch.nn.functional as F
4
  import torchcrepe
5
- from torch import Tensor
6
- import scipy.signal as signal
7
- import pyworld, os, traceback, faiss, librosa, torchcrepe
8
  from scipy import signal
9
- from functools import lru_cache
10
- import gc, re
 
 
 
11
 
12
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
13
  now_dir = os.path.join(BASE_DIR, 'src')
@@ -36,19 +37,20 @@ def cache_harvest_f0(input_audio_path, fs, f0max, f0min, frame_period):
36
 
37
 
38
  def change_rms(data1, sr1, data2, sr2, rate):
39
- rms1 = librosa.feature.rms(
40
- y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2
41
- )
42
  rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2)
 
43
  rms1 = torch.from_numpy(rms1)
44
  rms1 = F.interpolate(
45
  rms1.unsqueeze(0), size=data2.shape[0], mode="linear"
46
  ).squeeze()
 
47
  rms2 = torch.from_numpy(rms2)
48
  rms2 = F.interpolate(
49
  rms2.unsqueeze(0), size=data2.shape[0], mode="linear"
50
  ).squeeze()
51
  rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6)
 
52
  data2 *= (
53
  torch.pow(rms1, torch.tensor(1 - rate))
54
  * torch.pow(rms2, torch.tensor(rate - 1))
@@ -78,9 +80,7 @@ class VC(object):
78
 
79
  def get_optimal_torch_device(self, index: int = 0) -> torch.device:
80
  if torch.cuda.is_available():
81
- return torch.device(
82
- f"cuda:{index % torch.cuda.device_count()}"
83
- )
84
  elif torch.backends.mps.is_available():
85
  return torch.device("mps")
86
  return torch.device("cpu")
@@ -94,9 +94,7 @@ class VC(object):
94
  hop_length=160,
95
  model="full",
96
  ):
97
- x = x.astype(
98
- np.float32
99
- )
100
  x /= np.quantile(np.abs(x), 0.999)
101
  torch_device = self.get_optimal_torch_device()
102
  audio = torch.from_numpy(x).to(torch_device, copy=True)
@@ -152,12 +150,6 @@ class VC(object):
152
  f0 = f0[0].cpu().numpy()
153
  return f0
154
 
155
- def get_f0_pyin_computation(self, x, f0_min, f0_max):
156
- y, sr = librosa.load("saudio/Sidney.wav", self.sr, mono=True)
157
- f0, _, _ = librosa.pyin(y, sr=self.sr, fmin=f0_min, fmax=f0_max)
158
- f0 = f0[1:]
159
- return f0
160
-
161
  def get_f0_hybrid_computation(
162
  self,
163
  methods_str,
@@ -180,8 +172,9 @@ class VC(object):
180
  for method in methods:
181
  f0 = None
182
  if method == "crepe":
183
- f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max)
184
- f0 = f0[1:]
 
185
  elif method == "mangio-crepe":
186
  f0 = self.get_f0_crepe_computation(
187
  x, f0_min, f0_max, p_len, crepe_hop_length
@@ -228,11 +221,13 @@ class VC(object):
228
  filter_radius,
229
  crepe_hop_length,
230
  inp_f0=None,
 
 
231
  ):
232
  global input_audio_path2wav
233
  time_step = self.window / self.sr * 1000
234
- f0_min = 50
235
- f0_max = 1100
236
  f0_mel_min = 1127 * np.log(1 + f0_min / 700)
237
  f0_mel_max = 1127 * np.log(1 + f0_max / 700)
238
  if f0_method == "pm":
@@ -248,9 +243,7 @@ class VC(object):
248
  )
249
  pad_size = (p_len - len(f0) + 1) // 2
250
  if pad_size > 0 or p_len - len(f0) - pad_size > 0:
251
- f0 = np.pad(
252
- f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant"
253
- )
254
 
255
  elif f0_method == "harvest":
256
  input_audio_path2wav[input_audio_path] = x.astype(np.double)
@@ -268,10 +261,10 @@ class VC(object):
268
  )
269
  f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
270
  f0 = signal.medfilt(f0, 3)
271
-
272
- elif f0_method == "crepe":
273
- f0 = self.get_f0_official_crepe_computation(x, f0_min, f0_max)
274
 
 
 
 
275
  elif f0_method == "mangio-crepe":
276
  f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
277
 
@@ -476,17 +469,15 @@ class VC(object):
476
  protect,
477
  crepe_hop_length,
478
  f0_file=None,
 
 
479
  ):
480
- if (
481
- file_index != ""
482
- and os.path.exists(file_index) == True
483
- and index_rate != 0
484
- ):
485
  try:
486
  index = faiss.read_index(file_index)
487
  big_npy = index.reconstruct_n(0, index.ntotal)
488
- except:
489
- traceback.print_exc()
490
  index = big_npy = None
491
  else:
492
  index = big_npy = None
@@ -521,8 +512,8 @@ class VC(object):
521
  for line in lines:
522
  inp_f0.append([float(i) for i in line.split(",")])
523
  inp_f0 = np.array(inp_f0, dtype="float32")
524
- except:
525
- traceback.print_exc()
526
  sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
527
  pitch, pitchf = None, None
528
  if if_f0 == 1:
@@ -535,6 +526,8 @@ class VC(object):
535
  filter_radius,
536
  crepe_hop_length,
537
  inp_f0,
 
 
538
  )
539
  pitch = pitch[:p_len]
540
  pitchf = pitchf[:p_len]
 
1
+ from functools import lru_cache
2
  import numpy as np, parselmouth, torch, pdb, sys, os
3
  from time import time as ttime
4
  import torch.nn.functional as F
5
  import torchcrepe
 
 
 
6
  from scipy import signal
7
+ from torch import Tensor
8
+ import pyworld, os, faiss, librosa, torchcrepe
9
+ import random
10
+ import gc
11
+ import re
12
 
13
  BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
14
  now_dir = os.path.join(BASE_DIR, 'src')
 
37
 
38
 
39
  def change_rms(data1, sr1, data2, sr2, rate):
40
+ rms1 = librosa.feature.rms(y=data1, frame_length=sr1 // 2 * 2, hop_length=sr1 // 2)
 
 
41
  rms2 = librosa.feature.rms(y=data2, frame_length=sr2 // 2 * 2, hop_length=sr2 // 2)
42
+
43
  rms1 = torch.from_numpy(rms1)
44
  rms1 = F.interpolate(
45
  rms1.unsqueeze(0), size=data2.shape[0], mode="linear"
46
  ).squeeze()
47
+
48
  rms2 = torch.from_numpy(rms2)
49
  rms2 = F.interpolate(
50
  rms2.unsqueeze(0), size=data2.shape[0], mode="linear"
51
  ).squeeze()
52
  rms2 = torch.max(rms2, torch.zeros_like(rms2) + 1e-6)
53
+
54
  data2 *= (
55
  torch.pow(rms1, torch.tensor(1 - rate))
56
  * torch.pow(rms2, torch.tensor(rate - 1))
 
80
 
81
  def get_optimal_torch_device(self, index: int = 0) -> torch.device:
82
  if torch.cuda.is_available():
83
+ return torch.device(f"cuda:{index % torch.cuda.device_count()}")
 
 
84
  elif torch.backends.mps.is_available():
85
  return torch.device("mps")
86
  return torch.device("cpu")
 
94
  hop_length=160,
95
  model="full",
96
  ):
97
+ x = x.astype(np.float32)
 
 
98
  x /= np.quantile(np.abs(x), 0.999)
99
  torch_device = self.get_optimal_torch_device()
100
  audio = torch.from_numpy(x).to(torch_device, copy=True)
 
150
  f0 = f0[0].cpu().numpy()
151
  return f0
152
 
 
 
 
 
 
 
153
  def get_f0_hybrid_computation(
154
  self,
155
  methods_str,
 
172
  for method in methods:
173
  f0 = None
174
  if method == "crepe":
175
+ f0 = self.get_f0_crepe_computation(
176
+ x, f0_min, f0_max, p_len
177
+ )
178
  elif method == "mangio-crepe":
179
  f0 = self.get_f0_crepe_computation(
180
  x, f0_min, f0_max, p_len, crepe_hop_length
 
221
  filter_radius,
222
  crepe_hop_length,
223
  inp_f0=None,
224
+ f0_min=50,
225
+ f0_max=1100,
226
  ):
227
  global input_audio_path2wav
228
  time_step = self.window / self.sr * 1000
229
+ #f0_min = 50
230
+ #f0_max = 1100
231
  f0_mel_min = 1127 * np.log(1 + f0_min / 700)
232
  f0_mel_max = 1127 * np.log(1 + f0_max / 700)
233
  if f0_method == "pm":
 
243
  )
244
  pad_size = (p_len - len(f0) + 1) // 2
245
  if pad_size > 0 or p_len - len(f0) - pad_size > 0:
246
+ f0 = np.pad(f0, [[pad_size, p_len - len(f0) - pad_size]], mode="constant")
 
 
247
 
248
  elif f0_method == "harvest":
249
  input_audio_path2wav[input_audio_path] = x.astype(np.double)
 
261
  )
262
  f0 = pyworld.stonemask(x.astype(np.double), f0, t, self.sr)
263
  f0 = signal.medfilt(f0, 3)
 
 
 
264
 
265
+ elif f0_method == "crepe":
266
+ f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len)
267
+
268
  elif f0_method == "mangio-crepe":
269
  f0 = self.get_f0_crepe_computation(x, f0_min, f0_max, p_len, crepe_hop_length)
270
 
 
469
  protect,
470
  crepe_hop_length,
471
  f0_file=None,
472
+ f0_min=50,
473
+ f0_max=1100,
474
  ):
475
+ if file_index != "" and os.path.exists(file_index) == True and index_rate != 0:
 
 
 
 
476
  try:
477
  index = faiss.read_index(file_index)
478
  big_npy = index.reconstruct_n(0, index.ntotal)
479
+ except Exception as error:
480
+ print(error)
481
  index = big_npy = None
482
  else:
483
  index = big_npy = None
 
512
  for line in lines:
513
  inp_f0.append([float(i) for i in line.split(",")])
514
  inp_f0 = np.array(inp_f0, dtype="float32")
515
+ except Exception as error:
516
+ print(error)
517
  sid = torch.tensor(sid, device=self.device).unsqueeze(0).long()
518
  pitch, pitchf = None, None
519
  if if_f0 == 1:
 
526
  filter_radius,
527
  crepe_hop_length,
528
  inp_f0,
529
+ f0_min,
530
+ f0_max,
531
  )
532
  pitch = pitch[:p_len]
533
  pitchf = pitchf[:p_len]