|
from multiprocessing import cpu_count
|
|
import os
|
|
import sys
|
|
|
|
from scipy import signal
|
|
from scipy.io import wavfile
|
|
import librosa
|
|
import numpy as np
|
|
|
|
now_directory = os.getcwd()
|
|
sys.path.append(now_directory)
|
|
|
|
from rvc.lib.utils import load_audio
|
|
from rvc.train.slicer import Slicer
|
|
|
|
experiment_directory = sys.argv[1]
|
|
input_root = sys.argv[2]
|
|
sampling_rate = int(sys.argv[3])
|
|
percentage = float(sys.argv[4])
|
|
num_processes = cpu_count()
|
|
|
|
import multiprocessing
|
|
|
|
|
|
class PreProcess:
|
|
def __init__(self, sr, exp_dir, per=3.0):
|
|
self.slicer = Slicer(
|
|
sr=sr,
|
|
threshold=-42,
|
|
min_length=1500,
|
|
min_interval=400,
|
|
hop_size=15,
|
|
max_sil_kept=500,
|
|
)
|
|
self.sr = sr
|
|
self.b_high, self.a_high = signal.butter(N=5, Wn=48, btype="high", fs=self.sr)
|
|
self.per = per
|
|
self.overlap = 0.3
|
|
self.tail = self.per + self.overlap
|
|
self.max_amplitude = 0.9
|
|
self.alpha = 0.75
|
|
self.exp_dir = exp_dir
|
|
self.gt_wavs_dir = f"{exp_dir}/0_gt_wavs"
|
|
self.wavs16k_dir = f"{exp_dir}/1_16k_wavs"
|
|
os.makedirs(self.exp_dir, exist_ok=True)
|
|
os.makedirs(self.gt_wavs_dir, exist_ok=True)
|
|
os.makedirs(self.wavs16k_dir, exist_ok=True)
|
|
|
|
def normalize_and_write(self, tmp_audio, idx0, idx1):
|
|
tmp_max = np.abs(tmp_audio).max()
|
|
if tmp_max > 2.5:
|
|
print(f"{idx0}-{idx1}-{tmp_max}-filtered")
|
|
return
|
|
tmp_audio = (tmp_audio / tmp_max * (self.max_amplitude * self.alpha)) + (
|
|
1 - self.alpha
|
|
) * tmp_audio
|
|
wavfile.write(
|
|
f"{self.gt_wavs_dir}/{idx0}_{idx1}.wav",
|
|
self.sr,
|
|
tmp_audio.astype(np.float32),
|
|
)
|
|
tmp_audio = librosa.resample(
|
|
tmp_audio, orig_sr=self.sr, target_sr=16000
|
|
)
|
|
wavfile.write(
|
|
f"{self.wavs16k_dir}/{idx0}_{idx1}.wav",
|
|
16000,
|
|
tmp_audio.astype(np.float32),
|
|
)
|
|
|
|
def process_audio(self, path, idx0):
|
|
try:
|
|
audio = load_audio(path, self.sr)
|
|
audio = signal.lfilter(self.b_high, self.a_high, audio)
|
|
|
|
idx1 = 0
|
|
for audio_segment in self.slicer.slice(audio):
|
|
i = 0
|
|
while 1:
|
|
start = int(self.sr * (self.per - self.overlap) * i)
|
|
i += 1
|
|
if len(audio_segment[start:]) > self.tail * self.sr:
|
|
tmp_audio = audio_segment[
|
|
start : start + int(self.per * self.sr)
|
|
]
|
|
self.normalize_and_write(tmp_audio, idx0, idx1)
|
|
idx1 += 1
|
|
else:
|
|
tmp_audio = audio_segment[start:]
|
|
idx1 += 1
|
|
break
|
|
self.normalize_and_write(tmp_audio, idx0, idx1)
|
|
except Exception as error:
|
|
print(f"{path}: {error}")
|
|
|
|
def process_audio_multiprocessing(self, infos):
|
|
for path, idx0 in infos:
|
|
self.process_audio(path, idx0)
|
|
|
|
def process_audio_multiprocessing_input_directory(self, input_root, num_processes):
|
|
try:
|
|
infos = [
|
|
(f"{input_root}/{name}", idx)
|
|
for idx, name in enumerate(sorted(list(os.listdir(input_root))))
|
|
]
|
|
processes = []
|
|
for i in range(num_processes):
|
|
p = multiprocessing.Process(
|
|
target=self.process_audio_multiprocessing,
|
|
args=(infos[i::num_processes],),
|
|
)
|
|
processes.append(p)
|
|
p.start()
|
|
for i in range(num_processes):
|
|
processes[i].join()
|
|
except Exception as error:
|
|
print(error)
|
|
|
|
|
|
def preprocess_training_set(input_root, sr, num_processes, exp_dir, per):
|
|
pp = PreProcess(sr, exp_dir, per)
|
|
print("Starting preprocessing...")
|
|
pp.process_audio_multiprocessing_input_directory(input_root, num_processes)
|
|
print("Preprocessing completed!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
preprocess_training_set(
|
|
input_root, sampling_rate, num_processes, experiment_directory, percentage
|
|
)
|
|
|