|
import sys, os
|
|
import subprocess
|
|
import torch
|
|
from functools import lru_cache
|
|
import shutil
|
|
from pedalboard import Pedalboard, Reverb
|
|
from pedalboard.io import AudioFile
|
|
from pydub import AudioSegment
|
|
from audio_separator.separator import Separator
|
|
import logging
|
|
import yaml
|
|
|
|
now_dir = os.getcwd()
|
|
sys.path.append(now_dir)
|
|
from programs.applio_code.rvc.infer.infer import VoiceConverter
|
|
from programs.applio_code.rvc.lib.tools.model_download import model_download_pipeline
|
|
from programs.music_separation_code.inference import proc_file
|
|
|
|
models_vocals = [
|
|
{
|
|
"name": "Mel-Roformer by KimberleyJSN",
|
|
"path": os.path.join(now_dir, "models", "mel-vocals"),
|
|
"model": os.path.join(now_dir, "models", "mel-vocals", "model.ckpt"),
|
|
"config": os.path.join(now_dir, "models", "mel-vocals", "config.yaml"),
|
|
"type": "mel_band_roformer",
|
|
"config_url": "https://raw.githubusercontent.com/ZFTurbo/Music-Source-Separation-Training/main/configs/KimberleyJensen/config_vocals_mel_band_roformer_kj.yaml",
|
|
"model_url": "https://huggingface.co/KimberleyJSN/melbandroformer/resolve/main/MelBandRoformer.ckpt",
|
|
},
|
|
{
|
|
"name": "BS-Roformer by ViperX",
|
|
"path": os.path.join(now_dir, "models", "bs-vocals"),
|
|
"model": os.path.join(now_dir, "models", "bs-vocals", "model.ckpt"),
|
|
"config": os.path.join(now_dir, "models", "bs-vocals", "config.yaml"),
|
|
"type": "bs_roformer",
|
|
"config_url": "https://raw.githubusercontent.com/ZFTurbo/Music-Source-Separation-Training/main/configs/viperx/model_bs_roformer_ep_317_sdr_12.9755.yaml",
|
|
"model_url": "https://github.com/TRvlvr/model_repo/releases/download/all_public_uvr_models/model_bs_roformer_ep_317_sdr_12.9755.ckpt",
|
|
},
|
|
{
|
|
"name": "MDX23C",
|
|
"path": os.path.join(now_dir, "models", "mdx23c-vocals"),
|
|
"model": os.path.join(now_dir, "models", "mdx23c-vocals", "model.ckpt"),
|
|
"config": os.path.join(now_dir, "models", "mdx23c-vocals", "config.yaml"),
|
|
"type": "mdx23c",
|
|
"config_url": "https://raw.githubusercontent.com/ZFTurbo/Music-Source-Separation-Training/main/configs/config_vocals_mdx23c.yaml",
|
|
"model_url": "https://github.com/ZFTurbo/Music-Source-Separation-Training/releases/download/v1.0.0/model_vocals_mdx23c_sdr_10.17.ckpt",
|
|
},
|
|
]
|
|
|
|
karaoke_models = [
|
|
{
|
|
"name": "Mel-Roformer Karaoke by aufr33 and viperx",
|
|
"path": os.path.join(now_dir, "models", "mel-kara"),
|
|
"model": os.path.join(now_dir, "models", "mel-kara", "model.ckpt"),
|
|
"config": os.path.join(now_dir, "models", "mel-kara", "config.yaml"),
|
|
"type": "mel_band_roformer",
|
|
"config_url": "https://huggingface.co/shiromiya/audio-separation-models/resolve/main/mel_band_roformer_karaoke_aufr33_viperx/config_mel_band_roformer_karaoke.yaml",
|
|
"model_url": "https://huggingface.co/shiromiya/audio-separation-models/resolve/main/mel_band_roformer_karaoke_aufr33_viperx/mel_band_roformer_karaoke_aufr33_viperx_sdr_10.1956.ckpt",
|
|
},
|
|
{
|
|
"name": "UVR-BVE",
|
|
"full_name": "UVR-BVE-4B_SN-44100-1.pth",
|
|
"arch": "vr",
|
|
},
|
|
]
|
|
|
|
denoise_models = [
|
|
{
|
|
"name": "Mel-Roformer Denoise Normal by aufr33",
|
|
"path": os.path.join(now_dir, "models", "mel-denoise"),
|
|
"model": os.path.join(now_dir, "models", "mel-denoise", "model.ckpt"),
|
|
"config": os.path.join(now_dir, "models", "mel-denoise", "config.yaml"),
|
|
"type": "mel_band_roformer",
|
|
"config_url": "https://huggingface.co/shiromiya/audio-separation-models/resolve/main/mel-denoise/model_mel_band_roformer_denoise.yaml",
|
|
"model_url": "https://huggingface.co/jarredou/aufr33_MelBand_Denoise/resolve/main/denoise_mel_band_roformer_aufr33_sdr_27.9959.ckpt",
|
|
},
|
|
{
|
|
"name": "Mel-Roformer Denoise Aggressive by aufr33",
|
|
"path": os.path.join(now_dir, "models", "mel-denoise-aggr"),
|
|
"model": os.path.join(now_dir, "models", "mel-denoise-aggr", "model.ckpt"),
|
|
"config": os.path.join(now_dir, "models", "mel-denoise-aggr", "config.yaml"),
|
|
"type": "mel_band_roformer",
|
|
"config_url": "https://huggingface.co/shiromiya/audio-separation-models/resolve/main/mel-denoise/model_mel_band_roformer_denoise.yaml",
|
|
"model_url": "https://huggingface.co/jarredou/aufr33_MelBand_Denoise/resolve/main/denoise_mel_band_roformer_aufr33_aggr_sdr_27.9768.ckpt",
|
|
},
|
|
{
|
|
"name": "UVR Denoise",
|
|
"full_name": "UVR-DeNoise.pth",
|
|
"arch": "vr",
|
|
},
|
|
]
|
|
|
|
dereverb_models = [
|
|
{
|
|
"name": "MDX23C DeReverb by aufr33 and jarredou",
|
|
"path": os.path.join(now_dir, "models", "mdx23c-dereveb"),
|
|
"model": os.path.join(now_dir, "models", "mdx23c-dereveb", "model.ckpt"),
|
|
"config": os.path.join(now_dir, "models", "mdx23c-dereveb", "config.yaml"),
|
|
"type": "mdx23c",
|
|
"config_url": "https://huggingface.co/jarredou/aufr33_jarredou_MDXv3_DeReverb/resolve/main/config_dereverb_mdx23c.yaml",
|
|
"model_url": "https://huggingface.co/jarredou/aufr33_jarredou_MDXv3_DeReverb/resolve/main/dereverb_mdx23c_sdr_6.9096.ckpt",
|
|
},
|
|
{
|
|
"name": "BS-Roformer Dereverb by anvuew",
|
|
"path": os.path.join(now_dir, "models", "mdx23c-dereveb"),
|
|
"model": os.path.join(now_dir, "models", "mdx23c-dereveb", "model.ckpt"),
|
|
"config": os.path.join(now_dir, "models", "mdx23c-dereveb", "config.yaml"),
|
|
"type": "bs_roformer",
|
|
"config_url": "https://huggingface.co/anvuew/deverb_bs_roformer/resolve/main/deverb_bs_roformer_8_384dim_10depth.yaml",
|
|
"model_url": "https://huggingface.co/anvuew/deverb_bs_roformer/resolve/main/deverb_bs_roformer_8_384dim_10depth.ckpt",
|
|
},
|
|
{
|
|
"name": "UVR-Deecho-Dereverb",
|
|
"full_name": "UVR-DeEcho-DeReverb.pth",
|
|
"arch": "vr",
|
|
},
|
|
{
|
|
"name": "MDX Reverb HQ by FoxJoy",
|
|
"full_name": "Reverb_HQ_By_FoxJoy.onnx",
|
|
"arch": "mdx",
|
|
},
|
|
]
|
|
|
|
deecho_models = [
|
|
{
|
|
"name": "UVR-Deecho-Normal",
|
|
"full_name": "UVR-De-Echo-Normal.pth",
|
|
"arch": "vr",
|
|
},
|
|
{
|
|
"name": "UVR-Deecho-Agggressive",
|
|
"full_name": "UVR-De-Echo-Aggressive.pth",
|
|
"arch": "vr",
|
|
},
|
|
]
|
|
|
|
|
|
@lru_cache(maxsize=None)
|
|
def import_voice_converter():
|
|
from programs.applio_code.rvc.infer.infer import VoiceConverter
|
|
|
|
return VoiceConverter()
|
|
|
|
|
|
@lru_cache(maxsize=1)
|
|
def get_config():
|
|
from programs.applio_code.rvc.configs.config import Config
|
|
|
|
return Config()
|
|
|
|
|
|
def download_file(url, path, filename):
|
|
os.makedirs(path, exist_ok=True)
|
|
file_path = os.path.join(path, filename)
|
|
|
|
if os.path.exists(file_path):
|
|
print(f"File '{filename}' already exists at '{path}'.")
|
|
return
|
|
|
|
try:
|
|
response = torch.hub.download_url_to_file(url, file_path)
|
|
print(f"File '{filename}' downloaded successfully")
|
|
except Exception as e:
|
|
print(f"Error downloading file '{filename}' from '{url}': {e}")
|
|
|
|
|
|
def get_model_info_by_name(model_name):
|
|
all_models = (
|
|
models_vocals
|
|
+ karaoke_models
|
|
+ dereverb_models
|
|
+ deecho_models
|
|
+ denoise_models
|
|
)
|
|
for model in all_models:
|
|
if model["name"] == model_name:
|
|
return model
|
|
return None
|
|
|
|
|
|
def get_last_modified_file(pasta):
|
|
if not os.path.isdir(pasta):
|
|
raise NotADirectoryError(f"{pasta} is not a valid directory.")
|
|
arquivos = [f for f in os.listdir(pasta) if os.path.isfile(os.path.join(pasta, f))]
|
|
if not arquivos:
|
|
return None
|
|
return max(arquivos, key=lambda x: os.path.getmtime(os.path.join(pasta, x)))
|
|
|
|
|
|
def search_with_word(folder, word):
|
|
if not os.path.isdir(folder):
|
|
raise NotADirectoryError(f"{folder} is not a valid directory.")
|
|
file_with_word = [file for file in os.listdir(folder) if word in file]
|
|
if not file_with_word:
|
|
return None
|
|
most_recent_file = max(
|
|
file_with_word, key=lambda file: os.path.getmtime(os.path.join(folder, file))
|
|
)
|
|
return most_recent_file
|
|
|
|
|
|
def search_with_two_words(folder, word1, word2):
|
|
if not os.path.isdir(folder):
|
|
raise NotADirectoryError(f"{folder} is not a valid directory.")
|
|
file_with_words = [
|
|
file for file in os.listdir(folder) if word1 in file and word2 in file
|
|
]
|
|
if not file_with_words:
|
|
return None
|
|
most_recent_file = max(
|
|
file_with_words, key=lambda file: os.path.getmtime(os.path.join(folder, file))
|
|
)
|
|
return most_recent_file
|
|
|
|
|
|
def get_last_modified_folder(path):
|
|
directories = [
|
|
os.path.join(path, d)
|
|
for d in os.listdir(path)
|
|
if os.path.isdir(os.path.join(path, d))
|
|
]
|
|
if not directories:
|
|
return None
|
|
last_modified_folder = max(directories, key=os.path.getmtime)
|
|
return last_modified_folder
|
|
|
|
|
|
def add_audio_effects(
|
|
audio_path,
|
|
reverb_size,
|
|
reverb_wet,
|
|
reverb_dry,
|
|
reverb_damping,
|
|
reverb_width,
|
|
output_path,
|
|
):
|
|
board = Pedalboard([])
|
|
board.append(
|
|
Reverb(
|
|
room_size=reverb_size,
|
|
dry_level=reverb_dry,
|
|
wet_level=reverb_wet,
|
|
damping=reverb_damping,
|
|
width=reverb_width,
|
|
)
|
|
)
|
|
with AudioFile(audio_path) as f:
|
|
with AudioFile(output_path, "w", f.samplerate, f.num_channels) as o:
|
|
while f.tell() < f.frames:
|
|
chunk = f.read(int(f.samplerate))
|
|
effected = board(chunk, f.samplerate, reset=False)
|
|
o.write(effected)
|
|
return output_path
|
|
|
|
|
|
def merge_audios(
|
|
vocals_path,
|
|
inst_path,
|
|
backing_path,
|
|
output_path,
|
|
main_gain,
|
|
inst_gain,
|
|
backing_Vol,
|
|
output_format,
|
|
):
|
|
main_vocal_audio = AudioSegment.from_file(vocals_path, format="flac") + main_gain
|
|
instrumental_audio = AudioSegment.from_file(inst_path, format="flac") + inst_gain
|
|
backing_vocal_audio = (
|
|
AudioSegment.from_file(backing_path, format="flac") + backing_Vol
|
|
)
|
|
combined_audio = main_vocal_audio.overlay(
|
|
instrumental_audio.overlay(backing_vocal_audio)
|
|
)
|
|
combined_audio.export(output_path, format=output_format)
|
|
return output_path
|
|
|
|
|
|
def check_fp16_support(device):
|
|
i_device = int(str(device).split(":")[-1])
|
|
gpu_name = torch.cuda.get_device_name(i_device)
|
|
low_end_gpus = ["16", "P40", "P10", "1060", "1070", "1080"]
|
|
if any(gpu in gpu_name for gpu in low_end_gpus) and "V100" not in gpu_name.upper():
|
|
print(f"Your GPU {gpu_name} not support FP16 inference. Using FP32 instead.")
|
|
return False
|
|
return True
|
|
|
|
|
|
def full_inference_program(
|
|
model_path,
|
|
index_path,
|
|
input_audio_path,
|
|
output_path,
|
|
export_format_rvc,
|
|
split_audio,
|
|
autotune,
|
|
vocal_model,
|
|
karaoke_model,
|
|
dereverb_model,
|
|
deecho,
|
|
deecho_model,
|
|
denoise,
|
|
denoise_model,
|
|
reverb,
|
|
vocals_volume,
|
|
instrumentals_volume,
|
|
backing_vocals_volume,
|
|
export_format_final,
|
|
devices,
|
|
pitch,
|
|
filter_radius,
|
|
index_rate,
|
|
rms_mix_rate,
|
|
protect,
|
|
pitch_extract,
|
|
hop_lenght,
|
|
reverb_room_size,
|
|
reverb_damping,
|
|
reverb_wet_gain,
|
|
reverb_dry_gain,
|
|
reverb_width,
|
|
embedder_model,
|
|
delete_audios,
|
|
use_tta,
|
|
batch_size,
|
|
infer_backing_vocals,
|
|
infer_backing_vocals_model,
|
|
infer_backing_vocals_index,
|
|
change_inst_pitch,
|
|
pitch_back,
|
|
filter_radius_back,
|
|
index_rate_back,
|
|
rms_mix_rate_back,
|
|
protect_back,
|
|
pitch_extract_back,
|
|
hop_length_back,
|
|
export_format_rvc_back,
|
|
split_audio_back,
|
|
autotune_back,
|
|
embedder_model_back,
|
|
):
|
|
if torch.cuda.is_available():
|
|
n_gpu = torch.cuda.device_count()
|
|
devices = devices.replace("-", " ")
|
|
print(f"Number of GPUs available: {n_gpu}")
|
|
first_device = devices.split()[0]
|
|
fp16 = check_fp16_support(first_device)
|
|
else:
|
|
devices = "cpu"
|
|
print("Using CPU")
|
|
fp16 = False
|
|
|
|
music_folder = os.path.splitext(os.path.basename(input_audio_path))[0]
|
|
|
|
|
|
model_info = get_model_info_by_name(vocal_model)
|
|
model_ckpt_path = os.path.join(model_info["path"], "model.ckpt")
|
|
if not os.path.exists(model_ckpt_path):
|
|
download_file(
|
|
model_info["model_url"],
|
|
model_info["path"],
|
|
"model.ckpt",
|
|
)
|
|
config_json_path = os.path.join(model_info["path"], "config.yaml")
|
|
if not os.path.exists(config_json_path):
|
|
download_file(
|
|
model_info["config_url"],
|
|
model_info["path"],
|
|
"config.yaml",
|
|
)
|
|
if not fp16:
|
|
with open(model_info["config"], "r") as file:
|
|
config = yaml.safe_load(file)
|
|
|
|
config["training"]["use_amp"] = False
|
|
|
|
with open(model_info["config"], "w") as file:
|
|
yaml.safe_dump(config, file)
|
|
store_dir = os.path.join(now_dir, "audio_files", music_folder, "vocals")
|
|
inst_dir = os.path.join(now_dir, "audio_files", music_folder, "instrumentals")
|
|
os.makedirs(store_dir, exist_ok=True)
|
|
os.makedirs(inst_dir, exist_ok=True)
|
|
input_audio_basename = os.path.splitext(os.path.basename(input_audio_path))[0]
|
|
search_result = search_with_word(store_dir, "vocals")
|
|
if search_result:
|
|
print("Vocals already separated"),
|
|
else:
|
|
print("Separating vocals")
|
|
command = [
|
|
"python",
|
|
os.path.join(now_dir, "programs", "music_separation_code", "inference.py"),
|
|
"--model_type",
|
|
model_info["type"],
|
|
"--config_path",
|
|
model_info["config"],
|
|
"--start_check_point",
|
|
model_info["model"],
|
|
"--input_file",
|
|
input_audio_path,
|
|
"--store_dir",
|
|
store_dir,
|
|
"--flac_file",
|
|
"--pcm_type",
|
|
"PCM_16",
|
|
"--extract_instrumental",
|
|
]
|
|
|
|
if devices == "cpu":
|
|
command.append("--force_cpu")
|
|
else:
|
|
device_ids = [str(int(device)) for device in devices.split()]
|
|
command.extend(["--device_ids"] + device_ids)
|
|
|
|
subprocess.run(command)
|
|
os.rename(
|
|
os.path.join(
|
|
store_dir,
|
|
search_with_two_words(
|
|
store_dir,
|
|
os.path.basename(input_audio_path).split(".")[0],
|
|
"instrumental",
|
|
),
|
|
),
|
|
os.path.join(
|
|
inst_dir,
|
|
f"{os.path.basename(input_audio_path).split('.')[0]}_instrumentals.flac",
|
|
),
|
|
)
|
|
inst_file = os.path.join(
|
|
inst_dir,
|
|
search_with_two_words(
|
|
inst_dir, os.path.basename(input_audio_path).split(".")[0], "instrumentals"
|
|
),
|
|
)
|
|
|
|
|
|
model_info = get_model_info_by_name(karaoke_model)
|
|
store_dir = os.path.join(now_dir, "audio_files", music_folder, "karaoke")
|
|
os.makedirs(store_dir, exist_ok=True)
|
|
vocals_path = os.path.join(now_dir, "audio_files", music_folder, "vocals")
|
|
input_file = search_with_word(vocals_path, "vocals")
|
|
karaoke_exists = search_with_word(store_dir, "karaoke") is not None
|
|
|
|
if karaoke_exists:
|
|
print("Backing vocals already separated")
|
|
else:
|
|
if input_file:
|
|
input_file = os.path.join(vocals_path, input_file)
|
|
print("Separating Backing vocals")
|
|
if model_info["name"] == "Mel-Roformer Karaoke by aufr33 and viperx":
|
|
model_ckpt_path = os.path.join(model_info["path"], "model.ckpt")
|
|
if not os.path.exists(model_ckpt_path):
|
|
download_file(
|
|
model_info["model_url"],
|
|
model_info["path"],
|
|
"model.ckpt",
|
|
)
|
|
config_json_path = os.path.join(model_info["path"], "config.yaml")
|
|
if not os.path.exists(config_json_path):
|
|
download_file(
|
|
model_info["config_url"],
|
|
model_info["path"],
|
|
"config.yaml",
|
|
)
|
|
if not fp16:
|
|
with open(model_info["config"], "r") as file:
|
|
config = yaml.safe_load(file)
|
|
|
|
config["training"]["use_amp"] = False
|
|
|
|
with open(model_info["config"], "w") as file:
|
|
yaml.safe_dump(config, file)
|
|
|
|
command = [
|
|
"python",
|
|
os.path.join(
|
|
now_dir, "programs", "music_separation_code", "inference.py"
|
|
),
|
|
"--model_type",
|
|
model_info["type"],
|
|
"--config_path",
|
|
model_info["config"],
|
|
"--start_check_point",
|
|
model_info["model"],
|
|
"--input_file",
|
|
input_file,
|
|
"--store_dir",
|
|
store_dir,
|
|
"--flac_file",
|
|
"--pcm_type",
|
|
"PCM_16",
|
|
"--extract_instrumental",
|
|
]
|
|
|
|
if devices == "cpu":
|
|
command.append("--force_cpu")
|
|
else:
|
|
device_ids = [str(int(device)) for device in devices.split()]
|
|
command.extend(["--device_ids"] + device_ids)
|
|
|
|
subprocess.run(command)
|
|
else:
|
|
separator = Separator(
|
|
model_file_dir=os.path.join(now_dir, "models", "karaoke"),
|
|
log_level=logging.WARNING,
|
|
normalization_threshold=1.0,
|
|
output_format="flac",
|
|
output_dir=store_dir,
|
|
vr_params={
|
|
"batch_size": batch_size,
|
|
"enable_tta": use_tta,
|
|
},
|
|
)
|
|
separator.load_model(model_filename=model_info["full_name"])
|
|
separator.separate(input_file)
|
|
karaoke_path = os.path.join(now_dir, "audio_files", music_folder, "karaoke")
|
|
vocals_result = search_with_two_words(
|
|
karaoke_path,
|
|
os.path.basename(input_audio_path).split(".")[0],
|
|
"Vocals",
|
|
)
|
|
instrumental_result = search_with_two_words(
|
|
karaoke_path,
|
|
os.path.basename(input_audio_path).split(".")[0],
|
|
"Instrumental",
|
|
)
|
|
if "UVR-BVE-4B_SN-44100-1" in os.path.basename(vocals_result):
|
|
os.rename(
|
|
os.path.join(karaoke_path, vocals_result),
|
|
os.path.join(
|
|
karaoke_path,
|
|
f"{os.path.basename(input_audio_path).split('.')[0]}_karaoke.flac",
|
|
),
|
|
)
|
|
if "UVR-BVE-4B_SN-44100-1" in os.path.basename(instrumental_result):
|
|
os.rename(
|
|
os.path.join(karaoke_path, instrumental_result),
|
|
os.path.join(
|
|
karaoke_path,
|
|
f"{os.path.basename(input_audio_path).split('.')[0]}_instrumental.flac",
|
|
),
|
|
)
|
|
|
|
|
|
model_info = get_model_info_by_name(dereverb_model)
|
|
store_dir = os.path.join(now_dir, "audio_files", music_folder, "dereverb")
|
|
os.makedirs(store_dir, exist_ok=True)
|
|
karaoke_path = os.path.join(now_dir, "audio_files", music_folder, "karaoke")
|
|
input_file = search_with_word(karaoke_path, "karaoke")
|
|
noreverb_exists = search_with_word(store_dir, "noreverb") is not None
|
|
if noreverb_exists:
|
|
print("Reverb already removed")
|
|
else:
|
|
if input_file:
|
|
input_file = os.path.join(karaoke_path, input_file)
|
|
print("Removing reverb")
|
|
if (
|
|
model_info["name"] == "BS-Roformer Dereverb by anvuew"
|
|
or model_info["name"] == "MDX23C DeReverb by aufr33 and jarredou"
|
|
):
|
|
model_ckpt_path = os.path.join(model_info["path"], "model.ckpt")
|
|
if not os.path.exists(model_ckpt_path):
|
|
download_file(
|
|
model_info["model_url"],
|
|
model_info["path"],
|
|
"model.ckpt",
|
|
)
|
|
config_json_path = os.path.join(model_info["path"], "config.yaml")
|
|
if not os.path.exists(config_json_path):
|
|
download_file(
|
|
model_info["config_url"],
|
|
model_info["path"],
|
|
"config.yaml",
|
|
)
|
|
if not fp16:
|
|
with open(model_info["config"], "r") as file:
|
|
config = yaml.safe_load(file)
|
|
|
|
config["training"]["use_amp"] = False
|
|
|
|
with open(model_info["config"], "w") as file:
|
|
yaml.safe_dump(config, file)
|
|
command = [
|
|
"python",
|
|
os.path.join(
|
|
now_dir, "programs", "music_separation_code", "inference.py"
|
|
),
|
|
"--model_type",
|
|
model_info["type"],
|
|
"--config_path",
|
|
model_info["config"],
|
|
"--start_check_point",
|
|
model_info["model"],
|
|
"--input_file",
|
|
input_file,
|
|
"--store_dir",
|
|
store_dir,
|
|
"--flac_file",
|
|
"--pcm_type",
|
|
"PCM_16",
|
|
]
|
|
|
|
if devices == "cpu":
|
|
command.append("--force_cpu")
|
|
else:
|
|
device_ids = [str(int(device)) for device in devices.split()]
|
|
command.extend(["--device_ids"] + device_ids)
|
|
|
|
subprocess.run(command)
|
|
else:
|
|
if model_info["arch"] == "vr":
|
|
separator = Separator(
|
|
model_file_dir=os.path.join(now_dir, "models", "dereverb"),
|
|
log_level=logging.WARNING,
|
|
normalization_threshold=1.0,
|
|
output_format="flac",
|
|
output_dir=store_dir,
|
|
output_single_stem="No Reverb",
|
|
vr_params={
|
|
"batch_size": batch_size,
|
|
"enable_tta": use_tta,
|
|
},
|
|
)
|
|
else:
|
|
separator = Separator(
|
|
model_file_dir=os.path.join(now_dir, "models", "dereverb"),
|
|
log_level=logging.WARNING,
|
|
normalization_threshold=1.0,
|
|
output_format="flac",
|
|
output_dir=store_dir,
|
|
output_single_stem="No Reverb",
|
|
)
|
|
separator.load_model(model_filename=model_info["full_name"])
|
|
separator.separate(input_file)
|
|
dereverb_path = os.path.join(
|
|
now_dir, "audio_files", music_folder, "dereverb"
|
|
)
|
|
search_result = search_with_two_words(
|
|
dereverb_path,
|
|
os.path.basename(input_audio_path).split(".")[0],
|
|
"No Reverb",
|
|
)
|
|
if "UVR-DeEcho-DeReverb" in os.path.basename(
|
|
search_result
|
|
) or "MDX Reverb HQ by FoxJoy" in os.path.basename(search_result):
|
|
os.rename(
|
|
os.path.join(dereverb_path, search_result),
|
|
os.path.join(
|
|
dereverb_path,
|
|
f"{os.path.basename(input_audio_path).split('.')[0]}_noreverb.flac",
|
|
),
|
|
)
|
|
|
|
|
|
store_dir = os.path.join(now_dir, "audio_files", music_folder, "deecho")
|
|
os.makedirs(store_dir, exist_ok=True)
|
|
if deecho:
|
|
no_echo_exists = search_with_word(store_dir, "noecho") is not None
|
|
if no_echo_exists:
|
|
print("Echo already removed")
|
|
else:
|
|
print("Removing echo")
|
|
model_info = get_model_info_by_name(deecho_model)
|
|
|
|
dereverb_path = os.path.join(
|
|
now_dir, "audio_files", music_folder, "dereverb"
|
|
)
|
|
noreverb_file = search_with_word(dereverb_path, "noreverb")
|
|
|
|
input_file = os.path.join(dereverb_path, noreverb_file)
|
|
|
|
separator = Separator(
|
|
model_file_dir=os.path.join(now_dir, "models", "deecho"),
|
|
log_level=logging.WARNING,
|
|
normalization_threshold=1.0,
|
|
output_format="flac",
|
|
output_dir=store_dir,
|
|
output_single_stem="No Echo",
|
|
vr_params={
|
|
"batch_size": batch_size,
|
|
"enable_tta": use_tta,
|
|
},
|
|
)
|
|
separator.load_model(model_filename=model_info["full_name"])
|
|
separator.separate(input_file)
|
|
deecho_path = os.path.join(now_dir, "audio_files", music_folder, "deecho")
|
|
search_result = search_with_two_words(
|
|
deecho_path,
|
|
os.path.basename(input_audio_path).split(".")[0],
|
|
"No Echo",
|
|
)
|
|
if "UVR-De-Echo-Normal" in os.path.basename(
|
|
search_result
|
|
) or "UVR-Deecho-Agggressive" in os.path.basename(search_result):
|
|
os.rename(
|
|
os.path.join(deecho_path, search_result),
|
|
os.path.join(
|
|
deecho_path,
|
|
f"{os.path.basename(input_audio_path).split('.')[0]}_noecho.flac",
|
|
),
|
|
)
|
|
|
|
|
|
store_dir = os.path.join(now_dir, "audio_files", music_folder, "denoise")
|
|
os.makedirs(store_dir, exist_ok=True)
|
|
if denoise:
|
|
no_noise_exists = search_with_word(store_dir, "dry") is not None
|
|
if no_noise_exists:
|
|
print("Noise already removed")
|
|
else:
|
|
model_info = get_model_info_by_name(denoise_model)
|
|
print("Removing noise")
|
|
input_file = (
|
|
os.path.join(
|
|
now_dir,
|
|
"audio_files",
|
|
music_folder,
|
|
"deecho",
|
|
search_with_word(
|
|
os.path.join(now_dir, "audio_files", music_folder, "deecho"),
|
|
"noecho",
|
|
),
|
|
)
|
|
if deecho
|
|
else os.path.join(
|
|
now_dir,
|
|
"audio_files",
|
|
music_folder,
|
|
"dereverb",
|
|
search_with_word(
|
|
os.path.join(now_dir, "audio_files", music_folder, "dereverb"),
|
|
"noreverb",
|
|
),
|
|
)
|
|
)
|
|
|
|
if (
|
|
model_info["name"] == "Mel-Roformer Denoise Normal by aufr33"
|
|
or model_info["name"] == "Mel-Roformer Denoise Aggressive by aufr33"
|
|
):
|
|
model_ckpt_path = os.path.join(model_info["path"], "model.ckpt")
|
|
if not os.path.exists(model_ckpt_path):
|
|
download_file(
|
|
model_info["model_url"],
|
|
model_info["path"],
|
|
"model.ckpt",
|
|
)
|
|
config_json_path = os.path.join(model_info["path"], "config.yaml")
|
|
if not os.path.exists(config_json_path):
|
|
download_file(
|
|
model_info["config_url"], model_info["path"], "config.yaml"
|
|
)
|
|
if not fp16:
|
|
with open(model_info["config"], "r") as file:
|
|
config = yaml.safe_load(file)
|
|
|
|
config["training"]["use_amp"] = False
|
|
|
|
with open(model_info["config"], "w") as file:
|
|
yaml.safe_dump(config, file)
|
|
command = [
|
|
"python",
|
|
os.path.join(
|
|
now_dir, "programs", "music_separation_code", "inference.py"
|
|
),
|
|
"--model_type",
|
|
model_info["type"],
|
|
"--config_path",
|
|
model_info["config"],
|
|
"--start_check_point",
|
|
model_info["model"],
|
|
"--input_file",
|
|
input_file,
|
|
"--store_dir",
|
|
store_dir,
|
|
"--flac_file",
|
|
"--pcm_type",
|
|
"PCM_16",
|
|
]
|
|
|
|
if devices == "cpu":
|
|
command.append("--force_cpu")
|
|
else:
|
|
device_ids = [str(int(device)) for device in devices.split()]
|
|
command.extend(["--device_ids"] + device_ids)
|
|
|
|
subprocess.run(command)
|
|
else:
|
|
separator = Separator(
|
|
model_file_dir=os.path.join(now_dir, "models", "denoise"),
|
|
log_level=logging.WARNING,
|
|
normalization_threshold=1.0,
|
|
output_format="flac",
|
|
output_dir=store_dir,
|
|
output_single_stem="No Noise",
|
|
vr_params={
|
|
"batch_size": batch_size,
|
|
"enable_tta": use_tta,
|
|
},
|
|
)
|
|
separator.load_model(model_filename=model_info["full_name"])
|
|
separator.separate(input_file)
|
|
search_result = search_with_two_words(
|
|
deecho_path,
|
|
os.path.basename(input_audio_path).split(".")[0],
|
|
"No Noise",
|
|
)
|
|
if "UVR Denoise" in os.path.basename(search_result):
|
|
os.rename(
|
|
os.path.join(deecho_path, search_result),
|
|
os.path.join(
|
|
deecho_path,
|
|
f"{os.path.basename(input_audio_path).split('.')[0]}_dry.flac",
|
|
),
|
|
)
|
|
|
|
|
|
denoise_path = os.path.join(now_dir, "audio_files", music_folder, "denoise")
|
|
deecho_path = os.path.join(now_dir, "audio_files", music_folder, "deecho")
|
|
dereverb_path = os.path.join(now_dir, "audio_files", music_folder, "dereverb")
|
|
|
|
denoise_audio = search_with_two_words(
|
|
denoise_path, os.path.basename(input_audio_path).split(".")[0], "dry"
|
|
)
|
|
deecho_audio = search_with_two_words(
|
|
deecho_path, os.path.basename(input_audio_path).split(".")[0], "noecho"
|
|
)
|
|
dereverb = search_with_two_words(
|
|
dereverb_path, os.path.basename(input_audio_path).split(".")[0], "noreverb"
|
|
)
|
|
|
|
if denoise_audio:
|
|
final_path = os.path.join(
|
|
now_dir, "audio_files", music_folder, "denoise", denoise_audio
|
|
)
|
|
elif deecho_audio:
|
|
final_path = os.path.join(
|
|
now_dir, "audio_files", music_folder, "deecho", deecho_audio
|
|
)
|
|
elif dereverb:
|
|
final_path = os.path.join(
|
|
now_dir, "audio_files", music_folder, "dereverb", dereverb
|
|
)
|
|
else:
|
|
final_path = None
|
|
|
|
store_dir = os.path.join(now_dir, "audio_files", music_folder, "rvc")
|
|
os.makedirs(store_dir, exist_ok=True)
|
|
print("Making RVC inference")
|
|
output_rvc = os.path.join(
|
|
now_dir,
|
|
"audio_files",
|
|
music_folder,
|
|
"rvc",
|
|
f"{os.path.basename(input_audio_path).split('.')[0]}_rvc.wav",
|
|
)
|
|
inference_vc = import_voice_converter()
|
|
inference_vc.convert_audio(
|
|
audio_input_path=final_path,
|
|
audio_output_path=output_rvc,
|
|
model_path=model_path,
|
|
index_path=index_path,
|
|
embedder_model=embedder_model,
|
|
pitch=pitch,
|
|
f0_file=None,
|
|
f0_method=pitch_extract,
|
|
filter_radius=filter_radius,
|
|
index_rate=index_rate,
|
|
volume_envelope=rms_mix_rate,
|
|
protect=protect,
|
|
split_audio=split_audio,
|
|
f0_autotune=autotune,
|
|
hop_length=hop_lenght,
|
|
export_format=export_format_rvc,
|
|
embedder_model_custom=None,
|
|
)
|
|
backing_vocals = os.path.join(
|
|
karaoke_path, search_with_word(karaoke_path, "instrumental")
|
|
)
|
|
|
|
if infer_backing_vocals:
|
|
print("Infering backing vocals")
|
|
karaoke_path = os.path.join(now_dir, "audio_files", music_folder, "karaoke")
|
|
instrumental_file = search_with_word(karaoke_path, "instrumental")
|
|
backing_vocals = os.path.join(karaoke_path, instrumental_file)
|
|
output_backing_vocals = os.path.join(
|
|
karaoke_path, f"{input_audio_basename}_instrumental_output.wav"
|
|
)
|
|
inference_vc.convert_audio(
|
|
audio_input_path=backing_vocals,
|
|
audio_output_path=output_backing_vocals,
|
|
model_path=infer_backing_vocals_model,
|
|
index_path=infer_backing_vocals_index,
|
|
embedder_model=embedder_model_back,
|
|
pitch=pitch_back,
|
|
f0_file=None,
|
|
f0_method=pitch_extract_back,
|
|
filter_radius=filter_radius_back,
|
|
index_rate=index_rate_back,
|
|
volume_envelope=rms_mix_rate_back,
|
|
protect=protect_back,
|
|
split_audio=split_audio_back,
|
|
f0_autotune=autotune_back,
|
|
hop_length=hop_length_back,
|
|
export_format=export_format_rvc_back,
|
|
embedder_model_custom=None,
|
|
)
|
|
backing_vocals = output_backing_vocals
|
|
|
|
|
|
if reverb:
|
|
add_audio_effects(
|
|
os.path.join(
|
|
now_dir,
|
|
"audio_files",
|
|
music_folder,
|
|
"rvc",
|
|
get_last_modified_file(
|
|
os.path.join(now_dir, "audio_files", music_folder, "rvc")
|
|
),
|
|
),
|
|
reverb_room_size,
|
|
reverb_wet_gain,
|
|
reverb_dry_gain,
|
|
reverb_damping,
|
|
reverb_width,
|
|
os.path.join(
|
|
now_dir,
|
|
"audio_files",
|
|
music_folder,
|
|
"rvc",
|
|
os.path.basename(input_audio_path),
|
|
),
|
|
)
|
|
if change_inst_pitch != 0:
|
|
print("Changing instrumental pitch")
|
|
inst_path = os.path.join(
|
|
now_dir,
|
|
"audio_files",
|
|
music_folder,
|
|
"instrumentals",
|
|
search_with_word(
|
|
os.path.join(now_dir, "audio_files", music_folder, "instrumentals"),
|
|
"instrumentals",
|
|
),
|
|
)
|
|
audio = AudioSegment.from_file(inst_path)
|
|
|
|
factor = 2 ** (change_inst_pitch / 12)
|
|
|
|
new_frame_rate = int(audio.frame_rate * factor)
|
|
audio = audio._spawn(audio.raw_data, overrides={"frame_rate": new_frame_rate})
|
|
|
|
audio = audio.set_frame_rate(audio.frame_rate)
|
|
output_dir_pitch = os.path.join(
|
|
now_dir, "audio_files", music_folder, "instrumentals"
|
|
)
|
|
output_path_pitch = os.path.join(
|
|
output_dir_pitch, "inst_with_changed_pitch.flac"
|
|
)
|
|
audio.export(output_path_pitch, format="flac")
|
|
|
|
|
|
store_dir = os.path.join(now_dir, "audio_files", music_folder, "final")
|
|
os.makedirs(store_dir, exist_ok=True)
|
|
|
|
vocals_path = os.path.join(now_dir, "audio_files", music_folder, "rvc")
|
|
vocals_file = get_last_modified_file(
|
|
os.path.join(now_dir, "audio_files", music_folder, "rvc")
|
|
)
|
|
vocals_file = os.path.join(vocals_path, vocals_file)
|
|
|
|
karaoke_path = os.path.join(now_dir, "audio_files", music_folder, "karaoke")
|
|
karaoke_file = search_with_word(karaoke_path, "Instrumental") or search_with_word(
|
|
karaoke_path, "instrumental"
|
|
)
|
|
karaoke_file = os.path.join(karaoke_path, karaoke_file)
|
|
final_output_path = os.path.join(
|
|
now_dir,
|
|
"audio_files",
|
|
music_folder,
|
|
"final",
|
|
f"{os.path.basename(input_audio_path).split('.')[0]}_final.{export_format_final.lower()}",
|
|
)
|
|
print("Merging audios")
|
|
result = merge_audios(
|
|
vocals_file,
|
|
inst_file,
|
|
backing_vocals,
|
|
final_output_path,
|
|
vocals_volume,
|
|
instrumentals_volume,
|
|
backing_vocals_volume,
|
|
export_format_final,
|
|
)
|
|
print("Audios merged!")
|
|
if delete_audios:
|
|
main_directory = os.path.join(now_dir, "audio_files", music_folder)
|
|
folder_to_keep = "final"
|
|
for folder_name in os.listdir(main_directory):
|
|
folder_path = os.path.join(main_directory, folder_name)
|
|
if os.path.isdir(folder_path) and folder_name != folder_to_keep:
|
|
shutil.rmtree(folder_path)
|
|
return (
|
|
f"Audio file {os.path.basename(input_audio_path).split('.')[0]} converted with success",
|
|
result,
|
|
)
|
|
|
|
|
|
def download_model(link):
|
|
model_download_pipeline(link)
|
|
return "Model downloaded with success"
|
|
|
|
|
|
def download_music(link):
|
|
os.makedirs(os.path.join(now_dir, "audio_files", "original_files"), exist_ok=True)
|
|
command = [
|
|
"yt-dlp",
|
|
"-x",
|
|
"--output",
|
|
os.path.join(now_dir, "audio_files", "original_files", "%(title)s.%(ext)s"),
|
|
link,
|
|
]
|
|
subprocess.run(command)
|
|
return "Music downloaded with success"
|
|
|