Spaces:
Sleeping
Sleeping
import os | |
import re | |
import random | |
from scipy.io.wavfile import write, read | |
import numpy as np | |
import gradio as gr | |
import yt_dlp | |
# Model dictionaries and lists | |
roformer_models = { | |
'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt', | |
'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt', | |
'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt', | |
'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt' | |
} | |
mdx23c_models = [ | |
'MDX23C_D1581.ckpt', | |
'MDX23C-8KFFT-InstVoc_HQ.ckpt', | |
'MDX23C-8KFFT-InstVoc_HQ_2.ckpt', | |
] | |
# More model lists... | |
output_format = ['wav', 'flac', 'mp3'] | |
mdxnet_overlap_values = ['0.25', '0.5', '0.75', '0.99'] | |
vrarch_window_size_values = ['320', '512', '1024'] | |
demucs_overlap_values = ['0.25', '0.50', '0.75', '0.99'] | |
# Function to download audio | |
def download_audio(url): | |
ydl_opts = { | |
'format': 'bestaudio/best', | |
'outtmpl': 'ytdl/%(title)s.%(ext)s', | |
'postprocessors': [{ | |
'key': 'FFmpegExtractAudio', | |
'preferredcodec': 'wav', | |
'preferredquality': '192', | |
}], | |
} | |
with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
info_dict = ydl.extract_info(url, download=True) | |
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' | |
sample_rate, audio_data = read(file_path) | |
audio_array = np.asarray(audio_data, dtype=np.int16) | |
return sample_rate, audio_array | |
# Function to separate audio using Roformer | |
def roformer_separator(audio, model, output_format, overlap, segment_size, denoise): | |
directory = "./outputs" | |
random_id = str(random.randint(10000, 99999)) | |
os.makedirs("outputs", exist_ok=True) | |
write(f'{random_id}.wav', audio[0], audio[1]) | |
full_roformer_model = roformer_models[model] | |
prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={output_format} --normalization=0.9 --mdxc_overlap={overlap} --mdxc_segment_size={segment_size}" | |
if denoise: | |
prompt += " --mdx_enable_denoise" | |
os.system(prompt) | |
files_list = [os.path.join(directory, file) for file in os.listdir(directory) if re.search(random_id, file)] | |
stem1_file, stem2_file, stem3_file = files_list[:3] # Assuming the files are in the correct order | |
return stem1_file, stem2_file, stem3_file | |
# Gradio interface | |
def process_audio(url, model, output_format, overlap, segment_size, denoise): | |
sample_rate, audio_array = download_audio(url) | |
stems = roformer_separator((sample_rate, audio_array), model, output_format, overlap, segment_size, denoise) | |
return stems | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# Hex Audio Separator") | |
with gr.Row(): | |
url_input = gr.Textbox(label="YouTube URL") | |
model_input = gr.Dropdown(choices=list(roformer_models.keys()), label="Roformer Model") | |
format_input = gr.Dropdown(choices=output_format, label="Output Format") | |
overlap_input = gr.Dropdown(choices=mdxnet_overlap_values, label="Overlap") | |
segment_input = gr.Slider(0, 100, label="Segment Size") | |
denoise_input = gr.Checkbox(label="Enable Denoise") | |
output1 = gr.Audio(label="Vocals") | |
output2 = gr.Audio(label="Instrumental") | |
output3 = gr.Audio(label="Backing Vocals") | |
submit_button = gr.Button("Process") | |
submit_button.click( | |
process_audio, | |
inputs=[url_input, model_input, format_input, overlap_input, segment_input, denoise_input], | |
outputs=[output1, output2, output3] | |
) | |
demo.launch() | |