import os import re import random from scipy.io.wavfile import write, read import numpy as np import gradio as gr import yt_dlp # Model dictionaries and lists roformer_models = { 'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt', 'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt', 'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt', 'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt' } mdx23c_models = [ 'MDX23C_D1581.ckpt', 'MDX23C-8KFFT-InstVoc_HQ.ckpt', 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt', ] # More model lists... output_format = ['wav', 'flac', 'mp3'] mdxnet_overlap_values = ['0.25', '0.5', '0.75', '0.99'] vrarch_window_size_values = ['320', '512', '1024'] demucs_overlap_values = ['0.25', '0.50', '0.75', '0.99'] # Function to download audio def download_audio(url): ydl_opts = { 'format': 'bestaudio/best', 'outtmpl': 'ytdl/%(title)s.%(ext)s', 'postprocessors': [{ 'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192', }], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info_dict = ydl.extract_info(url, download=True) file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav' sample_rate, audio_data = read(file_path) audio_array = np.asarray(audio_data, dtype=np.int16) return sample_rate, audio_array # Function to separate audio using Roformer def roformer_separator(audio, model, output_format, overlap, segment_size, denoise): directory = "./outputs" random_id = str(random.randint(10000, 99999)) os.makedirs("outputs", exist_ok=True) write(f'{random_id}.wav', audio[0], audio[1]) full_roformer_model = roformer_models[model] prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={output_format} --normalization=0.9 --mdxc_overlap={overlap} --mdxc_segment_size={segment_size}" if denoise: prompt += " --mdx_enable_denoise" os.system(prompt) files_list = [os.path.join(directory, file) for file in os.listdir(directory) if re.search(random_id, file)] stem1_file, stem2_file, stem3_file = files_list[:3] # Assuming the files are in the correct order return stem1_file, stem2_file, stem3_file # Gradio interface def process_audio(url, model, output_format, overlap, segment_size, denoise): sample_rate, audio_array = download_audio(url) stems = roformer_separator((sample_rate, audio_array), model, output_format, overlap, segment_size, denoise) return stems # Gradio UI with gr.Blocks() as demo: gr.Markdown("# Hex Audio Separator") with gr.Row(): url_input = gr.Textbox(label="YouTube URL") model_input = gr.Dropdown(choices=list(roformer_models.keys()), label="Roformer Model") format_input = gr.Dropdown(choices=output_format, label="Output Format") overlap_input = gr.Dropdown(choices=mdxnet_overlap_values, label="Overlap") segment_input = gr.Slider(0, 100, label="Segment Size") denoise_input = gr.Checkbox(label="Enable Denoise") output1 = gr.Audio(label="Vocals") output2 = gr.Audio(label="Instrumental") output3 = gr.Audio(label="Backing Vocals") submit_button = gr.Button("Process") submit_button.click( process_audio, inputs=[url_input, model_input, format_input, overlap_input, segment_input, denoise_input], outputs=[output1, output2, output3] ) demo.launch()