hex-separator / app.py
justyoung's picture
Update app.py
265c6e4 verified
raw
history blame
3.63 kB
import os
import re
import random
from scipy.io.wavfile import write, read
import numpy as np
import gradio as gr
import yt_dlp
# Model dictionaries and lists
roformer_models = {
'BS-Roformer-Viperx-1297.ckpt': 'model_bs_roformer_ep_317_sdr_12.9755.ckpt',
'BS-Roformer-Viperx-1296.ckpt': 'model_bs_roformer_ep_368_sdr_12.9628.ckpt',
'BS-Roformer-Viperx-1053.ckpt': 'model_bs_roformer_ep_937_sdr_10.5309.ckpt',
'Mel-Roformer-Viperx-1143.ckpt': 'model_mel_band_roformer_ep_3005_sdr_11.4360.ckpt'
}
mdx23c_models = [
'MDX23C_D1581.ckpt',
'MDX23C-8KFFT-InstVoc_HQ.ckpt',
'MDX23C-8KFFT-InstVoc_HQ_2.ckpt',
]
# More model lists...
output_format = ['wav', 'flac', 'mp3']
mdxnet_overlap_values = ['0.25', '0.5', '0.75', '0.99']
vrarch_window_size_values = ['320', '512', '1024']
demucs_overlap_values = ['0.25', '0.50', '0.75', '0.99']
# Function to download audio
def download_audio(url):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': 'ytdl/%(title)s.%(ext)s',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
'preferredquality': '192',
}],
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
file_path = ydl.prepare_filename(info_dict).rsplit('.', 1)[0] + '.wav'
sample_rate, audio_data = read(file_path)
audio_array = np.asarray(audio_data, dtype=np.int16)
return sample_rate, audio_array
# Function to separate audio using Roformer
def roformer_separator(audio, model, output_format, overlap, segment_size, denoise):
directory = "./outputs"
random_id = str(random.randint(10000, 99999))
os.makedirs("outputs", exist_ok=True)
write(f'{random_id}.wav', audio[0], audio[1])
full_roformer_model = roformer_models[model]
prompt = f"audio-separator {random_id}.wav --model_filename {full_roformer_model} --output_dir=./outputs --output_format={output_format} --normalization=0.9 --mdxc_overlap={overlap} --mdxc_segment_size={segment_size}"
if denoise:
prompt += " --mdx_enable_denoise"
os.system(prompt)
files_list = [os.path.join(directory, file) for file in os.listdir(directory) if re.search(random_id, file)]
stem1_file, stem2_file, stem3_file = files_list[:3] # Assuming the files are in the correct order
return stem1_file, stem2_file, stem3_file
# Gradio interface
def process_audio(url, model, output_format, overlap, segment_size, denoise):
sample_rate, audio_array = download_audio(url)
stems = roformer_separator((sample_rate, audio_array), model, output_format, overlap, segment_size, denoise)
return stems
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Hex Audio Separator")
with gr.Row():
url_input = gr.Textbox(label="YouTube URL")
model_input = gr.Dropdown(choices=list(roformer_models.keys()), label="Roformer Model")
format_input = gr.Dropdown(choices=output_format, label="Output Format")
overlap_input = gr.Dropdown(choices=mdxnet_overlap_values, label="Overlap")
segment_input = gr.Slider(0, 100, label="Segment Size")
denoise_input = gr.Checkbox(label="Enable Denoise")
output1 = gr.Audio(label="Vocals")
output2 = gr.Audio(label="Instrumental")
output3 = gr.Audio(label="Backing Vocals")
submit_button = gr.Button("Process")
submit_button.click(
process_audio,
inputs=[url_input, model_input, format_input, overlap_input, segment_input, denoise_input],
outputs=[output1, output2, output3]
)
demo.launch()