Spaces:

SPACERUNNER99
/

noise-reduction

Runtime error

App Files Files Community

noise-reduction / app.py

SPACERUNNER99

Update app.py

23eab92 verified 3 months ago

raw

history blame contribute delete

15 kB

	import gradio as gr
	import numpy as np
	import soundfile as sf
	import noisereduce as nr
	from audio_separator.separator import Separator
	import os
	import tempfile
	import logging
	import time

	# --- Configuration ---
	OUTPUT_DIR = "output_audio" # Directory to store final outputs temporarily before Gradio handles them
	TEMP_SEP_DIR_PREFIX = "sep_temp_" # Prefix for temporary directories used by audio-separator
	# Choose a UVR5 model supported by audio-separator.
	# Examples: 'UVR-MDX-NET Voc FT', 'UVR_MDXNET_KARA_2' , 'UVR-MDX-NET-Inst_1', etc.
	# Check audio-separator documentation or repo for available models.
	# 'UVR-MDX-NET Voc FT' is often a good starting point for vocals.
	MODEL_NAME = "UVR-MDX-NET Voc FT"
	# You can also specify a local model file path:
	# MODEL_NAME = "/path/to/your/local/model.onnx"

	# --- Setup ---
	# Configure logging for audio-separator and this script
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Create output directory if it doesn't exist
	os.makedirs(OUTPUT_DIR, exist_ok=True)

	# Initialize the Separator class (this might download the model on first run)
	logger.info(f"Initializing audio separator with model: {MODEL_NAME}...")
	try:
	# Initialize Separator WITHOUT the model_name argument first
	separator = Separator(log_level='INFO',
	# Optional: Specify device (e.g., 'cuda' if PyTorch GPU is setup, else 'cpu')
	# computation_device='cuda'
	)
	logger.info("Separator initialized successfully.")
	except Exception as e:
	logger.error(f"Failed to initialize audio separator: {e}", exc_info=True)
	# Optionally, raise the exception or handle it to prevent app launch
	raise RuntimeError(f"Failed to initialize audio separator: {e}")


	# --- Core Processing Function ---
	def enhance_vocal(input_audio_path):
	"""
	Separates vocals using UVR5 (via audio-separator), enhances them using noisereduce,
	and returns the path to the enhanced vocal audio file.
	"""
	if input_audio_path is None:
	logger.warning("No input audio file provided.")
	return None, "Error: No input audio file provided. Please upload an audio file."

	logger.info(f"Processing audio file: {input_audio_path}")
	processing_start_time = time.time()

	try:
	# --- Step 1: Vocal Separation using audio-separator (UVR5) ---
	logger.info(f"Starting vocal separation using model: {MODEL_NAME}...") # Log model name here
	separation_start_time = time.time()

	with tempfile.TemporaryDirectory(prefix=TEMP_SEP_DIR_PREFIX) as temp_sep_dir:
	logger.info(f"Using temporary directory for separation: {temp_sep_dir}")

	try:
	# Perform separation - NOW we pass the model name HERE
	# Check audio-separator documentation for your version if this exact syntax fails.
	output_paths = separator.separate(
	input_audio_path,
	output_dir=temp_sep_dir,
	model_name=MODEL_NAME # Pass model name to the separate method
	# Other potential arguments might be needed depending on the version
	# e.g., output_format='wav'
	)

	separation_duration = time.time() - separation_start_time
	logger.info(f"Separation completed in {separation_duration:.2f} seconds.")
	logger.info(f"Separation output files: {output_paths}")

	# Find the vocal track path
	vocal_track_path = None
	instrumental_track_path = None # Keep track if needed for advanced noise reduction

	# Robustly find the vocal path based on common naming conventions
	input_basename = os.path.splitext(os.path.basename(input_audio_path))[0]
	expected_vocal_suffix = f"(Vocals)_{MODEL_NAME}.wav" # Adjust if needed based on actual output
	expected_instrumental_suffix = f"(Instrumental)_{MODEL_NAME}.wav" # Adjust if needed

	for output_path in output_paths:
	filename = os.path.basename(output_path)
	# Check for common vocal identifiers
	if "Vocals" in filename or "vocals" in filename:
	# Simple check: if output contains "(Vocals)" substring
	# More robust: check if filename ends with the expected suffix
	# if filename.startswith(input_basename) and expected_vocal_suffix in filename:
	vocal_track_path = output_path
	logger.info(f"Found vocal track: {vocal_track_path}")
	# Check for common instrumental identifiers (optional, for potential noise profile)
	elif "Instrumental" in filename or "instrumental" in filename or "No_Vocals" in filename:
	# if filename.startswith(input_basename) and expected_instrumental_suffix in filename:
	instrumental_track_path = output_path
	logger.info(f"Found instrumental track: {instrumental_track_path}")


	if vocal_track_path is None:
	logger.error(f"Could not find the vocal track in separation results: {output_paths}")
	return None, "Error: Vocal separation failed to produce a recognizable vocal track."

	if not os.path.exists(vocal_track_path):
	logger.error(f"Identified vocal track path does not exist: {vocal_track_path}")
	return None, "Error: Separated vocal track file is missing."

	except Exception as sep_exc:
	logger.error(f"Error during audio separation process: {sep_exc}", exc_info=True)
	return None, f"Error during vocal separation: {str(sep_exc)}"

	# --- Step 2: Load the Separated Vocal Track ---
	logger.info(f"Loading separated vocal track: {vocal_track_path}")
	try:
	vocal_data, sr = sf.read(vocal_track_path, dtype='float32')
	logger.info(f"Vocal track loaded successfully. Sample Rate: {sr}, Duration: {len(vocal_data)/sr:.2f}s")
	# Ensure mono for noisereduce if it's stereo (take average or first channel)
	if vocal_data.ndim > 1 and vocal_data.shape[1] > 1:
	logger.info("Vocal track is stereo, converting to mono for noise reduction.")
	vocal_data = np.mean(vocal_data, axis=1) # Average channels

	except Exception as read_exc:
	logger.error(f"Error reading separated vocal file {vocal_track_path}: {read_exc}", exc_info=True)
	return None, f"Error reading separated vocal file: {str(read_exc)}"


	# --- Step 3: Noise Reduction using noisereduce ---
	logger.info("Applying noise reduction to the vocal track...")
	nr_start_time = time.time()

	try:
	# Simple approach: let noisereduce estimate noise from the vocal track itself
	# Parameters can be tuned (e.g., n_fft, hop_length, prop_decrease)
	# Look at noisereduce documentation for advanced usage.
	reduced_noise_vocals = nr.reduce_noise(y=vocal_data,
	sr=sr,
	prop_decrease=1.0, # Aggressiveness of reduction
	stationary=False, # Good for non-stationary background noise often present in music
	n_jobs=-1) # Use all available CPU cores

	# # --- Alternative (more complex) Noise Reduction Strategy ---
	# # If you trust the separation completely, you could potentially use the
	# # instrumental track as a noise profile. This is EXPERIMENTAL and might
	# # remove desirable vocal characteristics if separation isn't perfect.
	# if instrumental_track_path and os.path.exists(instrumental_track_path):
	# try:
	# logger.info(f"Loading instrumental track for noise profile: {instrumental_track_path}")
	# instrumental_data, inst_sr = sf.read(instrumental_track_path, dtype='float32')
	# if inst_sr != sr:
	# logger.warning(f"Sample rate mismatch! Vocal SR={sr}, Instrumental SR={inst_sr}. Resampling needed or risk error.")
	# # Add resampling code here if necessary (e.g., using librosa)
	# else:
	# if instrumental_data.ndim > 1 and instrumental_data.shape[1] > 1:
	# instrumental_data = np.mean(instrumental_data, axis=1) # Mono
	# if len(instrumental_data) > len(vocal_data): # Ensure same length (trim longer)
	# instrumental_data = instrumental_data[:len(vocal_data)]
	# elif len(vocal_data) > len(instrumental_data): # Pad shorter (less ideal)
	# instrumental_data = np.pad(instrumental_data, (0, len(vocal_data) - len(instrumental_data)))

	# logger.info("Using instrumental track as noise profile for reduction.")
	# reduced_noise_vocals = nr.reduce_noise(y=vocal_data,
	# sr=sr,
	# y_noise=instrumental_data,
	# prop_decrease=1.0,
	# stationary=False,
	# n_jobs=-1)

	# except Exception as noise_profile_exc:
	# logger.error(f"Failed to use instrumental track as noise profile: {noise_profile_exc}. Falling back to standard reduction.", exc_info=True)
	# # Fallback to standard reduction if using instrumental fails
	# reduced_noise_vocals = nr.reduce_noise(y=vocal_data, sr=sr, prop_decrease=1.0, stationary=False, n_jobs=-1)
	# # --- End Alternative Strategy ---

	nr_duration = time.time() - nr_start_time
	logger.info(f"Noise reduction completed in {nr_duration:.2f} seconds.")
	except Exception as nr_exc:
	logger.error(f"Error during noise reduction: {nr_exc}", exc_info=True)
	return None, f"Error during noise reduction: {str(nr_exc)}"


	# --- Step 4: Save the Enhanced Vocal Track ---
	logger.info("Saving enhanced vocal track...")
	try:
	# Use NamedTemporaryFile to create a file that Gradio can access
	# Ensure it has a .wav extension and delete=False so it persists
	# after the 'with' block until Gradio handles it.
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False, dir=OUTPUT_DIR) as temp_out_file:
	enhanced_vocal_path = temp_out_file.name

	sf.write(enhanced_vocal_path, reduced_noise_vocals, sr, subtype='PCM_16') # Save as 16-bit WAV
	logger.info(f"Enhanced vocal track saved to: {enhanced_vocal_path}")

	processing_duration = time.time() - processing_start_time
	logger.info(f"Total processing time: {processing_duration:.2f} seconds.")

	# Return the path to the enhanced audio file
	return enhanced_vocal_path, f"Processing successful! Total time: {processing_duration:.2f}s" # Return success message

	except Exception as write_exc:
	logger.error(f"Error saving enhanced vocal file: {write_exc}", exc_info=True)
	return None, f"Error saving enhanced vocal file: {str(write_exc)}"

	# The temporary directory 'temp_sep_dir' is automatically cleaned up here

	except Exception as e:
	processing_duration = time.time() - processing_start_time
	logger.error(f"An unexpected error occurred during processing: {e}", exc_info=True)
	return None, f"An unexpected error occurred after {processing_duration:.2f}s: {str(e)}"


	# --- Gradio Interface Definition ---

	# Custom CSS for better layout (Optional)
	css = """
	#status_textbox textarea {
	font-style: italic;
	color: grey;
	}
	"""

	with gr.Blocks(css=css) as demo:
	gr.Markdown(
	"""
	# Vocal Enhancement App (UVR5 + NoiseReduce) 🎤✨

	Upload an audio file (e.g., MP3, WAV, FLAC) containing vocals and background music/noise.
	The app will:
	1. Use a UVR5 (MDX-Net) model (`""" + MODEL_NAME + """`) via `audio-separator` to isolate the vocals.
	2. Apply noise reduction using the `noisereduce` library to the isolated vocals.
	3. Return the enhanced vocal track.

	Note: Processing, especially the vocal separation step, can take some time depending on the audio length and your computer's performance (GPU acceleration helps significantly if configured). The first run might take longer as the separation model needs to be downloaded.
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	audio_input = gr.Audio(type="filepath", label="Input Audio File")
	submit_button = gr.Button("Enhance Vocals", variant="primary")
	with gr.Column(scale=1):
	audio_output = gr.Audio(type="filepath", label="Enhanced Vocals")
	status_output = gr.Textbox(label="Status", elem_id="status_textbox", interactive=False)

	submit_button.click(
	fn=enhance_vocal,
	inputs=audio_input,
	outputs=[audio_output, status_output],
	api_name="enhance_vocals" # For API usage if needed
	)

	gr.Examples(
	examples=[
	# Add paths to local example audio files if you have them
	# os.path.join(os.path.dirname(__file__), "audio_example_1.mp3"),
	# os.path.join(os.path.dirname(__file__), "audio/example_2.wav"),
	],
	inputs=audio_input,
	outputs=[audio_output, status_output],
	fn=enhance_vocal,
	cache_examples=False, # Set to True if examples are static and processing is slow
	)

	# --- Launch the App ---

	logger.info("Starting Gradio application...")
	# Share=True creates a public link (use with caution)
	demo.launch(server_name="0.0.0.0", # Allows access from other devices on the network
	server_port=7860) # Standard Gradio port
	logger.info("Gradio application stopped.")