Spaces:

Awell00
/

music_drums_separation

Running on Zero

App Files Files Community

Awell00 commited on Sep 7, 2024

Commit

757c094

verified ·

1 Parent(s): b9d3d3a

feat: add inline comments throughout app.py

Browse files

Files changed (1) hide show

app.py +116 -21

app.py CHANGED Viewed

@@ -13,29 +13,59 @@ import spaces
 from pydub.exceptions import CouldntEncodeError
 from transformers import pipeline
 model = pipeline('text-generation', model='EleutherAI/gpt-neo-125M')
 OUTPUT_FOLDER = "separation_results/"
 INPUT_FOLDER = "input"
 download_path = ""
 def sanitize_filename(filename):
     return re.sub(r'[\\/*?:"<>|]', '_', filename)
 def delete_input_files(input_dir):
     wav_dir = Path(input_dir) / "wav"
     for wav_file in wav_dir.glob("*.wav"):
         wav_file.unlink()
         print(f"Deleted {wav_file}")
 def standardize_title(input_title):
     title_cleaned = re.sub(r"[\(\[].*?[\)\]]", "", input_title)
     unnecessary_words = ["official", "video", "hd", "4k", "lyrics", "music", "audio", "visualizer", "remix"]
     title_cleaned = re.sub(r"\b(?:{})\b".format("|".join(unnecessary_words)), "", title_cleaned, flags=re.IGNORECASE)
     parts = re.split(r"\s*-\s*|\s*,\s*", title_cleaned)
     if len(parts) >= 2:
         title_part = parts[-1].strip()
         artist_part = ', '.join(parts[:-1]).strip()
@@ -43,27 +73,38 @@ def standardize_title(input_title):
         artist_part = "Unknown Artist"
         title_part = title_cleaned.strip()
     if "with" in input_title.lower() or "feat" in input_title.lower():
         match = re.search(r"\((with|feat\.?) (.*?)\)", input_title, re.IGNORECASE)
         if match:
             additional_artist = match.group(2).strip()
             artist_part = f"{artist_part}, {additional_artist}" if artist_part != "Unknown Artist" else additional_artist
     artist_part = re.sub(r'\s+', ' ', artist_part).title()
     title_part = re.sub(r'\s+', ' ', title_part).title()
     standardized_output = f"{artist_part} - {title_part}"
     return standardized_output.strip()
 def handle_file_upload(file):
     if file is None:
         return None, "No file uploaded"
     filename = os.path.basename(file.name)
     formatted_title = standardize_title(filename)
     formatted_title = sanitize_filename(formatted_title.strip())
     input_path = os.path.join(INPUT_FOLDER, "wav", f"{formatted_title}.wav")
@@ -73,8 +114,21 @@ def handle_file_upload(file):
     return input_path, formatted_title
 def run_inference(model_type, config_path, start_check_point, input_dir, output_dir, device_ids="0"):
     command = [
         "python", "inference.py",
         "--model_type", model_type,
@@ -87,6 +141,12 @@ def run_inference(model_type, config_path, start_check_point, input_dir, output_
     return subprocess.run(command, check=True, capture_output=True, text=True)
 def move_stems_to_parent(input_dir):
     for subdir, dirs, files in os.walk(input_dir):
         if subdir == input_dir:
             continue
@@ -94,42 +154,51 @@ def move_stems_to_parent(input_dir):
         parent_dir = os.path.dirname(subdir)
         song_name = os.path.basename(parent_dir)
         if 'htdemucs' in subdir:
-            print(f"Processing htdemucs in {subdir}")
             bass_path = os.path.join(subdir, f"{song_name}_bass.wav")
             if os.path.exists(bass_path):
                 new_bass_path = os.path.join(parent_dir, "bass.wav")
-                print(f"Moving {bass_path} to {new_bass_path}")
                 shutil.move(bass_path, new_bass_path)
             else:
                 print(f"Bass file not found: {bass_path}")
         elif 'mel_band_roformer' in subdir:
-            print(f"Processing mel_band_roformer in {subdir}")
             vocals_path = os.path.join(subdir, f"{song_name}_vocals.wav")
             if os.path.exists(vocals_path):
                 new_vocals_path = os.path.join(parent_dir, "vocals.wav")
-                print(f"Moving {vocals_path} to {new_vocals_path}")
                 shutil.move(vocals_path, new_vocals_path)
             else:
                 print(f"Vocals file not found: {vocals_path}")
         elif 'scnet' in subdir:
-            print(f"Processing scnet in {subdir}")
             other_path = os.path.join(subdir, f"{song_name}_other.wav")
             if os.path.exists(other_path):
                 new_other_path = os.path.join(parent_dir, "other.wav")
-                print(f"Moving {other_path} to {new_other_path}")
                 shutil.move(other_path, new_other_path)
             else:
                 print(f"Other file not found: {other_path}")
         elif 'bs_roformer' in subdir:
-            print(f"Processing bs_roformer in {subdir}")
             instrumental_path = os.path.join(subdir, f"{song_name}_other.wav")
             if os.path.exists(instrumental_path):
                 new_instrumental_path = os.path.join(parent_dir, "instrumental.wav")
-                print(f"Moving {instrumental_path} to {new_instrumental_path}")
                 shutil.move(instrumental_path, new_instrumental_path)
 def combine_stems_for_all(input_dir, output_format):
     for subdir, _, _ in os.walk(input_dir):
         if subdir == input_dir:
             continue
@@ -144,20 +213,22 @@ def combine_stems_for_all(input_dir, output_format):
             "instrumental": os.path.join(subdir, "instrumental.wav")
         }
         if not all(os.path.exists(path) for path in stem_paths.values()):
             print(f"Skipping {subdir}, not all stems are present.")
             continue
         stems = {name: AudioSegment.from_file(path) for name, path in stem_paths.items()}
         combined = stems["vocals"].overlay(stems["bass"]).overlay(stems["others"]).overlay(stems["instrumental"])
-        # Trim silence from the end of the combined audio
         trimmed_combined = trim_silence_at_end(combined)
-        # Determine the output file format and codec
         output_file = os.path.join(subdir, f"{song_name}")
         try:
             if output_format == "m4a":
                 trimmed_combined.export(output_file, format="ipod", codec="aac")
             else:
@@ -171,11 +242,15 @@ def combine_stems_for_all(input_dir, output_format):
 def trim_silence_at_end(audio_segment, silence_thresh=-50, chunk_size=10):
     """
-    Trims silence at the end of an AudioSegment.
-    :param audio_segment: The audio segment to trim.
-    :param silence_thresh: The threshold in dB below which is considered silence.
-    :param chunk_size: The size of the chunks in milliseconds that are checked for silence.
-    :return: A trimmed AudioSegment with silence removed from the end.
     """
     silence_end = silence.detect_silence(audio_segment, min_silence_len=chunk_size, silence_thresh=silence_thresh)
@@ -186,6 +261,12 @@ def trim_silence_at_end(audio_segment, silence_thresh=-50, chunk_size=10):
         return audio_segment
 def delete_folders_and_files(input_dir):
     folders_to_delete = ['htdemucs', 'mel_band_roformer', 'scnet', 'bs_roformer']
     files_to_delete = ['bass.wav', 'vocals.wav', 'other.wav', 'instrumental.wav']
@@ -193,18 +274,21 @@ def delete_folders_and_files(input_dir):
         if root == input_dir:
             continue
         for folder in folders_to_delete:
             folder_path = os.path.join(root, folder)
             if os.path.isdir(folder_path):
                 print(f"Deleting folder: {folder_path}")
                 shutil.rmtree(folder_path)
         for file in files_to_delete:
             file_path = os.path.join(root, file)
             if os.path.isfile(file_path):
                 print(f"Deleting file: {file_path}")
                 os.remove(file_path)
     for root, dirs, files in os.walk(OUTPUT_FOLDER):
         for dir_name in dirs:
             if dir_name.endswith('_vocals'):
@@ -214,8 +298,17 @@ def delete_folders_and_files(input_dir):
     print("Cleanup completed.")
-@spaces.GPU(duration=120)  # Adjust the duration as needed
 def process_audio(uploaded_file):
     try:
         yield "Processing audio...", None
@@ -226,6 +319,7 @@ def process_audio(uploaded_file):
         else:
             raise ValueError("Please upload a WAV file.")
         yield "Starting SCNet inference...", None
         proc_folder_direct("scnet", "configs/config_scnet_other.yaml", "results/model_scnet_other.ckpt", f"{INPUT_FOLDER}/wav", OUTPUT_FOLDER)
@@ -235,14 +329,15 @@ def process_audio(uploaded_file):
         yield "Starting HTDemucs inference...", None
         proc_folder_direct("htdemucs", "configs/config_htdemucs_bass.yaml", "results/model_htdemucs_bass.th", f"{INPUT_FOLDER}/wav", OUTPUT_FOLDER)
         source_path = f'{OUTPUT_FOLDER}{formatted_title}/mel_band_roformer/{formatted_title}_instrumental.wav'
         destination_path = f'{OUTPUT_FOLDER}{formatted_title}/mel_band_roformer/{formatted_title}.wav'
         os.rename(source_path, destination_path)
         yield "Starting BS Roformer inference...", None
         proc_folder_direct("bs_roformer", "configs/config_bs_roformer_instrumental.yaml", "results/model_bs_roformer_instrumental.ckpt", f'{OUTPUT_FOLDER}{formatted_title}/mel_band_roformer', OUTPUT_FOLDER)
         yield "Moving input files...", None
         delete_input_files(INPUT_FOLDER)
@@ -261,13 +356,12 @@ def process_audio(uploaded_file):
         logging.error(error_msg)
         yield error_msg, None
 with gr.Blocks() as demo:
     gr.Markdown("# Music Player and Processor")
     file_upload = gr.File(label="Upload WAV file", file_types=[".m4a"])
     process_button = gr.Button("Process Audio")
     log_output = gr.Textbox(label="Processing Log", interactive=False)
     processed_audio_output = gr.File(label="Processed Audio")
@@ -278,4 +372,5 @@ with gr.Blocks() as demo:
         show_progress=True
     )
 demo.launch()

 from pydub.exceptions import CouldntEncodeError
 from transformers import pipeline
+# Initialize text generation model
 model = pipeline('text-generation', model='EleutherAI/gpt-neo-125M')
+# Define constants
 OUTPUT_FOLDER = "separation_results/"
 INPUT_FOLDER = "input"
 download_path = ""
 def sanitize_filename(filename):
+    """
+    Remove special characters from filename to ensure it's valid across different file systems.
+    Args:
+        filename (str): The original filename
+    Returns:
+        str: Sanitized filename
+    """
     return re.sub(r'[\\/*?:"<>|]', '_', filename)
 def delete_input_files(input_dir):
+    """
+    Delete all WAV files in the input directory.
+    Args:
+        input_dir (str): Path to the input directory
+    """
     wav_dir = Path(input_dir) / "wav"
     for wav_file in wav_dir.glob("*.wav"):
         wav_file.unlink()
         print(f"Deleted {wav_file}")
 def standardize_title(input_title):
+    """
+    Standardize the title format by removing unnecessary words and rearranging artist and title.
+    Args:
+        input_title (str): The original title
+    Returns:
+        str: Standardized title in "Artist - Title" format
+    """
+    # Remove content within parentheses or brackets
     title_cleaned = re.sub(r"[\(\[].*?[\)\]]", "", input_title)
+    # Remove unnecessary words
     unnecessary_words = ["official", "video", "hd", "4k", "lyrics", "music", "audio", "visualizer", "remix"]
     title_cleaned = re.sub(r"\b(?:{})\b".format("|".join(unnecessary_words)), "", title_cleaned, flags=re.IGNORECASE)
+    # Split title into parts
     parts = re.split(r"\s*-\s*|\s*,\s*", title_cleaned)
+    # Determine artist and title parts
     if len(parts) >= 2:
         title_part = parts[-1].strip()
         artist_part = ', '.join(parts[:-1]).strip()
         artist_part = "Unknown Artist"
         title_part = title_cleaned.strip()
+    # Handle "with" or "feat" in the title
     if "with" in input_title.lower() or "feat" in input_title.lower():
         match = re.search(r"\((with|feat\.?) (.*?)\)", input_title, re.IGNORECASE)
         if match:
             additional_artist = match.group(2).strip()
             artist_part = f"{artist_part}, {additional_artist}" if artist_part != "Unknown Artist" else additional_artist
+    # Clean up and capitalize
     artist_part = re.sub(r'\s+', ' ', artist_part).title()
     title_part = re.sub(r'\s+', ' ', title_part).title()
+    # Combine artist and title
     standardized_output = f"{artist_part} - {title_part}"
     return standardized_output.strip()
 def handle_file_upload(file):
+    """
+    Handle file upload, standardize the filename, and copy it to the input folder.
+    Args:
+        file: Uploaded file object
+    Returns:
+        tuple: (input_path, formatted_title) or (None, error_message)
+    """
     if file is None:
         return None, "No file uploaded"
     filename = os.path.basename(file.name)
     formatted_title = standardize_title(filename)
     formatted_title = sanitize_filename(formatted_title.strip())
     input_path = os.path.join(INPUT_FOLDER, "wav", f"{formatted_title}.wav")
     return input_path, formatted_title
 def run_inference(model_type, config_path, start_check_point, input_dir, output_dir, device_ids="0"):
+    """
+    Run inference using the specified model and parameters.
+    Args:
+        model_type (str): Type of the model
+        config_path (str): Path to the model configuration
+        start_check_point (str): Path to the model checkpoint
+        input_dir (str): Input directory
+        output_dir (str): Output directory
+        device_ids (str): GPU device IDs to use
+    Returns:
+        subprocess.CompletedProcess: Result of the subprocess run
+    """
     command = [
         "python", "inference.py",
         "--model_type", model_type,
     return subprocess.run(command, check=True, capture_output=True, text=True)
 def move_stems_to_parent(input_dir):
+    """
+    Move generated stem files to their parent directories.
+    Args:
+        input_dir (str): Input directory containing stem folders
+    """
     for subdir, dirs, files in os.walk(input_dir):
         if subdir == input_dir:
             continue
         parent_dir = os.path.dirname(subdir)
         song_name = os.path.basename(parent_dir)
+        # Move bass stem
         if 'htdemucs' in subdir:
             bass_path = os.path.join(subdir, f"{song_name}_bass.wav")
             if os.path.exists(bass_path):
                 new_bass_path = os.path.join(parent_dir, "bass.wav")
                 shutil.move(bass_path, new_bass_path)
             else:
                 print(f"Bass file not found: {bass_path}")
+        # Move vocals stem
         elif 'mel_band_roformer' in subdir:
             vocals_path = os.path.join(subdir, f"{song_name}_vocals.wav")
             if os.path.exists(vocals_path):
                 new_vocals_path = os.path.join(parent_dir, "vocals.wav")
                 shutil.move(vocals_path, new_vocals_path)
             else:
                 print(f"Vocals file not found: {vocals_path}")
+        # Move other stem
         elif 'scnet' in subdir:
             other_path = os.path.join(subdir, f"{song_name}_other.wav")
             if os.path.exists(other_path):
                 new_other_path = os.path.join(parent_dir, "other.wav")
                 shutil.move(other_path, new_other_path)
             else:
                 print(f"Other file not found: {other_path}")
+        # Move instrumental stem
         elif 'bs_roformer' in subdir:
             instrumental_path = os.path.join(subdir, f"{song_name}_other.wav")
             if os.path.exists(instrumental_path):
                 new_instrumental_path = os.path.join(parent_dir, "instrumental.wav")
                 shutil.move(instrumental_path, new_instrumental_path)
 def combine_stems_for_all(input_dir, output_format):
+    """
+    Combine all stems for each song in the input directory.
+    Args:
+        input_dir (str): Input directory containing song folders
+        output_format (str): Output audio format (e.g., 'm4a')
+    Returns:
+        str: Path to the combined audio file
+    """
     for subdir, _, _ in os.walk(input_dir):
         if subdir == input_dir:
             continue
             "instrumental": os.path.join(subdir, "instrumental.wav")
         }
+        # Skip if not all stems are present
         if not all(os.path.exists(path) for path in stem_paths.values()):
             print(f"Skipping {subdir}, not all stems are present.")
             continue
+        # Load and combine stems
         stems = {name: AudioSegment.from_file(path) for name, path in stem_paths.items()}
         combined = stems["vocals"].overlay(stems["bass"]).overlay(stems["others"]).overlay(stems["instrumental"])
+        # Trim silence at the end
         trimmed_combined = trim_silence_at_end(combined)
         output_file = os.path.join(subdir, f"{song_name}")
         try:
+            # Export combined audio
             if output_format == "m4a":
                 trimmed_combined.export(output_file, format="ipod", codec="aac")
             else:
 def trim_silence_at_end(audio_segment, silence_thresh=-50, chunk_size=10):
     """
+    Trim silence at the end of an audio segment.
+    Args:
+        audio_segment (AudioSegment): Input audio segment
+        silence_thresh (int): Silence threshold in dB
+        chunk_size (int): Size of chunks to analyze in ms
+    Returns:
+        AudioSegment: Trimmed audio segment
     """
     silence_end = silence.detect_silence(audio_segment, min_silence_len=chunk_size, silence_thresh=silence_thresh)
         return audio_segment
 def delete_folders_and_files(input_dir):
+    """
+    Delete temporary folders and files after processing.
+    Args:
+        input_dir (str): Input directory to clean up
+    """
     folders_to_delete = ['htdemucs', 'mel_band_roformer', 'scnet', 'bs_roformer']
     files_to_delete = ['bass.wav', 'vocals.wav', 'other.wav', 'instrumental.wav']
         if root == input_dir:
             continue
+        # Delete specified folders
         for folder in folders_to_delete:
             folder_path = os.path.join(root, folder)
             if os.path.isdir(folder_path):
                 print(f"Deleting folder: {folder_path}")
                 shutil.rmtree(folder_path)
+        # Delete specified files
         for file in files_to_delete:
             file_path = os.path.join(root, file)
             if os.path.isfile(file_path):
                 print(f"Deleting file: {file_path}")
                 os.remove(file_path)
+    # Delete vocals folders
     for root, dirs, files in os.walk(OUTPUT_FOLDER):
         for dir_name in dirs:
             if dir_name.endswith('_vocals'):
     print("Cleanup completed.")
+@spaces.GPU(duration=120)
 def process_audio(uploaded_file):
+    """
+    Main function to process the uploaded audio file.
+    Args:
+        uploaded_file: Uploaded file object
+    Yields:
+        tuple: (status_message, output_file_path)
+    """
     try:
         yield "Processing audio...", None
         else:
             raise ValueError("Please upload a WAV file.")
+        # Run inference for different models
         yield "Starting SCNet inference...", None
         proc_folder_direct("scnet", "configs/config_scnet_other.yaml", "results/model_scnet_other.ckpt", f"{INPUT_FOLDER}/wav", OUTPUT_FOLDER)
         yield "Starting HTDemucs inference...", None
         proc_folder_direct("htdemucs", "configs/config_htdemucs_bass.yaml", "results/model_htdemucs_bass.th", f"{INPUT_FOLDER}/wav", OUTPUT_FOLDER)
+        # Rename instrumental file
         source_path = f'{OUTPUT_FOLDER}{formatted_title}/mel_band_roformer/{formatted_title}_instrumental.wav'
         destination_path = f'{OUTPUT_FOLDER}{formatted_title}/mel_band_roformer/{formatted_title}.wav'
         os.rename(source_path, destination_path)
         yield "Starting BS Roformer inference...", None
         proc_folder_direct("bs_roformer", "configs/config_bs_roformer_instrumental.yaml", "results/model_bs_roformer_instrumental.ckpt", f'{OUTPUT_FOLDER}{formatted_title}/mel_band_roformer', OUTPUT_FOLDER)
+        # Clean up and organize files
         yield "Moving input files...", None
         delete_input_files(INPUT_FOLDER)
         logging.error(error_msg)
         yield error_msg, None
+# Set up Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# Music Player and Processor")
     file_upload = gr.File(label="Upload WAV file", file_types=[".m4a"])
     process_button = gr.Button("Process Audio")
     log_output = gr.Textbox(label="Processing Log", interactive=False)
     processed_audio_output = gr.File(label="Processed Audio")
         show_progress=True
     )
+# Launch the Gradio app
 demo.launch()