import gradio as gr from speechbrain.pretrained import SepformerSeparation as separator import torchaudio import torch import os class AudioDenoiser: def __init__(self): # Initialize the SepFormer model for audio enhancement self.model = separator.from_hparams( source="speechbrain/sepformer-dns4-16k-enhancement", savedir='pretrained_models/sepformer-dns4-16k-enhancement' ) # Create output directory if it doesn't exist os.makedirs("enhanced_audio", exist_ok=True) def enhance_audio(self, audio_path): """ Process the input audio file and return the enhanced version Args: audio_path (str): Path to the input audio file Returns: str: Path to the enhanced audio file """ try: # Separate and enhance the audio est_sources = self.model.separate_file(path=audio_path) # Generate output filename output_path = os.path.join("enhanced_audio", "enhanced_audio.wav") # Save the enhanced audio torchaudio.save( output_path, est_sources[:, :, 0].detach().cpu(), 16000 # Sample rate ) return output_path except Exception as e: raise gr.Error(f"Error processing audio: {str(e)}") def create_gradio_interface(): # Initialize the denoiser denoiser = AudioDenoiser() # Create the Gradio interface interface = gr.Interface( fn=denoiser.enhance_audio, inputs=gr.Audio( type="filepath", label="Upload Noisy Audio" ), outputs=gr.Audio( label="Enhanced Audio" ), title="Audio Denoising using SepFormer", description=""" This application uses the SepFormer model from SpeechBrain to enhance audio quality by removing background noise. Upload any noisy audio file to get started. """, article=""" This application uses the SepFormer model trained on the DNS4 dataset. For more information, visit the [SpeechBrain documentation](https://speechbrain.github.io/). """ ) return interface if __name__ == "__main__": # Create and launch the interface demo = create_gradio_interface() demo.launch()