import gradio as gr import torch import torchaudio from speechbrain.inference.separation import SepformerSeparation as separator import os # Load the enhancement model model = separator.from_hparams( source="speechbrain/sepformer-dns4-16k-enhancement", savedir='pretrained_models/sepformer-dns4-16k-enhancement' ) # Define the enhancement function def enhance_audio(noisy_audio): # Convert MP3 to WAV wav_audio = "temp_audio.wav" torchaudio.save(wav_audio, *torchaudio.load(noisy_audio)) # Load and add a batch dimension to the audio tensor noisy = model.load_audio(wav_audio).unsqueeze(0) # Enhance the audio enhanced = model.enhance_batch(noisy, lengths=torch.tensor([1.0])) # Save enhanced audio to a file enhanced_path = "enhanced.wav" torchaudio.save(enhanced_path, enhanced.cpu(), 16000) # Clean up the temporary audio file os.remove(wav_audio) return enhanced_path # Create the Gradio interface interface = gr.Interface( fn=enhance_audio, inputs=gr.Audio(type="filepath", label="Upload Noisy Audio"), outputs=gr.Audio(type="filepath", label="Enhanced Audio"), title="Speech Enhancement App", description="Upload a noisy audio file to enhance the quality. The enhanced audio can be downloaded after processing." ) # Launch the Gradio app with public link enabled interface.launch(share=True)