import os from speechbrain.inference.separation import SepformerSeparation as separator import torchaudio import gradio as gr from moviepy.editor import VideoFileClip def convert_video_to_audio(video_input): video_clip = VideoFileClip(video_input) audio_clip = video_clip.audio audio_clip_filepath = os.path.normpath(f"{video_input.split('.')[0]}.m4a") audio_clip.write_audiofile(audio_clip_filepath, codec='aac') audio_clip.close() video_clip.close() return audio_clip_filepath def speechbrain(input_obj, input_obj_type): if input_obj_type == "video": aud = convert_video_to_audio(input_obj) est_sources = model.separate_file(path=aud) torchaudio.save("clean_audio_file.wav", est_sources[:, :, 0].detach().cpu(), 8000) return "clean_audio_file.wav" def main(): with gr.Blocks(title="Speech Enhancement", delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo: description = "Gradio demo for Speech Enhancement by SpeechBrain. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below." article = "

Attention is All You Need in Speech Separation | Github Repo

" examples = [ ['samples_audio_samples_test_mixture.wav'] ] with gr.Tabs(selected="video") as tabs: with gr.Tab("Video", id="video"): gr.Interface( fn=speechbrain, inputs=[ gr.Video(type="filepath"), gr.Radio(choices=["video"], value="video", label="File Type") ] outputs=[ gr.Audio(label="Output Audio", type="filepath") ], description=description, article=article, examples=examples ) with gr.Tab("Audio", id="audio"): gr.Interface( fn=speechbrain, inputs=[ gr.Audio(type="filepath"), gr.Radio(choices=["audio"], value="audio", label="File Type") ] outputs=[ gr.Audio(label="Output Audio", type="filepath"), ], description=description, article=article, examples=examples ) demo.launch() if __name__ == '__main__': main()