Spaces:
Runtime error
Runtime error
import moviepy.editor as mp | |
import librosa | |
import numpy as np | |
import gradio as gr | |
from pytube import YouTube | |
import subprocess | |
def buffer_n_merge(intervals, buffer=0.1): | |
if not intervals: return [] | |
new_intervals = [intervals[0]] | |
new_intervals[0][0] -= buffer | |
new_intervals[0][1] += buffer | |
for start, end in intervals[1:]: | |
start -= buffer | |
end += buffer | |
if new_intervals[-1][-1] >= start: | |
new_intervals[-1][-1] = end | |
else: | |
new_intervals.append([start, end]) | |
return new_intervals | |
def download_and_process_video(youtube_url, threshold_db, buffer_sec): | |
# Download the YouTube video | |
youtube = YouTube(youtube_url) | |
vidpath = 'downloaded_video' | |
youtube.streams.first().download(filename=vidpath) | |
# load the video | |
video = mp.VideoFileClip(vidpath) | |
# extract audio and convert to mono | |
audio = video.audio.to_soundarray(fps=22000) | |
# use librosa to get non-silent intervals | |
non_silent_intervals = librosa.effects.split(audio[:, 0], top_db=threshold_db) | |
# convert non_silent_intervals from samples to seconds, as librosa works with samples not seconds | |
non_silent_intervals_sec = np.array(non_silent_intervals) / 22000 | |
# Add buffer and merge intervals | |
non_silent_intervals_sec = buffer_n_merge(non_silent_intervals_sec.tolist(), buffer=buffer_sec) | |
# Process video | |
# cut the video using the non-silent intervals and store the clips in a list | |
clips = [video.subclip(max(0, start_time), min(end_time, video.duration)) for start_time, end_time in non_silent_intervals_sec] | |
output_file = 'my_concatenation.mp4' | |
final_clip = mp.concatenate_videoclips(clips) | |
final_clip.write_videofile(output_file, codec='libx264', audio_codec='aac', temp_audiofile='temp-audio.m4a', remove_temp=True) | |
return output_file | |
iface = gr.Interface( | |
fn=download_and_process_video, | |
inputs=[ | |
gr.inputs.Text(label="YouTube URL"), | |
gr.inputs.Slider(minimum=1, maximum=70, step=1, default=30, label="Threshold (db)"), | |
gr.inputs.Slider(minimum=0, maximum=2, step=0.01, default=0.1, label="Buffer (sec)"), | |
], | |
outputs=gr.outputs.Video(label="Processed Video"), | |
title="YouTube Video Silence Remover" | |
) | |
iface.launch() | |