Omnibus's picture
Update vc.py
a3702dc
raw
history blame
2.99 kB
import gradio as gr
import torch
from pathlib import Path
from pytube import YouTube
from pydub import AudioSegment
from TTS.api import TTS
import uuid
uid = uuid.uuid4()
device = "cuda" if torch.cuda.is_available() else "cpu"
def custom_bark(inp, in_aud, trim_aud=None):
speaker_wav=Path(f"{uid}-tmp_aud.mp4")
if trim_aud != None:
speaker_wav=Path(f"{uid}-trim.wav")
tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav")
return (f"{uid}-output.wav")
def load_video_yt(vid):
yt = YouTube(vid)
vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4")
vid_aud = yt.streams.filter(only_audio=True)[0].download(filename=f"{uid}-tmp_aud.mp4")
print (f'Video Length: {yt.length}')
return vid, vid_aud, f"{uid}-tmp_aud.mp4"
def trim_clip(clip, start_t, end_t):
clip = Path(f"{uid}-tmp_aud.mp4")
song = AudioSegment.from_file(f"{uid}-tmp_aud.mp4", format="mp4")
start_min = int(start_t.split(":",1)[0])
start_sec = int(start_t.split(":",1)[1])
end_min = int(end_t.split(":",1)[0])
end_sec = int(end_t.split(":",1)[1])
start = ((start_min*60)+start_sec)*1000
end = ((end_min*60)+end_sec)*1000
song_clip = song[start: end]
song_clip.export(f"{uid}-trim.wav", format="wav")
print("New Audio file is created and saved")
return f"{uid}-trim.wav"
with gr.Blocks() as app:
with gr.Box():
with gr.Row():
in_text = gr.Textbox(lines = 6, max_lines = 20)
with gr.Column():
alt_go_btn = gr.Button()
out_audio = gr.Audio(interactive=False)
with gr.Box():
with gr.Row():
with gr.Column():
in_aud_mic = gr.Audio(source='microphone')
in_aud_file = gr.Audio(label = 'Audio Source', source='upload', interactive = True)
aud_file = gr.File(interactive=False,visible=False)
with gr.Row():
start_time = gr.Textbox(label = "Start", value = "0:00", placeholder = "0:23")
end_time = gr.Textbox(label = "End", value = "0:01", placeholder = "1:12")
trim_clip_btn = gr.Button("Trim Clip")
trim_aud = gr.Audio(label = 'Trimmed Audio Source', source='upload', interactive = False)
with gr.Column():
in_aud_yt = gr.Textbox(label="YouTube URL")
load_yt_btn = gr.Button("Load URL")
yt_vid = gr.Video(interactiv=False)
load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
trim_clip_btn.click(trim_clip,[aud_file, start_time, end_time],trim_aud)
alt_go_btn.click(custom_bark, [in_text,in_aud_file,trim_aud], out_audio)
app.launch()