Spaces:

isaakkamau
/

whisper-video-caption

Runtime error

File size: 2,548 Bytes

5377a79
061f1ce
5377a79
061f1ce
 
 
8b7a004
5377a79
 
 
 
 
 
 
 
 
 
061f1ce
5377a79
 
 
 
 
 
 
 
 
 
 
 
061f1ce
bfa484b
5377a79
 
061f1ce
5377a79
 
 
061f1ce
5377a79
 
 
 
 
 
061f1ce
 
 
 
 
 
bfa484b
061f1ce
5377a79
061f1ce
 
5377a79
061f1ce
5377a79
 
061f1ce
5377a79
14eb3d6
 
2aae4d0
14eb3d6
 
 
 
061f1ce
5377a79

import gradio as gr
import subprocess
import os
import whisper
from whisper.utils import write_vtt

model = whisper.load_model('medium')
title = 'Add Captions(CC) to your videos'

def convert_mp4_mp3(file, output="mp3"):
    """
    Convert the Input Video files to Audio files (MP4 -> MP3)
    using FFMPEG 
    """
    filename, ext = os.path.splitext(file)
    subprocess.call(['ffmpeg', '-y', '-i', file, f'{filename}.{output}'],
                    stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT)
    
    return f"{filename}.{output}"

def transcribe(video):
    """
    Transcribe the text in the video file using Whisper model
    and write the transcribed captions to the video
    """
    audio_file = convert_mp4_mp3(video)
    # CFG
    options = dict(beam_size=5, best_of=5, fp16=False)
    translate_options = dict(task='translate', **options)
    result = model.transcribe(audio_file, **translate_options)

    output_dir = ''
    # audio_path = audio_file.split('.')[0]
    audio_path = os.path.splitext(os.path.basename(audio_file))[0]

    # Write Subtitle onto a .vtt file
    with open(os.path.join(output_dir, audio_path + '.vtt'), 'w') as f:
        write_vtt(result['segments'], file=f)

    # Write the subtitles on the input video
    # subtitle = audio_path + '.vtt'
    # output_video = audio_path + '_subtitled.mp4'
    # os.system(f'ffmpeg -i {video} -vf subtitles={subtitle} {output_video}')
    output_video = os.path.join(output_dir, f'{audio_path}_subtitled.mp4')
    os.system(f'ffmpeg -i {video} -vf subtitles={os.path.join(output_dir, audio_path + ".vtt")} {output_video}')

    return output_video

block = gr.Blocks()
with block:
    with gr.Group():
        with gr.Box():
            with gr.Row().style():
                input_video = gr.Video(
                    label="Input Video",
                    type="filepath",
                    mirror_webcam=False
                )
                output_video = gr.Video()
            btn = gr.Button('Generate Subtitle Video')

        btn.click(transcribe, inputs=[input_video], outputs=[output_video])
        gr.HTML('''
        <div class="footer">
                    <p>Powered by <a href="https://openai.com/" style="text-decoration: underline;" target="_blank">OpenAI</a> - NOTE: The longer the video, the longer the processing time <a href="https://github.com/Isaakkamau" style="text-decoration: underline;" target="_blank"> Star me on Github</a>
                    </p>
        </div>
        ''')
        

block.launch(enable_queue=True)