File size: 2,651 Bytes
a9fdafe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import whisper
import moviepy.editor as mp
import gradio as gr
import torch
import subprocess
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "tiny"
whisper_model = whisper.load_model(model_name).to(device)
def generate_srt(transcription_result):
srt_content = ""
for i, segment in enumerate(transcription_result['segments']):
start = segment['start']
end = segment['end']
start_time = f"{int(start//3600):02}:{int((start%3600)//60):02}:{int(start%60):02},{int((start%1)*1000):03}"
end_time = f"{int(end//3600):02}:{int((end%3600)//60):02}:{int(end%60):02},{int((end%1)*1000):03}"
srt_content += f"{i+1}\n{start_time} --> {end_time}\n{segment['text'].strip()}\n\n"
return srt_content
def extract_audio_ffmpeg(video_file, audio_output):
subprocess.run([
'ffmpeg',
'-i', video_file,
'-vn',
'-acodec', 'pcm_s16le',
'-ar', '16000',
audio_output,
'-y'
])
def transcribe_and_generate_subtitles(video):
audio_path = "temp_audio.wav"
extract_audio_ffmpeg(video, audio_path)
transcription_result = whisper_model.transcribe(audio_path, language="en", verbose=False)
detected_language = transcription_result['language']
if detected_language == "hau":
transcription_result = whisper_model.transcribe(audio_path, task="translate", verbose=False)
elif detected_language == "yor":
transcription_result = whisper_model.transcribe(audio_path, task="translate", language="yor", verbose=False)
elif detected_language == "ibo":
transcription_result = whisper_model.transcribe(audio_path, task="translate", language="ibo", verbose=False)
srt_content = generate_srt(transcription_result)
srt_file = "output_subtitles.srt"
with open(srt_file, "w", encoding="utf-8") as f:
f.write(srt_content)
output_video = "video_with_subtitles.mp4"
subprocess.run([
'ffmpeg',
'-i', video,
'-vf', f"subtitles={srt_file}",
output_video,
'-y'
])
return transcription_result["text"], output_video
interface = gr.Interface(
fn=transcribe_and_generate_subtitles,
inputs=gr.Video(label="Upload Video File"),
outputs=[
gr.Textbox(label="Transcription or Translation"),
gr.File(label="Download Video with Subtitles")
],
title="Video Subtitle Generator",
description="Upload a video in either English, Hausa, Yoruba, or Igbo. The system will detect the language, transcribe or translate if necessary, and generate a video with subtitles embedded.",
live=False
)
interface.launch() |