import whisper import moviepy.editor as mp import gradio as gr # Load Whisper model for audio transcription model_name = "base" whisper_model = whisper.load_model(model_name) def generate_srt(transcription_result): """Helper function to convert Whisper's transcription with timestamps to SRT format.""" srt_content = "" for i, segment in enumerate(transcription_result['segments']): # Convert start and end times to SRT time format start = segment['start'] end = segment['end'] start_time = f"{int(start//3600):02}:{int((start%3600)//60):02}:{int(start%60):02},{int((start%1)*1000):03}" end_time = f"{int(end//3600):02}:{int((end%3600)//60):02}:{int(end%60):02},{int((end%1)*1000):03}" # Create the SRT entry srt_content += f"{i+1}\n{start_time} --> {end_time}\n{segment['text'].strip()}\n\n" return srt_content def transcribe_and_generate_subtitles(video): # Step 1: Extract the audio from the video using moviepy video_clip = mp.VideoFileClip(video) audio_path = "temp_audio.wav" video_clip.audio.write_audiofile(audio_path, codec='pcm_s16le') # Step 2: Transcribe the audio with timestamps using Whisper transcription_result = whisper_model.transcribe(audio_path, task="transcribe", language="en", verbose=False) # Step 3: Generate SRT subtitles srt_content = generate_srt(transcription_result) # Save SRT file srt_file = "output_subtitles.srt" with open(srt_file, "w", encoding="utf-8") as f: f.write(srt_content) return transcription_result["text"], srt_file # Set up Gradio interface interface = gr.Interface( fn=transcribe_and_generate_subtitles, inputs=gr.Video(label="Upload Video File"), outputs=[ gr.Textbox(label="English Transcription"), gr.File(label="Subtitle File (SRT)") ], title="Video Subtitle Generator", description="Upload a video file, and this will generate a transcription and subtitles in SRT format." ) # Launch the interface interface.launch()