Spaces:
Runtime error
Runtime error
| import whisper | |
| import moviepy.editor as mp | |
| import gradio as gr | |
| import torch | |
| import subprocess | |
| # 1. Load Whisper Model and Move to GPU if Available | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model_name = "tiny" # Use 'tiny' model for faster processing, change to 'base' or larger for more accuracy | |
| whisper_model = whisper.load_model(model_name).to(device) | |
| # Helper function to convert Whisper's transcription to SRT format | |
| def generate_srt(transcription_result): | |
| """Convert transcription with timestamps to SRT format.""" | |
| srt_content = "" | |
| for i, segment in enumerate(transcription_result['segments']): | |
| start = segment['start'] | |
| end = segment['end'] | |
| start_time = f"{int(start//3600):02}:{int((start%3600)//60):02}:{int(start%60):02},{int((start%1)*1000):03}" | |
| end_time = f"{int(end//3600):02}:{int((end%3600)//60):02}:{int(end%60):02},{int((end%1)*1000):03}" | |
| srt_content += f"{i+1}\n{start_time} --> {end_time}\n{segment['text'].strip()}\n\n" | |
| return srt_content | |
| # 2. Function to Extract Audio using optimized FFmpeg subprocess | |
| def extract_audio_ffmpeg(video_file, audio_output): | |
| """Extract audio using ffmpeg with optimized settings.""" | |
| subprocess.run([ | |
| 'ffmpeg', '-i', video_file, '-vn', '-acodec', 'pcm_s16le', '-ar', '16000', audio_output, '-y' | |
| ]) | |
| # 3. Function to Transcribe, Translate (if necessary), and Generate Subtitles | |
| def transcribe_and_generate_subtitles(video): | |
| # Step 1: Extract audio from video using optimized FFmpeg | |
| audio_path = "temp_audio.wav" | |
| extract_audio_ffmpeg(video, audio_path) | |
| # Step 2: Transcribe the audio | |
| transcription_result = whisper_model.transcribe(audio_path, language="en", verbose=False) | |
| # Step 3: Detect language and translate if necessary | |
| detected_language = transcription_result['language'] | |
| if detected_language == "hau": # Assuming "hau" is the code for Hausa | |
| # Re-transcribe with translation to English | |
| transcription_result = whisper_model.transcribe(audio_path, task="translate", verbose=False) | |
| # Step 4: Generate SRT subtitles | |
| srt_content = generate_srt(transcription_result) | |
| # Save SRT file | |
| srt_file = "output_subtitles.srt" | |
| with open(srt_file, "w", encoding="utf-8") as f: | |
| f.write(srt_content) | |
| # Step 5: Add subtitles to the video | |
| output_video = "video_with_subtitles.mp4" | |
| subprocess.run([ | |
| 'ffmpeg', '-i', video, '-vf', f"subtitles={srt_file}", output_video, '-y' | |
| ]) | |
| return transcription_result["text"], output_video | |
| # 4. Set up Gradio Interface with Optimized Settings | |
| interface = gr.Interface( | |
| fn=transcribe_and_generate_subtitles, | |
| inputs=gr.Video(label="Upload Video File"), | |
| outputs=[ | |
| gr.Textbox(label="Transcription or Translation"), | |
| gr.File(label="Download Video with Subtitles") | |
| ], | |
| title="Video Subtitle Generator", | |
| description="Upload a video in either English or Hausa. The system will detect the language, transcribe or translate if necessary, and generate a video with subtitles embedded.", | |
| live=False # Disable live updates for faster processing | |
| ) | |
| # Launch the interface | |
| interface.launch() | |