import gradio as gr from faster_whisper import WhisperModel import logging # Configure logging for debugging purposes logging.basicConfig() logging.getLogger("faster_whisper").setLevel(logging.DEBUG) # Initialize the Whisper model with your desired configuration model_size = "large-v3" # Choose the model size device = "cpu" # or "cuda" if GPU is available compute_type = "int8" # Choose the compute type based on your hardware model = WhisperModel(model_size=model_size, device=device, compute_type=compute_type) def transcribe(audio_file): # Enable word-level timestamps segments, _ = model.transcribe(audio_file, word_timestamps=True) # Format and gather transcription with timestamps transcription_with_timestamps = [] for segment in segments: segment_text = f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}\n" # If word-level detail is desired word_details = "\n".join( f" [{word.start:.2f}s - {word.end:.2f}s] {word.word}" for word in segment.words ) transcription_with_timestamps.append(segment_text + word_details) return "\n".join(transcription_with_timestamps) # Define the Gradio interface iface = gr.Interface(fn=transcribe, inputs=gr.Audio(sources="upload", type="filepath", label="Upload Audio"), outputs="text", title="Enhanced Whisper Transcription with Timestamps", description="Upload an audio file to get detailed transcription with timestamps using Faster Whisper.") # Launch the app if __name__ == "__main__": iface.launch()