import gradio as gr from faster_whisper import WhisperModel import logging # Configure logging for debugging purposes logging.basicConfig() logging.getLogger("faster_whisper").setLevel(logging.DEBUG) # Initialize the Whisper model with your desired configuration model_size = "small" # Choose the model size device = "cpu" # GPU : cuda CPU : cpu compute_type = "int8" # GPU : float16 or int8 - CPU : int8 model = WhisperModel(model_size, device=device, compute_type=compute_type) def transcribe(audio_file): # Transcribe the audio file without word-level timestamps segments, _ = model.transcribe(audio_file) # Format and gather transcription with segment timestamps transcription_with_timestamps = [ f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}" for segment in segments ] return "\n".join(transcription_with_timestamps) # Define the Gradio interface iface = gr.Interface(fn=transcribe, inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio"), outputs="text", title="Whisper Transcription with Line-by-Line Timestamps", description="Upload an audio file to get transcription with line-by-line timestamps using Faster Whisper.") # Launch the app if __name__ == "__main__": iface.launch()