import gradio as gr
from faster_whisper import WhisperModel
import logging

# Configure logging for debugging purposes
logging.basicConfig()
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)

# Initialize the Whisper model with your desired configuration
model_size = "small"  # Choose the model size
device = "cpu"  # GPU : cuda  CPU : cpu
compute_type = "int8"  # GPU : float16 or int8 - CPU : int8

model = WhisperModel(model_size, device=device, compute_type=compute_type)

def transcribe(audio_file):
    # Transcribe the audio file without word-level timestamps
    segments, _ = model.transcribe(audio_file)
    
    # Format and gather transcription with segment timestamps
    transcription_with_timestamps = [
        f"[{segment.start:.2f}s - {segment.end:.2f}s] {segment.text}" for segment in segments
    ]
    
    return "\n".join(transcription_with_timestamps)

# Define the Gradio interface
iface = gr.Interface(fn=transcribe,
                     inputs=gr.inputs.Audio(source="upload", type="file", label="Upload Audio"),
                     outputs="text",
                     title="Whisper Transcription with Line-by-Line Timestamps",
                     description="Upload an audio file to get transcription with line-by-line timestamps using Faster Whisper.")

# Launch the app
if __name__ == "__main__":
    iface.launch()