import gradio as gr
from transformers import pipeline

# Load the model
pipe = pipeline("automatic-speech-recognition", model="vargha/whisper-large-v3")

# Define the inference function
def transcribe_audio(audio):
    if audio is None:
        return "No audio file uploaded. Please try again."
    
    try:
        # Perform transcription
        result = pipe(audio)["text"]
        return result
    except Exception as e:
        return f"Error during transcription: {str(e)}"

# Create a Gradio interface for uploading audio or using the microphone
with gr.Blocks() as interface:
    gr.Markdown("# Whisper Large V3 Speech Recognition")
    gr.Markdown("Upload an audio file or use your microphone to transcribe speech to text.")
    
    # Create the input and output components
    audio_input = gr.Audio(type="filepath", label="Input Audio")
    output_text = gr.Textbox(label="Transcription")
    
    # Add a button to trigger the transcription
    transcribe_button = gr.Button("Transcribe")
    
    # Bind the transcribe_audio function to the button click
    transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)

# Launch the Gradio app
interface.launch()