import whisper import gradio as gr model = whisper.load_model('base') def transcribe(inputs , timestamp): if inputs is None: raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.") output = "" result = model.transcribe(inputs) if timestamp == "Yes": for indx, segment in enumerate(result['segments']): output += str(datetime.timedelta (seconds=segment['start'])) +" "+ str(datetime.timedelta (seconds=segment['end'])) + "\n" output += segment['text'].strip() + '\n' else: output = result["text"] print(result) return output interface = gr.Interface( fn=transcribe, inputs=[gr.Audio(sources=["upload"],type="filepath"), gr.Radio(["Yes", "No"], label="Timestamp", info="Displays with timestamp if needed."),], outputs="text", title="Whisper Large V3: Transcribe Audio", description=( "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper API" ) ) interface.launch()