from transformers import pipeline pipe = pipeline('automatic-speech-recognition', model='openai/whisper-small') def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens = 256, generate_kwargs={ "task": "transcribe", "language": "english", }, chunk_length_s = 30, batch_size = 8, ) return output["text"] import gradio as gr demo = gr.Blocks() mic_transcribe = gr.Interface( fn = transcribe_speech, inputs=gr.Audio(sources = "microphone", type = "filepath"), outputs = 'text', ) file_transcribe = gr.Interface( fn = transcribe_speech, inputs = gr.Audio(sources = "upload", type = "filepath"), outputs ='text', ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch()