import gradio as gr from transformers import pipeline pipe2 = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") pipe3 = pipeline("automatic-speech-recognition", model="antony66/whisper-large-v3-russian") demo = gr.Blocks() def transcribe_speech_english(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" output = pipe2(filepath) return output["text"] def transcribe_speech_russian(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" output = pipe3(filepath) return output["text"] mic_transcribe_english = gr.Interface( fn=transcribe_speech_english, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never") mic_transcribe_russian = gr.Interface( fn=transcribe_speech_russian, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never") file_transcribe_english = gr.Interface( fn=transcribe_speech_english, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never", ) file_transcribe_russian = gr.Interface( fn=transcribe_speech_russian, inputs=gr.Audio(sources="upload", type="filepath"), outputs=gr.Textbox(label="Transcription", lines=3), allow_flagging="never", ) with demo: gr.TabbedInterface( [mic_transcribe_english, file_transcribe_english, mic_transcribe_russian, file_transcribe_russian], ["Transcribe Microphone English", "Transcribe Audio File English", "Transcribe Microphone Russian", "Transcribe Audio File Russian"], ) demo.launch()