import gradio as gr import time from transformers import pipeline p = pipeline("automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h") def transcribe(audio, state="", uploaded_audio=""): if uploaded_audio: audio = uploaded_audio time.sleep(3) text = p(audio)["text"] state += text + " - " return state, state gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), 'state', gr.inputs.Audio(label="Upload Audio File", type="numpy", source="upload") ], outputs=[ "textbox", "state" ], live=True).launch()