from transformers import pipeline import os import time os.system("pip install gradio==2.8.7") import gradio as gr p = pipeline("automatic-speech-recognition", model = "jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn") def transcribe(audio, state=""): time.sleep(2) text = p(audio)["text"] state += text + " " return state, state gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="filepath"), "state" ], outputs=[ "textbox", "state" ], live=True).launch()