import gradio as gr whisper = gr.load("models/openai/whisper-small") def inference(audio): # audio = whisper.load_audio(audio) # audio = whisper.pad_or_trim(audio) # mel = whisper.log_mel_spectrogram(audio).to(model.device) # _, probs = model.detect_language(mel) # options = whisper.DecodingOptions(fp16 = False) # result = whisper.decode(model, mel, options) # print(result.text) # return result.text return whisper(audio).replace("AutomaticSpeechRecognitionOutput(text=' ", "").replace("', chunks=None)", "") article = """ ## 参考 - [Innev GitHub](https://github.com/innev) """ gr.Interface( fn=inference, api_name="predict", inputs=[ gr.Audio(label="录制语音", source="microphone", type="filepath") ], outputs=[ gr.Text(label="识别出的文字") ], title="Whisper Speech Recognition", article=article ).launch()