import gradio as gr

whisper = gr.load("models/openai/whisper-small")
        
def inference(audio):
    # audio = whisper.load_audio(audio)
    # audio = whisper.pad_or_trim(audio)
    
    # mel = whisper.log_mel_spectrogram(audio).to(model.device)
    
    # _, probs = model.detect_language(mel)
    
    # options = whisper.DecodingOptions(fp16 = False)
    # result = whisper.decode(model, mel, options)
    
    # print(result.text)
    # return result.text
    return whisper(audio).replace("AutomaticSpeechRecognitionOutput(text=' ", "").replace("', chunks=None)", "")

article = """
## 参考
- [Innev GitHub](https://github.com/innev)
"""

gr.Interface(
    fn=inference,
    api_name="predict",
    inputs=[
        gr.Audio(label="录制语音", source="microphone", type="filepath")
    ],
    outputs=[
        gr.Text(label="识别出的文字")
    ],
    title="Whisper Speech Recognition",
    article=article
).launch()