import json import gradio as gr import numpy as np import wenetruntime as wenet wenet.set_log_level(2) decoder = wenet.Decoder(lang='chs') def recognition(audio): print(audio) if audio is None: return "Input Error! Please enter one audio!" sr, y = audio assert sr in [48000, 16000] if sr == 48000: # Optional resample to 16000 y = (y / max(np.max(y), 1) * 32767)[::3].astype("int16") ans = decoder.decode(y.tobytes(), True) if ans == None: return "ERROR! No text output! Please try again!" # ans (json) # { # 'nbest' : [{"sentence" : ""}], 'type' : 'final_result # } ans = json.loads(ans) print(ans) txt = ans['nbest'][0]['sentence'] return txt # input inputs = [ gr.inputs.Audio(source="microphone", type="numpy", label='Speaker#1') ] output = gr.outputs.Textbox(label="Output Text") # examples = ['examples/BAC009S0764W0121.wav'] text = "Speech Recognition in WeNet | 基于 WeNet 的语音识别" # description description = ("WeSpeaker Demo ! Try it with your own voice !") article = ( "
") interface = gr.Interface( fn=recognition, inputs=inputs, outputs=output, title=text, description=description, article=article, theme='huggingface', ) interface.launch(enable_queue=True)