wenet_demo / app.py
Liangcd
[demo] Initialize the first version
a487abc
raw
history blame
1.51 kB
import json
import gradio as gr
import numpy as np
import wenetruntime as wenet
wenet.set_log_level(2)
decoder = wenet.Decoder(lang='chs')
def recognition(audio):
print(audio)
if audio is None:
return "Input Error! Please enter one audio!"
sr, y = audio
assert sr in [48000, 16000]
if sr == 48000: # Optional resample to 16000
y = (y / max(np.max(y), 1) * 32767)[::3].astype("int16")
ans = decoder.decode(y.tobytes(), True)
if ans == None:
return "ERROR! No text output! Please try again!"
# ans (json)
# {
# 'nbest' : [{"sentence" : ""}], 'type' : 'final_result
# }
ans = json.loads(ans)
print(ans)
txt = ans['nbest'][0]['sentence']
return txt
# input
inputs = [
gr.inputs.Audio(source="microphone",
type="numpy",
label='Speaker#1')
]
output = gr.outputs.Textbox(label="Output Text")
# examples = ['examples/BAC009S0764W0121.wav']
text = "Speech Recognition in WeNet | 基于 WeNet 的语音识别"
# description
description = ("WeSpeaker Demo ! Try it with your own voice !")
article = (
"<p style='text-align: center'>"
"<a href='https://github.com/wenet-e2e/wespeaker' target='_blank'>Github: Learn more about WeSpeaker</a>"
"</p>")
interface = gr.Interface(
fn=recognition,
inputs=inputs,
outputs=output,
title=text,
description=description,
article=article,
theme='huggingface',
)
interface.launch(enable_queue=True)