File size: 740 Bytes
5cea8c6
 
 
 
 
 
a371a8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import git
git.Repo.clone_from('https://github.com/reazon-research/ReazonSpeech', 'ReazonSpeech')
import pip, site, importlib
pip.main(['install', 'ReazonSpeech/pkg/nemo-asr'])
importlib.reload(site)   

import gradio as gr
from reazonspeech.nemo.asr import audio_from_path, load_model, transcribe

model = None

def speech_to_text(audio_file):
    global model

    if not model:
        model = load_model()

    audio = audio_from_path(audio_file)
    ret = transcribe(model, audio)

    return ret.text, "\n".join(ret.segments), "\n".join(ret.subwords)

load_model(model_size)

gr.Interface(
    fn=speech_to_text,
    inputs=[
        gr.Audio(sources="upload", type="filepath"),
        ],
    outputs=["text","text","text"]).launch()