Spaces:

John6666
/

testnemoasr

Sleeping

App Files Files Community

testnemoasr / app.py

John6666

Upload 5 files

db5bcc3 verified 8 months ago

raw

history blame contribute delete

1.8 kB

	import os
	if os.environ.get("SPACES_ZERO_GPU") is not None:
	import spaces
	else:
	class spaces:
	@staticmethod
	def GPU(func):
	def wrapper(args, *kwargs):
	return func(args, *kwargs)
	return wrapper
	import gradio as gr
	import subprocess

	#subprocess.run("git clone https://github.com/AI4Bharat/NeMo.git && cd NeMo && git checkout nemo-v2 && bash reinstall.sh", shell=True)

	import torch
	import nemo.collections.asr as nemo_asr

	from pathlib import Path

	model = nemo_asr.models.ASRModel.from_pretrained("ai4bharat/indicconformer_stt_ml_hybrid_rnnt_large")

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model.freeze() # inference mode
	model = model.to(device) # transfer model to device

	@spaces.GPU
	def infer(srcfile: str):
	tmpfile = "sample_audio_infer_ready.wav"

	subprocess.run(f"ffmpeg -i {srcfile} -ac 1 -ar 16000 {tmpfile}", shell=True)
	model.cur_decoder = "ctc"
	ctc_text = model.transcribe([tmpfile], batch_size=1, logprobs=False, language_id='ml')[0]
	print(ctc_text)

	model.cur_decoder = "rnnt"
	rnnt_text = model.transcribe([tmpfile], batch_size=1, language_id='ml')[0]
	print(rnnt_text)

	if Path(tmpfile).exists(): Path(tmpfile).unlink()

	return ctc_text, rnnt_text

	with gr.Blocks() as demo:
	input_audio = gr.Audio(label="Input", type="filepath", sources=["upload", "microphone"], format="wav")
	run_button = gr.Button("Run", variant="primary")
	with gr.Row():
	ctc_text = gr.Textbox(label="CTC", value="", show_copy_button=True)
	rnnt_text = gr.Textbox(label="RNNT", value="", show_copy_button=True)

	run_button.click(infer, [input_audio], [ctc_text, rnnt_text])

	demo.launch()