Spaces:

nata0801
/

RuEn_ASR_with_Voice_Recorder

Runtime error

App Files Files Community

RuEn_ASR_with_Voice_Recorder / app.py

nata0801

Update app.py

9726882 almost 3 years ago

raw

history blame

1.89 kB

	#Importing all the necessary packages
	import nltk
	import librosa
	import torch
	import gradio as gr
	from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
	nltk.download("punkt")


	def asr_transcript(audio_file, language):
	"""Generating transcripts for the audio input
	"""

	#Selecting the language and loading the model and the tokenizer
	if language == "English":
	model = "facebook/wav2vec2-large-960h-lv60-self"
	elif language == "Russian":
	model = "jonatasgrosman/wav2vec2-large-xlsr-53-russian"

	tokenizer = Wav2Vec2Tokenizer.from_pretrained(model)
	model = Wav2Vec2ForCTC.from_pretrained(model)

	#read the file and resample to 16KHz
	#stream = librosa.stream(audio_file.name, block_length=20, frame_length=16000, hop_length=16000)

	#read the file
	speech, sample_rate = librosa.load(input_file, 16000)

	if len(speech.shape) > 1:
	speech = speech[:, 0] + speech[:, 1]

	input_values = tokenizer(speech, return_tensors="pt").input_values
	logits = model(input_values).logits

	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = tokenizer.batch_decode(predicted_ids)[0]
	transcript = transcription.lower()

	return transcript


	gr.Interface(asr_transcript,
	inputs = [gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Please record your message/Пожалуйста, введите Ваше сообщение"),
	gr.inputs.Radio(label="Pick a language/Выберите язык", choices=["English", "Russian"])],
	outputs = gr.outputs.Textbox(label="Output Text/Результат"),
	title="Automatic speech recognition with voice recorder in Russian and English",
	description = "This application displays transcribed text for given audio input",
	theme="grass").launch()