Spaces:

ahmedJaafari
/

AnnarabicRecord

Runtime error

App Files Files Community

AnnarabicRecord / app.py

ahmedJaafari

Update app.py

72979b7 almost 3 years ago

raw

history blame

2.26 kB

	import gradio as gr
	import streamlit as st
	import numpy as np
	from transformers.file_utils import cached_path, hf_bucket_url
	import os
	from transformers import Wav2Vec2ProcessorWithLM, AutoModelForCTC
	from datasets import load_dataset
	import torch
	import kenlm
	import torchaudio

	cache_dir = './cache/'
	processor = Wav2Vec2ProcessorWithLM.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=st.secrets["AnnarabicToken"])
	model = AutoModelForCTC.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=st.secrets["AnnarabicToken"])

	# define function to read in sound file
	def speech_file_to_array_fn(path, max_seconds=10):
	batch = {"file": path}
	speech_array, sampling_rate = torchaudio.load(batch["file"])
	if sampling_rate != 16000:
	transform = torchaudio.transforms.Resample(orig_freq=sampling_rate,
	new_freq=16000)
	speech_array = transform(speech_array)
	speech_array = speech_array[0]
	if max_seconds > 0:
	speech_array = speech_array[:max_seconds*16000]
	batch["speech"] = speech_array.numpy()
	batch["sampling_rate"] = 16000
	return batch

	# tokenize
	def inference(audio):
	# read in sound file
	# load dummy dataset and read soundfiles
	ds = speech_file_to_array_fn(audio.name)
	# infer model
	input_values = processor(
	ds["speech"],
	sampling_rate=ds["sampling_rate"],
	return_tensors="pt"
	).input_values
	# decode ctc output
	with torch.no_grad():
	logits = model(input_values).logits

	#pred_ids = torch.argmax(logits, dim=-1)
	h = logits.numpy()[0,:,:]
	v = np.pad(h, [0, 2], mode='constant')

	output = processor.decode(v).text

	return output[:-4]

	inputs = gr.inputs.Audio(label="Record Audio", source="microphone", type='file')
	outputs = gr.outputs.Textbox(label="Output Text")
	title = "Annarabic Speech Recognition System"
	description = "Gradio demo for Annarabic ASR. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
	examples=[['Aya.mp3'], ['Loubna.mp3']]
	gr.Interface(inference, inputs, outputs, title=title, description=description, examples=examples).launch()