Spaces:

ahmedJaafari
/

Annarabic

Runtime error

App Files Files

Annarabic / app.py

ahmedJaafari

Update app.py

f3c4afb almost 3 years ago

raw

history blame

2.55 kB

	import gradio as gr
	from transformers.file_utils import cached_path, hf_bucket_url
	import os
	from transformers import Wav2Vec2Processor, AutoModelForCTC
	from datasets import load_dataset
	import torch
	import kenlm
	import torchaudio

	cache_dir = './cache/'
	processor = Wav2Vec2ProcessorWithLM.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token="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")
	processor2 = Wav2Vec2Processor.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token="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")
	model = AutoModelForCTC.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token="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")

	# define function to read in sound file
	def speech_file_to_array_fn(path, max_seconds=10):
	batch = {"file": path}
	speech_array, sampling_rate = torchaudio.load(batch["file"])
	if sampling_rate != 16000:
	transform = torchaudio.transforms.Resample(orig_freq=sampling_rate,
	new_freq=16000)
	speech_array = transform(speech_array)
	speech_array = speech_array[0]
	if max_seconds > 0:
	speech_array = speech_array[:max_seconds*16000]
	batch["speech"] = speech_array.numpy()
	batch["sampling_rate"] = 16000
	return batch

	# tokenize
	def inference(audio):
	# read in sound file
	# load dummy dataset and read soundfiles
	ds = speech_file_to_array_fn(audio.name)
	# infer model
	input_values = processor(
	ds["speech"],
	sampling_rate=ds["sampling_rate"],
	return_tensors="pt"
	).input_values
	# decode ctc output
	with torch.no_grad():
	logits = model(input_values).logits

	#pred_ids = torch.argmax(logits, dim=-1)
	h = logits.numpy()[0,:,:]
	v = np.pad(h, [0, 2], mode='constant')

	output = processor.decode(v).text

	return output[:-4]

	inputs = gr.inputs.Audio(label="Input Audio", type="file")
	outputs = gr.outputs.Textbox(label="Output Text")
	title = "Annarabic Speech Recognition System"
	description = "Gradio demo for Annarabic ASR. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below. Currently supports .wav 16_000hz files"
	article = "<a href='https://huggingface.co/ahmedJaafari' target='_blank'>Pretrained model</a></p>"
	#examples=[['t1_0001-00010.wav'], ['t1_utt000000042.wav'], ['t2_0000006682.wav']]
	gr.Interface(inference, inputs, outputs, title=title, description=description, article=article).launch()