Spaces:
Runtime error
Runtime error
File size: 2,464 Bytes
85959f4 3d120ba f3c4afb 85959f4 8a7749b f3c4afb e662762 f3c4afb 60924ba f3c4afb c03e9ab f3c4afb 25cbc36 09d00cd f3c4afb 8972f37 85959f4 c03e9ab f3c4afb cc69677 1cbf289 c2eec8b 1cbf289 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
import numpy as np
from transformers.file_utils import cached_path, hf_bucket_url
import os
from transformers import Wav2Vec2ProcessorWithLM, AutoModelForCTC
from datasets import load_dataset
import torch
import kenlm
import torchaudio
cache_dir = './cache/'
processor = Wav2Vec2ProcessorWithLM.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=os.getenv("AnnarabicToken"))
model = AutoModelForCTC.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=os.getenv("AnnarabicToken"))
# define function to read in sound file
def speech_file_to_array_fn(path, max_seconds=120):
batch = {"file": path}
speech_array, sampling_rate = torchaudio.load(batch["file"])
if sampling_rate != 16000:
transform = torchaudio.transforms.Resample(orig_freq=sampling_rate,
new_freq=16000)
speech_array = transform(speech_array)
speech_array = speech_array[0]
if max_seconds > 0:
speech_array = speech_array[:max_seconds*16000]
batch["speech"] = speech_array.numpy()
batch["sampling_rate"] = 16000
return batch
# tokenize
def inference(audio):
# read in sound file
# load dummy dataset and read soundfiles
ds = speech_file_to_array_fn(audio)
# infer model
input_values = processor(
ds["speech"],
sampling_rate=ds["sampling_rate"],
return_tensors="pt"
).input_values
# decode ctc output
with torch.no_grad():
logits = model(input_values).logits
output = processor.decode(logits.numpy()[0]).text
print(output)
return output
inputs = gr.inputs.Audio(label="Input Audio", type="filepath")
outputs = gr.outputs.Textbox(label="Output Text")
title = "Annarabic Speech Recognition System"
description = 'Demo for <b>Annarabic ASR</b>. To use it, simply upload your audio, or click on one of the examples to load them. Only the 10 first seconds of the audio will be transcribed and GPU runtime is not used. For more information, contact Ahmed Jaafari via email: <a href = "mailto: [email protected]">[email protected]</a> or phone: <a href = "tel: +212658537105">+212658537105</a>.'
examples=[['Aya.mp3'], ['Loubna.mp3'], ['Omar.wav'], ['Yassir.wav']]
article="* The ASR never trained on the given examples."
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch() |