Spaces:
Runtime error
Runtime error
import gradio as gr | |
import streamlit as st | |
import numpy as np | |
from transformers.file_utils import cached_path, hf_bucket_url | |
import os | |
from transformers import Wav2Vec2ProcessorWithLM, AutoModelForCTC | |
from datasets import load_dataset | |
import torch | |
import kenlm | |
import torchaudio | |
cache_dir = './cache/' | |
processor = Wav2Vec2ProcessorWithLM.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=st.secrets["AnnarabicToken"]) | |
model = AutoModelForCTC.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=st.secrets["AnnarabicToken"]) | |
# define function to read in sound file | |
def speech_file_to_array_fn(path, max_seconds=10): | |
batch = {"file": path} | |
speech_array, sampling_rate = torchaudio.load(batch["file"]) | |
if sampling_rate != 16000: | |
transform = torchaudio.transforms.Resample(orig_freq=sampling_rate, | |
new_freq=16000) | |
speech_array = transform(speech_array) | |
speech_array = speech_array[0] | |
if max_seconds > 0: | |
speech_array = speech_array[:max_seconds*16000] | |
batch["speech"] = speech_array.numpy() | |
batch["sampling_rate"] = 16000 | |
return batch | |
# tokenize | |
def inference(audio): | |
# read in sound file | |
# load dummy dataset and read soundfiles | |
ds = speech_file_to_array_fn(audio.name) | |
# infer model | |
input_values = processor( | |
ds["speech"], | |
sampling_rate=ds["sampling_rate"], | |
return_tensors="pt" | |
).input_values | |
# decode ctc output | |
with torch.no_grad(): | |
logits = model(input_values).logits | |
#pred_ids = torch.argmax(logits, dim=-1) | |
h = logits.numpy()[0,:,:] | |
v = np.pad(h, [0, 2], mode='constant') | |
output = processor.decode(v).text | |
return output[:-4] | |
inputs = gr.inputs.Audio(label="Record Audio", source="microphone", type='file') | |
outputs = gr.outputs.Textbox(label="Output Text") | |
title = "Annarabic Speech Recognition System" | |
description = "Gradio demo for Annarabic ASR. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below." | |
examples=[['Aya.mp3'], ['Loubna.mp3']] | |
gr.Interface(inference, inputs, outputs, title=title, description=description, examples=examples).launch() |