Spaces:
Runtime error
Runtime error
File size: 2,263 Bytes
9bb0768 72979b7 9bb0768 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import gradio as gr
import streamlit as st
import numpy as np
from transformers.file_utils import cached_path, hf_bucket_url
import os
from transformers import Wav2Vec2ProcessorWithLM, AutoModelForCTC
from datasets import load_dataset
import torch
import kenlm
import torchaudio
cache_dir = './cache/'
processor = Wav2Vec2ProcessorWithLM.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=st.secrets["AnnarabicToken"])
model = AutoModelForCTC.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=st.secrets["AnnarabicToken"])
# define function to read in sound file
def speech_file_to_array_fn(path, max_seconds=10):
batch = {"file": path}
speech_array, sampling_rate = torchaudio.load(batch["file"])
if sampling_rate != 16000:
transform = torchaudio.transforms.Resample(orig_freq=sampling_rate,
new_freq=16000)
speech_array = transform(speech_array)
speech_array = speech_array[0]
if max_seconds > 0:
speech_array = speech_array[:max_seconds*16000]
batch["speech"] = speech_array.numpy()
batch["sampling_rate"] = 16000
return batch
# tokenize
def inference(audio):
# read in sound file
# load dummy dataset and read soundfiles
ds = speech_file_to_array_fn(audio.name)
# infer model
input_values = processor(
ds["speech"],
sampling_rate=ds["sampling_rate"],
return_tensors="pt"
).input_values
# decode ctc output
with torch.no_grad():
logits = model(input_values).logits
#pred_ids = torch.argmax(logits, dim=-1)
h = logits.numpy()[0,:,:]
v = np.pad(h, [0, 2], mode='constant')
output = processor.decode(v).text
return output[:-4]
inputs = gr.inputs.Audio(label="Record Audio", source="microphone", type='file')
outputs = gr.outputs.Textbox(label="Output Text")
title = "Annarabic Speech Recognition System"
description = "Gradio demo for Annarabic ASR. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below."
examples=[['Aya.mp3'], ['Loubna.mp3']]
gr.Interface(inference, inputs, outputs, title=title, description=description, examples=examples).launch() |