import gradio as gr import streamlit as st import numpy as np from transformers.file_utils import cached_path, hf_bucket_url import os from transformers import Wav2Vec2ProcessorWithLM, AutoModelForCTC from datasets import load_dataset import torch import kenlm import torchaudio cache_dir = './cache/' processor = Wav2Vec2ProcessorWithLM.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=st.secrets["AnnarabicToken"]) model = AutoModelForCTC.from_pretrained("ahmedJaafari/Annarabic3.2", cache_dir=cache_dir, use_auth_token=st.secrets["AnnarabicToken"]) # define function to read in sound file def speech_file_to_array_fn(path, max_seconds=10): batch = {"file": path} speech_array, sampling_rate = torchaudio.load(batch["file"]) if sampling_rate != 16000: transform = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000) speech_array = transform(speech_array) speech_array = speech_array[0] if max_seconds > 0: speech_array = speech_array[:max_seconds*16000] batch["speech"] = speech_array.numpy() batch["sampling_rate"] = 16000 return batch # tokenize def inference(audio): # read in sound file # load dummy dataset and read soundfiles ds = speech_file_to_array_fn(audio.name) # infer model input_values = processor( ds["speech"], sampling_rate=ds["sampling_rate"], return_tensors="pt" ).input_values # decode ctc output with torch.no_grad(): logits = model(input_values).logits #pred_ids = torch.argmax(logits, dim=-1) h = logits.numpy()[0,:,:] v = np.pad(h, [0, 2], mode='constant') output = processor.decode(v).text return output[:-4] inputs = gr.inputs.Audio(label="Record Audio", source="microphone", type='file') outputs = gr.outputs.Textbox(label="Output Text") title = "Annarabic Speech Recognition System" description = "Gradio demo for Annarabic ASR. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below." examples=[['Aya.mp3'], ['Loubna.mp3']] gr.Interface(inference, inputs, outputs, title=title, description=description, examples=examples).launch()