Spaces:

omarxadel
/

egyptian-arabic-speech-to-text

Running

File size: 630 Bytes

527afaa
6249202
95f30a9
6249202
527afaa
6249202
95f30a9
527afaa
 
 
 
6249202
527afaa
 
 
 
 
6249202
 
527afaa
 
95f30a9
527afaa
 
 
6249202
527afaa

from transformers import pipeline
import gradio as gr
import numpy as np

transcriber = pipeline("automatic-speech-recognition", model="omarxadel/hubert-large-arabic-egyptian")


def transcribe(stream, new_chunk):
    sr, y = new_chunk
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    if stream is not None:
        stream = np.concatenate([stream, y])
    else:
        stream = y
    return stream, transcriber({"sampling_rate": sr, "raw": stream})["text"]


demo = gr.Interface(
    transcribe,
    ["state", gr.Audio(sources=["microphone"], streaming=True)],
    ["state", "text"],
    live=True,
)

demo.launch()