hsb_stt_demo / app.py
Korla's picture
Upload folder using huggingface_hub
c1701de
from transformers import pipeline
import gradio as gr
import librosa
import torch
if torch.cuda.is_available():
device = torch.device("cuda")
elif (
hasattr(torch.backends, "mps")
and torch.backends.mps.is_available()
and torch.backends.mps.is_built()
):
device = torch.device("mps")
else:
device = torch.device("cpu")
pipe1 = pipeline(
"automatic-speech-recognition",
model="base",
tokenizer="openai/whisper-base",
chunk_length_s=26,
device=device,
stride_length_s=(4, 2),
)
pipe2 = pipeline(
"automatic-speech-recognition",
model="tiny",
tokenizer="openai/whisper-tiny",
chunk_length_s=26,
device=device,
stride_length_s=(4, 2),
)
def transcribe(audio, x, model):
if audio == None:
sample = librosa.load(x, sr=16_000, mono=True)[0]
else:
sample = librosa.load(audio, sr=16_000, mono=True)[0]
if model == "base":
transcription_whspr = pipe1(sample, batch_size=8)["text"]
elif model == "tiny":
transcription_whspr = pipe2(sample, batch_size=8)["text"]
return transcription_whspr
iface = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(source="microphone", type="filepath"),
gr.Audio(source="upload", type="filepath"),
gr.Dropdown(
choices=["base", "tiny"],
info="model k wuzwolenju",
value="base",
),
],
outputs="text",
title="Serbski STT",
description="Gradio demo za spóznawanje rěće w hornjoserbšćinje",
)
iface.launch(debug=True)