File size: 1,164 Bytes
42c9935 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import copy
import time
import gradio as gr
import numpy as np
import torch
import torchaudio
from loguru import logger
from transformers import pipeline
device = "cuda" if torch.cuda.is_available() else "cpu"
transcriber = pipeline(
"automatic-speech-recognition", model="openai/whisper-base.en", device=device
)
def save_audio_as_wav(data, sample_rate, file_path):
# make a tensor from the numpy array
data = torch.tensor(data).reshape(1, -1)
torchaudio.save(
file_path, data, sample_rate=sample_rate, bits_per_sample=16, encoding="PCM_S"
)
def save_and_transcribe_audio(audio):
sample_rate, data = audio
try:
# add timestamp to file name
filename = f"recordings/audio{time.time()}.wav"
save_audio_as_wav(data, sample_rate, filename)
data = data.astype(np.float32)
data /= np.max(np.abs(data))
text = transcriber({"sampling_rate": sample_rate, "raw": data})["text"]
gr.Info(f"Transcribed text is: {text}\nProcessing the input...")
except Exception as e:
logger.error(f"Error: {e}")
raise Exception("Error transcribing audio.")
return text
|