Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline | |
import torch | |
import numpy as np | |
from pydub import AudioSegment | |
import io | |
# Load the ASR pipeline with Whisper model | |
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3") | |
def audio_to_numpy(audio_file): | |
# Load the audio file into an AudioSegment object | |
audio = AudioSegment.from_file(io.BytesIO(audio_file.read())) | |
# Convert audio to mono and set sample rate to 16000 | |
audio = audio.set_channels(1).set_frame_rate(16000) | |
# Convert to numpy array | |
samples = np.array(audio.get_array_of_samples()) | |
# Normalize the data | |
samples = samples.astype(np.float32) / np.iinfo(audio.sample_width * 8).max | |
return samples | |
def transcribe_audio(audio_file): | |
# Convert audio bytes to numpy array | |
audio_numpy = audio_to_numpy(audio_file) | |
# Transcribe audio | |
transcription = pipe(audio_numpy) | |
return transcription['text'] | |
# Streamlit UI | |
st.title("Speech-to-Text Transcription App") | |
st.write("Upload an audio file to transcribe its content into text.") | |
uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3", "flac"]) | |
if uploaded_file is not None: | |
try: | |
with st.spinner("Transcribing..."): | |
text = transcribe_audio(uploaded_file) | |
st.subheader("Transcription Result:") | |
st.write(text) | |
except Exception as e: | |
st.error(f"An error occurred: {e}") | |