import streamlit as st from transformers import pipeline import librosa import soundfile as sf import numpy as np import io # Load the ASR pipeline with the specified model pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu") def load_audio(audio_file): """Load an audio file and convert to the correct format.""" audio_bytes = audio = io.BytesIO(audio_bytes) # Use librosa to load the audio file audio_np, sr = librosa.load(audio, sr=16000) return audio_np, sr def transcribe_audio(audio_np): """Transcribe the given audio numpy array using the model pipeline.""" # Convert the audio numpy array to a format acceptable by the pipeline audio = sf.write(io.BytesIO(), audio_np, 16000, format='wav') # Transcribe audio transcription = pipe(audio) return transcription['text'] # Streamlit UI st.title("Urdu Speech-to-Text Transcription App") st.write("Upload an audio file to transcribe its content into Urdu text.") uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3"]) if uploaded_file is not None: try: # Load and process the audio file audio_np, sr = load_audio(uploaded_file) # Transcribe the audio text = transcribe_audio(audio_np) # Display the transcription result st.subheader("Transcription Result:") st.write(text) except Exception as e: st.error(f"An error occurred: {e}")