import streamlit as st
from transformers import pipeline
import librosa
import soundfile as sf
import numpy as np
import io

# Load the ASR pipeline with the specified model
pipe = pipeline("automatic-speech-recognition", model="kingabzpro/wav2vec2-large-xls-r-300m-Urdu")

def load_audio(audio_file):
    """Load an audio file and convert to the correct format."""
    audio_bytes = audio_file.read()
    audio = io.BytesIO(audio_bytes)
    
    # Use librosa to load the audio file
    audio_np, sr = librosa.load(audio, sr=16000)
    
    return audio_np, sr

def transcribe_audio(audio_np):
    """Transcribe the given audio numpy array using the model pipeline."""
    # Convert the audio numpy array to a format acceptable by the pipeline
    audio = sf.write(io.BytesIO(), audio_np, 16000, format='wav')
    
    # Transcribe audio
    transcription = pipe(audio)
    
    return transcription['text']

# Streamlit UI
st.title("Urdu Speech-to-Text Transcription App")
st.write("Upload an audio file to transcribe its content into Urdu text.")

uploaded_file = st.file_uploader("Choose an audio file...", type=["wav", "mp3"])

if uploaded_file is not None:
    try:
        # Load and process the audio file
        audio_np, sr = load_audio(uploaded_file)
        
        # Transcribe the audio
        text = transcribe_audio(audio_np)
        
        # Display the transcription result
        st.subheader("Transcription Result:")
        st.write(text)
    except Exception as e:
        st.error(f"An error occurred: {e}")