from transformers import pipeline
import gradio as gr
import librosa

local_model_name = "wav2vec2_model_pipeline"
speech_recognizer = pipeline("automatic-speech-recognition", model = local_model_name)

def greet_and_transcribe(name, intensity, input_audio):
    input_audio_rs = librosa.resample(
        input_audio[1].astype(float),
        orig_sr=input_audio[0],
        target_sr=speech_recognizer.feature_extractor.sampling_rate)
    transcribed_audio = speech_recognizer(input_audio_rs)["text"]
    return "Hello, " + name + "!" * int(intensity), transcribed_audio

demo = gr.Interface(
    title="A simple audio transcribing model",
    description="This is an application to test gradio functionalities",
    fn=greet_and_transcribe,
    inputs=[
        gr.Text(placeholder="input your name here"),
        gr.Slider(minimum=1, maximum=5, value=3),
        gr.Audio()],
    outputs=[gr.Text(label="Greeting"), gr.Text(label="Transcribed output")],
    cache_examples="lazy",
    allow_flagging="auto",
    examples=[["Jacob", 3, "example_audio/conference.wav"]]
    # article="<p style='text-align: center'><a href='https://tmabraham.github.io/blog/gradio_hf_spaces_tutorial' target='_blank'>Blog post</a></p>"
)
demo.launch(share=True)