from transformers import pipeline import gradio as gr import librosa local_model_name = "wav2vec2_model_pipeline" speech_recognizer = pipeline("automatic-speech-recognition", model = local_model_name) def greet_and_transcribe(name, intensity, input_audio): input_audio_rs = librosa.resample( input_audio[1].astype(float), orig_sr=input_audio[0], target_sr=speech_recognizer.feature_extractor.sampling_rate) transcribed_audio = speech_recognizer(input_audio_rs)["text"] return "Hello, " + name + "!" * int(intensity), transcribed_audio demo = gr.Interface( title="A simple audio transcribing model", description="This is an application to test gradio functionalities", fn=greet_and_transcribe, inputs=[ gr.Text(placeholder="input your name here"), gr.Slider(minimum=1, maximum=5, value=3), gr.Audio()], outputs=[gr.Text(label="Greeting"), gr.Text(label="Transcribed output")], cache_examples="lazy", allow_flagging="auto", examples=[["Jacob", 3, "example_audio/conference.wav"]] # article="<p style='text-align: center'><a href='https://tmabraham.github.io/blog/gradio_hf_spaces_tutorial' target='_blank'>Blog post</a></p>" ) demo.launch(share=True)