from transformers import pipeline import gradio as gr local_model_name = "wav2vec2_model_pipeline" speech_recognizer = pipeline("automatic-speech-recognition", model = f"models/{local_model_name}") def greet_and_transcribe(name, intensity, input_audio): input_audio_rs = librosa.resample( input_audio[1].astype(float), orig_sr=input_audio[0], target_sr=speech_recognizer.feature_extractor.sampling_rate) transcribed_audio = speech_recognizer(input_audio_rs)["text"] return "Hello, " + name + "!" * int(intensity), transcribed_audio demo = gr.Interface( title="A simple audio transcribing model", description="This is an application to test gradio functionalities", fn=greet_and_transcribe, inputs=[ gr.Text(placeholder="input your name here"), gr.Slider(minimum=1, maximum=5, value=3), gr.Audio()], outputs=["text", "text"], cache_examples="lazy" allow_flagging="auto", # article="
" enable_queue=True ) demo.launch(share=True)