import torch import gradio as gr from pathlib import Path from transformers import pipeline model_id = "Sandiago21/whisper-large-v2-spanish" cache_path = Path("~/.cache/huggingface/transformers") / model_id if not cache_path.is_dir(): pipe = pipeline("automatic-speech-recognition", model=model_id) else: pipe = pipeline("automatic-speech-recognition", model=cache_path) def transcribe_speech(filepath): output = pipe( filepath, max_new_tokens=256, generate_kwargs={ "task": "transcribe", "language": "spanish", }, chunk_length_s=30, batch_size=8, ) return output["text"] demo = gr.Blocks() mic_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(source="microphone", type="filepath"), outputs=gr.outputs.Textbox(), ) file_transcribe = gr.Interface( fn=transcribe_speech, inputs=gr.Audio(source="upload", type="filepath"), outputs=gr.outputs.Textbox(), ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ), demo.launch()