Spaces:
Running
Running
import gradio as gr | |
from pytube import YouTube | |
import whisper | |
# define function for transcription | |
def whisper_transcript(model_size, url, audio_file): | |
if url: | |
link = YouTube(url) | |
source = link.streams.filter(only_audio=True)[0].download(filename="audio.mp4") | |
else: | |
source = audio_file | |
if model_size.endswith(".en"): | |
language = "english" | |
else: | |
language = None | |
options = whisper.DecodingOptions(without_timestamps=True) | |
loaded_model = whisper.load_model(model_size) | |
transcript = loaded_model.transcribe(source, language=language) | |
return transcript["text"] | |
# define Gradio app interface | |
gradio_ui = gr.Interface( | |
fn=whisper_transcript, | |
theme="Nymbo/Nymbo_Theme", | |
title="Transcribe multi-lingual audio clips with Whisper", | |
description="**How to use**: Select a model, paste in a Youtube link or upload an audio clip, then click submit. If your clip is **100% in English, select models ending in ‘.en’**. If the clip is in other languages, or a mix of languages, select models without ‘.en’", | |
article="**Note**: The larger the model size selected or the longer the audio clip, the more time it would take to process the transcript.", | |
inputs=[ | |
gr.Dropdown( | |
label="Select Model", | |
choices=[ | |
"tiny.en", | |
"base.en", | |
"small.en", | |
"medium.en", | |
"tiny", | |
"base", | |
"small", | |
"medium", | |
"large", | |
], | |
value="base", | |
), | |
gr.Textbox(label="Paste YouTube link here"), | |
gr.Audio(label="Upload Audio File", sources=["upload", "microphone"], type="filepath"), | |
], | |
outputs=gr.Textbox(label="Whisper Transcript"), | |
) | |
gradio_ui.queue().launch() | |