Spaces:
Running
on
Zero
Running
on
Zero
ABOUT = """ | |
# Fast Whisper Turbo ⚡ | |
Ultra-fast Whisper V3 Turbo inference, with enhancements sourced from [insanely-fast-whisper](https://github.com/Vaibhavs10/insanely-fast-whisper). | |
""" | |
CREDITS = """ | |
## Credits | |
This project was made possible through the work of several other projects: | |
- [insanely-fast-whisper](https://github.com/Vaibhavs10/insanely-fast-whisper) | |
""" | |
import subprocess | |
subprocess.run( | |
"pip install flash-attn --no-build-isolation", | |
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, | |
shell=True, | |
) # https://huggingface.co/spaces/zero-gpu-explorers/README/discussions/75#666e4681303f0a5d67175a90 | |
import gradio as gr | |
from transformers import pipeline | |
import torch | |
import spaces | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model="openai/whisper-large-v3-turbo", | |
torch_dtype=torch.float16, | |
device="cuda:0", | |
model_kwargs={"attn_implementation": "flash_attention_2"}, | |
) | |
def transcribe(audio, task): | |
gr.Info("Starting transcription task") | |
outputs = pipe( | |
audio, | |
chunk_length_s=30, | |
batch_size=128, | |
generate_kwargs={"task": task}, | |
return_timestamps=False, | |
) | |
gr.Info("Finished transcription task") | |
return outputs['text'].strip() | |
with gr.Blocks() as demo: | |
gr.Markdown(ABOUT) | |
audio = gr.Audio(label="Audio", type="filepath", interactive=True) | |
task = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe", interactive=True) | |
btn = gr.Button("Transcribe", variant="primary") | |
output = gr.Textbox(label="Transcription", interactive=False) | |
btn.click(transcribe, inputs=[audio, task], outputs=output) | |
gr.Markdown(CREDITS) | |
demo.queue().launch() |