Spaces:
Sleeping
Sleeping
File size: 1,749 Bytes
75795f2 6186718 f0698ec 33bef80 8197c6e d6bc47b 75795f2 8197c6e d3e099a f0698ec 8197c6e 91f6a7c 8197c6e a61e46a 8197c6e a61e46a 8197c6e f0698ec 8197c6e e0a729c 8197c6e 50a8cbc a61e46a f0698ec 8197c6e e0a729c f0698ec 8197c6e 6186718 e0a729c 8197c6e fe0ca9c 8197c6e 6186718 0e42c86 0547be0 6186718 d6bc47b 75795f2 d6bc47b 6186718 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
from difflib import Differ
import gradio as gr
import torch
from transformers import (
AutoModelForSpeechSeq2Seq,
AutoProcessor,
pipeline,
)
description = """
<div>
<p>Roll up, roll up come test your diction against a 🤖</p>
</div>
"""
diction_text = """
<div>
<p>How now brown cow</p>
</div>
"""
diction = gr.HTML(diction_text)
device = "cpu"
# device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "openai/whisper-large-v3"
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)
processor = AutoProcessor.from_pretrained(model_id)
pipe = pipeline(
task="automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
max_new_tokens=128,
chunk_length_s=30,
batch_size=8,
return_timestamps=True,
torch_dtype=torch_dtype,
device=device,
)
def transcribe_audio(audio):
result = pipe(audio)
print(f'TRANSCRIPTION {result["text"]}')
try:
for r in result:
print(r)
except:
print("ERROR")
return result
input_audio = gr.Audio(
sources=["microphone"],
type="filepath",
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,
),
)
demo = gr.Interface(
fn=transcribe_audio,
inputs=[diction, input_audio],
outputs="text",
title="Test your diction",
description=description,
theme="abidlabs/Lime",
)
if __name__ == "__main__":
demo.launch()
|