Casper / app.py
AlexK-PL's picture
Update app.py
1150183
from transformers import pipeline
import gradio as gr
import torch
from examples import *
device = "cuda" if torch.cuda.is_available() else "cpu"
asr = pipeline(
"automatic-speech-recognition",
model="CLiC-UB/Casper",
chunk_length_s=30,
device=device,
)
def transcribe_audio(file=None, mic=None):
if mic is not None:
audio = mic
elif file is not None:
audio = file
else:
return "You must either provide a mic recording or a file"
transcription = asr(audio)["text"]
return transcription
# css=".gradio-container {background: url('file=background_images/wallpaper_test_mod_2.jpg')}"
with gr.Blocks() as demo:
gr.Markdown("<center><h1>CASPER</h1> "
"<h2>Catalan Automatic Speech Recognition using Fine-Tuned Whisper</h2></center>")
with gr.Row():
with gr.Column():
audio_from_microphone = gr.Audio(source="microphone", label="Mic", type="filepath")
audio_from_file = gr.Audio(source="upload", label="File", type="filepath")
with gr.Row():
with gr.Column(scale=2):
asr_btn = gr.Button("Transcribe!")
with gr.Column(scale=0):
cln_btn = gr.ClearButton(value='Clear', components=[audio_from_microphone, audio_from_file])
with gr.Column():
output_text = gr.Textbox(label="Generated Transcription")
del_text = gr.ClearButton(value='Delete Text', components=output_text)
gr.Markdown("<sub>NOTE: This model does not generate punctuation and casing</sub>")
asr_btn.click(fn=transcribe_audio,
inputs=[audio_from_file, audio_from_microphone],
outputs=output_text)
with gr.Row():
with gr.Column():
gr.Markdown("### Audio Examples")
gr.Examples(examples=infer_from_audio_examples,
label="From Catalan Google TTS dataset",
inputs=[audio_from_file, audio_from_microphone],
outputs=output_text,
fn=transcribe_audio,
cache_examples=True, )
gr.Markdown("### More Details")
gr.Markdown("The model used is a small version of the Whisper architecture. "
"Please, find more details about it in this [link](https://huggingface.co/openai/whisper-small) <br>"
"Whisper has been fine-tuned using the catalan CommonVoice v.11 and the ParlamentParla datasets. "
"More information about results and evaluation can be found in "
"[here](https://huggingface.co/MaximilianChen/Casper)")
demo.launch()