Spaces:

eloi-goncalves
/

ai-interface

Sleeping

File size: 570 Bytes

9fd0422
 
 
80b26db
4ca77dc
a439774
 
 
 
 
 
 
80b26db
dc9df02
 
9fd0422
 
6fc805b
dc9df02
5c2f8ce
6fc805b
9fd0422
 
dc9df02
 
9fd0422
6fc805b
dc9df02

import gradio as gr
from transformers import pipeline
import numpy as np

transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-medium")

transcriber.model.config.forced_decoder_ids = (
  transcriber.tokenizer.get_decoder_prompt_ids(
    language="pt", 
    task="transcribe"
  )
)

def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    return transcriber({"sampling_rate": sr, "raw": y})["text"]


demo = gr.Interface(
    transcribe,
    gr.Audio(source="microphone"),
    "text",
)

demo.launch()