Spaces:

acmc
/

grammASRian

Sleeping

File size: 2,722 Bytes

b99bb69
 
 
 
7e7acc6
 
 
 
 
b99bb69
 
7e7acc6
b99bb69
 
7e7acc6
 
 
 
 
b99bb69
 
 
7e7acc6
 
 
b99bb69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e7acc6
b99bb69
7e7acc6
b99bb69
 
7e7acc6
b99bb69
 
 
 
 
 
 
 
 
 
 
 
 
7e7acc6

import gradio as gr
from transformers import pipeline
import numpy as np

transcriber = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-base.en",
    return_timestamps=True,
)


def transcribe_live(state, words_list, new_chunk):
    print(f"state: {state}")

    try:
        words_to_check_for = [word.strip() for word in words_list.split(",")]
    except:
        gr.Warning("Please enter a valid list of words to check for")
        words_to_check_for = []

    stream = state.get("stream", None)
    previous_transcription = state.get("full_transcription", "")
    previous_counts_of_words = state.get(
        "counts_of_words", {word: 0 for word in words_to_check_for}
    )

    if new_chunk is None:
        gr.Info("You can start transcribing by clicking on the Record button")
        print("new chunk is None")
        return state, previous_counts_of_words, previous_transcription

    sr, y = new_chunk

    # Convert to mono if stereo
    if y.ndim > 1:
        y = y.mean(axis=1)

    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    if stream is not None:
        stream = np.concatenate([stream, y])
    else:
        stream = y

    try:
        new_transcription = transcriber({"sampling_rate": sr, "raw": stream})
    except Exception as e:
        gr.Error(f"Transcription failed. Error: {e}")
        print(f"Transcription failed. Error: {e}")
        return state, previous_counts_of_words, previous_transcription

    print(f"new transcription: {new_transcription}")
    full_transcription_text = new_transcription["text"]

    full_transcription_text_lower = full_transcription_text.lower()

    new_counts_of_words = {
        word: full_transcription_text_lower.count(word) for word in words_to_check_for
    }

    new_state = {
        "stream": stream,
        "full_transcription": full_transcription_text,
        "counts_of_words": new_counts_of_words,
    }

    print(f"new state: {new_state}")

    return new_state, new_counts_of_words, full_transcription_text


with gr.Blocks() as demo:
    state = gr.State(
        value={
            "stream": None,
            "full_transcription": "",
            "counts_of_words": {},
        }
    )
    filler_words = gr.Textbox(label="List of filer words", value="like, so, you know")
    recording = gr.Audio(streaming=True, label="Recording")

    word_counts = gr.JSON(label="Filler words count", value={})
    transcription = gr.Textbox(label="Transcription", value="")

    recording.stream(
        transcribe_live,
        inputs=[state, filler_words, recording],
        outputs=[state, word_counts, transcription],
        stream_every=5,
        time_limit=60,
    )

demo.launch(show_error=True)