File size: 2,722 Bytes
b99bb69
 
 
 
7e7acc6
 
 
 
 
b99bb69
 
7e7acc6
b99bb69
 
7e7acc6
 
 
 
 
b99bb69
 
 
7e7acc6
 
 
b99bb69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7e7acc6
b99bb69
7e7acc6
b99bb69
 
7e7acc6
b99bb69
 
 
 
 
 
 
 
 
 
 
 
 
7e7acc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import gradio as gr
from transformers import pipeline
import numpy as np

transcriber = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-base.en",
    return_timestamps=True,
)


def transcribe_live(state, words_list, new_chunk):
    print(f"state: {state}")

    try:
        words_to_check_for = [word.strip() for word in words_list.split(",")]
    except:
        gr.Warning("Please enter a valid list of words to check for")
        words_to_check_for = []

    stream = state.get("stream", None)
    previous_transcription = state.get("full_transcription", "")
    previous_counts_of_words = state.get(
        "counts_of_words", {word: 0 for word in words_to_check_for}
    )

    if new_chunk is None:
        gr.Info("You can start transcribing by clicking on the Record button")
        print("new chunk is None")
        return state, previous_counts_of_words, previous_transcription

    sr, y = new_chunk

    # Convert to mono if stereo
    if y.ndim > 1:
        y = y.mean(axis=1)

    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    if stream is not None:
        stream = np.concatenate([stream, y])
    else:
        stream = y

    try:
        new_transcription = transcriber({"sampling_rate": sr, "raw": stream})
    except Exception as e:
        gr.Error(f"Transcription failed. Error: {e}")
        print(f"Transcription failed. Error: {e}")
        return state, previous_counts_of_words, previous_transcription

    print(f"new transcription: {new_transcription}")
    full_transcription_text = new_transcription["text"]

    full_transcription_text_lower = full_transcription_text.lower()

    new_counts_of_words = {
        word: full_transcription_text_lower.count(word) for word in words_to_check_for
    }

    new_state = {
        "stream": stream,
        "full_transcription": full_transcription_text,
        "counts_of_words": new_counts_of_words,
    }

    print(f"new state: {new_state}")

    return new_state, new_counts_of_words, full_transcription_text


with gr.Blocks() as demo:
    state = gr.State(
        value={
            "stream": None,
            "full_transcription": "",
            "counts_of_words": {},
        }
    )
    filler_words = gr.Textbox(label="List of filer words", value="like, so, you know")
    recording = gr.Audio(streaming=True, label="Recording")

    word_counts = gr.JSON(label="Filler words count", value={})
    transcription = gr.Textbox(label="Transcription", value="")

    recording.stream(
        transcribe_live,
        inputs=[state, filler_words, recording],
        outputs=[state, word_counts, transcription],
        stream_every=5,
        time_limit=60,
    )

demo.launch(show_error=True)