aldan.creo commited on
Commit
7e7acc6
1 Parent(s): b99bb69
Files changed (1) hide show
  1. app.py +40 -32
app.py CHANGED
@@ -2,18 +2,27 @@ import gradio as gr
2
  from transformers import pipeline
3
  import numpy as np
4
 
5
- transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
 
 
 
 
6
 
7
 
8
- def transcribe(state, words_list, new_chunk):
9
  print(f"state: {state}")
10
 
11
- if state is None:
12
- state = {}
 
 
 
13
 
14
  stream = state.get("stream", None)
15
  previous_transcription = state.get("full_transcription", "")
16
- previous_counts_of_words = state.get("counts_of_words", {})
 
 
17
 
18
  if new_chunk is None:
19
  gr.Info("You can start transcribing by clicking on the Record button")
@@ -22,12 +31,6 @@ def transcribe(state, words_list, new_chunk):
22
 
23
  sr, y = new_chunk
24
 
25
- try:
26
- words_to_check_for = [word.strip() for word in words_list.split(",")]
27
- except:
28
- gr.Warning("Please enter a valid list of words to check for")
29
- words_to_check_for = []
30
-
31
  # Convert to mono if stereo
32
  if y.ndim > 1:
33
  y = y.mean(axis=1)
@@ -48,18 +51,12 @@ def transcribe(state, words_list, new_chunk):
48
  return state, previous_counts_of_words, previous_transcription
49
 
50
  print(f"new transcription: {new_transcription}")
51
- new_transcription_text = new_transcription["text"]
52
- full_transcription_text = f"{previous_transcription} {new_transcription_text}"
53
-
54
- new_transcription_text_lower = new_transcription_text.lower()
55
 
56
- new_counts_of_words = {
57
- word: new_transcription_text_lower.count(word) for word in words_to_check_for
58
- }
59
 
60
  new_counts_of_words = {
61
- word: new_counts_of_words.get(word, 0) + previous_counts_of_words.get(word, 0)
62
- for word in words_to_check_for
63
  }
64
 
65
  new_state = {
@@ -73,15 +70,26 @@ def transcribe(state, words_list, new_chunk):
73
  return new_state, new_counts_of_words, full_transcription_text
74
 
75
 
76
- demo = gr.Interface(
77
- transcribe,
78
- [
79
- "state",
80
- gr.Textbox(label="List of filer words"),
81
- gr.Audio(sources=["microphone"], streaming=True),
82
- ],
83
- ["state", gr.JSON(label="Filler words count"), gr.Text(label="Transcription")],
84
- live=True,
85
- )
86
-
87
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers import pipeline
3
  import numpy as np
4
 
5
+ transcriber = pipeline(
6
+ "automatic-speech-recognition",
7
+ model="openai/whisper-base.en",
8
+ return_timestamps=True,
9
+ )
10
 
11
 
12
+ def transcribe_live(state, words_list, new_chunk):
13
  print(f"state: {state}")
14
 
15
+ try:
16
+ words_to_check_for = [word.strip() for word in words_list.split(",")]
17
+ except:
18
+ gr.Warning("Please enter a valid list of words to check for")
19
+ words_to_check_for = []
20
 
21
  stream = state.get("stream", None)
22
  previous_transcription = state.get("full_transcription", "")
23
+ previous_counts_of_words = state.get(
24
+ "counts_of_words", {word: 0 for word in words_to_check_for}
25
+ )
26
 
27
  if new_chunk is None:
28
  gr.Info("You can start transcribing by clicking on the Record button")
 
31
 
32
  sr, y = new_chunk
33
 
 
 
 
 
 
 
34
  # Convert to mono if stereo
35
  if y.ndim > 1:
36
  y = y.mean(axis=1)
 
51
  return state, previous_counts_of_words, previous_transcription
52
 
53
  print(f"new transcription: {new_transcription}")
54
+ full_transcription_text = new_transcription["text"]
 
 
 
55
 
56
+ full_transcription_text_lower = full_transcription_text.lower()
 
 
57
 
58
  new_counts_of_words = {
59
+ word: full_transcription_text_lower.count(word) for word in words_to_check_for
 
60
  }
61
 
62
  new_state = {
 
70
  return new_state, new_counts_of_words, full_transcription_text
71
 
72
 
73
+ with gr.Blocks() as demo:
74
+ state = gr.State(
75
+ value={
76
+ "stream": None,
77
+ "full_transcription": "",
78
+ "counts_of_words": {},
79
+ }
80
+ )
81
+ filler_words = gr.Textbox(label="List of filer words", value="like, so, you know")
82
+ recording = gr.Audio(streaming=True, label="Recording")
83
+
84
+ word_counts = gr.JSON(label="Filler words count", value={})
85
+ transcription = gr.Textbox(label="Transcription", value="")
86
+
87
+ recording.stream(
88
+ transcribe_live,
89
+ inputs=[state, filler_words, recording],
90
+ outputs=[state, word_counts, transcription],
91
+ stream_every=5,
92
+ time_limit=60,
93
+ )
94
+
95
+ demo.launch(show_error=True)