aldan.creo commited on
Commit
b99bb69
1 Parent(s): 3da5b44

First version

Browse files
Files changed (4) hide show
  1. .gitignore +5 -0
  2. README.md +2 -4
  3. app.py +87 -0
  4. requirements.txt +3 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ .conda
2
+ __pycache__
3
+ *.pyc
4
+ .vscode
5
+ .DS_Store
README.md CHANGED
@@ -1,13 +1,11 @@
1
  ---
2
  title: GrAImmarian
3
- emoji: 🐠
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.3.0
8
  app_file: app.py
9
- pinned: false
10
  short_description: Utilizes ASR to check for filler words when public speaking
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: GrAImmarian
3
+ emoji: 🗣️
4
  colorFrom: purple
5
  colorTo: blue
6
  sdk: gradio
7
  sdk_version: 5.3.0
8
  app_file: app.py
9
+ pinned: true
10
  short_description: Utilizes ASR to check for filler words when public speaking
11
  ---
 
 
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import numpy as np
4
+
5
+ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
6
+
7
+
8
+ def transcribe(state, words_list, new_chunk):
9
+ print(f"state: {state}")
10
+
11
+ if state is None:
12
+ state = {}
13
+
14
+ stream = state.get("stream", None)
15
+ previous_transcription = state.get("full_transcription", "")
16
+ previous_counts_of_words = state.get("counts_of_words", {})
17
+
18
+ if new_chunk is None:
19
+ gr.Info("You can start transcribing by clicking on the Record button")
20
+ print("new chunk is None")
21
+ return state, previous_counts_of_words, previous_transcription
22
+
23
+ sr, y = new_chunk
24
+
25
+ try:
26
+ words_to_check_for = [word.strip() for word in words_list.split(",")]
27
+ except:
28
+ gr.Warning("Please enter a valid list of words to check for")
29
+ words_to_check_for = []
30
+
31
+ # Convert to mono if stereo
32
+ if y.ndim > 1:
33
+ y = y.mean(axis=1)
34
+
35
+ y = y.astype(np.float32)
36
+ y /= np.max(np.abs(y))
37
+
38
+ if stream is not None:
39
+ stream = np.concatenate([stream, y])
40
+ else:
41
+ stream = y
42
+
43
+ try:
44
+ new_transcription = transcriber({"sampling_rate": sr, "raw": stream})
45
+ except Exception as e:
46
+ gr.Error(f"Transcription failed. Error: {e}")
47
+ print(f"Transcription failed. Error: {e}")
48
+ return state, previous_counts_of_words, previous_transcription
49
+
50
+ print(f"new transcription: {new_transcription}")
51
+ new_transcription_text = new_transcription["text"]
52
+ full_transcription_text = f"{previous_transcription} {new_transcription_text}"
53
+
54
+ new_transcription_text_lower = new_transcription_text.lower()
55
+
56
+ new_counts_of_words = {
57
+ word: new_transcription_text_lower.count(word) for word in words_to_check_for
58
+ }
59
+
60
+ new_counts_of_words = {
61
+ word: new_counts_of_words.get(word, 0) + previous_counts_of_words.get(word, 0)
62
+ for word in words_to_check_for
63
+ }
64
+
65
+ new_state = {
66
+ "stream": stream,
67
+ "full_transcription": full_transcription_text,
68
+ "counts_of_words": new_counts_of_words,
69
+ }
70
+
71
+ print(f"new state: {new_state}")
72
+
73
+ return new_state, new_counts_of_words, full_transcription_text
74
+
75
+
76
+ demo = gr.Interface(
77
+ transcribe,
78
+ [
79
+ "state",
80
+ gr.Textbox(label="List of filer words"),
81
+ gr.Audio(sources=["microphone"], streaming=True),
82
+ ],
83
+ ["state", gr.JSON(label="Filler words count"), gr.Text(label="Transcription")],
84
+ live=True,
85
+ )
86
+
87
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio==5.3.0
2
+ transformers==4.46.0
3
+ torchaudio==2.5.0