Spaces:
Sleeping
Sleeping
aldan.creo
commited on
Commit
•
b99bb69
1
Parent(s):
3da5b44
First version
Browse files- .gitignore +5 -0
- README.md +2 -4
- app.py +87 -0
- requirements.txt +3 -0
.gitignore
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.conda
|
2 |
+
__pycache__
|
3 |
+
*.pyc
|
4 |
+
.vscode
|
5 |
+
.DS_Store
|
README.md
CHANGED
@@ -1,13 +1,11 @@
|
|
1 |
---
|
2 |
title: GrAImmarian
|
3 |
-
emoji:
|
4 |
colorFrom: purple
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.3.0
|
8 |
app_file: app.py
|
9 |
-
pinned:
|
10 |
short_description: Utilizes ASR to check for filler words when public speaking
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: GrAImmarian
|
3 |
+
emoji: 🗣️
|
4 |
colorFrom: purple
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.3.0
|
8 |
app_file: app.py
|
9 |
+
pinned: true
|
10 |
short_description: Utilizes ASR to check for filler words when public speaking
|
11 |
---
|
|
|
|
app.py
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
|
6 |
+
|
7 |
+
|
8 |
+
def transcribe(state, words_list, new_chunk):
|
9 |
+
print(f"state: {state}")
|
10 |
+
|
11 |
+
if state is None:
|
12 |
+
state = {}
|
13 |
+
|
14 |
+
stream = state.get("stream", None)
|
15 |
+
previous_transcription = state.get("full_transcription", "")
|
16 |
+
previous_counts_of_words = state.get("counts_of_words", {})
|
17 |
+
|
18 |
+
if new_chunk is None:
|
19 |
+
gr.Info("You can start transcribing by clicking on the Record button")
|
20 |
+
print("new chunk is None")
|
21 |
+
return state, previous_counts_of_words, previous_transcription
|
22 |
+
|
23 |
+
sr, y = new_chunk
|
24 |
+
|
25 |
+
try:
|
26 |
+
words_to_check_for = [word.strip() for word in words_list.split(",")]
|
27 |
+
except:
|
28 |
+
gr.Warning("Please enter a valid list of words to check for")
|
29 |
+
words_to_check_for = []
|
30 |
+
|
31 |
+
# Convert to mono if stereo
|
32 |
+
if y.ndim > 1:
|
33 |
+
y = y.mean(axis=1)
|
34 |
+
|
35 |
+
y = y.astype(np.float32)
|
36 |
+
y /= np.max(np.abs(y))
|
37 |
+
|
38 |
+
if stream is not None:
|
39 |
+
stream = np.concatenate([stream, y])
|
40 |
+
else:
|
41 |
+
stream = y
|
42 |
+
|
43 |
+
try:
|
44 |
+
new_transcription = transcriber({"sampling_rate": sr, "raw": stream})
|
45 |
+
except Exception as e:
|
46 |
+
gr.Error(f"Transcription failed. Error: {e}")
|
47 |
+
print(f"Transcription failed. Error: {e}")
|
48 |
+
return state, previous_counts_of_words, previous_transcription
|
49 |
+
|
50 |
+
print(f"new transcription: {new_transcription}")
|
51 |
+
new_transcription_text = new_transcription["text"]
|
52 |
+
full_transcription_text = f"{previous_transcription} {new_transcription_text}"
|
53 |
+
|
54 |
+
new_transcription_text_lower = new_transcription_text.lower()
|
55 |
+
|
56 |
+
new_counts_of_words = {
|
57 |
+
word: new_transcription_text_lower.count(word) for word in words_to_check_for
|
58 |
+
}
|
59 |
+
|
60 |
+
new_counts_of_words = {
|
61 |
+
word: new_counts_of_words.get(word, 0) + previous_counts_of_words.get(word, 0)
|
62 |
+
for word in words_to_check_for
|
63 |
+
}
|
64 |
+
|
65 |
+
new_state = {
|
66 |
+
"stream": stream,
|
67 |
+
"full_transcription": full_transcription_text,
|
68 |
+
"counts_of_words": new_counts_of_words,
|
69 |
+
}
|
70 |
+
|
71 |
+
print(f"new state: {new_state}")
|
72 |
+
|
73 |
+
return new_state, new_counts_of_words, full_transcription_text
|
74 |
+
|
75 |
+
|
76 |
+
demo = gr.Interface(
|
77 |
+
transcribe,
|
78 |
+
[
|
79 |
+
"state",
|
80 |
+
gr.Textbox(label="List of filer words"),
|
81 |
+
gr.Audio(sources=["microphone"], streaming=True),
|
82 |
+
],
|
83 |
+
["state", gr.JSON(label="Filler words count"), gr.Text(label="Transcription")],
|
84 |
+
live=True,
|
85 |
+
)
|
86 |
+
|
87 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
gradio==5.3.0
|
2 |
+
transformers==4.46.0
|
3 |
+
torchaudio==2.5.0
|