aliabd HF staff commited on
Commit
d3c14fb
·
1 Parent(s): ac59356

Upload with huggingface_hub

Browse files
Files changed (5) hide show
  1. .gitignore +2 -0
  2. README.md +6 -6
  3. app.py +57 -0
  4. requirements.txt +1 -0
  5. setup.sh +3 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ *.pbmm
2
+ *.scorer
README.md CHANGED
@@ -1,12 +1,12 @@
 
1
  ---
2
- title: Streaming Stt
3
- emoji: 🐢
4
- colorFrom: blue
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 3.3.1
 
8
  app_file: app.py
9
  pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+
2
  ---
3
+ title: streaming_stt
4
+ emoji: 🔥
5
+ colorFrom: indigo
6
+ colorTo: indigo
7
  sdk: gradio
8
  sdk_version: 3.3.1
9
+
10
  app_file: app.py
11
  pinned: false
12
  ---
 
 
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from deepspeech import Model
2
+ import gradio as gr
3
+ import numpy as np
4
+ import urllib.request
5
+
6
+ model_file_path = "deepspeech-0.9.3-models.pbmm"
7
+ lm_file_path = "deepspeech-0.9.3-models.scorer"
8
+ url = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/"
9
+
10
+ urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)
11
+ urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)
12
+
13
+ beam_width = 100
14
+ lm_alpha = 0.93
15
+ lm_beta = 1.18
16
+
17
+ model = Model(model_file_path)
18
+ model.enableExternalScorer(lm_file_path)
19
+ model.setScorerAlphaBeta(lm_alpha, lm_beta)
20
+ model.setBeamWidth(beam_width)
21
+
22
+
23
+ def reformat_freq(sr, y):
24
+ if sr not in (
25
+ 48000,
26
+ 16000,
27
+ ): # Deepspeech only supports 16k, (we convert 48k -> 16k)
28
+ raise ValueError("Unsupported rate", sr)
29
+ if sr == 48000:
30
+ y = (
31
+ ((y / max(np.max(y), 1)) * 32767)
32
+ .reshape((-1, 3))
33
+ .mean(axis=1)
34
+ .astype("int16")
35
+ )
36
+ sr = 16000
37
+ return sr, y
38
+
39
+
40
+ def transcribe(speech, stream):
41
+ _, y = reformat_freq(*speech)
42
+ if stream is None:
43
+ stream = model.createStream()
44
+ stream.feedAudioContent(y)
45
+ text = stream.intermediateDecode()
46
+ return text, stream
47
+
48
+
49
+ demo = gr.Interface(
50
+ transcribe,
51
+ [gr.Audio(source="microphone", streaming=True), "state"],
52
+ ["text", "state"],
53
+ live=True,
54
+ )
55
+
56
+ if __name__ == "__main__":
57
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ deepspeech==0.9.3
setup.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-models.pbmm
2
+ wget https://github.com/mozilla/DeepSpeech/releases/download/v0.8.2/deepspeech-0.8.2-models.scorer
3
+ apt install libasound2-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg