Spaces:
Build error
Build error
samarthsrivastava
commited on
Commit
•
787a546
1
Parent(s):
074e7d9
Upload folder using huggingface_hub
Browse files- .gcloudignore +19 -0
- .gitattributes +1 -0
- .ipynb_checkpoints/voice_to_text_systemdev -checkpoint-checkpoint-checkpoint.ipynb +265 -0
- README.md +2 -8
- app.yaml +7 -0
- main.py +18 -0
- recasepunc/README +7 -0
- recasepunc/checkpoint +3 -0
- recasepunc/example.py +26 -0
- recasepunc/recasepunc.py +744 -0
- recasepunc/vosk-adapted.txt +17 -0
- recasepunc/vosk-adapted.txt.punc +1 -0
- requirements.txt +5 -0
- temp_audio.wav +0 -0
- voice_to_text_systemdev -checkpoint-checkpoint.ipynb +424 -0
- voice_to_text_systemdev -checkpoint-checkpoint.py +195 -0
.gcloudignore
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file specifies files that are *not* uploaded to Google Cloud
|
2 |
+
# using gcloud. It follows the same syntax as .gitignore, with the addition of
|
3 |
+
# "#!include" directives (which insert the entries of the given .gitignore-style
|
4 |
+
# file at that point).
|
5 |
+
#
|
6 |
+
# For more information, run:
|
7 |
+
# $ gcloud topic gcloudignore
|
8 |
+
#
|
9 |
+
.gcloudignore
|
10 |
+
# If you would like to upload your .git directory, .gitignore file or files
|
11 |
+
# from your .gitignore file, remove the corresponding line
|
12 |
+
# below:
|
13 |
+
.git
|
14 |
+
.gitignore
|
15 |
+
|
16 |
+
# Python pycache:
|
17 |
+
__pycache__/
|
18 |
+
# Ignored by the build system
|
19 |
+
/setup.cfg
|
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
recasepunc/checkpoint filter=lfs diff=lfs merge=lfs -text
|
.ipynb_checkpoints/voice_to_text_systemdev -checkpoint-checkpoint-checkpoint.ipynb
ADDED
@@ -0,0 +1,265 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 29,
|
6 |
+
"id": "5c7d8fe6-69ca-4f29-9046-0b0bc9f31911",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"data": {
|
11 |
+
"application/vnd.jupyter.widget-view+json": {
|
12 |
+
"model_id": "99ee6b03c5154644998c23c837444e83",
|
13 |
+
"version_major": 2,
|
14 |
+
"version_minor": 0
|
15 |
+
},
|
16 |
+
"text/plain": [
|
17 |
+
"HBox(children=(Button(button_style='success', description='Record', icon='microphone', style=ButtonStyle()), B…"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
"metadata": {},
|
21 |
+
"output_type": "display_data"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"data": {
|
25 |
+
"application/vnd.jupyter.widget-view+json": {
|
26 |
+
"model_id": "2b3e4f24da8d4c198b5d15f0f3f7399d",
|
27 |
+
"version_major": 2,
|
28 |
+
"version_minor": 0
|
29 |
+
},
|
30 |
+
"text/plain": [
|
31 |
+
"Output()"
|
32 |
+
]
|
33 |
+
},
|
34 |
+
"metadata": {},
|
35 |
+
"output_type": "display_data"
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"source": [
|
39 |
+
"import ipywidgets as widgets\n",
|
40 |
+
"from IPython.display import display, clear_output\n",
|
41 |
+
"from threading import Thread\n",
|
42 |
+
"from queue import Queue\n",
|
43 |
+
"import time\n",
|
44 |
+
"\n",
|
45 |
+
"messages = Queue()\n",
|
46 |
+
"recordings = Queue()\n",
|
47 |
+
"\n",
|
48 |
+
"record_button = widgets.Button(\n",
|
49 |
+
" description=\"Record\",\n",
|
50 |
+
" disabled=False,\n",
|
51 |
+
" button_style=\"success\",\n",
|
52 |
+
" icon=\"microphone\"\n",
|
53 |
+
")\n",
|
54 |
+
"\n",
|
55 |
+
"stop_button = widgets.Button(\n",
|
56 |
+
" description=\"Stop\",\n",
|
57 |
+
" disabled=False,\n",
|
58 |
+
" button_style=\"warning\",\n",
|
59 |
+
" icon=\"stop\"\n",
|
60 |
+
")\n",
|
61 |
+
"\n",
|
62 |
+
"output = widgets.Output()\n",
|
63 |
+
"\n",
|
64 |
+
"def record_microphone():\n",
|
65 |
+
" while not messages.empty():\n",
|
66 |
+
" time.sleep(1) # Simulate recording\n",
|
67 |
+
" recordings.put(\"Audio recorded.\") # Simulated recorded audio data\n",
|
68 |
+
"\n",
|
69 |
+
"def speech_recognition(output_widget):\n",
|
70 |
+
" while not messages.empty():\n",
|
71 |
+
" time.sleep(2) # Simulate transcription\n",
|
72 |
+
" with output_widget:\n",
|
73 |
+
" clear_output(wait=True)\n",
|
74 |
+
" display(\"Transcription: Hello, how are you?\") # Simulated transcription result\n",
|
75 |
+
"\n",
|
76 |
+
"def start_recording(data):\n",
|
77 |
+
" if not messages.empty():\n",
|
78 |
+
" return # Recording already in progress\n",
|
79 |
+
"\n",
|
80 |
+
" messages.put(True)\n",
|
81 |
+
" with output:\n",
|
82 |
+
" clear_output(wait=True)\n",
|
83 |
+
" display(\"Starting...\")\n",
|
84 |
+
"\n",
|
85 |
+
" record = Thread(target=record_microphone)\n",
|
86 |
+
" record.start()\n",
|
87 |
+
"\n",
|
88 |
+
" transcribe = Thread(target=speech_recognition, args=(output,))\n",
|
89 |
+
" transcribe.start()\n",
|
90 |
+
"\n",
|
91 |
+
"def stop_recording(data):\n",
|
92 |
+
" if messages.empty():\n",
|
93 |
+
" return # No recording in progress\n",
|
94 |
+
"\n",
|
95 |
+
" messages.get()\n",
|
96 |
+
" with output:\n",
|
97 |
+
" clear_output(wait=True)\n",
|
98 |
+
" display(\"Stopped.\")\n",
|
99 |
+
"\n",
|
100 |
+
"record_button.on_click(start_recording)\n",
|
101 |
+
"stop_button.on_click(stop_recording)\n",
|
102 |
+
"\n",
|
103 |
+
"display(widgets.HBox([record_button, stop_button]), output)\n"
|
104 |
+
]
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"cell_type": "code",
|
108 |
+
"execution_count": 30,
|
109 |
+
"id": "bdcb9097-ab31-4dcc-9e2a-4e0818fceb3f",
|
110 |
+
"metadata": {},
|
111 |
+
"outputs": [
|
112 |
+
{
|
113 |
+
"name": "stdout",
|
114 |
+
"output_type": "stream",
|
115 |
+
"text": [
|
116 |
+
"Requirement already satisfied: pyaudio in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (0.2.14)\n"
|
117 |
+
]
|
118 |
+
}
|
119 |
+
],
|
120 |
+
"source": [
|
121 |
+
"!python -m pip install pyaudio"
|
122 |
+
]
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"cell_type": "code",
|
126 |
+
"execution_count": 31,
|
127 |
+
"id": "34112777-1845-4aff-80de-099ceed52f01",
|
128 |
+
"metadata": {},
|
129 |
+
"outputs": [
|
130 |
+
{
|
131 |
+
"name": "stdout",
|
132 |
+
"output_type": "stream",
|
133 |
+
"text": [
|
134 |
+
"{'index': 0, 'structVersion': 2, 'name': 'Microsoft Sound Mapper - Input', 'hostApi': 0, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}\n",
|
135 |
+
"{'index': 1, 'structVersion': 2, 'name': 'Microphone Array (Intel® Smart ', 'hostApi': 0, 'maxInputChannels': 4, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}\n",
|
136 |
+
"{'index': 2, 'structVersion': 2, 'name': 'Microsoft Sound Mapper - Output', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}\n",
|
137 |
+
"{'index': 3, 'structVersion': 2, 'name': 'Speakers (Realtek(R) Audio)', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}\n",
|
138 |
+
"{'index': 4, 'structVersion': 2, 'name': 'Primary Sound Capture Driver', 'hostApi': 1, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.12, 'defaultLowOutputLatency': 0.0, 'defaultHighInputLatency': 0.24, 'defaultHighOutputLatency': 0.0, 'defaultSampleRate': 44100.0}\n",
|
139 |
+
"{'index': 5, 'structVersion': 2, 'name': 'Microphone Array (Intel® Smart Sound Technology (Intel® SST))', 'hostApi': 1, 'maxInputChannels': 4, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.12, 'defaultLowOutputLatency': 0.0, 'defaultHighInputLatency': 0.24, 'defaultHighOutputLatency': 0.0, 'defaultSampleRate': 44100.0}\n",
|
140 |
+
"{'index': 6, 'structVersion': 2, 'name': 'Primary Sound Driver', 'hostApi': 1, 'maxInputChannels': 0, 'maxOutputChannels': 8, 'defaultLowInputLatency': 0.0, 'defaultLowOutputLatency': 0.12, 'defaultHighInputLatency': 0.0, 'defaultHighOutputLatency': 0.24, 'defaultSampleRate': 44100.0}\n",
|
141 |
+
"{'index': 7, 'structVersion': 2, 'name': 'Speakers (Realtek(R) Audio)', 'hostApi': 1, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.0, 'defaultLowOutputLatency': 0.12, 'defaultHighInputLatency': 0.0, 'defaultHighOutputLatency': 0.24, 'defaultSampleRate': 44100.0}\n",
|
142 |
+
"{'index': 8, 'structVersion': 2, 'name': 'Speakers (Realtek(R) Audio)', 'hostApi': 2, 'maxInputChannels': 0, 'maxOutputChannels': 8, 'defaultLowInputLatency': 0.0, 'defaultLowOutputLatency': 0.003, 'defaultHighInputLatency': 0.0, 'defaultHighOutputLatency': 0.01, 'defaultSampleRate': 48000.0}\n",
|
143 |
+
"{'index': 9, 'structVersion': 2, 'name': 'Microphone Array (Intel® Smart Sound Technology (Intel® SST))', 'hostApi': 2, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.002, 'defaultLowOutputLatency': 0.0, 'defaultHighInputLatency': 0.01, 'defaultHighOutputLatency': 0.0, 'defaultSampleRate': 48000.0}\n",
|
144 |
+
"{'index': 10, 'structVersion': 2, 'name': 'Microphone Array 1 (Intel® Smart Sound Technology (Intel® SST) Microphone)', 'hostApi': 3, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 48000.0}\n",
|
145 |
+
"{'index': 11, 'structVersion': 2, 'name': 'Microphone Array 2 (Intel® Smart Sound Technology (Intel® SST) Microphone)', 'hostApi': 3, 'maxInputChannels': 4, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 16000.0}\n",
|
146 |
+
"{'index': 12, 'structVersion': 2, 'name': 'Microphone Array 3 (Intel® Smart Sound Technology (Intel® SST) Microphone)', 'hostApi': 3, 'maxInputChannels': 4, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 16000.0}\n",
|
147 |
+
"{'index': 13, 'structVersion': 2, 'name': 'Stereo Mix (Realtek HD Audio Stereo input)', 'hostApi': 3, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 48000.0}\n",
|
148 |
+
"{'index': 14, 'structVersion': 2, 'name': 'Headphones (Realtek HD Audio 2nd output with SST)', 'hostApi': 3, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 48000.0}\n",
|
149 |
+
"{'index': 15, 'structVersion': 2, 'name': 'Speakers (Realtek HD Audio output with SST)', 'hostApi': 3, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 48000.0}\n",
|
150 |
+
"{'index': 16, 'structVersion': 2, 'name': 'Microphone (Realtek HD Audio Mic input)', 'hostApi': 3, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 44100.0}\n"
|
151 |
+
]
|
152 |
+
}
|
153 |
+
],
|
154 |
+
"source": [
|
155 |
+
"import pyaudio\n",
|
156 |
+
"\n",
|
157 |
+
"p = pyaudio.PyAudio()\n",
|
158 |
+
"for i in range(p.get_device_count()):\n",
|
159 |
+
" print(p.get_device_info_by_index(i))\n",
|
160 |
+
"\n",
|
161 |
+
"p.terminate()"
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "code",
|
166 |
+
"execution_count": 32,
|
167 |
+
"id": "2e74dacf-1a91-4dfa-bf91-c64c72755d75",
|
168 |
+
"metadata": {},
|
169 |
+
"outputs": [],
|
170 |
+
"source": [
|
171 |
+
"import pyaudio\n",
|
172 |
+
"from queue import Queue\n",
|
173 |
+
"\n",
|
174 |
+
"CHANNELS = 1\n",
|
175 |
+
"FRAME_RATE = 16000\n",
|
176 |
+
"RECORD_SECONDS = 20\n",
|
177 |
+
"AUDIO_FORMAT = pyaudio.paInt16\n",
|
178 |
+
"SAMPLE_SIZE = 2\n",
|
179 |
+
"\n",
|
180 |
+
"messages = Queue()\n",
|
181 |
+
"recordings = Queue()\n",
|
182 |
+
"\n",
|
183 |
+
"def record_microphone(chunk=1024):\n",
|
184 |
+
" p = pyaudio.PyAudio()\n",
|
185 |
+
"\n",
|
186 |
+
" stream = p.open(format=AUDIO_FORMAT,\n",
|
187 |
+
" channels=CHANNELS,\n",
|
188 |
+
" rate=FRAME_RATE,\n",
|
189 |
+
" input=True,\n",
|
190 |
+
" input_device_index=1,\n",
|
191 |
+
" frames_per_buffer=chunk)\n",
|
192 |
+
"\n",
|
193 |
+
" frames = []\n",
|
194 |
+
"\n",
|
195 |
+
" while not messages.empty():\n",
|
196 |
+
" data = stream.read(chunk)\n",
|
197 |
+
" frames.append(data)\n",
|
198 |
+
"\n",
|
199 |
+
" if len(frames) >= int(FRAME_RATE * RECORD_SECONDS / chunk):\n",
|
200 |
+
" recordings.put(frames.copy())\n",
|
201 |
+
" frames = []\n",
|
202 |
+
"\n",
|
203 |
+
" stream.stop_stream()\n",
|
204 |
+
" stream.close()\n",
|
205 |
+
" p.terminate()\n"
|
206 |
+
]
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"cell_type": "code",
|
210 |
+
"execution_count": 33,
|
211 |
+
"id": "931dc754-e034-45e7-981b-a9210c1fe6e9",
|
212 |
+
"metadata": {},
|
213 |
+
"outputs": [],
|
214 |
+
"source": [
|
215 |
+
"import subprocess\n",
|
216 |
+
"import json\n",
|
217 |
+
"from vosk import Model, KaldiRecognizer\n",
|
218 |
+
"\n",
|
219 |
+
"model = Model(model_name=\"vosk-model-en-us-0.42-gigaspeech\")\n",
|
220 |
+
"rec = KaldiRecognizer(model, FRAME_RATE)\n",
|
221 |
+
"rec.SetWords(True)\n",
|
222 |
+
"\n",
|
223 |
+
"def speech_recognition(output):\n",
|
224 |
+
" while not messages.empty():\n",
|
225 |
+
" frames = recordings.get()\n",
|
226 |
+
"\n",
|
227 |
+
" rec.AcceptWaveform(b''.join(frames))\n",
|
228 |
+
" result = rec.Result()\n",
|
229 |
+
" text = json.loads(result)[\"text\"]\n",
|
230 |
+
"\n",
|
231 |
+
" cased = subprocess.check_output(\"python recasepunc/recasepunc.py predict recasepunc/checkpoint\", shell=True, text=True, input=text)\n",
|
232 |
+
" output.append_stdout(cased)"
|
233 |
+
]
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"cell_type": "code",
|
237 |
+
"execution_count": null,
|
238 |
+
"id": "a27fb138-d3a9-4e04-83fe-23aca2921d92",
|
239 |
+
"metadata": {},
|
240 |
+
"outputs": [],
|
241 |
+
"source": []
|
242 |
+
}
|
243 |
+
],
|
244 |
+
"metadata": {
|
245 |
+
"kernelspec": {
|
246 |
+
"display_name": "Python 3 (ipykernel)",
|
247 |
+
"language": "python",
|
248 |
+
"name": "python3"
|
249 |
+
},
|
250 |
+
"language_info": {
|
251 |
+
"codemirror_mode": {
|
252 |
+
"name": "ipython",
|
253 |
+
"version": 3
|
254 |
+
},
|
255 |
+
"file_extension": ".py",
|
256 |
+
"mimetype": "text/x-python",
|
257 |
+
"name": "python",
|
258 |
+
"nbconvert_exporter": "python",
|
259 |
+
"pygments_lexer": "ipython3",
|
260 |
+
"version": "3.12.4"
|
261 |
+
}
|
262 |
+
},
|
263 |
+
"nbformat": 4,
|
264 |
+
"nbformat_minor": 5
|
265 |
+
}
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: blue
|
5 |
-
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.36.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: voice_to_text_system
|
3 |
+
app_file: voice_to_text_systemdev -checkpoint-checkpoint.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 4.36.1
|
|
|
|
|
6 |
---
|
|
|
|
app.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
runtime: python38
|
2 |
+
|
3 |
+
entrypoint: gunicorn -b :$PORT main:app
|
4 |
+
|
5 |
+
handlers:
|
6 |
+
- url: /.*
|
7 |
+
script: auto
|
main.py
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from flask import Flask, request, jsonify
|
3 |
+
|
4 |
+
app = Flask(__name__)
|
5 |
+
|
6 |
+
@app.route('/')
|
7 |
+
def index():
|
8 |
+
return 'Hello, World!'
|
9 |
+
|
10 |
+
@app.route('/input', methods=['POST'])
|
11 |
+
def get_input():
|
12 |
+
data = request.json
|
13 |
+
# Process the input data as needed
|
14 |
+
return jsonify(data)
|
15 |
+
|
16 |
+
if __name__ == '__main__':
|
17 |
+
port = int(os.environ.get('PORT', 8080))
|
18 |
+
app.run(host='0.0.0.0', port=port)
|
recasepunc/README
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1. Install pytorch and transformers:
|
2 |
+
|
3 |
+
pip3 install transformers
|
4 |
+
|
5 |
+
2. Run python3 example.py de-test.txt
|
6 |
+
|
7 |
+
3. Compare with de-test.txt.orig
|
recasepunc/checkpoint
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9782ccd13a130feffb13609834778421ebd39e26910d25ddcf2185a0eea75935
|
3 |
+
size 1310193349
|
recasepunc/example.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import time
|
3 |
+
from transformers import logging
|
4 |
+
from recasepunc import CasePuncPredictor
|
5 |
+
from recasepunc import WordpieceTokenizer
|
6 |
+
from recasepunc import Config
|
7 |
+
|
8 |
+
logging.set_verbosity_error()
|
9 |
+
|
10 |
+
predictor = CasePuncPredictor('checkpoint', lang="en")
|
11 |
+
|
12 |
+
text = " ".join(open(sys.argv[1]).readlines())
|
13 |
+
tokens = list(enumerate(predictor.tokenize(text)))
|
14 |
+
|
15 |
+
results = ""
|
16 |
+
for token, case_label, punc_label in predictor.predict(tokens, lambda x: x[1]):
|
17 |
+
prediction = predictor.map_punc_label(predictor.map_case_label(token[1], case_label), punc_label)
|
18 |
+
|
19 |
+
if token[1][0] == '\'' or (len(results) > 0 and results[-1] == '\''):
|
20 |
+
results = results + prediction
|
21 |
+
elif token[1][0] != '#':
|
22 |
+
results = results + ' ' + prediction
|
23 |
+
else:
|
24 |
+
results = results + prediction
|
25 |
+
|
26 |
+
print (results.strip())
|
recasepunc/recasepunc.py
ADDED
@@ -0,0 +1,744 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import collections
|
3 |
+
import os
|
4 |
+
import regex as re
|
5 |
+
import re
|
6 |
+
#from mosestokenizer import *
|
7 |
+
from tqdm import tqdm
|
8 |
+
import torch
|
9 |
+
import torch.nn as nn
|
10 |
+
import torch.nn.functional as F
|
11 |
+
import torch.optim as optim
|
12 |
+
import random
|
13 |
+
import unicodedata
|
14 |
+
import numpy as np
|
15 |
+
import argparse
|
16 |
+
from torch.utils.data import TensorDataset, DataLoader
|
17 |
+
|
18 |
+
from transformers import AutoModel, AutoTokenizer, BertTokenizer
|
19 |
+
|
20 |
+
|
21 |
+
default_config = argparse.Namespace(
|
22 |
+
seed=871253,
|
23 |
+
lang='en',
|
24 |
+
#flavor='flaubert/flaubert_base_uncased',
|
25 |
+
flavor=None,
|
26 |
+
max_length=256,
|
27 |
+
batch_size=16,
|
28 |
+
updates=24000,
|
29 |
+
period=1000,
|
30 |
+
lr=1e-5,
|
31 |
+
dab_rate=0.1,
|
32 |
+
device='cuda',
|
33 |
+
debug=False
|
34 |
+
)
|
35 |
+
|
36 |
+
default_flavors = {
|
37 |
+
'fr': 'flaubert/flaubert_base_uncased',
|
38 |
+
'en': 'bert-base-uncased',
|
39 |
+
'zh': 'ckiplab/bert-base-chinese',
|
40 |
+
'tr': 'dbmdz/bert-base-turkish-uncased',
|
41 |
+
'de': 'dbmdz/bert-base-german-uncased',
|
42 |
+
'pt': 'neuralmind/bert-base-portuguese-cased'
|
43 |
+
}
|
44 |
+
|
45 |
+
class Config(argparse.Namespace):
|
46 |
+
def __init__(self, **kwargs):
|
47 |
+
for key, value in default_config.__dict__.items():
|
48 |
+
setattr(self, key, value)
|
49 |
+
for key, value in kwargs.items():
|
50 |
+
setattr(self, key, value)
|
51 |
+
|
52 |
+
assert self.lang in ['fr', 'en', 'zh', 'tr', 'pt', 'de']
|
53 |
+
|
54 |
+
if 'lang' in kwargs and ('flavor' not in kwargs or kwargs['flavor'] is None):
|
55 |
+
self.flavor = default_flavors[self.lang]
|
56 |
+
|
57 |
+
#print(self.lang, self.flavor)
|
58 |
+
|
59 |
+
|
60 |
+
def init_random(seed):
|
61 |
+
# make sure everything is deterministic
|
62 |
+
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'
|
63 |
+
#torch.use_deterministic_algorithms(True)
|
64 |
+
torch.manual_seed(seed)
|
65 |
+
torch.cuda.manual_seed_all(seed)
|
66 |
+
random.seed(seed)
|
67 |
+
np.random.seed(seed)
|
68 |
+
|
69 |
+
# NOTE: it is assumed in the implementation that y[:,0] is the punctuation label, and y[:,1] is the case label!
|
70 |
+
|
71 |
+
punctuation = {
|
72 |
+
'O': 0,
|
73 |
+
'COMMA': 1,
|
74 |
+
'PERIOD': 2,
|
75 |
+
'QUESTION': 3,
|
76 |
+
'EXCLAMATION': 4,
|
77 |
+
}
|
78 |
+
|
79 |
+
punctuation_syms = ['', ',', '.', ' ?', ' !']
|
80 |
+
|
81 |
+
case = {
|
82 |
+
'LOWER': 0,
|
83 |
+
'UPPER': 1,
|
84 |
+
'CAPITALIZE': 2,
|
85 |
+
'OTHER': 3,
|
86 |
+
}
|
87 |
+
|
88 |
+
|
89 |
+
class Model(nn.Module):
|
90 |
+
def __init__(self, flavor, device):
|
91 |
+
super().__init__()
|
92 |
+
self.bert = AutoModel.from_pretrained(flavor)
|
93 |
+
# need a proper way of determining representation size
|
94 |
+
size = self.bert.dim if hasattr(self.bert, 'dim') else self.bert.config.pooler_fc_size if hasattr(self.bert.config, 'pooler_fc_size') else self.bert.config.emb_dim if hasattr(self.bert.config, 'emb_dim') else self.bert.config.hidden_size
|
95 |
+
self.punc = nn.Linear(size, 5)
|
96 |
+
self.case = nn.Linear(size, 4)
|
97 |
+
self.dropout = nn.Dropout(0.3)
|
98 |
+
self.to(device)
|
99 |
+
|
100 |
+
def forward(self, x):
|
101 |
+
output = self.bert(x)
|
102 |
+
representations = self.dropout(F.gelu(output['last_hidden_state']))
|
103 |
+
punc = self.punc(representations)
|
104 |
+
case = self.case(representations)
|
105 |
+
return punc, case
|
106 |
+
|
107 |
+
|
108 |
+
# randomly create sequences that align to punctuation boundaries
|
109 |
+
def drop_at_boundaries(rate, x, y, cls_token_id, sep_token_id, pad_token_id):
|
110 |
+
for i, dropped in enumerate(torch.rand((len(x),)) < rate):
|
111 |
+
if dropped:
|
112 |
+
# select all indices that are sentence endings
|
113 |
+
indices = (y[i,:,0] > 1).nonzero(as_tuple=True)[0]
|
114 |
+
if len(indices) < 2:
|
115 |
+
continue
|
116 |
+
start = indices[0] + 1
|
117 |
+
end = indices[random.randint(1, len(indices) - 1)] + 1
|
118 |
+
length = end - start
|
119 |
+
if length + 2 > len(x[i]):
|
120 |
+
continue
|
121 |
+
x[i, 0] = cls_token_id
|
122 |
+
x[i, 1: length + 1] = x[i, start: end].clone()
|
123 |
+
x[i, length + 1] = sep_token_id
|
124 |
+
x[i, length + 2:] = pad_token_id
|
125 |
+
y[i, 0] = 0
|
126 |
+
y[i, 1: length + 1] = y[i, start: end].clone()
|
127 |
+
y[i, length + 1:] = 0
|
128 |
+
|
129 |
+
|
130 |
+
def compute_performance(config, model, loader):
|
131 |
+
device = config.device
|
132 |
+
criterion = nn.CrossEntropyLoss()
|
133 |
+
model.eval()
|
134 |
+
total_loss = all_correct1 = all_correct2 = num_loss = num_perf = 0
|
135 |
+
num_ref = collections.defaultdict(float)
|
136 |
+
num_hyp = collections.defaultdict(float)
|
137 |
+
num_correct = collections.defaultdict(float)
|
138 |
+
for x, y in loader:
|
139 |
+
x = x.long().to(device)
|
140 |
+
y = y.long().to(device)
|
141 |
+
y1 = y[:,:,0]
|
142 |
+
y2 = y[:,:,1]
|
143 |
+
with torch.no_grad():
|
144 |
+
y_scores1, y_scores2 = model(x.to(device))
|
145 |
+
loss1 = criterion(y_scores1.view(y1.size(0) * y1.size(1), -1), y1.view(y1.size(0) * y1.size(1)))
|
146 |
+
loss2 = criterion(y_scores2.view(y2.size(0) * y2.size(1), -1), y2.view(y2.size(0) * y2.size(1)))
|
147 |
+
loss = loss1 + loss2
|
148 |
+
y_pred1 = torch.max(y_scores1, 2)[1]
|
149 |
+
y_pred2 = torch.max(y_scores2, 2)[1]
|
150 |
+
for label in range(1, 5):
|
151 |
+
ref = (y1 == label)
|
152 |
+
hyp = (y_pred1 == label)
|
153 |
+
correct = (ref * hyp == 1)
|
154 |
+
num_ref[label] += ref.sum()
|
155 |
+
num_hyp[label] += hyp.sum()
|
156 |
+
num_correct[label] += correct.sum()
|
157 |
+
num_ref[0] += ref.sum()
|
158 |
+
num_hyp[0] += hyp.sum()
|
159 |
+
num_correct[0] += correct.sum()
|
160 |
+
all_correct1 += (y_pred1 == y1).sum()
|
161 |
+
all_correct2 += (y_pred2 == y2).sum()
|
162 |
+
total_loss += loss.item()
|
163 |
+
num_loss += len(y)
|
164 |
+
num_perf += len(y) * config.max_length
|
165 |
+
recall = {}
|
166 |
+
precision = {}
|
167 |
+
fscore = {}
|
168 |
+
for label in range(0, 5):
|
169 |
+
recall[label] = num_correct[label] / num_ref[label] if num_ref[label] > 0 else 0
|
170 |
+
precision[label] = num_correct[label] / num_hyp[label] if num_hyp[label] > 0 else 0
|
171 |
+
fscore[label] = (2 * recall[label] * precision[label] / (recall[label] + precision[label])).item() if recall[label] + precision[label] > 0 else 0
|
172 |
+
return total_loss / num_loss, all_correct2.item() / num_perf, all_correct1.item() / num_perf, fscore
|
173 |
+
|
174 |
+
|
175 |
+
def fit(config, model, checkpoint_path, train_loader, valid_loader, iterations, valid_period=200, lr=1e-5):
|
176 |
+
device = config.device
|
177 |
+
criterion = nn.CrossEntropyLoss()
|
178 |
+
optimizer = optim.Adam(filter(lambda param: param.requires_grad, model.parameters()), lr=lr)
|
179 |
+
iteration = 0
|
180 |
+
while True:
|
181 |
+
model.train()
|
182 |
+
total_loss = num = 0
|
183 |
+
for x, y in tqdm(train_loader):
|
184 |
+
x = x.long().to(device)
|
185 |
+
y = y.long().to(device)
|
186 |
+
drop_at_boundaries(config.dab_rate, x, y, config.cls_token_id, config.sep_token_id, config.pad_token_id)
|
187 |
+
y1 = y[:,:,0]
|
188 |
+
y2 = y[:,:,1]
|
189 |
+
optimizer.zero_grad()
|
190 |
+
y_scores1, y_scores2 = model(x)
|
191 |
+
loss1 = criterion(y_scores1.view(y1.size(0) * y1.size(1), -1), y1.view(y1.size(0) * y1.size(1)))
|
192 |
+
loss2 = criterion(y_scores2.view(y2.size(0) * y2.size(1), -1), y2.view(y2.size(0) * y2.size(1)))
|
193 |
+
loss = loss1 + loss2
|
194 |
+
loss.backward()
|
195 |
+
optimizer.step()
|
196 |
+
total_loss += loss.item()
|
197 |
+
num += len(y)
|
198 |
+
if iteration % valid_period == valid_period - 1:
|
199 |
+
train_loss = total_loss / num
|
200 |
+
valid_loss, valid_accuracy_case, valid_accuracy_punc, valid_fscore = compute_performance(config, model, valid_loader)
|
201 |
+
torch.save({
|
202 |
+
'iteration': iteration + 1,
|
203 |
+
'model_state_dict': model.state_dict(),
|
204 |
+
'optimizer_state_dict': optimizer.state_dict(),
|
205 |
+
'train_loss': train_loss,
|
206 |
+
'valid_loss': valid_loss,
|
207 |
+
'valid_accuracy_case': valid_accuracy_case,
|
208 |
+
'valid_accuracy_punc': valid_accuracy_punc,
|
209 |
+
'valid_fscore': valid_fscore,
|
210 |
+
'config': config.__dict__,
|
211 |
+
}, '%s.%d' % (checkpoint_path, iteration + 1))
|
212 |
+
print(iteration + 1, train_loss, valid_loss, valid_accuracy_case, valid_accuracy_punc, valid_fscore)
|
213 |
+
total_loss = num = 0
|
214 |
+
|
215 |
+
iteration += 1
|
216 |
+
if iteration > iterations:
|
217 |
+
return
|
218 |
+
|
219 |
+
sys.stderr.flush()
|
220 |
+
sys.stdout.flush()
|
221 |
+
|
222 |
+
|
223 |
+
def batchify(max_length, x, y):
|
224 |
+
print (x.shape)
|
225 |
+
print (y.shape)
|
226 |
+
x = x[:(len(x) // max_length) * max_length].reshape(-1, max_length)
|
227 |
+
y = y[:(len(y) // max_length) * max_length, :].reshape(-1, max_length, 2)
|
228 |
+
return x, y
|
229 |
+
|
230 |
+
|
231 |
+
def train(config, train_x_fn, train_y_fn, valid_x_fn, valid_y_fn, checkpoint_path):
|
232 |
+
X_train, Y_train = batchify(config.max_length, torch.load(train_x_fn), torch.load(train_y_fn))
|
233 |
+
X_valid, Y_valid = batchify(config.max_length, torch.load(valid_x_fn), torch.load(valid_y_fn))
|
234 |
+
|
235 |
+
train_set = TensorDataset(X_train, Y_train)
|
236 |
+
valid_set = TensorDataset(X_valid, Y_valid)
|
237 |
+
|
238 |
+
train_loader = DataLoader(train_set, batch_size=config.batch_size, shuffle=True)
|
239 |
+
valid_loader = DataLoader(valid_set, batch_size=config.batch_size)
|
240 |
+
|
241 |
+
model = Model(config.flavor, config.device)
|
242 |
+
|
243 |
+
fit(config, model, checkpoint_path, train_loader, valid_loader, config.updates, config.period, config.lr)
|
244 |
+
|
245 |
+
|
246 |
+
def run_eval(config, test_x_fn, test_y_fn, checkpoint_path):
|
247 |
+
X_test, Y_test = batchify(config.max_length, torch.load(test_x_fn), torch.load(test_y_fn))
|
248 |
+
test_set = TensorDataset(X_test, Y_test)
|
249 |
+
test_loader = DataLoader(test_set, batch_size=config.batch_size)
|
250 |
+
|
251 |
+
loaded = torch.load(checkpoint_path, map_location=config.device)
|
252 |
+
if 'config' in loaded:
|
253 |
+
config = Config(**loaded['config'])
|
254 |
+
init(config)
|
255 |
+
|
256 |
+
model = Model(config.flavor, config.device)
|
257 |
+
model.load_state_dict(loaded['model_state_dict'], strict=False)
|
258 |
+
|
259 |
+
print(*compute_performance(config, model, test_loader))
|
260 |
+
|
261 |
+
|
262 |
+
def recase(token, label):
|
263 |
+
if label == case['LOWER']:
|
264 |
+
return token.lower()
|
265 |
+
elif label == case['CAPITALIZE']:
|
266 |
+
return token.lower().capitalize()
|
267 |
+
elif label == case['UPPER']:
|
268 |
+
return token.upper()
|
269 |
+
else:
|
270 |
+
return token
|
271 |
+
|
272 |
+
|
273 |
+
class CasePuncPredictor:
|
274 |
+
def __init__(self, checkpoint_path, lang=default_config.lang, flavor=default_config.flavor, device=default_config.device):
|
275 |
+
loaded = torch.load(checkpoint_path, map_location=device if torch.cuda.is_available() else 'cpu')
|
276 |
+
if 'config' in loaded:
|
277 |
+
self.config = Config(**loaded['config'])
|
278 |
+
else:
|
279 |
+
self.config = Config(lang=lang, flavor=flavor, device=device)
|
280 |
+
init(self.config)
|
281 |
+
|
282 |
+
self.model = Model(self.config.flavor, self.config.device)
|
283 |
+
self.model.load_state_dict(loaded['model_state_dict'])
|
284 |
+
self.model.eval()
|
285 |
+
self.model.to(self.config.device)
|
286 |
+
|
287 |
+
self.rev_case = {b: a for a, b in case.items()}
|
288 |
+
self.rev_punc = {b: a for a, b in punctuation.items()}
|
289 |
+
|
290 |
+
def tokenize(self, text):
|
291 |
+
return [self.config.cls_token] + self.config.tokenizer.tokenize(text) + [self.config.sep_token]
|
292 |
+
|
293 |
+
def predict(self, tokens, getter=lambda x: x):
|
294 |
+
max_length = self.config.max_length
|
295 |
+
device = self.config.device
|
296 |
+
if type(tokens) == str:
|
297 |
+
tokens = self.tokenize(tokens)
|
298 |
+
previous_label = punctuation['PERIOD']
|
299 |
+
for start in range(0, len(tokens), max_length):
|
300 |
+
instance = tokens[start: start + max_length]
|
301 |
+
if type(getter(instance[0])) == str:
|
302 |
+
ids = self.config.tokenizer.convert_tokens_to_ids(getter(token) for token in instance)
|
303 |
+
else:
|
304 |
+
ids = [getter(token) for token in instance]
|
305 |
+
if len(ids) < max_length:
|
306 |
+
ids += [0] * (max_length - len(ids))
|
307 |
+
x = torch.tensor([ids]).long().to(device)
|
308 |
+
y_scores1, y_scores2 = self.model(x)
|
309 |
+
y_pred1 = torch.max(y_scores1, 2)[1]
|
310 |
+
y_pred2 = torch.max(y_scores2, 2)[1]
|
311 |
+
for i, id, token, punc_label, case_label in zip(range(len(instance)), ids, instance, y_pred1[0].tolist()[:len(instance)], y_pred2[0].tolist()[:len(instance)]):
|
312 |
+
if id == self.config.cls_token_id or id == self.config.sep_token_id:
|
313 |
+
continue
|
314 |
+
if previous_label != None and previous_label > 1:
|
315 |
+
if case_label in [case['LOWER'], case['OTHER']]: # LOWER, OTHER
|
316 |
+
case_label = case['CAPITALIZE']
|
317 |
+
if i + start == len(tokens) - 2 and punc_label == punctuation['O']:
|
318 |
+
punc_label = punctuation['PERIOD']
|
319 |
+
yield (token, self.rev_case[case_label], self.rev_punc[punc_label])
|
320 |
+
previous_label = punc_label
|
321 |
+
|
322 |
+
def map_case_label(self, token, case_label):
|
323 |
+
if token.endswith('</w>'):
|
324 |
+
token = token[:-4]
|
325 |
+
if token.startswith('##'):
|
326 |
+
token = token[2:]
|
327 |
+
return recase(token, case[case_label])
|
328 |
+
|
329 |
+
def map_punc_label(self, token, punc_label):
|
330 |
+
if token.endswith('</w>'):
|
331 |
+
token = token[:-4]
|
332 |
+
if token.startswith('##'):
|
333 |
+
token = token[2:]
|
334 |
+
return token + punctuation_syms[punctuation[punc_label]]
|
335 |
+
|
336 |
+
|
337 |
+
|
338 |
+
def generate_predictions(config, checkpoint_path):
|
339 |
+
loaded = torch.load(checkpoint_path, map_location=config.device if torch.cuda.is_available() else 'cpu')
|
340 |
+
if 'config' in loaded:
|
341 |
+
config = Config(**loaded['config'])
|
342 |
+
init(config)
|
343 |
+
|
344 |
+
model = Model(config.flavor, config.device)
|
345 |
+
model.load_state_dict(loaded['model_state_dict'], strict=False)
|
346 |
+
|
347 |
+
rev_case = {b: a for a, b in case.items()}
|
348 |
+
rev_punc = {b: a for a, b in punctuation.items()}
|
349 |
+
|
350 |
+
for line in sys.stdin:
|
351 |
+
# also drop punctuation that we may generate
|
352 |
+
line = ''.join([c for c in line if c not in mapped_punctuation])
|
353 |
+
if config.debug:
|
354 |
+
print(line)
|
355 |
+
tokens = [config.cls_token] + config.tokenizer.tokenize(line) + [config.sep_token]
|
356 |
+
if config.debug:
|
357 |
+
print(tokens)
|
358 |
+
previous_label = punctuation['PERIOD']
|
359 |
+
first_time = True
|
360 |
+
was_word = False
|
361 |
+
for start in range(0, len(tokens), config.max_length):
|
362 |
+
instance = tokens[start: start + config.max_length]
|
363 |
+
ids = config.tokenizer.convert_tokens_to_ids(instance)
|
364 |
+
#print(len(ids), file=sys.stderr)
|
365 |
+
if len(ids) < config.max_length:
|
366 |
+
ids += [config.pad_token_id] * (config.max_length - len(ids))
|
367 |
+
x = torch.tensor([ids]).long().to(config.device)
|
368 |
+
y_scores1, y_scores2 = model(x)
|
369 |
+
y_pred1 = torch.max(y_scores1, 2)[1]
|
370 |
+
y_pred2 = torch.max(y_scores2, 2)[1]
|
371 |
+
for id, token, punc_label, case_label in zip(ids, instance, y_pred1[0].tolist()[:len(instance)], y_pred2[0].tolist()[:len(instance)]):
|
372 |
+
if config.debug:
|
373 |
+
print(id, token, punc_label, case_label, file=sys.stderr)
|
374 |
+
if id == config.cls_token_id or id == config.sep_token_id:
|
375 |
+
continue
|
376 |
+
if previous_label != None and previous_label > 1:
|
377 |
+
if case_label in [case['LOWER'], case['OTHER']]:
|
378 |
+
case_label = case['CAPITALIZE']
|
379 |
+
previous_label = punc_label
|
380 |
+
# different strategy due to sub-lexical token encoding in Flaubert
|
381 |
+
if config.lang == 'fr':
|
382 |
+
if token.endswith('</w>'):
|
383 |
+
cased_token = recase(token[:-4], case_label)
|
384 |
+
if was_word:
|
385 |
+
print(' ', end='')
|
386 |
+
print(cased_token + punctuation_syms[punc_label], end='')
|
387 |
+
was_word = True
|
388 |
+
else:
|
389 |
+
cased_token = recase(token, case_label)
|
390 |
+
if was_word:
|
391 |
+
print(' ', end='')
|
392 |
+
print(cased_token, end='')
|
393 |
+
was_word = False
|
394 |
+
else:
|
395 |
+
if token.startswith('##'):
|
396 |
+
cased_token = recase(token[2:], case_label)
|
397 |
+
print(cased_token, end='')
|
398 |
+
else:
|
399 |
+
cased_token = recase(token, case_label)
|
400 |
+
if not first_time:
|
401 |
+
print(' ', end='')
|
402 |
+
first_time = False
|
403 |
+
print(cased_token + punctuation_syms[punc_label], end='')
|
404 |
+
if previous_label == 0:
|
405 |
+
print('.', end='')
|
406 |
+
print()
|
407 |
+
|
408 |
+
|
409 |
+
def label_for_case(token):
|
410 |
+
token = re.sub(r'[^\p{Han}\p{Ll}\p{Lu}]', '', token)
|
411 |
+
if token == token.lower():
|
412 |
+
return 'LOWER'
|
413 |
+
elif token == token.lower().capitalize():
|
414 |
+
return 'CAPITALIZE'
|
415 |
+
elif token == token.upper():
|
416 |
+
return 'UPPER'
|
417 |
+
else:
|
418 |
+
return 'OTHER'
|
419 |
+
|
420 |
+
|
421 |
+
def make_tensors(config, input_fn, output_x_fn, output_y_fn):
|
422 |
+
# count file lines without loading them
|
423 |
+
size = 0
|
424 |
+
with open(input_fn) as fp:
|
425 |
+
for line in fp:
|
426 |
+
size += 1
|
427 |
+
|
428 |
+
with open(input_fn) as fp:
|
429 |
+
X = torch.IntTensor(size)
|
430 |
+
Y = torch.ByteTensor(size, 2)
|
431 |
+
|
432 |
+
offset = 0
|
433 |
+
for n, line in enumerate(fp):
|
434 |
+
word, case_label, punc_label = line.strip().split('\t')
|
435 |
+
id = config.tokenizer.convert_tokens_to_ids(word)
|
436 |
+
if config.debug:
|
437 |
+
assert word.lower() == tokenizer.convert_ids_to_tokens(id)
|
438 |
+
X[offset] = id
|
439 |
+
Y[offset, 0] = punctuation[punc_label]
|
440 |
+
Y[offset, 1] = case[case_label]
|
441 |
+
offset += 1
|
442 |
+
|
443 |
+
torch.save(X, output_x_fn)
|
444 |
+
torch.save(Y, output_y_fn)
|
445 |
+
|
446 |
+
|
447 |
+
mapped_punctuation = {
|
448 |
+
'.': 'PERIOD',
|
449 |
+
'...': 'PERIOD',
|
450 |
+
',': 'COMMA',
|
451 |
+
';': 'COMMA',
|
452 |
+
':': 'COMMA',
|
453 |
+
'(': 'COMMA',
|
454 |
+
')': 'COMMA',
|
455 |
+
'?': 'QUESTION',
|
456 |
+
'!': 'EXCLAMATION',
|
457 |
+
',': 'COMMA',
|
458 |
+
'!': 'EXCLAMATION',
|
459 |
+
'?': 'QUESTION',
|
460 |
+
';': 'COMMA',
|
461 |
+
':': 'COMMA',
|
462 |
+
'(': 'COMMA',
|
463 |
+
'(': 'COMMA',
|
464 |
+
')': 'COMMA',
|
465 |
+
'[': 'COMMA',
|
466 |
+
']': 'COMMA',
|
467 |
+
'【': 'COMMA',
|
468 |
+
'】': 'COMMA',
|
469 |
+
'└': 'COMMA',
|
470 |
+
'└ ': 'COMMA',
|
471 |
+
'_': 'O',
|
472 |
+
'。': 'PERIOD',
|
473 |
+
'、': 'COMMA', # enumeration comma
|
474 |
+
'、': 'COMMA',
|
475 |
+
'…': 'PERIOD',
|
476 |
+
'—': 'COMMA',
|
477 |
+
'「': 'COMMA',
|
478 |
+
'」': 'COMMA',
|
479 |
+
'.': 'PERIOD',
|
480 |
+
'《': 'O',
|
481 |
+
'》': 'O',
|
482 |
+
',': 'COMMA',
|
483 |
+
'“': 'O',
|
484 |
+
'”': 'O',
|
485 |
+
'"': 'O',
|
486 |
+
'-': 'O',
|
487 |
+
'-': 'O',
|
488 |
+
'〉': 'COMMA',
|
489 |
+
'〈': 'COMMA',
|
490 |
+
'↑': 'O',
|
491 |
+
'〔': 'COMMA',
|
492 |
+
'〕': 'COMMA',
|
493 |
+
}
|
494 |
+
|
495 |
+
def preprocess_text(config, max_token_count=-1):
|
496 |
+
global num_tokens_output
|
497 |
+
max_token_count = int(max_token_count)
|
498 |
+
num_tokens_output = 0
|
499 |
+
def process_segment(text, punctuation):
|
500 |
+
global num_tokens_output
|
501 |
+
text = text.replace('\t', ' ')
|
502 |
+
tokens = config.tokenizer.tokenize(text)
|
503 |
+
for i, token in enumerate(tokens):
|
504 |
+
case_label = label_for_case(token)
|
505 |
+
if i == len(tokens) - 1:
|
506 |
+
print(token.lower(), case_label, punctuation, sep='\t')
|
507 |
+
else:
|
508 |
+
print(token.lower(), case_label, 'O', sep='\t')
|
509 |
+
num_tokens_output += 1
|
510 |
+
# a bit too ugly, but alternative is to throw an exception
|
511 |
+
if max_token_count > 0 and num_tokens_output >= max_token_count:
|
512 |
+
sys.exit(0)
|
513 |
+
|
514 |
+
for line in sys.stdin:
|
515 |
+
line = line.strip()
|
516 |
+
if line != '':
|
517 |
+
line = unicodedata.normalize("NFC", line)
|
518 |
+
if config.debug:
|
519 |
+
print(line)
|
520 |
+
start = 0
|
521 |
+
for i, char in enumerate(line):
|
522 |
+
if char in mapped_punctuation:
|
523 |
+
if i > start and line[start: i].strip() != '':
|
524 |
+
process_segment(line[start: i], mapped_punctuation[char])
|
525 |
+
start = i + 1
|
526 |
+
if start < len(line):
|
527 |
+
process_segment(line[start:], 'PERIOD')
|
528 |
+
|
529 |
+
|
530 |
+
def preprocess_text_old_fr(config):
|
531 |
+
assert config.lang == 'fr'
|
532 |
+
splitsents = MosesSentenceSplitter(lang)
|
533 |
+
tokenize = MosesTokenizer(lang, extra=['-no-escape'])
|
534 |
+
normalize = MosesPunctuationNormalizer(lang)
|
535 |
+
|
536 |
+
for line in sys.stdin:
|
537 |
+
if line.strip() != '':
|
538 |
+
for sentence in splitsents([normalize(line)]):
|
539 |
+
tokens = tokenize(sentence)
|
540 |
+
previous_token = None
|
541 |
+
for token in tokens:
|
542 |
+
if token in mapped_punctuation:
|
543 |
+
if previous_token != None:
|
544 |
+
print(previous_token, mapped_punctuation[token], sep='\t')
|
545 |
+
previous_token = None
|
546 |
+
elif not re.search(r'[\p{Han}\p{Ll}\p{Lu}\d]', token): # remove non-alphanumeric tokens
|
547 |
+
continue
|
548 |
+
else:
|
549 |
+
if previous_token != None:
|
550 |
+
print(previous_token, 'O', sep='\t')
|
551 |
+
previous_token = token
|
552 |
+
if previous_token != None:
|
553 |
+
print(previous_token, 'PERIOD', sep='\t')
|
554 |
+
|
555 |
+
|
556 |
+
# modification of the wordpiece tokenizer to keep case information even if vocab is lower cased
|
557 |
+
# forked from https://github.com/huggingface/transformers/blob/master/src/transformers/models/bert/tokenization_bert.py
|
558 |
+
|
559 |
+
class WordpieceTokenizer(object):
|
560 |
+
"""Runs WordPiece tokenization."""
|
561 |
+
|
562 |
+
def __init__(self, vocab, unk_token, max_input_chars_per_word=100, keep_case=True):
|
563 |
+
self.vocab = vocab
|
564 |
+
self.unk_token = unk_token
|
565 |
+
self.max_input_chars_per_word = max_input_chars_per_word
|
566 |
+
self.keep_case = keep_case
|
567 |
+
|
568 |
+
def tokenize(self, text):
|
569 |
+
"""
|
570 |
+
Tokenizes a piece of text into its word pieces. This uses a greedy longest-match-first algorithm to perform
|
571 |
+
tokenization using the given vocabulary.
|
572 |
+
For example, :obj:`input = "unaffable"` wil return as output :obj:`["un", "##aff", "##able"]`.
|
573 |
+
Args:
|
574 |
+
text: A single token or whitespace separated tokens. This should have
|
575 |
+
already been passed through `BasicTokenizer`.
|
576 |
+
Returns:
|
577 |
+
A list of wordpiece tokens.
|
578 |
+
"""
|
579 |
+
|
580 |
+
output_tokens = []
|
581 |
+
for token in text.strip().split():
|
582 |
+
chars = list(token)
|
583 |
+
if len(chars) > self.max_input_chars_per_word:
|
584 |
+
output_tokens.append(self.unk_token)
|
585 |
+
continue
|
586 |
+
|
587 |
+
is_bad = False
|
588 |
+
start = 0
|
589 |
+
sub_tokens = []
|
590 |
+
while start < len(chars):
|
591 |
+
end = len(chars)
|
592 |
+
cur_substr = None
|
593 |
+
while start < end:
|
594 |
+
substr = "".join(chars[start:end])
|
595 |
+
if start > 0:
|
596 |
+
substr = "##" + substr
|
597 |
+
# optionaly lowercase substring before checking for inclusion in vocab
|
598 |
+
if (self.keep_case and substr.lower() in self.vocab) or (substr in self.vocab):
|
599 |
+
cur_substr = substr
|
600 |
+
break
|
601 |
+
end -= 1
|
602 |
+
if cur_substr is None:
|
603 |
+
is_bad = True
|
604 |
+
break
|
605 |
+
sub_tokens.append(cur_substr)
|
606 |
+
start = end
|
607 |
+
|
608 |
+
if is_bad:
|
609 |
+
output_tokens.append(self.unk_token)
|
610 |
+
else:
|
611 |
+
output_tokens.extend(sub_tokens)
|
612 |
+
return output_tokens
|
613 |
+
|
614 |
+
|
615 |
+
# modification of XLM bpe tokenizer for keeping case information when vocab is lowercase
|
616 |
+
# forked from https://github.com/huggingface/transformers/blob/cd56f3fe7eae4a53a9880e3f5e8f91877a78271c/src/transformers/models/xlm/tokenization_xlm.py
|
617 |
+
def bpe(self, token):
|
618 |
+
def to_lower(pair):
|
619 |
+
#print(' ',pair)
|
620 |
+
return (pair[0].lower(), pair[1].lower())
|
621 |
+
|
622 |
+
from transformers.models.xlm.tokenization_xlm import get_pairs
|
623 |
+
|
624 |
+
word = tuple(token[:-1]) + (token[-1] + "</w>",)
|
625 |
+
if token in self.cache:
|
626 |
+
return self.cache[token]
|
627 |
+
pairs = get_pairs(word)
|
628 |
+
|
629 |
+
if not pairs:
|
630 |
+
return token + "</w>"
|
631 |
+
|
632 |
+
while True:
|
633 |
+
bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(to_lower(pair), float("inf")))
|
634 |
+
#print(bigram)
|
635 |
+
if to_lower(bigram) not in self.bpe_ranks:
|
636 |
+
break
|
637 |
+
first, second = bigram
|
638 |
+
new_word = []
|
639 |
+
i = 0
|
640 |
+
while i < len(word):
|
641 |
+
try:
|
642 |
+
j = word.index(first, i)
|
643 |
+
except ValueError:
|
644 |
+
new_word.extend(word[i:])
|
645 |
+
break
|
646 |
+
else:
|
647 |
+
new_word.extend(word[i:j])
|
648 |
+
i = j
|
649 |
+
|
650 |
+
if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
|
651 |
+
new_word.append(first + second)
|
652 |
+
i += 2
|
653 |
+
else:
|
654 |
+
new_word.append(word[i])
|
655 |
+
i += 1
|
656 |
+
new_word = tuple(new_word)
|
657 |
+
word = new_word
|
658 |
+
if len(word) == 1:
|
659 |
+
break
|
660 |
+
else:
|
661 |
+
pairs = get_pairs(word)
|
662 |
+
word = " ".join(word)
|
663 |
+
if word == "\n </w>":
|
664 |
+
word = "\n</w>"
|
665 |
+
self.cache[token] = word
|
666 |
+
return word
|
667 |
+
|
668 |
+
|
669 |
+
|
670 |
+
def init(config):
|
671 |
+
init_random(config.seed)
|
672 |
+
|
673 |
+
if config.lang == 'fr':
|
674 |
+
config.tokenizer = tokenizer = AutoTokenizer.from_pretrained(config.flavor, do_lower_case=False)
|
675 |
+
|
676 |
+
from transformers.models.xlm.tokenization_xlm import XLMTokenizer
|
677 |
+
assert isinstance(tokenizer, XLMTokenizer)
|
678 |
+
|
679 |
+
# monkey patch XLM tokenizer
|
680 |
+
import types
|
681 |
+
tokenizer.bpe = types.MethodType(bpe, tokenizer)
|
682 |
+
else:
|
683 |
+
# warning: needs to be BertTokenizer for monkey patching to work
|
684 |
+
config.tokenizer = tokenizer = BertTokenizer.from_pretrained(config.flavor, do_lower_case=False)
|
685 |
+
|
686 |
+
# warning: monkey patch tokenizer to keep case information
|
687 |
+
#from recasing_tokenizer import WordpieceTokenizer
|
688 |
+
config.tokenizer.wordpiece_tokenizer = WordpieceTokenizer(vocab=tokenizer.vocab, unk_token=tokenizer.unk_token)
|
689 |
+
|
690 |
+
if config.lang == 'fr':
|
691 |
+
config.pad_token_id = tokenizer.pad_token_id
|
692 |
+
config.cls_token_id = tokenizer.bos_token_id
|
693 |
+
config.cls_token = tokenizer.bos_token
|
694 |
+
config.sep_token_id = tokenizer.sep_token_id
|
695 |
+
config.sep_token = tokenizer.sep_token
|
696 |
+
else:
|
697 |
+
config.pad_token_id = tokenizer.pad_token_id
|
698 |
+
config.cls_token_id = tokenizer.cls_token_id
|
699 |
+
config.cls_token = tokenizer.cls_token
|
700 |
+
config.sep_token_id = tokenizer.sep_token_id
|
701 |
+
config.sep_token = tokenizer.sep_token
|
702 |
+
|
703 |
+
if not torch.cuda.is_available() and config.device == 'cuda':
|
704 |
+
print('WARNING: reverting to cpu as cuda is not available', file=sys.stderr)
|
705 |
+
config.device = torch.device(config.device if torch.cuda.is_available() else 'cpu')
|
706 |
+
|
707 |
+
|
708 |
+
def main(config, action, args):
|
709 |
+
init(config)
|
710 |
+
|
711 |
+
if action == 'train':
|
712 |
+
train(config, *args)
|
713 |
+
elif action == 'eval':
|
714 |
+
run_eval(config, *args)
|
715 |
+
elif action == 'predict':
|
716 |
+
generate_predictions(config, *args)
|
717 |
+
elif action == 'tensorize':
|
718 |
+
make_tensors(config, *args)
|
719 |
+
elif action == 'preprocess':
|
720 |
+
preprocess_text(config, *args)
|
721 |
+
else:
|
722 |
+
print('invalid action "%s"' % action)
|
723 |
+
sys.exit(1)
|
724 |
+
|
725 |
+
if __name__ == '__main__':
|
726 |
+
parser = argparse.ArgumentParser()
|
727 |
+
parser.add_argument("action", help="train|eval|predict|tensorize|preprocess", type=str)
|
728 |
+
parser.add_argument("action_args", help="arguments for selected action", type=str, nargs='*')
|
729 |
+
parser.add_argument("--seed", help="random seed", default=default_config.seed, type=int)
|
730 |
+
parser.add_argument("--lang", help="language (fr, en, zh)", default=default_config.lang, type=str)
|
731 |
+
parser.add_argument("--flavor", help="bert flavor in transformers model zoo", default=default_config.flavor, type=str)
|
732 |
+
parser.add_argument("--max-length", help="maximum input length", default=default_config.max_length, type=int)
|
733 |
+
parser.add_argument("--batch-size", help="size of batches", default=default_config.batch_size, type=int)
|
734 |
+
parser.add_argument("--device", help="computation device (cuda, cpu)", default=default_config.device, type=str)
|
735 |
+
parser.add_argument("--debug", help="whether to output more debug info", default=default_config.debug, type=bool)
|
736 |
+
parser.add_argument("--updates", help="number of training updates to perform", default=default_config.updates, type=bool)
|
737 |
+
parser.add_argument("--period", help="validation period in updates", default=default_config.period, type=bool)
|
738 |
+
parser.add_argument("--lr", help="learning rate", default=default_config.lr, type=bool)
|
739 |
+
parser.add_argument("--dab-rate", help="drop at boundaries rate", default=default_config.dab_rate, type=bool)
|
740 |
+
config = Config(**parser.parse_args().__dict__)
|
741 |
+
|
742 |
+
main(config, config.action, config.action_args)
|
743 |
+
|
744 |
+
|
recasepunc/vosk-adapted.txt
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
the
|
2 |
+
the
|
3 |
+
the beijing and shanghai welcome to the market strata open i'm yvonne good morning and i'm david ingles counting down of course the diablo trade on the chinese
|
4 |
+
mainland here in hong kong let's get your top stories today taper and a timetable dominating the latest fed minutes as official debates the exit path meanwhile i got beijing heading the other way hinting at the first triple r cut in more than a year and after the didi debacle here china may move to close a loophole long used
|
5 |
+
by companies to take their listings abroad all to enhance that was a horrible mistake council yesterday from china as a maybe it's time to cut the triple r to help them with small businesses they are struggling from the rise of raw material costs the key question is how likely is this yeah what they say it chances are likely it's probably going to be up yet
|
6 |
+
the fact that they're saying it might actually already mean we're getting some sentiment coming through in terms of an improved material tracker ten year yield we'll get to that in just a moment in china we're now flirting with the three percent level equity markets futures are pointing up as you can see here in china though broadly speaking though we're down for a seven day across asia seventh day in the last excuse me
|
7 |
+
in the last eight sessions here have little commodity markets we're stabilising across your oil or oil prices we're still down five six per cent from highs though as far as that is concerned fx markets your story is guys can we change the police are we're looking at generally speaking the dollar that's very much in focus here so you look at that against the euro you look at that
|
8 |
+
against the chinese currency twenty four hours ago who would have thought we were talking about this sort of more divergence and starker labour discord between where you are in a pboc to easily in the fed and very quickly we alluded to this of course if one three percent on your chinese ten year yield and we're not one point three percent lower and lower
|
9 |
+
yields there is a charge for you china's top us ten year yield is at the bottom yeah the chinatown area lowest since we saw last year of september yup
|
10 |
+
yeah it is a really big major shift in china's central bank policy that's the key question could it be coming of course let's flash out that into what we heard from the cabinet there raising the possibility of a cut to the reserve requirement ratio to both the economy at the same time we also from a former pboc official sheng songcheng said the central bank should actually
|
11 |
+
cut rates he's not just talking about a triple r and either the second half is an important window when china's monetary policy can tilt towards loosening while remaining stable and the interest rates can be lowered in a reasonable and moderate manner let's get the take from also be as well whether daisy i'm david chiu here the short of it is
|
12 |
+
so i guess one point if we still haven't gotten that if in the event that we do their take is they it might be a little bit too aggressive to address some of the softness in the economy in other words what they're saying is it needs some help the economy maybe not this much yeah there preferring perhaps perhaps liquidity injections here and there but this might signal a bit too much
|
13 |
+
for when it comes to reflating the economy joining us out of the dice all this let's bring in wang tao ubi as head of asia economics and the chief china economists as well wang tao thanks much for joining us first off do you think this is actually a real possibility now
|
14 |
+
or well will shrink or fade contro as a frequently called using triple r cut as a tool so i think yes indeed it is a real possibility that they could do this however in the past whenever the state council called for this a few days to a couple of weeks later we were
|
15 |
+
would have we would see a triple r cut if they called for it and but it's worth noting that last year in june shoot at the chicago auto quote for it and by the pbc did not hold onto with any market so i i would say at this moment it's probably a relatively high likelihood but anything
|
16 |
+
the wording is really you know about mitigating the higher cost of commodity prices they impact on at an ease and make their effective conquered funding a bit lower so it's possible that it's going to be a targeted not a overall triple cut and i i don't think this really reflects a
|
17 |
+
wholesale shift in monetary policy i think very very much in the same state concrete statement also talked about
|
recasepunc/vosk-adapted.txt.punc
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
The. The. The Beijing and Shanghai. Welcome to the market strata open. I'm Yvonne, good morning, and I'm David Ingles, counting down, of course, the Diablo trade on the Chinese mainland here in Hong Kong. Let's get your top stories today, taper and a timetable dominating the latest Fed minutes as official debates. The exit path. Meanwhile, I got Beijing heading the other way, hinting at the first triple R cut in more than a year. And after the Didi debacle here, China may move to close a loophole. Long used by companies to take their listings abroad, all to enhance. That was a horrible mistake. Council yesterday from China as a. Maybe it's time to cut the triple R to help them with small businesses they are struggling from the rise of raw material costs. The key question is, how likely is this ? Yeah, what they say it. Chances are likely it's probably going to be up yet. The fact that they're saying it might actually already mean we're getting some sentiment coming through in terms of an improved material tracker. Ten year yield. We'll get to that in just a moment. In China. We're now flirting with the three percent level equity markets futures are pointing up. As you can see here in China, though. Broadly speaking, though, we're down for a seven day across Asia. Seventh day in the last. Excuse me, in the last eight sessions here have little commodity markets. We're stabilising across your oil or oil prices. We're still down five, six per cent from highs, though as far as that is concerned FX markets. Your story is, guys, can we change the police are we're looking at, generally speaking, the dollar. That's very much in focus here. So you look at that against the euro. You look at that against the Chinese currency Twenty four hours ago. Who would have thought we were talking about this sort of more divergence and starker labour discord between where you are in a PBOC to easily in the Fed and very quickly. We alluded to this, Of course, if one three percent on your Chinese ten year yield and we're not one point three percent lower and lower yields, there is a charge for you. China's top US ten year yield is at the bottom. Yeah, the Chinatown area lowest since we saw last year of September. Yup. Yeah, it is a really big major shift in China's central bank policy. That's the key question. Could it be coming ? Of course. Let's flash out that into what we heard from the cabinet there, raising the possibility of a cut to the reserve requirement ratio to both the economy at the same time. We also from a former PBOC official, Sheng Songcheng said the central bank should actually cut rates. He's not just talking about a triple R. And either the second half is an important window when China's monetary policy can tilt towards loosening while remaining stable and the interest rates can be lowered in a reasonable and moderate manner. Let's get the take from also be as well, whether Daisy, I'm David Chiu here, the short of it is so I guess one point, if we still haven't gotten that if in the event that we do their take is they, it might be a little bit too aggressive to address some of the softness in the economy. In other words, what they're saying is it needs some help. The economy, maybe not this much. Yeah, there, preferring perhaps perhaps liquidity injections here and there. But this might signal a bit too much for when it comes to reflating the economy. Joining us out of the dice. All this, Let's bring in Wang Tao Ubi as head of Asia Economics, and the chief China economists as well. Wang Tao, thanks much for joining us. First off, do you think this is actually a real possibility now or well will shrink or fade ? Contro as a frequently called using triple R cut as a tool. So I think yes, indeed, it is a real possibility. That they could do this. However, in the past, whenever the State Council called for this a few days to a couple of weeks later, we were. Would have we would see a triple R cut if they called for it. And. But it's worth noting that last year in June, shoot at the Chicago auto quote for it and by the PBC did not hold onto with any market so I. I would say at this moment it's probably a relatively high likelihood, but anything. The wording is really, you know about mitigating the higher cost of commodity prices they impact on at an ease and make their effective conquered funding a bit lower. So it's possible that it's going to be a targeted, not a overall triple cut and I. I don't think this really reflects a wholesale shift in monetary policy. I think very, very much in the same state. Concrete statement also talked about.
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Flask==2.0.1
|
2 |
+
gunicorn==20.1.0
|
3 |
+
Werkzeug==2.0.1
|
4 |
+
|
5 |
+
|
temp_audio.wav
ADDED
Binary file (639 kB). View file
|
|
voice_to_text_systemdev -checkpoint-checkpoint.ipynb
ADDED
@@ -0,0 +1,424 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 29,
|
6 |
+
"id": "5c7d8fe6-69ca-4f29-9046-0b0bc9f31911",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"data": {
|
11 |
+
"application/vnd.jupyter.widget-view+json": {
|
12 |
+
"model_id": "99ee6b03c5154644998c23c837444e83",
|
13 |
+
"version_major": 2,
|
14 |
+
"version_minor": 0
|
15 |
+
},
|
16 |
+
"text/plain": [
|
17 |
+
"HBox(children=(Button(button_style='success', description='Record', icon='microphone', style=ButtonStyle()), B…"
|
18 |
+
]
|
19 |
+
},
|
20 |
+
"metadata": {},
|
21 |
+
"output_type": "display_data"
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"data": {
|
25 |
+
"application/vnd.jupyter.widget-view+json": {
|
26 |
+
"model_id": "2b3e4f24da8d4c198b5d15f0f3f7399d",
|
27 |
+
"version_major": 2,
|
28 |
+
"version_minor": 0
|
29 |
+
},
|
30 |
+
"text/plain": [
|
31 |
+
"Output()"
|
32 |
+
]
|
33 |
+
},
|
34 |
+
"metadata": {},
|
35 |
+
"output_type": "display_data"
|
36 |
+
}
|
37 |
+
],
|
38 |
+
"source": [
|
39 |
+
"import ipywidgets as widgets\n",
|
40 |
+
"from IPython.display import display, clear_output\n",
|
41 |
+
"from threading import Thread\n",
|
42 |
+
"from queue import Queue\n",
|
43 |
+
"import time\n",
|
44 |
+
"\n",
|
45 |
+
"messages = Queue()\n",
|
46 |
+
"recordings = Queue()\n",
|
47 |
+
"\n",
|
48 |
+
"record_button = widgets.Button(\n",
|
49 |
+
" description=\"Record\",\n",
|
50 |
+
" disabled=False,\n",
|
51 |
+
" button_style=\"success\",\n",
|
52 |
+
" icon=\"microphone\"\n",
|
53 |
+
")\n",
|
54 |
+
"\n",
|
55 |
+
"stop_button = widgets.Button(\n",
|
56 |
+
" description=\"Stop\",\n",
|
57 |
+
" disabled=False,\n",
|
58 |
+
" button_style=\"warning\",\n",
|
59 |
+
" icon=\"stop\"\n",
|
60 |
+
")\n",
|
61 |
+
"\n",
|
62 |
+
"output = widgets.Output()\n",
|
63 |
+
"\n",
|
64 |
+
"def record_microphone():\n",
|
65 |
+
" while not messages.empty():\n",
|
66 |
+
" time.sleep(1) # Simulate recording\n",
|
67 |
+
" recordings.put(\"Audio recorded.\") # Simulated recorded audio data\n",
|
68 |
+
"\n",
|
69 |
+
"def speech_recognition(output_widget):\n",
|
70 |
+
" while not messages.empty():\n",
|
71 |
+
" time.sleep(2) # Simulate transcription\n",
|
72 |
+
" with output_widget:\n",
|
73 |
+
" clear_output(wait=True)\n",
|
74 |
+
" display(\"Transcription: Hello, how are you?\") # Simulated transcription result\n",
|
75 |
+
"\n",
|
76 |
+
"def start_recording(data):\n",
|
77 |
+
" if not messages.empty():\n",
|
78 |
+
" return # Recording already in progress\n",
|
79 |
+
"\n",
|
80 |
+
" messages.put(True)\n",
|
81 |
+
" with output:\n",
|
82 |
+
" clear_output(wait=True)\n",
|
83 |
+
" display(\"Starting...\")\n",
|
84 |
+
"\n",
|
85 |
+
" record = Thread(target=record_microphone)\n",
|
86 |
+
" record.start()\n",
|
87 |
+
"\n",
|
88 |
+
" transcribe = Thread(target=speech_recognition, args=(output,))\n",
|
89 |
+
" transcribe.start()\n",
|
90 |
+
"\n",
|
91 |
+
"def stop_recording(data):\n",
|
92 |
+
" if messages.empty():\n",
|
93 |
+
" return # No recording in progress\n",
|
94 |
+
"\n",
|
95 |
+
" messages.get()\n",
|
96 |
+
" with output:\n",
|
97 |
+
" clear_output(wait=True)\n",
|
98 |
+
" display(\"Stopped.\")\n",
|
99 |
+
"\n",
|
100 |
+
"record_button.on_click(start_recording)\n",
|
101 |
+
"stop_button.on_click(stop_recording)\n",
|
102 |
+
"\n",
|
103 |
+
"display(widgets.HBox([record_button, stop_button]), output)\n"
|
104 |
+
]
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"cell_type": "code",
|
108 |
+
"execution_count": 30,
|
109 |
+
"id": "bdcb9097-ab31-4dcc-9e2a-4e0818fceb3f",
|
110 |
+
"metadata": {},
|
111 |
+
"outputs": [
|
112 |
+
{
|
113 |
+
"name": "stdout",
|
114 |
+
"output_type": "stream",
|
115 |
+
"text": [
|
116 |
+
"Requirement already satisfied: pyaudio in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (0.2.14)\n"
|
117 |
+
]
|
118 |
+
}
|
119 |
+
],
|
120 |
+
"source": [
|
121 |
+
"!python -m pip install pyaudio"
|
122 |
+
]
|
123 |
+
},
|
124 |
+
{
|
125 |
+
"cell_type": "code",
|
126 |
+
"execution_count": 31,
|
127 |
+
"id": "34112777-1845-4aff-80de-099ceed52f01",
|
128 |
+
"metadata": {},
|
129 |
+
"outputs": [
|
130 |
+
{
|
131 |
+
"name": "stdout",
|
132 |
+
"output_type": "stream",
|
133 |
+
"text": [
|
134 |
+
"{'index': 0, 'structVersion': 2, 'name': 'Microsoft Sound Mapper - Input', 'hostApi': 0, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}\n",
|
135 |
+
"{'index': 1, 'structVersion': 2, 'name': 'Microphone Array (Intel® Smart ', 'hostApi': 0, 'maxInputChannels': 4, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}\n",
|
136 |
+
"{'index': 2, 'structVersion': 2, 'name': 'Microsoft Sound Mapper - Output', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}\n",
|
137 |
+
"{'index': 3, 'structVersion': 2, 'name': 'Speakers (Realtek(R) Audio)', 'hostApi': 0, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.09, 'defaultLowOutputLatency': 0.09, 'defaultHighInputLatency': 0.18, 'defaultHighOutputLatency': 0.18, 'defaultSampleRate': 44100.0}\n",
|
138 |
+
"{'index': 4, 'structVersion': 2, 'name': 'Primary Sound Capture Driver', 'hostApi': 1, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.12, 'defaultLowOutputLatency': 0.0, 'defaultHighInputLatency': 0.24, 'defaultHighOutputLatency': 0.0, 'defaultSampleRate': 44100.0}\n",
|
139 |
+
"{'index': 5, 'structVersion': 2, 'name': 'Microphone Array (Intel® Smart Sound Technology (Intel® SST))', 'hostApi': 1, 'maxInputChannels': 4, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.12, 'defaultLowOutputLatency': 0.0, 'defaultHighInputLatency': 0.24, 'defaultHighOutputLatency': 0.0, 'defaultSampleRate': 44100.0}\n",
|
140 |
+
"{'index': 6, 'structVersion': 2, 'name': 'Primary Sound Driver', 'hostApi': 1, 'maxInputChannels': 0, 'maxOutputChannels': 8, 'defaultLowInputLatency': 0.0, 'defaultLowOutputLatency': 0.12, 'defaultHighInputLatency': 0.0, 'defaultHighOutputLatency': 0.24, 'defaultSampleRate': 44100.0}\n",
|
141 |
+
"{'index': 7, 'structVersion': 2, 'name': 'Speakers (Realtek(R) Audio)', 'hostApi': 1, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.0, 'defaultLowOutputLatency': 0.12, 'defaultHighInputLatency': 0.0, 'defaultHighOutputLatency': 0.24, 'defaultSampleRate': 44100.0}\n",
|
142 |
+
"{'index': 8, 'structVersion': 2, 'name': 'Speakers (Realtek(R) Audio)', 'hostApi': 2, 'maxInputChannels': 0, 'maxOutputChannels': 8, 'defaultLowInputLatency': 0.0, 'defaultLowOutputLatency': 0.003, 'defaultHighInputLatency': 0.0, 'defaultHighOutputLatency': 0.01, 'defaultSampleRate': 48000.0}\n",
|
143 |
+
"{'index': 9, 'structVersion': 2, 'name': 'Microphone Array (Intel® Smart Sound Technology (Intel® SST))', 'hostApi': 2, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.002, 'defaultLowOutputLatency': 0.0, 'defaultHighInputLatency': 0.01, 'defaultHighOutputLatency': 0.0, 'defaultSampleRate': 48000.0}\n",
|
144 |
+
"{'index': 10, 'structVersion': 2, 'name': 'Microphone Array 1 (Intel® Smart Sound Technology (Intel® SST) Microphone)', 'hostApi': 3, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 48000.0}\n",
|
145 |
+
"{'index': 11, 'structVersion': 2, 'name': 'Microphone Array 2 (Intel® Smart Sound Technology (Intel® SST) Microphone)', 'hostApi': 3, 'maxInputChannels': 4, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 16000.0}\n",
|
146 |
+
"{'index': 12, 'structVersion': 2, 'name': 'Microphone Array 3 (Intel® Smart Sound Technology (Intel® SST) Microphone)', 'hostApi': 3, 'maxInputChannels': 4, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 16000.0}\n",
|
147 |
+
"{'index': 13, 'structVersion': 2, 'name': 'Stereo Mix (Realtek HD Audio Stereo input)', 'hostApi': 3, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 48000.0}\n",
|
148 |
+
"{'index': 14, 'structVersion': 2, 'name': 'Headphones (Realtek HD Audio 2nd output with SST)', 'hostApi': 3, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 48000.0}\n",
|
149 |
+
"{'index': 15, 'structVersion': 2, 'name': 'Speakers (Realtek HD Audio output with SST)', 'hostApi': 3, 'maxInputChannels': 0, 'maxOutputChannels': 2, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 48000.0}\n",
|
150 |
+
"{'index': 16, 'structVersion': 2, 'name': 'Microphone (Realtek HD Audio Mic input)', 'hostApi': 3, 'maxInputChannels': 2, 'maxOutputChannels': 0, 'defaultLowInputLatency': 0.01, 'defaultLowOutputLatency': 0.01, 'defaultHighInputLatency': 0.04, 'defaultHighOutputLatency': 0.04, 'defaultSampleRate': 44100.0}\n"
|
151 |
+
]
|
152 |
+
}
|
153 |
+
],
|
154 |
+
"source": [
|
155 |
+
"import pyaudio\n",
|
156 |
+
"\n",
|
157 |
+
"p = pyaudio.PyAudio()\n",
|
158 |
+
"for i in range(p.get_device_count()):\n",
|
159 |
+
" print(p.get_device_info_by_index(i))\n",
|
160 |
+
"\n",
|
161 |
+
"p.terminate()"
|
162 |
+
]
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"cell_type": "code",
|
166 |
+
"execution_count": 32,
|
167 |
+
"id": "2e74dacf-1a91-4dfa-bf91-c64c72755d75",
|
168 |
+
"metadata": {},
|
169 |
+
"outputs": [],
|
170 |
+
"source": [
|
171 |
+
"import pyaudio\n",
|
172 |
+
"from queue import Queue\n",
|
173 |
+
"\n",
|
174 |
+
"CHANNELS = 1\n",
|
175 |
+
"FRAME_RATE = 16000\n",
|
176 |
+
"RECORD_SECONDS = 20\n",
|
177 |
+
"AUDIO_FORMAT = pyaudio.paInt16\n",
|
178 |
+
"SAMPLE_SIZE = 2\n",
|
179 |
+
"\n",
|
180 |
+
"messages = Queue()\n",
|
181 |
+
"recordings = Queue()\n",
|
182 |
+
"\n",
|
183 |
+
"def record_microphone(chunk=1024):\n",
|
184 |
+
" p = pyaudio.PyAudio()\n",
|
185 |
+
"\n",
|
186 |
+
" stream = p.open(format=AUDIO_FORMAT,\n",
|
187 |
+
" channels=CHANNELS,\n",
|
188 |
+
" rate=FRAME_RATE,\n",
|
189 |
+
" input=True,\n",
|
190 |
+
" input_device_index=1,\n",
|
191 |
+
" frames_per_buffer=chunk)\n",
|
192 |
+
"\n",
|
193 |
+
" frames = []\n",
|
194 |
+
"\n",
|
195 |
+
" while not messages.empty():\n",
|
196 |
+
" data = stream.read(chunk)\n",
|
197 |
+
" frames.append(data)\n",
|
198 |
+
"\n",
|
199 |
+
" if len(frames) >= int(FRAME_RATE * RECORD_SECONDS / chunk):\n",
|
200 |
+
" recordings.put(frames.copy())\n",
|
201 |
+
" frames = []\n",
|
202 |
+
"\n",
|
203 |
+
" stream.stop_stream()\n",
|
204 |
+
" stream.close()\n",
|
205 |
+
" p.terminate()\n"
|
206 |
+
]
|
207 |
+
},
|
208 |
+
{
|
209 |
+
"cell_type": "code",
|
210 |
+
"execution_count": 33,
|
211 |
+
"id": "931dc754-e034-45e7-981b-a9210c1fe6e9",
|
212 |
+
"metadata": {},
|
213 |
+
"outputs": [],
|
214 |
+
"source": [
|
215 |
+
"import subprocess\n",
|
216 |
+
"import json\n",
|
217 |
+
"from vosk import Model, KaldiRecognizer\n",
|
218 |
+
"\n",
|
219 |
+
"model = Model(model_name=\"vosk-model-en-us-0.42-gigaspeech\")\n",
|
220 |
+
"rec = KaldiRecognizer(model, FRAME_RATE)\n",
|
221 |
+
"rec.SetWords(True)\n",
|
222 |
+
"\n",
|
223 |
+
"def speech_recognition(output):\n",
|
224 |
+
" while not messages.empty():\n",
|
225 |
+
" frames = recordings.get()\n",
|
226 |
+
"\n",
|
227 |
+
" rec.AcceptWaveform(b''.join(frames))\n",
|
228 |
+
" result = rec.Result()\n",
|
229 |
+
" text = json.loads(result)[\"text\"]\n",
|
230 |
+
"\n",
|
231 |
+
" cased = subprocess.check_output(\"python recasepunc/recasepunc.py predict recasepunc/checkpoint\", shell=True, text=True, input=text)\n",
|
232 |
+
" output.append_stdout(cased)"
|
233 |
+
]
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"cell_type": "code",
|
237 |
+
"execution_count": 1,
|
238 |
+
"id": "a27fb138-d3a9-4e04-83fe-23aca2921d92",
|
239 |
+
"metadata": {},
|
240 |
+
"outputs": [
|
241 |
+
{
|
242 |
+
"name": "stdout",
|
243 |
+
"output_type": "stream",
|
244 |
+
"text": [
|
245 |
+
"Requirement already satisfied: gradio in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (4.36.1)\n",
|
246 |
+
"Requirement already satisfied: aiofiles<24.0,>=22.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (23.2.1)\n",
|
247 |
+
"Requirement already satisfied: altair<6.0,>=4.2.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (5.3.0)\n",
|
248 |
+
"Requirement already satisfied: fastapi in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.111.0)\n",
|
249 |
+
"Requirement already satisfied: ffmpy in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.3.2)\n",
|
250 |
+
"Requirement already satisfied: gradio-client==1.0.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (1.0.1)\n",
|
251 |
+
"Requirement already satisfied: httpx>=0.24.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.27.0)\n",
|
252 |
+
"Requirement already satisfied: huggingface-hub>=0.19.3 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.23.4)\n",
|
253 |
+
"Requirement already satisfied: importlib-resources<7.0,>=1.3 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (6.4.0)\n",
|
254 |
+
"Requirement already satisfied: jinja2<4.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (3.1.4)\n",
|
255 |
+
"Requirement already satisfied: markupsafe~=2.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (2.1.5)\n",
|
256 |
+
"Requirement already satisfied: matplotlib~=3.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (3.9.0)\n",
|
257 |
+
"Requirement already satisfied: numpy<3.0,>=1.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (1.26.4)\n",
|
258 |
+
"Requirement already satisfied: orjson~=3.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (3.10.5)\n",
|
259 |
+
"Requirement already satisfied: packaging in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (24.1)\n",
|
260 |
+
"Requirement already satisfied: pandas<3.0,>=1.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (2.2.2)\n",
|
261 |
+
"Requirement already satisfied: pillow<11.0,>=8.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (10.3.0)\n",
|
262 |
+
"Requirement already satisfied: pydantic>=2.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (2.7.4)\n",
|
263 |
+
"Requirement already satisfied: pydub in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.25.1)\n",
|
264 |
+
"Requirement already satisfied: python-multipart>=0.0.9 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.0.9)\n",
|
265 |
+
"Requirement already satisfied: pyyaml<7.0,>=5.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (6.0.1)\n",
|
266 |
+
"Requirement already satisfied: ruff>=0.2.2 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.4.10)\n",
|
267 |
+
"Requirement already satisfied: semantic-version~=2.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (2.10.0)\n",
|
268 |
+
"Requirement already satisfied: tomlkit==0.12.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.12.0)\n",
|
269 |
+
"Requirement already satisfied: typer<1.0,>=0.12 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.12.3)\n",
|
270 |
+
"Requirement already satisfied: typing-extensions~=4.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (4.12.2)\n",
|
271 |
+
"Requirement already satisfied: urllib3~=2.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (2.2.1)\n",
|
272 |
+
"Requirement already satisfied: uvicorn>=0.14.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio) (0.30.1)\n",
|
273 |
+
"Requirement already satisfied: fsspec in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio-client==1.0.1->gradio) (2024.6.0)\n",
|
274 |
+
"Requirement already satisfied: websockets<12.0,>=10.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from gradio-client==1.0.1->gradio) (11.0.3)\n",
|
275 |
+
"Requirement already satisfied: jsonschema>=3.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from altair<6.0,>=4.2.0->gradio) (4.22.0)\n",
|
276 |
+
"Requirement already satisfied: toolz in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from altair<6.0,>=4.2.0->gradio) (0.12.1)\n",
|
277 |
+
"Requirement already satisfied: anyio in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from httpx>=0.24.1->gradio) (4.4.0)\n",
|
278 |
+
"Requirement already satisfied: certifi in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from httpx>=0.24.1->gradio) (2024.6.2)\n",
|
279 |
+
"Requirement already satisfied: httpcore==1.* in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from httpx>=0.24.1->gradio) (1.0.5)\n",
|
280 |
+
"Requirement already satisfied: idna in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from httpx>=0.24.1->gradio) (3.7)\n",
|
281 |
+
"Requirement already satisfied: sniffio in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from httpx>=0.24.1->gradio) (1.3.1)\n",
|
282 |
+
"Requirement already satisfied: h11<0.15,>=0.13 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from httpcore==1.*->httpx>=0.24.1->gradio) (0.14.0)\n",
|
283 |
+
"Requirement already satisfied: filelock in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from huggingface-hub>=0.19.3->gradio) (3.15.1)\n",
|
284 |
+
"Requirement already satisfied: requests in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from huggingface-hub>=0.19.3->gradio) (2.32.3)\n",
|
285 |
+
"Requirement already satisfied: tqdm>=4.42.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from huggingface-hub>=0.19.3->gradio) (4.66.4)\n",
|
286 |
+
"Requirement already satisfied: contourpy>=1.0.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from matplotlib~=3.0->gradio) (1.2.1)\n",
|
287 |
+
"Requirement already satisfied: cycler>=0.10 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from matplotlib~=3.0->gradio) (0.12.1)\n",
|
288 |
+
"Requirement already satisfied: fonttools>=4.22.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from matplotlib~=3.0->gradio) (4.53.0)\n",
|
289 |
+
"Requirement already satisfied: kiwisolver>=1.3.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from matplotlib~=3.0->gradio) (1.4.5)\n",
|
290 |
+
"Requirement already satisfied: pyparsing>=2.3.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from matplotlib~=3.0->gradio) (3.1.2)\n",
|
291 |
+
"Requirement already satisfied: python-dateutil>=2.7 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from matplotlib~=3.0->gradio) (2.9.0.post0)\n",
|
292 |
+
"Requirement already satisfied: pytz>=2020.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n",
|
293 |
+
"Requirement already satisfied: tzdata>=2022.7 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pandas<3.0,>=1.0->gradio) (2024.1)\n",
|
294 |
+
"Requirement already satisfied: annotated-types>=0.4.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pydantic>=2.0->gradio) (0.7.0)\n",
|
295 |
+
"Requirement already satisfied: pydantic-core==2.18.4 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from pydantic>=2.0->gradio) (2.18.4)\n",
|
296 |
+
"Requirement already satisfied: click>=8.0.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from typer<1.0,>=0.12->gradio) (8.1.7)\n",
|
297 |
+
"Requirement already satisfied: shellingham>=1.3.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from typer<1.0,>=0.12->gradio) (1.5.4)\n",
|
298 |
+
"Requirement already satisfied: rich>=10.11.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from typer<1.0,>=0.12->gradio) (13.7.1)\n",
|
299 |
+
"Requirement already satisfied: starlette<0.38.0,>=0.37.2 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from fastapi->gradio) (0.37.2)\n",
|
300 |
+
"Requirement already satisfied: fastapi-cli>=0.0.2 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from fastapi->gradio) (0.0.4)\n",
|
301 |
+
"Requirement already satisfied: ujson!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,>=4.0.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from fastapi->gradio) (5.10.0)\n",
|
302 |
+
"Requirement already satisfied: email_validator>=2.0.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from fastapi->gradio) (2.2.0)\n",
|
303 |
+
"Requirement already satisfied: colorama in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from click>=8.0.0->typer<1.0,>=0.12->gradio) (0.4.6)\n",
|
304 |
+
"Requirement already satisfied: dnspython>=2.0.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from email_validator>=2.0.0->fastapi->gradio) (2.6.1)\n",
|
305 |
+
"Requirement already satisfied: attrs>=22.2.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (23.2.0)\n",
|
306 |
+
"Requirement already satisfied: jsonschema-specifications>=2023.03.6 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (2023.12.1)\n",
|
307 |
+
"Requirement already satisfied: referencing>=0.28.4 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.35.1)\n",
|
308 |
+
"Requirement already satisfied: rpds-py>=0.7.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from jsonschema>=3.0->altair<6.0,>=4.2.0->gradio) (0.18.1)\n",
|
309 |
+
"Requirement already satisfied: six>=1.5 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from python-dateutil>=2.7->matplotlib~=3.0->gradio) (1.16.0)\n",
|
310 |
+
"Requirement already satisfied: markdown-it-py>=2.2.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (3.0.0)\n",
|
311 |
+
"Requirement already satisfied: pygments<3.0.0,>=2.13.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from rich>=10.11.0->typer<1.0,>=0.12->gradio) (2.18.0)\n",
|
312 |
+
"Requirement already satisfied: httptools>=0.5.0 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio) (0.6.1)\n",
|
313 |
+
"Requirement already satisfied: python-dotenv>=0.13 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio) (1.0.1)\n",
|
314 |
+
"Requirement already satisfied: watchfiles>=0.13 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from uvicorn[standard]>=0.12.0->fastapi->gradio) (0.22.0)\n",
|
315 |
+
"Requirement already satisfied: charset-normalizer<4,>=2 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from requests->huggingface-hub>=0.19.3->gradio) (3.3.2)\n",
|
316 |
+
"Requirement already satisfied: mdurl~=0.1 in c:\\users\\samarth srivastava\\appdata\\local\\programs\\python\\python312\\lib\\site-packages (from markdown-it-py>=2.2.0->rich>=10.11.0->typer<1.0,>=0.12->gradio) (0.1.2)\n",
|
317 |
+
"Note: you may need to restart the kernel to use updated packages.\n"
|
318 |
+
]
|
319 |
+
}
|
320 |
+
],
|
321 |
+
"source": [
|
322 |
+
"pip install gradio\n"
|
323 |
+
]
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"cell_type": "code",
|
327 |
+
"execution_count": 2,
|
328 |
+
"id": "6d7852a7-88e5-4e39-afae-da0bad2f72e5",
|
329 |
+
"metadata": {},
|
330 |
+
"outputs": [],
|
331 |
+
"source": [
|
332 |
+
"def my_function(input1, input2):\n",
|
333 |
+
" # Process the inputs and generate the output\n",
|
334 |
+
" output = f\"Processed {input1} and {input2}\"\n",
|
335 |
+
" return output\n"
|
336 |
+
]
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"cell_type": "code",
|
340 |
+
"execution_count": 5,
|
341 |
+
"id": "c57fb014-6562-4909-b3d2-52a048c9af18",
|
342 |
+
"metadata": {},
|
343 |
+
"outputs": [
|
344 |
+
{
|
345 |
+
"name": "stdout",
|
346 |
+
"output_type": "stream",
|
347 |
+
"text": [
|
348 |
+
"Running on local URL: http://127.0.0.1:7861\n",
|
349 |
+
"Running on public URL: https://4e26f42d95143ec249.gradio.live\n",
|
350 |
+
"\n",
|
351 |
+
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)\n"
|
352 |
+
]
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"data": {
|
356 |
+
"text/html": [
|
357 |
+
"<div><iframe src=\"https://4e26f42d95143ec249.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
358 |
+
],
|
359 |
+
"text/plain": [
|
360 |
+
"<IPython.core.display.HTML object>"
|
361 |
+
]
|
362 |
+
},
|
363 |
+
"metadata": {},
|
364 |
+
"output_type": "display_data"
|
365 |
+
},
|
366 |
+
{
|
367 |
+
"data": {
|
368 |
+
"text/plain": []
|
369 |
+
},
|
370 |
+
"execution_count": 5,
|
371 |
+
"metadata": {},
|
372 |
+
"output_type": "execute_result"
|
373 |
+
}
|
374 |
+
],
|
375 |
+
"source": [
|
376 |
+
"import gradio as gr\n",
|
377 |
+
"\n",
|
378 |
+
"# Define the function you want to expose through Gradio\n",
|
379 |
+
"def my_function(input1, input2):\n",
|
380 |
+
" output = f\"Processed {input1} and {input2}\"\n",
|
381 |
+
" return output\n",
|
382 |
+
"\n",
|
383 |
+
"# Create the Gradio interface\n",
|
384 |
+
"iface = gr.Interface(\n",
|
385 |
+
" fn=my_function,\n",
|
386 |
+
" inputs=[gr.Textbox(label=\"Input 1\"), gr.Textbox(label=\"Input 2\")],\n",
|
387 |
+
" outputs=gr.Textbox(label=\"Output\")\n",
|
388 |
+
")\n",
|
389 |
+
"\n",
|
390 |
+
"# Launch the interface with a public link\n",
|
391 |
+
"iface.launch(share=True)\n"
|
392 |
+
]
|
393 |
+
},
|
394 |
+
{
|
395 |
+
"cell_type": "code",
|
396 |
+
"execution_count": null,
|
397 |
+
"id": "bc4e1d90-6688-4205-a0d2-7933fcdc5874",
|
398 |
+
"metadata": {},
|
399 |
+
"outputs": [],
|
400 |
+
"source": []
|
401 |
+
}
|
402 |
+
],
|
403 |
+
"metadata": {
|
404 |
+
"kernelspec": {
|
405 |
+
"display_name": "Python 3 (ipykernel)",
|
406 |
+
"language": "python",
|
407 |
+
"name": "python3"
|
408 |
+
},
|
409 |
+
"language_info": {
|
410 |
+
"codemirror_mode": {
|
411 |
+
"name": "ipython",
|
412 |
+
"version": 3
|
413 |
+
},
|
414 |
+
"file_extension": ".py",
|
415 |
+
"mimetype": "text/x-python",
|
416 |
+
"name": "python",
|
417 |
+
"nbconvert_exporter": "python",
|
418 |
+
"pygments_lexer": "ipython3",
|
419 |
+
"version": "3.12.4"
|
420 |
+
}
|
421 |
+
},
|
422 |
+
"nbformat": 4,
|
423 |
+
"nbformat_minor": 5
|
424 |
+
}
|
voice_to_text_systemdev -checkpoint-checkpoint.py
ADDED
@@ -0,0 +1,195 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
# In[29]:
|
5 |
+
|
6 |
+
|
7 |
+
import ipywidgets as widgets
|
8 |
+
from IPython.display import display, clear_output
|
9 |
+
from threading import Thread
|
10 |
+
from queue import Queue
|
11 |
+
import time
|
12 |
+
|
13 |
+
messages = Queue()
|
14 |
+
recordings = Queue()
|
15 |
+
|
16 |
+
record_button = widgets.Button(
|
17 |
+
description="Record",
|
18 |
+
disabled=False,
|
19 |
+
button_style="success",
|
20 |
+
icon="microphone"
|
21 |
+
)
|
22 |
+
|
23 |
+
stop_button = widgets.Button(
|
24 |
+
description="Stop",
|
25 |
+
disabled=False,
|
26 |
+
button_style="warning",
|
27 |
+
icon="stop"
|
28 |
+
)
|
29 |
+
|
30 |
+
output = widgets.Output()
|
31 |
+
|
32 |
+
def record_microphone():
|
33 |
+
while not messages.empty():
|
34 |
+
time.sleep(1) # Simulate recording
|
35 |
+
recordings.put("Audio recorded.") # Simulated recorded audio data
|
36 |
+
|
37 |
+
def speech_recognition(output_widget):
|
38 |
+
while not messages.empty():
|
39 |
+
time.sleep(2) # Simulate transcription
|
40 |
+
with output_widget:
|
41 |
+
clear_output(wait=True)
|
42 |
+
display("Transcription: Hello, how are you?") # Simulated transcription result
|
43 |
+
|
44 |
+
def start_recording(data):
|
45 |
+
if not messages.empty():
|
46 |
+
return # Recording already in progress
|
47 |
+
|
48 |
+
messages.put(True)
|
49 |
+
with output:
|
50 |
+
clear_output(wait=True)
|
51 |
+
display("Starting...")
|
52 |
+
|
53 |
+
record = Thread(target=record_microphone)
|
54 |
+
record.start()
|
55 |
+
|
56 |
+
transcribe = Thread(target=speech_recognition, args=(output,))
|
57 |
+
transcribe.start()
|
58 |
+
|
59 |
+
def stop_recording(data):
|
60 |
+
if messages.empty():
|
61 |
+
return # No recording in progress
|
62 |
+
|
63 |
+
messages.get()
|
64 |
+
with output:
|
65 |
+
clear_output(wait=True)
|
66 |
+
display("Stopped.")
|
67 |
+
|
68 |
+
record_button.on_click(start_recording)
|
69 |
+
stop_button.on_click(stop_recording)
|
70 |
+
|
71 |
+
display(widgets.HBox([record_button, stop_button]), output)
|
72 |
+
|
73 |
+
|
74 |
+
# In[30]:
|
75 |
+
|
76 |
+
|
77 |
+
get_ipython().system('python -m pip install pyaudio')
|
78 |
+
|
79 |
+
|
80 |
+
# In[31]:
|
81 |
+
|
82 |
+
|
83 |
+
import pyaudio
|
84 |
+
|
85 |
+
p = pyaudio.PyAudio()
|
86 |
+
for i in range(p.get_device_count()):
|
87 |
+
print(p.get_device_info_by_index(i))
|
88 |
+
|
89 |
+
p.terminate()
|
90 |
+
|
91 |
+
|
92 |
+
# In[32]:
|
93 |
+
|
94 |
+
|
95 |
+
import pyaudio
|
96 |
+
from queue import Queue
|
97 |
+
|
98 |
+
CHANNELS = 1
|
99 |
+
FRAME_RATE = 16000
|
100 |
+
RECORD_SECONDS = 20
|
101 |
+
AUDIO_FORMAT = pyaudio.paInt16
|
102 |
+
SAMPLE_SIZE = 2
|
103 |
+
|
104 |
+
messages = Queue()
|
105 |
+
recordings = Queue()
|
106 |
+
|
107 |
+
def record_microphone(chunk=1024):
|
108 |
+
p = pyaudio.PyAudio()
|
109 |
+
|
110 |
+
stream = p.open(format=AUDIO_FORMAT,
|
111 |
+
channels=CHANNELS,
|
112 |
+
rate=FRAME_RATE,
|
113 |
+
input=True,
|
114 |
+
input_device_index=1,
|
115 |
+
frames_per_buffer=chunk)
|
116 |
+
|
117 |
+
frames = []
|
118 |
+
|
119 |
+
while not messages.empty():
|
120 |
+
data = stream.read(chunk)
|
121 |
+
frames.append(data)
|
122 |
+
|
123 |
+
if len(frames) >= int(FRAME_RATE * RECORD_SECONDS / chunk):
|
124 |
+
recordings.put(frames.copy())
|
125 |
+
frames = []
|
126 |
+
|
127 |
+
stream.stop_stream()
|
128 |
+
stream.close()
|
129 |
+
p.terminate()
|
130 |
+
|
131 |
+
|
132 |
+
# In[33]:
|
133 |
+
|
134 |
+
|
135 |
+
import subprocess
|
136 |
+
import json
|
137 |
+
from vosk import Model, KaldiRecognizer
|
138 |
+
|
139 |
+
model = Model(model_name="vosk-model-en-us-0.42-gigaspeech")
|
140 |
+
rec = KaldiRecognizer(model, FRAME_RATE)
|
141 |
+
rec.SetWords(True)
|
142 |
+
|
143 |
+
def speech_recognition(output):
|
144 |
+
while not messages.empty():
|
145 |
+
frames = recordings.get()
|
146 |
+
|
147 |
+
rec.AcceptWaveform(b''.join(frames))
|
148 |
+
result = rec.Result()
|
149 |
+
text = json.loads(result)["text"]
|
150 |
+
|
151 |
+
cased = subprocess.check_output("python recasepunc/recasepunc.py predict recasepunc/checkpoint", shell=True, text=True, input=text)
|
152 |
+
output.append_stdout(cased)
|
153 |
+
|
154 |
+
|
155 |
+
# In[1]:
|
156 |
+
|
157 |
+
|
158 |
+
pip install gradio
|
159 |
+
|
160 |
+
|
161 |
+
# In[2]:
|
162 |
+
|
163 |
+
|
164 |
+
def my_function(input1, input2):
|
165 |
+
# Process the inputs and generate the output
|
166 |
+
output = f"Processed {input1} and {input2}"
|
167 |
+
return output
|
168 |
+
|
169 |
+
|
170 |
+
# In[5]:
|
171 |
+
|
172 |
+
|
173 |
+
import gradio as gr
|
174 |
+
|
175 |
+
# Define the function you want to expose through Gradio
|
176 |
+
def my_function(input1, input2):
|
177 |
+
output = f"Processed {input1} and {input2}"
|
178 |
+
return output
|
179 |
+
|
180 |
+
# Create the Gradio interface
|
181 |
+
iface = gr.Interface(
|
182 |
+
fn=my_function,
|
183 |
+
inputs=[gr.Textbox(label="Input 1"), gr.Textbox(label="Input 2")],
|
184 |
+
outputs=gr.Textbox(label="Output")
|
185 |
+
)
|
186 |
+
|
187 |
+
# Launch the interface with a public link
|
188 |
+
iface.launch(share=True)
|
189 |
+
|
190 |
+
|
191 |
+
# In[ ]:
|
192 |
+
|
193 |
+
|
194 |
+
|
195 |
+
|