daihui.zhang
commited on
Commit
·
c00c06f
1
Parent(s):
b008abb
add logging format
Browse files- config.py +10 -0
- run_client.py +1 -0
- transcribe/transcription.py +4 -2
- transcribe/utils.py +7 -0
- transcribe/whisper_llm_serve.py +22 -12
config.py
CHANGED
@@ -1,5 +1,15 @@
|
|
1 |
import pathlib
|
2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
BASE_DIR = pathlib.Path(__file__).parent
|
5 |
MODEL_DIR = BASE_DIR / "moyoyo_asr_models"
|
|
|
1 |
import pathlib
|
2 |
|
3 |
+
import logging
|
4 |
+
|
5 |
+
logging.basicConfig(
|
6 |
+
level=logging.INFO,
|
7 |
+
format="%(asctime)s - %(levelname)s - %(message)s",
|
8 |
+
datefmt="%H:%M:%S"
|
9 |
+
)
|
10 |
+
|
11 |
+
logging.getLogger("pywhispercpp").setLevel(logging.WARNING)
|
12 |
+
|
13 |
|
14 |
BASE_DIR = pathlib.Path(__file__).parent
|
15 |
MODEL_DIR = BASE_DIR / "moyoyo_asr_models"
|
run_client.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from transcribe.client import TranscriptionClient
|
2 |
|
3 |
client = TranscriptionClient(
|
|
|
1 |
+
|
2 |
from transcribe.client import TranscriptionClient
|
3 |
|
4 |
client = TranscriptionClient(
|
transcribe/transcription.py
CHANGED
@@ -14,6 +14,8 @@ from urllib.parse import urlparse, parse_qsl
|
|
14 |
from websockets.exceptions import ConnectionClosed
|
15 |
from websockets.sync.server import serve
|
16 |
from uuid import uuid1
|
|
|
|
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
|
19 |
|
@@ -174,8 +176,8 @@ class TranscriptionServer:
|
|
174 |
frame_data = websocket.recv()
|
175 |
if frame_data == b"END_OF_AUDIO":
|
176 |
return False
|
177 |
-
|
178 |
-
return np.frombuffer(frame_data, dtype=np.float32)
|
179 |
|
180 |
|
181 |
def handle_new_connection(self, websocket):
|
|
|
14 |
from websockets.exceptions import ConnectionClosed
|
15 |
from websockets.sync.server import serve
|
16 |
from uuid import uuid1
|
17 |
+
|
18 |
+
|
19 |
logging.basicConfig(level=logging.INFO)
|
20 |
|
21 |
|
|
|
176 |
frame_data = websocket.recv()
|
177 |
if frame_data == b"END_OF_AUDIO":
|
178 |
return False
|
179 |
+
return np.frombuffer(frame_data, dtype=np.int16).astype(np.float32) / 32768.0
|
180 |
+
# return np.frombuffer(frame_data, dtype=np.float32)
|
181 |
|
182 |
|
183 |
def handle_new_connection(self, websocket):
|
transcribe/utils.py
CHANGED
@@ -1,8 +1,15 @@
|
|
1 |
import os
|
2 |
import textwrap
|
3 |
from pathlib import Path
|
|
|
|
|
4 |
|
5 |
import av
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
|
8 |
def clear_screen():
|
|
|
1 |
import os
|
2 |
import textwrap
|
3 |
from pathlib import Path
|
4 |
+
import logging
|
5 |
+
|
6 |
|
7 |
import av
|
8 |
+
def log_block(key: str, value, unit=''):
|
9 |
+
"""格式化输出日志内容"""
|
10 |
+
key_fmt = f"[{key.ljust(25)}]" # 左对齐填充
|
11 |
+
val_fmt = f"{value} {unit}".strip()
|
12 |
+
logging.info(f"{key_fmt}: {val_fmt}")
|
13 |
|
14 |
|
15 |
def clear_screen():
|
transcribe/whisper_llm_serve.py
CHANGED
@@ -19,9 +19,9 @@ from pywhispercpp.model import Model
|
|
19 |
from queue import Queue
|
20 |
from scipy.io.wavfile import write
|
21 |
from api_model import TransResult, Message
|
|
|
22 |
|
23 |
-
|
24 |
-
logger = getLogger(__name__)
|
25 |
|
26 |
def save_to_wave(filename, data:np.ndarray, sample_rate=16000):
|
27 |
write(filename, sample_rate, data)
|
@@ -133,7 +133,6 @@ class PywhisperInference:
|
|
133 |
# init llamacpp
|
134 |
cls.llm_model = QwenTranslator(config.LLM_MODEL_PATH, config.LLM_SYS_PROMPT)
|
135 |
# cls.vad_model = VoiceActivityDetector()
|
136 |
-
|
137 |
event.set()
|
138 |
|
139 |
|
@@ -157,7 +156,7 @@ class PywhisperInference:
|
|
157 |
raise ValueError(f"Unsupported language : {language}")
|
158 |
|
159 |
@classmethod
|
160 |
-
def
|
161 |
max_len, prompt = cls.config_language(language)
|
162 |
audio_buffer = np.frombuffer(audio_buffer, dtype=np.float32)
|
163 |
return cls.whisper_model.transcribe(
|
@@ -197,7 +196,10 @@ class PyWhiperCppServe(ServeClientBase):
|
|
197 |
# 进程初始化后再开始收音频
|
198 |
self._ready_state = mp.Event()
|
199 |
self._pool = PPool(
|
200 |
-
max_workers=1,
|
|
|
|
|
|
|
201 |
self._pool.submit(PywhisperInference.init)
|
202 |
logger.info('Create a process to process audio.')
|
203 |
self.send_ready_state()
|
@@ -211,7 +213,6 @@ class PyWhiperCppServe(ServeClientBase):
|
|
211 |
|
212 |
def send_ready_state(self):
|
213 |
self._ready_state.wait()
|
214 |
-
|
215 |
self.websocket.send(json.dumps({
|
216 |
"uid": self.client_uid,
|
217 |
"message": self.SERVER_READY,
|
@@ -256,8 +257,14 @@ class PyWhiperCppServe(ServeClientBase):
|
|
256 |
Args:
|
257 |
audio_buffer (np.array): The audio chunk to transcribe.
|
258 |
"""
|
|
|
|
|
|
|
|
|
259 |
transcribe_fut = self._pool.submit(
|
260 |
-
PywhisperInference.
|
|
|
|
|
261 |
segments = transcribe_fut.result()
|
262 |
return segments
|
263 |
|
@@ -265,8 +272,12 @@ class PyWhiperCppServe(ServeClientBase):
|
|
265 |
"""
|
266 |
translate the text to dst lang"""
|
267 |
# return "sample english"
|
|
|
|
|
268 |
translate_fut = self._pool.submit(
|
269 |
PywhisperInference.translate, text, self.language, self.dst_lang)
|
|
|
|
|
270 |
return translate_fut.result()
|
271 |
|
272 |
def _segments_split(self, segments, audio_buffer: np.ndarray):
|
@@ -330,11 +341,10 @@ class PyWhiperCppServe(ServeClientBase):
|
|
330 |
# name = f"dev-{c}.wav"
|
331 |
# save_to_wave(name, audio_buffer)
|
332 |
try:
|
333 |
-
logger.info(f"
|
334 |
segments = self.transcribe_audio(audio_buffer)
|
335 |
for tran_result in self.handle_transcription_output(segments, audio_buffer):
|
336 |
self.send_to_client(tran_result)
|
337 |
-
|
338 |
except KeyboardInterrupt:
|
339 |
break
|
340 |
except Exception as e:
|
@@ -359,7 +369,7 @@ class PyWhiperCppServe(ServeClientBase):
|
|
359 |
if is_end_sentence and last_cut_index:
|
360 |
message = self._segment_manager.segment
|
361 |
seg_id = self._segment_manager.get_seg_id() - 1
|
362 |
-
logger.info(f"{seg_id}, {message}")
|
363 |
yield TransResult(
|
364 |
seg_id=seg_id,
|
365 |
context=message,
|
@@ -370,7 +380,7 @@ class PyWhiperCppServe(ServeClientBase):
|
|
370 |
)
|
371 |
if self._segment_manager.string.strip():
|
372 |
message = self._segment_manager.string.strip()
|
373 |
-
logger.info(f"{seg_id + 1}, {message}")
|
374 |
yield TransResult(
|
375 |
seg_id=seg_id+1,
|
376 |
context=self._segment_manager.string,
|
@@ -382,7 +392,7 @@ class PyWhiperCppServe(ServeClientBase):
|
|
382 |
else:
|
383 |
seg_id = self._segment_manager.get_seg_id()
|
384 |
message = self._segment_manager.short_sentence + self._segment_manager.string
|
385 |
-
logger.info(f"{seg_id}, {message}")
|
386 |
yield TransResult(
|
387 |
seg_id=seg_id,
|
388 |
context=message,
|
|
|
19 |
from queue import Queue
|
20 |
from scipy.io.wavfile import write
|
21 |
from api_model import TransResult, Message
|
22 |
+
from .utils import log_block
|
23 |
|
24 |
+
logger = getLogger("TranslatorApp")
|
|
|
25 |
|
26 |
def save_to_wave(filename, data:np.ndarray, sample_rate=16000):
|
27 |
write(filename, sample_rate, data)
|
|
|
133 |
# init llamacpp
|
134 |
cls.llm_model = QwenTranslator(config.LLM_MODEL_PATH, config.LLM_SYS_PROMPT)
|
135 |
# cls.vad_model = VoiceActivityDetector()
|
|
|
136 |
event.set()
|
137 |
|
138 |
|
|
|
156 |
raise ValueError(f"Unsupported language : {language}")
|
157 |
|
158 |
@classmethod
|
159 |
+
def transcribe(cls, audio_buffer, language):
|
160 |
max_len, prompt = cls.config_language(language)
|
161 |
audio_buffer = np.frombuffer(audio_buffer, dtype=np.float32)
|
162 |
return cls.whisper_model.transcribe(
|
|
|
196 |
# 进程初始化后再开始收音频
|
197 |
self._ready_state = mp.Event()
|
198 |
self._pool = PPool(
|
199 |
+
max_workers=1,
|
200 |
+
initializer=PywhisperInference.initializer,
|
201 |
+
initargs=(self._ready_state,)
|
202 |
+
)
|
203 |
self._pool.submit(PywhisperInference.init)
|
204 |
logger.info('Create a process to process audio.')
|
205 |
self.send_ready_state()
|
|
|
213 |
|
214 |
def send_ready_state(self):
|
215 |
self._ready_state.wait()
|
|
|
216 |
self.websocket.send(json.dumps({
|
217 |
"uid": self.client_uid,
|
218 |
"message": self.SERVER_READY,
|
|
|
257 |
Args:
|
258 |
audio_buffer (np.array): The audio chunk to transcribe.
|
259 |
"""
|
260 |
+
|
261 |
+
log_block("Audio buffer length", f"{audio_buffer.shape[0]/self.sample_rate:.2f}", "s")
|
262 |
+
start_time = time.perf_counter()
|
263 |
+
|
264 |
transcribe_fut = self._pool.submit(
|
265 |
+
PywhisperInference.transcribe, audio_buffer.tobytes(), self.language)
|
266 |
+
|
267 |
+
log_block("Whisper transcrible time", f"{(time.perf_counter() - start_time):.3f}", "s")
|
268 |
segments = transcribe_fut.result()
|
269 |
return segments
|
270 |
|
|
|
272 |
"""
|
273 |
translate the text to dst lang"""
|
274 |
# return "sample english"
|
275 |
+
log_block("LLM translate input", f"{text}")
|
276 |
+
start_time = time.perf_counter()
|
277 |
translate_fut = self._pool.submit(
|
278 |
PywhisperInference.translate, text, self.language, self.dst_lang)
|
279 |
+
|
280 |
+
log_block("LLM translate time", f"{(time.perf_counter() - start_time):.3f}", "s")
|
281 |
return translate_fut.result()
|
282 |
|
283 |
def _segments_split(self, segments, audio_buffer: np.ndarray):
|
|
|
341 |
# name = f"dev-{c}.wav"
|
342 |
# save_to_wave(name, audio_buffer)
|
343 |
try:
|
344 |
+
# logger.info(f"Audio buffer length: {len(audio_buffer) / self.sample_rate:.2f}s")
|
345 |
segments = self.transcribe_audio(audio_buffer)
|
346 |
for tran_result in self.handle_transcription_output(segments, audio_buffer):
|
347 |
self.send_to_client(tran_result)
|
|
|
348 |
except KeyboardInterrupt:
|
349 |
break
|
350 |
except Exception as e:
|
|
|
369 |
if is_end_sentence and last_cut_index:
|
370 |
message = self._segment_manager.segment
|
371 |
seg_id = self._segment_manager.get_seg_id() - 1
|
372 |
+
# logger.info(f"{seg_id}, {message}")
|
373 |
yield TransResult(
|
374 |
seg_id=seg_id,
|
375 |
context=message,
|
|
|
380 |
)
|
381 |
if self._segment_manager.string.strip():
|
382 |
message = self._segment_manager.string.strip()
|
383 |
+
# logger.info(f"{seg_id + 1}, {message}")
|
384 |
yield TransResult(
|
385 |
seg_id=seg_id+1,
|
386 |
context=self._segment_manager.string,
|
|
|
392 |
else:
|
393 |
seg_id = self._segment_manager.get_seg_id()
|
394 |
message = self._segment_manager.short_sentence + self._segment_manager.string
|
395 |
+
# logger.info(f"{seg_id}, {message}")
|
396 |
yield TransResult(
|
397 |
seg_id=seg_id,
|
398 |
context=message,
|