daihui.zhang commited on
Commit
c00c06f
·
1 Parent(s): b008abb

add logging format

Browse files
config.py CHANGED
@@ -1,5 +1,15 @@
1
  import pathlib
2
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  BASE_DIR = pathlib.Path(__file__).parent
5
  MODEL_DIR = BASE_DIR / "moyoyo_asr_models"
 
1
  import pathlib
2
 
3
+ import logging
4
+
5
+ logging.basicConfig(
6
+ level=logging.INFO,
7
+ format="%(asctime)s - %(levelname)s - %(message)s",
8
+ datefmt="%H:%M:%S"
9
+ )
10
+
11
+ logging.getLogger("pywhispercpp").setLevel(logging.WARNING)
12
+
13
 
14
  BASE_DIR = pathlib.Path(__file__).parent
15
  MODEL_DIR = BASE_DIR / "moyoyo_asr_models"
run_client.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from transcribe.client import TranscriptionClient
2
 
3
  client = TranscriptionClient(
 
1
+
2
  from transcribe.client import TranscriptionClient
3
 
4
  client = TranscriptionClient(
transcribe/transcription.py CHANGED
@@ -14,6 +14,8 @@ from urllib.parse import urlparse, parse_qsl
14
  from websockets.exceptions import ConnectionClosed
15
  from websockets.sync.server import serve
16
  from uuid import uuid1
 
 
17
  logging.basicConfig(level=logging.INFO)
18
 
19
 
@@ -174,8 +176,8 @@ class TranscriptionServer:
174
  frame_data = websocket.recv()
175
  if frame_data == b"END_OF_AUDIO":
176
  return False
177
- # return np.frombuffer(frame_data, dtype=np.int16).astype(np.float32) / 32768.0
178
- return np.frombuffer(frame_data, dtype=np.float32)
179
 
180
 
181
  def handle_new_connection(self, websocket):
 
14
  from websockets.exceptions import ConnectionClosed
15
  from websockets.sync.server import serve
16
  from uuid import uuid1
17
+
18
+
19
  logging.basicConfig(level=logging.INFO)
20
 
21
 
 
176
  frame_data = websocket.recv()
177
  if frame_data == b"END_OF_AUDIO":
178
  return False
179
+ return np.frombuffer(frame_data, dtype=np.int16).astype(np.float32) / 32768.0
180
+ # return np.frombuffer(frame_data, dtype=np.float32)
181
 
182
 
183
  def handle_new_connection(self, websocket):
transcribe/utils.py CHANGED
@@ -1,8 +1,15 @@
1
  import os
2
  import textwrap
3
  from pathlib import Path
 
 
4
 
5
  import av
 
 
 
 
 
6
 
7
 
8
  def clear_screen():
 
1
  import os
2
  import textwrap
3
  from pathlib import Path
4
+ import logging
5
+
6
 
7
  import av
8
+ def log_block(key: str, value, unit=''):
9
+ """格式化输出日志内容"""
10
+ key_fmt = f"[{key.ljust(25)}]" # 左对齐填充
11
+ val_fmt = f"{value} {unit}".strip()
12
+ logging.info(f"{key_fmt}: {val_fmt}")
13
 
14
 
15
  def clear_screen():
transcribe/whisper_llm_serve.py CHANGED
@@ -19,9 +19,9 @@ from pywhispercpp.model import Model
19
  from queue import Queue
20
  from scipy.io.wavfile import write
21
  from api_model import TransResult, Message
 
22
 
23
-
24
- logger = getLogger(__name__)
25
 
26
  def save_to_wave(filename, data:np.ndarray, sample_rate=16000):
27
  write(filename, sample_rate, data)
@@ -133,7 +133,6 @@ class PywhisperInference:
133
  # init llamacpp
134
  cls.llm_model = QwenTranslator(config.LLM_MODEL_PATH, config.LLM_SYS_PROMPT)
135
  # cls.vad_model = VoiceActivityDetector()
136
-
137
  event.set()
138
 
139
 
@@ -157,7 +156,7 @@ class PywhisperInference:
157
  raise ValueError(f"Unsupported language : {language}")
158
 
159
  @classmethod
160
- def inference(cls, audio_buffer, language):
161
  max_len, prompt = cls.config_language(language)
162
  audio_buffer = np.frombuffer(audio_buffer, dtype=np.float32)
163
  return cls.whisper_model.transcribe(
@@ -197,7 +196,10 @@ class PyWhiperCppServe(ServeClientBase):
197
  # 进程初始化后再开始收音频
198
  self._ready_state = mp.Event()
199
  self._pool = PPool(
200
- max_workers=1, initializer=PywhisperInference.initializer, initargs=(self._ready_state, ))
 
 
 
201
  self._pool.submit(PywhisperInference.init)
202
  logger.info('Create a process to process audio.')
203
  self.send_ready_state()
@@ -211,7 +213,6 @@ class PyWhiperCppServe(ServeClientBase):
211
 
212
  def send_ready_state(self):
213
  self._ready_state.wait()
214
-
215
  self.websocket.send(json.dumps({
216
  "uid": self.client_uid,
217
  "message": self.SERVER_READY,
@@ -256,8 +257,14 @@ class PyWhiperCppServe(ServeClientBase):
256
  Args:
257
  audio_buffer (np.array): The audio chunk to transcribe.
258
  """
 
 
 
 
259
  transcribe_fut = self._pool.submit(
260
- PywhisperInference.inference, audio_buffer.tobytes(), self.language)
 
 
261
  segments = transcribe_fut.result()
262
  return segments
263
 
@@ -265,8 +272,12 @@ class PyWhiperCppServe(ServeClientBase):
265
  """
266
  translate the text to dst lang"""
267
  # return "sample english"
 
 
268
  translate_fut = self._pool.submit(
269
  PywhisperInference.translate, text, self.language, self.dst_lang)
 
 
270
  return translate_fut.result()
271
 
272
  def _segments_split(self, segments, audio_buffer: np.ndarray):
@@ -330,11 +341,10 @@ class PyWhiperCppServe(ServeClientBase):
330
  # name = f"dev-{c}.wav"
331
  # save_to_wave(name, audio_buffer)
332
  try:
333
- logger.info(f"Processing audio with duration: {len(audio_buffer) / self.sample_rate:.2f}s")
334
  segments = self.transcribe_audio(audio_buffer)
335
  for tran_result in self.handle_transcription_output(segments, audio_buffer):
336
  self.send_to_client(tran_result)
337
-
338
  except KeyboardInterrupt:
339
  break
340
  except Exception as e:
@@ -359,7 +369,7 @@ class PyWhiperCppServe(ServeClientBase):
359
  if is_end_sentence and last_cut_index:
360
  message = self._segment_manager.segment
361
  seg_id = self._segment_manager.get_seg_id() - 1
362
- logger.info(f"{seg_id}, {message}")
363
  yield TransResult(
364
  seg_id=seg_id,
365
  context=message,
@@ -370,7 +380,7 @@ class PyWhiperCppServe(ServeClientBase):
370
  )
371
  if self._segment_manager.string.strip():
372
  message = self._segment_manager.string.strip()
373
- logger.info(f"{seg_id + 1}, {message}")
374
  yield TransResult(
375
  seg_id=seg_id+1,
376
  context=self._segment_manager.string,
@@ -382,7 +392,7 @@ class PyWhiperCppServe(ServeClientBase):
382
  else:
383
  seg_id = self._segment_manager.get_seg_id()
384
  message = self._segment_manager.short_sentence + self._segment_manager.string
385
- logger.info(f"{seg_id}, {message}")
386
  yield TransResult(
387
  seg_id=seg_id,
388
  context=message,
 
19
  from queue import Queue
20
  from scipy.io.wavfile import write
21
  from api_model import TransResult, Message
22
+ from .utils import log_block
23
 
24
+ logger = getLogger("TranslatorApp")
 
25
 
26
  def save_to_wave(filename, data:np.ndarray, sample_rate=16000):
27
  write(filename, sample_rate, data)
 
133
  # init llamacpp
134
  cls.llm_model = QwenTranslator(config.LLM_MODEL_PATH, config.LLM_SYS_PROMPT)
135
  # cls.vad_model = VoiceActivityDetector()
 
136
  event.set()
137
 
138
 
 
156
  raise ValueError(f"Unsupported language : {language}")
157
 
158
  @classmethod
159
+ def transcribe(cls, audio_buffer, language):
160
  max_len, prompt = cls.config_language(language)
161
  audio_buffer = np.frombuffer(audio_buffer, dtype=np.float32)
162
  return cls.whisper_model.transcribe(
 
196
  # 进程初始化后再开始收音频
197
  self._ready_state = mp.Event()
198
  self._pool = PPool(
199
+ max_workers=1,
200
+ initializer=PywhisperInference.initializer,
201
+ initargs=(self._ready_state,)
202
+ )
203
  self._pool.submit(PywhisperInference.init)
204
  logger.info('Create a process to process audio.')
205
  self.send_ready_state()
 
213
 
214
  def send_ready_state(self):
215
  self._ready_state.wait()
 
216
  self.websocket.send(json.dumps({
217
  "uid": self.client_uid,
218
  "message": self.SERVER_READY,
 
257
  Args:
258
  audio_buffer (np.array): The audio chunk to transcribe.
259
  """
260
+
261
+ log_block("Audio buffer length", f"{audio_buffer.shape[0]/self.sample_rate:.2f}", "s")
262
+ start_time = time.perf_counter()
263
+
264
  transcribe_fut = self._pool.submit(
265
+ PywhisperInference.transcribe, audio_buffer.tobytes(), self.language)
266
+
267
+ log_block("Whisper transcrible time", f"{(time.perf_counter() - start_time):.3f}", "s")
268
  segments = transcribe_fut.result()
269
  return segments
270
 
 
272
  """
273
  translate the text to dst lang"""
274
  # return "sample english"
275
+ log_block("LLM translate input", f"{text}")
276
+ start_time = time.perf_counter()
277
  translate_fut = self._pool.submit(
278
  PywhisperInference.translate, text, self.language, self.dst_lang)
279
+
280
+ log_block("LLM translate time", f"{(time.perf_counter() - start_time):.3f}", "s")
281
  return translate_fut.result()
282
 
283
  def _segments_split(self, segments, audio_buffer: np.ndarray):
 
341
  # name = f"dev-{c}.wav"
342
  # save_to_wave(name, audio_buffer)
343
  try:
344
+ # logger.info(f"Audio buffer length: {len(audio_buffer) / self.sample_rate:.2f}s")
345
  segments = self.transcribe_audio(audio_buffer)
346
  for tran_result in self.handle_transcription_output(segments, audio_buffer):
347
  self.send_to_client(tran_result)
 
348
  except KeyboardInterrupt:
349
  break
350
  except Exception as e:
 
369
  if is_end_sentence and last_cut_index:
370
  message = self._segment_manager.segment
371
  seg_id = self._segment_manager.get_seg_id() - 1
372
+ # logger.info(f"{seg_id}, {message}")
373
  yield TransResult(
374
  seg_id=seg_id,
375
  context=message,
 
380
  )
381
  if self._segment_manager.string.strip():
382
  message = self._segment_manager.string.strip()
383
+ # logger.info(f"{seg_id + 1}, {message}")
384
  yield TransResult(
385
  seg_id=seg_id+1,
386
  context=self._segment_manager.string,
 
392
  else:
393
  seg_id = self._segment_manager.get_seg_id()
394
  message = self._segment_manager.short_sentence + self._segment_manager.string
395
+ # logger.info(f"{seg_id}, {message}")
396
  yield TransResult(
397
  seg_id=seg_id,
398
  context=message,