File size: 1,172 Bytes
3b9863d
 
 
 
 
208141a
3b9863d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208141a
3b9863d
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from pywhispercpp.model import Model
import config
import soundfile
from pywhispercpp.utils import to_timestamp

mel, _, = soundfile.read("test/6_before_cut_56640.wav")
# mel, _, = soundfile.read(f"{config.ASSERT_DIR}/jfk.flac")

models_dir = config.MODEL_DIR.as_posix()
model = Model(
            model=config.WHISPER_MODEL, 
            models_dir=models_dir,
              n_threads=4,
              print_realtime=False,
              print_progress=False,
              print_timestamps=False,
              translate=False,
              temperature=0.,
              no_context=True
              )
print(mel.shape, mel.dtype) # (160000,) float64
segments = model.transcribe(mel,
                            # initial_prompt="",# 'The following is an English sentence.', # "以下是简体中文句子。"
                            language='en',
                            # initial_prompt="以下是简体中文句子。",
                            # language='zh',
                            token_timestamps=True,
                            max_len=1,)
for segment in segments:
    print(to_timestamp(segment.t0), to_timestamp(segment.t1), segment.text)