HoneyTian commited on
Commit
b17fe2a
1 Parent(s): 61bc48a
Files changed (3) hide show
  1. install.sh +2 -2
  2. main.py +14 -6
  3. test.py +27 -0
install.sh CHANGED
@@ -53,10 +53,10 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
53
  cd "${thirdparty_dir}"
54
 
55
  # install onnxruntime
 
 
56
  wget https://github.com/microsoft/onnxruntime/releases/download/v1.20.1/onnxruntime-linux-x64-1.20.1.tgz
57
  tar -zxvf onnxruntime-linux-x64-1.20.1.tgz
58
- # export C_INCLUDE_PATH=$C_INCLUDE_PATH:$(pwd)/onnxruntime-linux-x64-1.20.1/include
59
- # export LIBRARY_PATH=$LIBRARY_PATH:$(pwd)/onnxruntime-linux-x64-1.20.1/lib
60
 
61
  fi
62
 
 
53
  cd "${thirdparty_dir}"
54
 
55
  # install onnxruntime
56
+ # https://onnxruntime.ai/docs/install/
57
+ # https://github.com/microsoft/onnxruntime/releases
58
  wget https://github.com/microsoft/onnxruntime/releases/download/v1.20.1/onnxruntime-linux-x64-1.20.1.tgz
59
  tar -zxvf onnxruntime-linux-x64-1.20.1.tgz
 
 
60
 
61
  fi
62
 
main.py CHANGED
@@ -46,15 +46,16 @@ def run_nx_vad(filename: str, silence_time: float = 0.3, longest_activate: float
46
  cmd = "vad_bins/nx_vad --filename {} --silence_time {} --longest_activate {}".format(
47
  filename, silence_time, longest_activate
48
  )
49
- vad_result = Command.popen(cmd)
50
 
51
- pattern = "(\\d+)[\r\n]VadFlagPrepare[\r\n](\\d+)[\r\n]VadFlagSpeaking(?:[\r\n](?:\\d+)[\r\n]VadFlagPause[\r\n](?:\\d+)[\r\n]VadFlagSpeaking)?[\r\n](\\d+)[\r\n]VadFlagNoSpeech"
52
 
53
- vad_timestamps = re.findall(pattern, vad_result, flags=re.DOTALL)
 
54
 
55
  vad_timestamps: str = json.dumps(vad_timestamps, ensure_ascii=False, indent=2)
56
 
57
- return vad_timestamps
58
 
59
 
60
  def run_silero_vad(filename: str, silence_time: float = 0.3, longest_activate: float = 3.0) -> str:
@@ -65,9 +66,16 @@ def run_silero_vad(filename: str, silence_time: float = 0.3, longest_activate: f
65
  cmd = "vad_bins/silero {}".format(
66
  filename
67
  )
68
- vad_result = Command.popen(cmd)
69
 
70
- return vad_result
 
 
 
 
 
 
 
71
 
72
 
73
  def shell(cmd: str):
 
46
  cmd = "vad_bins/nx_vad --filename {} --silence_time {} --longest_activate {}".format(
47
  filename, silence_time, longest_activate
48
  )
49
+ raw_vad_result = Command.popen(cmd)
50
 
51
+ pattern = "(\\d+)[\r\n]VadFlagPrepare[\r\n](?:\\d+)[\r\n]VadFlagSpeaking(?:[\r\n](?:\\d+)[\r\n]VadFlagPause[\r\n](?:\\d+)[\r\n]VadFlagSpeaking)?[\r\n](\\d+)[\r\n]VadFlagNoSpeech"
52
 
53
+ vad_timestamps = re.findall(pattern, raw_vad_result, flags=re.DOTALL)
54
+ vad_timestamps = [(float(start), float(end)) for start, end in vad_timestamps]
55
 
56
  vad_timestamps: str = json.dumps(vad_timestamps, ensure_ascii=False, indent=2)
57
 
58
+ return raw_vad_result, vad_timestamps
59
 
60
 
61
  def run_silero_vad(filename: str, silence_time: float = 0.3, longest_activate: float = 3.0) -> str:
 
66
  cmd = "vad_bins/silero {}".format(
67
  filename
68
  )
69
+ raw_vad_result = Command.popen(cmd)
70
 
71
+ pattern = ".*?speech starts at (.+?)s[\r\n].*?speech ends at (.+?)s"
72
+
73
+ vad_timestamps = re.findall(pattern, raw_vad_result, flags=re.DOTALL)
74
+ vad_timestamps = [(float(start), float(end)) for start, end in vad_timestamps]
75
+
76
+ vad_timestamps: str = json.dumps(vad_timestamps, ensure_ascii=False, indent=2)
77
+
78
+ return raw_vad_result, vad_timestamps
79
 
80
 
81
  def shell(cmd: str):
test.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import re
4
+
5
+ string = """
6
+ 2024/12/11 06:10:03 speech starts at 4.45s
7
+ 2024/12/11 06:10:03 speech ends at 4.96s
8
+ 2024/12/11 06:10:03 speech starts at 9.63s
9
+ 2024/12/11 06:10:03 speech ends at 9.92s
10
+ 2024/12/11 06:10:03 speech starts at 20.58s
11
+ 2024/12/11 06:10:03 speech ends at 21.02s
12
+ 2024/12/11 06:10:03 speech starts at 27.36s
13
+ 2024/12/11 06:10:03 speech ends at 27.61s
14
+ 2024/12/11 06:10:03 speech starts at 27.97s
15
+ 2024/12/11 06:10:03 speech ends at 31.20s
16
+ 2024/12/11 06:10:03 speech starts at 31.55s
17
+ 2024/12/11 06:10:03 speech ends at 33.21s
18
+ """
19
+
20
+ pattern = ".*?speech starts at (.+?)s[\r\n].*?speech ends at (.+?)s"
21
+
22
+ result = re.findall(pattern, string, flags=re.DOTALL)
23
+ print(result)
24
+
25
+
26
+ if __name__ == "__main__":
27
+ pass