File size: 1,275 Bytes
165d1b6
 
 
 
 
 
 
 
 
 
 
 
 
cc48e0c
2b2f7f6
165d1b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
package main

import (
	"log"
	"os"

	"github.com/streamer45/silero-vad-go/speech"

	"github.com/go-audio/wav"
)

func main() {
	sd, err := speech.NewDetector(speech.DetectorConfig{
		ModelPath:            "./pretrained_models/silero_vad/silero_vad.onnx",
		SampleRate:           8000,
		Threshold:            0.5,
		MinSilenceDurationMs: 100,
		SpeechPadMs:          30,
	})
	if err != nil {
		log.Fatalf("failed to create speech detector: %s", err)
	}

	if len(os.Args) != 2 {
		log.Fatalf("invalid arguments provided: expecting one file path")
	}

	f, err := os.Open(os.Args[1])
	if err != nil {
		log.Fatalf("failed to open sample audio file: %s", err)
	}
	defer f.Close()

	dec := wav.NewDecoder(f)

	if ok := dec.IsValidFile(); !ok {
		log.Fatalf("invalid WAV file")
	}

	buf, err := dec.FullPCMBuffer()
	if err != nil {
		log.Fatalf("failed to get PCM buffer")
	}

	pcmBuf := buf.AsFloat32Buffer()

	segments, err := sd.Detect(pcmBuf.Data)
	if err != nil {
		log.Fatalf("Detect failed: %s", err)
	}

	for _, s := range segments {
		log.Printf("speech starts at %0.2fs", s.SpeechStartAt)
		if s.SpeechEndAt > 0 {
			log.Printf("speech ends at %0.2fs", s.SpeechEndAt)
		}
	}

	err = sd.Destroy()
	if err != nil {
		log.Fatalf("failed to destroy detector: %s", err)
	}
}