Spaces:
Running
Running
File size: 901 Bytes
c56c253 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
## Mel-filterbank
mel_window_length = 25 # In milliseconds
mel_window_step = 10 # In milliseconds
mel_n_channels = 40
## Audio
sampling_rate = 16000
# Number of spectrogram frames in a partial utterance
partials_n_frames = 160 # 1600 ms
# Number of spectrogram frames at inference
inference_n_frames = 80 # 800 ms
## Voice Activation Detection
# Window size of the VAD. Must be either 10, 20 or 30 milliseconds.
# This sets the granularity of the VAD. Should not need to be changed.
vad_window_length = 30 # In milliseconds
# Number of frames to average together when performing the moving average smoothing.
# The larger this value, the larger the VAD variations must be to not get smoothed out.
vad_moving_average_width = 8
# Maximum number of consecutive silent frames a segment can have.
vad_max_silence_length = 6
## Audio volume normalization
audio_norm_target_dBFS = -30
|