Upload wsola.py
Browse files
wsola.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
from scipy.signal import correlate2d
|
5 |
+
from skimage.util import view_as_windows
|
6 |
+
class WSOLA(object):
|
7 |
+
|
8 |
+
def __init__(self, fs, speech_rate, shiftms=10):
|
9 |
+
self.fs = fs
|
10 |
+
self.speech_rate = speech_rate
|
11 |
+
|
12 |
+
self.shiftms = shiftms # Hs的时间
|
13 |
+
self.sl = int(self.fs * self.shiftms / 1000) # Hs的长度
|
14 |
+
self.fl = self.sl * 2 # 帧长 Hs长度的两倍
|
15 |
+
self.epstep = int(self.sl * self.speech_rate) # Ha的长度
|
16 |
+
self.win = np.hanning(self.fl) # 窗函数
|
17 |
+
|
18 |
+
def duration_modification(self, x):
|
19 |
+
wlen = len(x) # Lin
|
20 |
+
#Lout = Lin/rate
|
21 |
+
wsolaed = np.zeros(int(wlen / self.speech_rate), dtype='d')
|
22 |
+
# 初始化
|
23 |
+
sp = self.sl * 2 # x'm的中心
|
24 |
+
rp = sp + self.sl # x~m的中心
|
25 |
+
ep = sp + self.epstep # x+(m+1)的中心
|
26 |
+
outp = self.sl
|
27 |
+
# allocate first frame of waveform to outp
|
28 |
+
wsolaed[:outp] = x[:outp]
|
29 |
+
|
30 |
+
while wlen > ep + self.fl:
|
31 |
+
# copy wavform
|
32 |
+
ref = x[rp - self.sl:rp + self.sl] # x~m
|
33 |
+
buff = x[ep - self.fl:ep + self.fl] # 搜索区域
|
34 |
+
|
35 |
+
# search minimum distance bepween ref and buff
|
36 |
+
delta = self._search_minimum_distance(ref, buff)
|
37 |
+
epd = ep + delta
|
38 |
+
|
39 |
+
# store WSOLAed waveform using over-lap add
|
40 |
+
spdata = x[sp:sp + self.sl] * self.win[self.sl:] # x'm的右半帧
|
41 |
+
epdata = x[epd - self.sl:epd] * self.win[:self.sl] # x~m的左半帧
|
42 |
+
if len(spdata) == len(wsolaed[outp:outp + self.sl]):
|
43 |
+
wsolaed[outp:outp + self.sl] = spdata + epdata
|
44 |
+
else:
|
45 |
+
wsolaed_len = len(wsolaed[outp:outp + self.sl])
|
46 |
+
wsolaed[outp:outp + self.sl] = spdata[:wsolaed_len] + \
|
47 |
+
epdata[:wsolaed_len]
|
48 |
+
|
49 |
+
outp += self.sl
|
50 |
+
|
51 |
+
# 计算下一帧的起始位置
|
52 |
+
sp = epd
|
53 |
+
rp = sp + self.sl
|
54 |
+
ep += self.epstep
|
55 |
+
|
56 |
+
return wsolaed
|
57 |
+
|
58 |
+
def _search_minimum_distance(self, ref, buff):
|
59 |
+
if len(ref) < self.fl:
|
60 |
+
ref = np.r_[ref, np.zeros(self.fl - len(ref))]
|
61 |
+
|
62 |
+
# slicing and windowing one sample by one
|
63 |
+
buffmat = view_as_windows(buff, self.fl) * self.win
|
64 |
+
refwin = np.array(ref * self.win).reshape(1, self.fl)
|
65 |
+
corr = correlate2d(buffmat, refwin, mode='valid')
|
66 |
+
|
67 |
+
return np.argmax(corr) - self.sl
|