File size: 2,436 Bytes
ce21f7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# -*- coding: utf-8 -*-

import numpy as np
from scipy.signal import correlate2d
from skimage.util import view_as_windows
class WSOLA(object):

    def __init__(self, fs, speech_rate, shiftms=10):
        self.fs = fs
        self.speech_rate = speech_rate

        self.shiftms = shiftms  # Hs的时间
        self.sl = int(self.fs * self.shiftms / 1000)  # Hs的长度
        self.fl = self.sl * 2  # 帧长 Hs长度的两倍
        self.epstep = int(self.sl * self.speech_rate)  # Ha的长度
        self.win = np.hanning(self.fl)  # 窗函数

    def duration_modification(self, x):
        wlen = len(x)  # Lin
        #Lout = Lin/rate
        wsolaed = np.zeros(int(wlen / self.speech_rate), dtype='d') 
        # 初始化
        sp = self.sl * 2 # x'm的中心
        rp = sp + self.sl # x~m的中心
        ep = sp + self.epstep # x+(m+1)的中心
        outp = self.sl
        # allocate first frame of waveform to outp
        wsolaed[:outp] = x[:outp]

        while wlen > ep + self.fl:
            # copy wavform
            ref = x[rp - self.sl:rp + self.sl] # x~m
            buff = x[ep - self.fl:ep + self.fl] # 搜索区域

            # search minimum distance bepween ref and buff
            delta = self._search_minimum_distance(ref, buff)
            epd = ep + delta

            # store WSOLAed waveform using over-lap add
            spdata = x[sp:sp + self.sl] * self.win[self.sl:] # x'm的右半帧
            epdata = x[epd - self.sl:epd] * self.win[:self.sl] # x~m的左半帧
            if len(spdata) == len(wsolaed[outp:outp + self.sl]):
                wsolaed[outp:outp + self.sl] = spdata + epdata
            else:
                wsolaed_len = len(wsolaed[outp:outp + self.sl])
                wsolaed[outp:outp + self.sl] = spdata[:wsolaed_len] + \
                    epdata[:wsolaed_len]

            outp += self.sl

            # 计算下一帧的起始位置
            sp = epd
            rp = sp + self.sl
            ep += self.epstep

        return wsolaed

    def _search_minimum_distance(self, ref, buff):
        if len(ref) < self.fl:
            ref = np.r_[ref, np.zeros(self.fl - len(ref))]

        # slicing and windowing one sample by one
        buffmat = view_as_windows(buff, self.fl) * self.win
        refwin = np.array(ref * self.win).reshape(1, self.fl)
        corr = correlate2d(buffmat, refwin, mode='valid')

        return np.argmax(corr) - self.sl