guetLzy commited on
Commit
8437ecb
·
verified ·
1 Parent(s): f1d3214

Upload stft.py

Browse files
Files changed (1) hide show
  1. stft.py +209 -0
stft.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ BSD 3-Clause License
3
+ Copyright (c) 2017, Prem Seetharaman
4
+ All rights reserved.
5
+ * Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+ * Redistributions of source code must retain the above copyright notice,
8
+ this list of conditions and the following disclaimer.
9
+ * Redistributions in binary form must reproduce the above copyright notice, this
10
+ list of conditions and the following disclaimer in the
11
+ documentation and/or other materials provided with the distribution.
12
+ * Neither the name of the copyright holder nor the names of its
13
+ contributors may be used to endorse or promote products derived from this
14
+ software without specific prior written permission.
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
19
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25
+ """
26
+
27
+ import torch
28
+ import numpy as np
29
+ import torch.nn.functional as F
30
+ from torch.autograd import Variable
31
+ from scipy.signal import get_window
32
+ from librosa.util import pad_center, tiny
33
+ import librosa.util as librosa_util
34
+
35
+ def window_sumsquare(window, n_frames, hop_length=200, win_length=800,
36
+ n_fft=800, dtype=np.float32, norm=None):
37
+ """
38
+ # from librosa 0.6
39
+ Compute the sum-square envelope of a window function at a given hop length.
40
+ This is used to estimate modulation effects induced by windowing
41
+ observations in short-time fourier transforms.
42
+ Parameters
43
+ ----------
44
+ window : string, tuple, number, callable, or list-like
45
+ Window specification, as in `get_window`
46
+ n_frames : int > 0
47
+ The number of analysis frames
48
+ hop_length : int > 0
49
+ The number of samples to advance between frames
50
+ win_length : [optional]
51
+ The length of the window function. By default, this matches `n_fft`.
52
+ n_fft : int > 0
53
+ The length of each analysis frame.
54
+ dtype : np.dtype
55
+ The data type of the output
56
+ Returns
57
+ -------
58
+ wss : np.ndarray, shape=`(n_fft + hop_length * (n_frames - 1))`
59
+ The sum-squared envelope of the window function
60
+ """
61
+ if win_length is None:
62
+ win_length = n_fft
63
+
64
+ n = n_fft + hop_length * (n_frames - 1)
65
+ x = np.zeros(n, dtype=dtype)
66
+
67
+ # Compute the squared window at the desired length
68
+ win_sq = get_window(window, win_length, fftbins=True)
69
+ win_sq = librosa_util.normalize(win_sq, norm=norm)**2
70
+ win_sq = librosa_util.pad_center(win_sq, n_fft)
71
+
72
+ # Fill the envelope
73
+ for i in range(n_frames):
74
+ sample = i * hop_length
75
+ x[sample:min(n, sample + n_fft)] += win_sq[:max(0, min(n_fft, n - sample))]
76
+ return x
77
+
78
+
79
+ class STFT(torch.nn.Module):
80
+ """adapted from Prem Seetharaman's https://github.com/pseeth/pytorch-stft"""
81
+ def __init__(self, filter_length=800, hop_length=200, win_length=800,
82
+ window='hann'):
83
+ super(STFT, self).__init__()
84
+ self.filter_length = filter_length
85
+ self.hop_length = hop_length
86
+ self.win_length = win_length
87
+ self.window = window
88
+ self.forward_transform = None
89
+ scale = self.filter_length / self.hop_length
90
+ fourier_basis = np.fft.fft(np.eye(self.filter_length))
91
+
92
+ cutoff = int((self.filter_length / 2 + 1))
93
+ fourier_basis = np.vstack([np.real(fourier_basis[:cutoff, :]),
94
+ np.imag(fourier_basis[:cutoff, :])])
95
+
96
+ forward_basis = torch.FloatTensor(fourier_basis[:, None, :])
97
+ inverse_basis = torch.FloatTensor(
98
+ np.linalg.pinv(scale * fourier_basis).T[:, None, :])
99
+
100
+ if window is not None:
101
+ assert(filter_length >= win_length)
102
+ # get window and zero center pad it to filter_length
103
+ fft_window = get_window(window, win_length, fftbins=True)
104
+ fft_window = pad_center(fft_window, filter_length)
105
+ fft_window = torch.from_numpy(fft_window).float()
106
+
107
+ # window the bases
108
+ forward_basis *= fft_window
109
+ inverse_basis *= fft_window
110
+
111
+ self.register_buffer('forward_basis', forward_basis.float())
112
+ self.register_buffer('inverse_basis', inverse_basis.float())
113
+
114
+ def transform(self, input_data):
115
+ num_batches = input_data.size(0)
116
+ num_samples = input_data.size(1)
117
+
118
+ self.num_samples = num_samples
119
+
120
+ # similar to librosa, reflect-pad the input
121
+ input_data = input_data.view(num_batches, 1, num_samples)
122
+ input_data = F.pad(
123
+ input_data.unsqueeze(1),
124
+ (int(self.filter_length / 2), int(self.filter_length / 2), 0, 0),
125
+ mode='reflect')
126
+ input_data = input_data.squeeze(1)
127
+
128
+ forward_transform = F.conv1d(
129
+ input_data,
130
+ Variable(self.forward_basis, requires_grad=False),
131
+ stride=self.hop_length,
132
+ padding=0)
133
+
134
+ cutoff = int((self.filter_length / 2) + 1)
135
+ real_part = forward_transform[:, :cutoff, :]
136
+ imag_part = forward_transform[:, cutoff:, :]
137
+
138
+ magnitude = torch.sqrt(real_part**2 + imag_part**2)
139
+ phase = torch.autograd.Variable(
140
+ torch.atan2(imag_part.data, real_part.data))
141
+
142
+ return magnitude, phase
143
+
144
+ def inverse(self, magnitude, phase):
145
+ recombine_magnitude_phase = torch.cat(
146
+ [magnitude*torch.cos(phase), magnitude*torch.sin(phase)], dim=1)
147
+
148
+ inverse_transform = F.conv_transpose1d(
149
+ recombine_magnitude_phase,
150
+ Variable(self.inverse_basis, requires_grad=False),
151
+ stride=self.hop_length,
152
+ padding=0)
153
+
154
+ if self.window is not None:
155
+ window_sum = window_sumsquare(
156
+ self.window, magnitude.size(-1), hop_length=self.hop_length,
157
+ win_length=self.win_length, n_fft=self.filter_length,
158
+ dtype=np.float32)
159
+ # remove modulation effects
160
+ approx_nonzero_indices = torch.from_numpy(
161
+ np.where(window_sum > tiny(window_sum))[0])
162
+ window_sum = torch.autograd.Variable(
163
+ torch.from_numpy(window_sum), requires_grad=False)
164
+ window_sum = window_sum.to(inverse_transform.device()) if magnitude.is_cuda else window_sum
165
+ inverse_transform[:, :, approx_nonzero_indices] /= window_sum[approx_nonzero_indices]
166
+
167
+ # scale by hop ratio
168
+ inverse_transform *= float(self.filter_length) / self.hop_length
169
+
170
+ inverse_transform = inverse_transform[:, :, int(self.filter_length/2):]
171
+ inverse_transform = inverse_transform[:, :, :-int(self.filter_length/2):]
172
+
173
+ return inverse_transform
174
+
175
+ def forward(self, input_data):
176
+ self.magnitude, self.phase = self.transform(input_data)
177
+ reconstruction = self.inverse(self.magnitude, self.phase)
178
+ return reconstruction
179
+
180
+
181
+ class TorchSTFT(torch.nn.Module):
182
+ def __init__(self, filter_length=800, hop_length=200, win_length=800, window='hann'):
183
+ super().__init__()
184
+ self.filter_length = filter_length
185
+ self.hop_length = hop_length
186
+ self.win_length = win_length
187
+ self.window = torch.from_numpy(get_window(window, win_length, fftbins=True).astype(np.float32))
188
+
189
+ def transform(self, input_data):
190
+ forward_transform = torch.stft(
191
+ input_data,
192
+ self.filter_length, self.hop_length, self.win_length, window=self.window,
193
+ return_complex=True)
194
+
195
+ return torch.abs(forward_transform), torch.angle(forward_transform)
196
+
197
+ def inverse(self, magnitude, phase):
198
+ inverse_transform = torch.istft(
199
+ magnitude * torch.exp(phase * 1j),
200
+ self.filter_length, self.hop_length, self.win_length, window=self.window.to(magnitude.device))
201
+
202
+ return inverse_transform.unsqueeze(-2) # unsqueeze to stay consistent with conv_transpose1d implementation
203
+
204
+ def forward(self, input_data):
205
+ self.magnitude, self.phase = self.transform(input_data)
206
+ reconstruction = self.inverse(self.magnitude, self.phase)
207
+ return reconstruction
208
+
209
+