hieupt commited on
Commit
41ed787
·
verified ·
1 Parent(s): 52a0f2f

Upload utils.py

Browse files
Files changed (1) hide show
  1. data/utils.py +68 -0
data/utils.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import soundfile
4
+ import torch
5
+
6
+
7
+ def random_amplify(mix, targets, shapes, min, max):
8
+ '''
9
+ Data augmentation by randomly amplifying sources before adding them to form a new mixture
10
+ :param mix: Original mixture
11
+ :param targets: Source targets
12
+ :param shapes: Shape dict from model
13
+ :param min: Minimum possible amplification
14
+ :param max: Maximum possible amplification
15
+ :return: New data point as tuple (mix, targets)
16
+ '''
17
+ residual = mix # start with original mix
18
+ for key in targets.keys():
19
+ if key != "mix":
20
+ residual -= targets[key] # subtract all instruments (output is zero if all instruments add to mix)
21
+ mix = residual * np.random.uniform(min, max) # also apply gain data augmentation to residual
22
+ for key in targets.keys():
23
+ if key != "mix":
24
+ targets[key] = targets[key] * np.random.uniform(min, max)
25
+ mix += targets[key] # add instrument with gain data augmentation to mix
26
+ mix = np.clip(mix, -1.0, 1.0)
27
+ return crop_targets(mix, targets, shapes)
28
+
29
+
30
+ def crop_targets(mix, targets, shapes):
31
+ '''
32
+ Crops target audio to the output shape required by the model given in "shapes"
33
+ '''
34
+ for key in targets.keys():
35
+ if key != "mix":
36
+ targets[key] = targets[key][:, shapes["output_start_frame"]:shapes["output_end_frame"]]
37
+ return mix, targets
38
+
39
+
40
+ def load(path, sr=22050, mono=True, mode="numpy", offset=0.0, duration=None):
41
+ y, curr_sr = librosa.load(path, sr=sr, mono=mono, res_type='kaiser_fast', offset=offset, duration=duration)
42
+
43
+ if len(y.shape) == 1:
44
+ # Expand channel dimension
45
+ y = y[np.newaxis, :]
46
+
47
+ if mode == "pytorch":
48
+ y = torch.tensor(y)
49
+
50
+ return y, curr_sr
51
+
52
+
53
+ def write_wav(path, audio, sr):
54
+ soundfile.write(path, audio.T, sr, "PCM_16")
55
+
56
+
57
+ def resample(audio, orig_sr, new_sr, mode="numpy"):
58
+ if orig_sr == new_sr:
59
+ return audio
60
+
61
+ if isinstance(audio, torch.Tensor):
62
+ audio = audio.detach().cpu().numpy()
63
+
64
+ out = librosa.resample(audio, orig_sr, new_sr, res_type='kaiser_fast')
65
+
66
+ if mode == "pytorch":
67
+ out = torch.tensor(out)
68
+ return out