Spaces:
Runtime error
Runtime error
File size: 3,142 Bytes
b6d5990 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import cv2
import librosa
import numpy as np
import albumentations
from albumentations import (Compose, ImageCompression, GaussNoise, HorizontalFlip,
PadIfNeeded, OneOf,ToGray, ShiftScaleRotate, GaussianBlur,
RandomBrightnessContrast, FancyPCA, HueSaturationValue, BasicTransform)
class AudioTransform(BasicTransform):
""" Transform for audio task. This is the main class where we override the targets and update params function for our need"""
@property
def targets(self):
return {"data": self.apply}
def update_params(self, params, **kwargs):
if hasattr(self, "interpolation"):
params["interpolation"] = self.interpolation
if hasattr(self, "fill_value"):
params["fill_value"] = self.fill_value
return params
class TimeShifting(AudioTransform):
""" Do time shifting of audio """
def __init__(self, always_apply=False, p=0.5):
super(TimeShifting, self).__init__(always_apply, p)
def apply(self,data,**params):
'''
data : ndarray of audio timeseries
'''
start_ = int(np.random.uniform(-80000,80000))
if start_ >= 0:
audio_time_shift = np.r_[data[start_:], np.random.uniform(-0.001,0.001, start_)]
else:
audio_time_shift = np.r_[np.random.uniform(-0.001,0.001, -start_), data[:start_]]
return audio_time_shift
class PitchShift(AudioTransform):
""" Do time shifting of audio """
def __init__(self, always_apply=False, p=0.5 , n_steps=None):
super(PitchShift, self).__init__(always_apply, p)
'''
nsteps here is equal to number of semitones
'''
self.n_steps = n_steps
def apply(self,data,**params):
'''
data : ndarray of audio timeseries
'''
return librosa.effects.pitch_shift(data,sr=16000,n_steps=self.n_steps)
class AddGaussianNoise(AudioTransform):
""" Do time shifting of audio """
def __init__(self, always_apply=False, p=0.5):
super(AddGaussianNoise, self).__init__(always_apply, p)
def apply(self,data,**params):
'''
data : ndarray of audio timeseries
'''
noise = np.random.randn(len(data))
data_wn = data + 0.005*noise
return data_wn
create_frame_transforms = Compose([
ImageCompression(quality_lower=60, quality_upper=100, p=0.5),
GaussNoise(p=0.1),
GaussianBlur(blur_limit=3, p=0.05),
HorizontalFlip(),
PadIfNeeded(min_height=256, min_width=256, border_mode=cv2.BORDER_CONSTANT),
OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7),
ToGray(p=0.2),
ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5),])
create_spec_transforms = albumentations.Compose([
TimeShifting(p=0.9), # here not p=1.0 because your nets should get some difficulties
AddGaussianNoise(p=0.8),
PitchShift(p=0.5,n_steps=4)
])
|