DeepFakes_Audio_Video_Image / data /augmentation_utils.py
SivaResearch's picture
demo
b6d5990
raw
history blame
3.14 kB
import cv2
import librosa
import numpy as np
import albumentations
from albumentations import (Compose, ImageCompression, GaussNoise, HorizontalFlip,
PadIfNeeded, OneOf,ToGray, ShiftScaleRotate, GaussianBlur,
RandomBrightnessContrast, FancyPCA, HueSaturationValue, BasicTransform)
class AudioTransform(BasicTransform):
""" Transform for audio task. This is the main class where we override the targets and update params function for our need"""
@property
def targets(self):
return {"data": self.apply}
def update_params(self, params, **kwargs):
if hasattr(self, "interpolation"):
params["interpolation"] = self.interpolation
if hasattr(self, "fill_value"):
params["fill_value"] = self.fill_value
return params
class TimeShifting(AudioTransform):
""" Do time shifting of audio """
def __init__(self, always_apply=False, p=0.5):
super(TimeShifting, self).__init__(always_apply, p)
def apply(self,data,**params):
'''
data : ndarray of audio timeseries
'''
start_ = int(np.random.uniform(-80000,80000))
if start_ >= 0:
audio_time_shift = np.r_[data[start_:], np.random.uniform(-0.001,0.001, start_)]
else:
audio_time_shift = np.r_[np.random.uniform(-0.001,0.001, -start_), data[:start_]]
return audio_time_shift
class PitchShift(AudioTransform):
""" Do time shifting of audio """
def __init__(self, always_apply=False, p=0.5 , n_steps=None):
super(PitchShift, self).__init__(always_apply, p)
'''
nsteps here is equal to number of semitones
'''
self.n_steps = n_steps
def apply(self,data,**params):
'''
data : ndarray of audio timeseries
'''
return librosa.effects.pitch_shift(data,sr=16000,n_steps=self.n_steps)
class AddGaussianNoise(AudioTransform):
""" Do time shifting of audio """
def __init__(self, always_apply=False, p=0.5):
super(AddGaussianNoise, self).__init__(always_apply, p)
def apply(self,data,**params):
'''
data : ndarray of audio timeseries
'''
noise = np.random.randn(len(data))
data_wn = data + 0.005*noise
return data_wn
create_frame_transforms = Compose([
ImageCompression(quality_lower=60, quality_upper=100, p=0.5),
GaussNoise(p=0.1),
GaussianBlur(blur_limit=3, p=0.05),
HorizontalFlip(),
PadIfNeeded(min_height=256, min_width=256, border_mode=cv2.BORDER_CONSTANT),
OneOf([RandomBrightnessContrast(), FancyPCA(), HueSaturationValue()], p=0.7),
ToGray(p=0.2),
ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=10, border_mode=cv2.BORDER_CONSTANT, p=0.5),])
create_spec_transforms = albumentations.Compose([
TimeShifting(p=0.9), # here not p=1.0 because your nets should get some difficulties
AddGaussianNoise(p=0.8),
PitchShift(p=0.5,n_steps=4)
])