import soundfile import librosa import numpy as np import pickle import os from convert_wavs import convert_audio AVAILABLE_EMOTIONS = { "neutral", "calm", "happy", "sad", "angry", "fear", "disgust", "ps", # pleasant surprised "boredom" } def get_label(audio_config): """Returns label corresponding to which features are to be extracted e.g: audio_config = {'mfcc': True, 'chroma': True, 'contrast': False, 'tonnetz': False, 'mel': False} get_label(audio_config): 'mfcc-chroma' """ features = ["mfcc", "chroma", "mel", "contrast", "tonnetz"] label = "" for feature in features: if audio_config[feature]: label += f"{feature}-" return label.rstrip("-") def get_dropout_str(dropout, n_layers=3): if isinstance(dropout, list): return "_".join([ str(d) for d in dropout]) elif isinstance(dropout, float): return "_".join([ str(dropout) for i in range(n_layers) ]) def get_first_letters(emotions): return "".join(sorted([ e[0].upper() for e in emotions ])) def extract_feature(file_name, **kwargs): """ Extract feature from audio file `file_name` Features supported: - MFCC (mfcc) - Chroma (chroma) - MEL Spectrogram Frequency (mel) - Contrast (contrast) - Tonnetz (tonnetz) e.g: `features = extract_feature(path, mel=True, mfcc=True)` """ mfcc = kwargs.get("mfcc") chroma = kwargs.get("chroma") mel = kwargs.get("mel") contrast = kwargs.get("contrast") tonnetz = kwargs.get("tonnetz") # try: # with soundfile.SoundFile(file_name) as sound_file: # pass # except RuntimeError: # # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg # # get the basename # basename = os.path.basename(file_name) # dirname = os.path.dirname(file_name) # name, ext = os.path.splitext(basename) # new_basename = f"{name}_c.wav" # new_filename = os.path.join(dirname, new_basename) # v = convert_audio(file_name, new_filename) # if v: # raise NotImplementedError("Converting the audio files failed, make sure `ffmpeg` is installed in your machine and added to PATH.") # else: # new_filename = file_name # with soundfile.SoundFile(new_filename) as sound_file: X = file_name[1].astype("float32") #X = sound_file.read(dtype="float32") sample_rate = file_name[0] #sound_file.samplerate #sample_rate = sound_file.samplerate if chroma or contrast: stft = np.abs(librosa.stft(X)) result = np.array([]) if mfcc: mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) result = np.hstack((result, mfccs)) if chroma: chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0) result = np.hstack((result, chroma)) if mel: mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0) result = np.hstack((result, mel)) if contrast: contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0) result = np.hstack((result, contrast)) if tonnetz: tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0) result = np.hstack((result, tonnetz)) return result def get_best_estimators(classification): """ Loads the estimators that are pickled in `grid` folder Note that if you want to use different or more estimators, you can fine tune the parameters in `grid_search.py` script and run it again ( may take hours ) """ if classification: return pickle.load(open("grid/best_classifiers.pickle", "rb")) else: return pickle.load(open("grid/best_regressors.pickle", "rb")) def get_audio_config(features_list): """ Converts a list of features into a dictionary understandable by `data_extractor.AudioExtractor` class """ audio_config = {'mfcc': False, 'chroma': False, 'mel': False, 'contrast': False, 'tonnetz': False} for feature in features_list: if feature not in audio_config: raise TypeError(f"Feature passed: {feature} is not recognized.") audio_config[feature] = True return audio_config