Spaces:

chaitanya9
/

emotion_recognizer

Runtime error

App Files Files Community

chaitanya9 commited on Nov 30, 2021

Commit

9af4f2c

1 Parent(s): 27bf1d6

Upload utils.py

Browse files

Files changed (1) hide show

utils.py +130 -0

utils.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import soundfile
+import librosa
+import numpy as np
+import pickle
+import os
+from convert_wavs import convert_audio
+AVAILABLE_EMOTIONS = {
+    "neutral",
+    "calm",
+    "happy",
+    "sad",
+    "angry",
+    "fear",
+    "disgust",
+    "ps", # pleasant surprised
+    "boredom"
+}
+def get_label(audio_config):
+    """Returns label corresponding to which features are to be extracted
+        e.g:
+    audio_config = {'mfcc': True, 'chroma': True, 'contrast': False, 'tonnetz': False, 'mel': False}
+    get_label(audio_config): 'mfcc-chroma'
+    """
+    features = ["mfcc", "chroma", "mel", "contrast", "tonnetz"]
+    label = ""
+    for feature in features:
+        if audio_config[feature]:
+            label += f"{feature}-"
+    return label.rstrip("-")
+def get_dropout_str(dropout, n_layers=3):
+    if isinstance(dropout, list):
+        return "_".join([ str(d) for d in dropout])
+    elif isinstance(dropout, float):
+        return "_".join([ str(dropout) for i in range(n_layers) ])
+def get_first_letters(emotions):
+    return "".join(sorted([ e[0].upper() for e in emotions ]))
+def extract_feature(file_name, **kwargs):
+    """
+    Extract feature from audio file `file_name`
+        Features supported:
+            - MFCC (mfcc)
+            - Chroma (chroma)
+            - MEL Spectrogram Frequency (mel)
+            - Contrast (contrast)
+            - Tonnetz (tonnetz)
+        e.g:
+        `features = extract_feature(path, mel=True, mfcc=True)`
+    """
+    mfcc = kwargs.get("mfcc")
+    chroma = kwargs.get("chroma")
+    mel = kwargs.get("mel")
+    contrast = kwargs.get("contrast")
+    tonnetz = kwargs.get("tonnetz")
+    # try:
+    #     with soundfile.SoundFile(file_name) as sound_file:
+    #         pass
+    # except RuntimeError:
+    #     # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg
+    #     # get the basename
+    #     basename = os.path.basename(file_name)
+    #     dirname  = os.path.dirname(file_name)
+    #     name, ext = os.path.splitext(basename)
+    #     new_basename = f"{name}_c.wav"
+    #     new_filename = os.path.join(dirname, new_basename)
+    #     v = convert_audio(file_name, new_filename)
+    #     if v:
+    #         raise NotImplementedError("Converting the audio files failed, make sure `ffmpeg` is installed in your machine and added to PATH.")
+    # else:
+    #     new_filename = file_name
+    # with soundfile.SoundFile(new_filename) as sound_file:
+    X = file_name[1].astype("float32")
+        #X = sound_file.read(dtype="float32")
+    sample_rate = file_name[0] #sound_file.samplerate
+    #sample_rate = sound_file.samplerate
+    if chroma or contrast:
+        stft = np.abs(librosa.stft(X))
+    result = np.array([])
+    if mfcc:
+        mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
+        result = np.hstack((result, mfccs))
+    if chroma:
+        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
+        result = np.hstack((result, chroma))
+    if mel:
+        mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
+        result = np.hstack((result, mel))
+    if contrast:
+        contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
+        result = np.hstack((result, contrast))
+    if tonnetz:
+        tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
+        result = np.hstack((result, tonnetz))
+    return result
+def get_best_estimators(classification):
+    """
+    Loads the estimators that are pickled in `grid` folder
+    Note that if you want to use different or more estimators,
+    you can fine tune the parameters in `grid_search.py` script
+    and run it again ( may take hours )
+    """
+    if classification:
+        return pickle.load(open("grid/best_classifiers.pickle", "rb"))
+    else:
+        return pickle.load(open("grid/best_regressors.pickle", "rb"))
+def get_audio_config(features_list):
+    """
+    Converts a list of features into a dictionary understandable by
+    `data_extractor.AudioExtractor` class
+    """
+    audio_config = {'mfcc': False, 'chroma': False, 'mel': False, 'contrast': False, 'tonnetz': False}
+    for feature in features_list:
+        if feature not in audio_config:
+            raise TypeError(f"Feature passed: {feature} is not recognized.")
+        audio_config[feature] = True
+    return audio_config