chaitanya9 commited on
Commit
9af4f2c
·
1 Parent(s): 27bf1d6

Upload utils.py

Browse files
Files changed (1) hide show
  1. utils.py +130 -0
utils.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import soundfile
2
+ import librosa
3
+ import numpy as np
4
+ import pickle
5
+ import os
6
+ from convert_wavs import convert_audio
7
+
8
+
9
+ AVAILABLE_EMOTIONS = {
10
+ "neutral",
11
+ "calm",
12
+ "happy",
13
+ "sad",
14
+ "angry",
15
+ "fear",
16
+ "disgust",
17
+ "ps", # pleasant surprised
18
+ "boredom"
19
+ }
20
+
21
+
22
+ def get_label(audio_config):
23
+ """Returns label corresponding to which features are to be extracted
24
+ e.g:
25
+ audio_config = {'mfcc': True, 'chroma': True, 'contrast': False, 'tonnetz': False, 'mel': False}
26
+ get_label(audio_config): 'mfcc-chroma'
27
+ """
28
+ features = ["mfcc", "chroma", "mel", "contrast", "tonnetz"]
29
+ label = ""
30
+ for feature in features:
31
+ if audio_config[feature]:
32
+ label += f"{feature}-"
33
+ return label.rstrip("-")
34
+
35
+
36
+ def get_dropout_str(dropout, n_layers=3):
37
+ if isinstance(dropout, list):
38
+ return "_".join([ str(d) for d in dropout])
39
+ elif isinstance(dropout, float):
40
+ return "_".join([ str(dropout) for i in range(n_layers) ])
41
+
42
+
43
+ def get_first_letters(emotions):
44
+ return "".join(sorted([ e[0].upper() for e in emotions ]))
45
+
46
+
47
+ def extract_feature(file_name, **kwargs):
48
+ """
49
+ Extract feature from audio file `file_name`
50
+ Features supported:
51
+ - MFCC (mfcc)
52
+ - Chroma (chroma)
53
+ - MEL Spectrogram Frequency (mel)
54
+ - Contrast (contrast)
55
+ - Tonnetz (tonnetz)
56
+ e.g:
57
+ `features = extract_feature(path, mel=True, mfcc=True)`
58
+ """
59
+ mfcc = kwargs.get("mfcc")
60
+ chroma = kwargs.get("chroma")
61
+ mel = kwargs.get("mel")
62
+ contrast = kwargs.get("contrast")
63
+ tonnetz = kwargs.get("tonnetz")
64
+ # try:
65
+ # with soundfile.SoundFile(file_name) as sound_file:
66
+ # pass
67
+ # except RuntimeError:
68
+ # # not properly formated, convert to 16000 sample rate & mono channel using ffmpeg
69
+ # # get the basename
70
+ # basename = os.path.basename(file_name)
71
+ # dirname = os.path.dirname(file_name)
72
+ # name, ext = os.path.splitext(basename)
73
+ # new_basename = f"{name}_c.wav"
74
+ # new_filename = os.path.join(dirname, new_basename)
75
+ # v = convert_audio(file_name, new_filename)
76
+ # if v:
77
+ # raise NotImplementedError("Converting the audio files failed, make sure `ffmpeg` is installed in your machine and added to PATH.")
78
+ # else:
79
+ # new_filename = file_name
80
+ # with soundfile.SoundFile(new_filename) as sound_file:
81
+ X = file_name[1].astype("float32")
82
+ #X = sound_file.read(dtype="float32")
83
+ sample_rate = file_name[0] #sound_file.samplerate
84
+ #sample_rate = sound_file.samplerate
85
+ if chroma or contrast:
86
+ stft = np.abs(librosa.stft(X))
87
+ result = np.array([])
88
+ if mfcc:
89
+ mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0)
90
+ result = np.hstack((result, mfccs))
91
+ if chroma:
92
+ chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
93
+ result = np.hstack((result, chroma))
94
+ if mel:
95
+ mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
96
+ result = np.hstack((result, mel))
97
+ if contrast:
98
+ contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
99
+ result = np.hstack((result, contrast))
100
+ if tonnetz:
101
+ tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
102
+ result = np.hstack((result, tonnetz))
103
+ return result
104
+
105
+
106
+ def get_best_estimators(classification):
107
+ """
108
+ Loads the estimators that are pickled in `grid` folder
109
+ Note that if you want to use different or more estimators,
110
+ you can fine tune the parameters in `grid_search.py` script
111
+ and run it again ( may take hours )
112
+ """
113
+ if classification:
114
+ return pickle.load(open("grid/best_classifiers.pickle", "rb"))
115
+ else:
116
+ return pickle.load(open("grid/best_regressors.pickle", "rb"))
117
+
118
+
119
+ def get_audio_config(features_list):
120
+ """
121
+ Converts a list of features into a dictionary understandable by
122
+ `data_extractor.AudioExtractor` class
123
+ """
124
+ audio_config = {'mfcc': False, 'chroma': False, 'mel': False, 'contrast': False, 'tonnetz': False}
125
+ for feature in features_list:
126
+ if feature not in audio_config:
127
+ raise TypeError(f"Feature passed: {feature} is not recognized.")
128
+ audio_config[feature] = True
129
+ return audio_config
130
+