Hguimaraes commited on
Commit
379f06e
·
1 Parent(s): 0ae53cb

simple version for the challenge

Browse files
requirements.txt CHANGED
@@ -7,4 +7,6 @@ pydantic>=1.10.0
7
  python-dotenv>=1.0.0
8
  gradio>=4.0.0
9
  requests>=2.31.0
10
- librosa==0.10.2.post1
 
 
 
7
  python-dotenv>=1.0.0
8
  gradio>=4.0.0
9
  requests>=2.31.0
10
+ librosa==0.10.2.post1
11
+ tqdm==4.67.1
12
+ lightgbm==4.5.0
tasks/assets/chainsaw_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c93aa663b8b332dafb175145ed71ce37edaff6a0a919c780bd33c8ea45df4fa8
3
+ size 734390
tasks/assets/selected_features.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f252d9a5b0ecd44fc3e273103fa29859010817a29dbf2d0286395f8ca1d9a87b
3
+ size 3380
tasks/audio.py CHANGED
@@ -1,9 +1,14 @@
 
 
 
 
 
 
 
1
  from fastapi import APIRouter
2
  from datetime import datetime
3
- from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
5
- import random
6
- import os
7
 
8
  from .utils.evaluation import AudioEvaluationRequest
9
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
@@ -17,6 +22,116 @@ DESCRIPTION = "Random Baseline"
17
  ROUTE = "/audio"
18
 
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  @router.post(ROUTE, tags=["Audio Task"],
22
  description=DESCRIPTION)
@@ -52,11 +167,16 @@ async def evaluate_audio(request: AudioEvaluationRequest):
52
  # YOUR MODEL INFERENCE CODE HERE
53
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
54
  #--------------------------------------------------------------------------------------------
55
-
56
  # Make random predictions (placeholder for actual model inference)
57
- true_labels = test_dataset["label"]
58
- predictions = [random.randint(0, 1) for _ in range(len(true_labels))]
59
-
 
 
 
 
 
60
  #--------------------------------------------------------------------------------------------
61
  # YOUR MODEL INFERENCE STOPS HERE
62
  #--------------------------------------------------------------------------------------------
 
1
+ import os
2
+ import random
3
+ import joblib
4
+ import librosa
5
+ import numpy as np
6
+ import pandas as pd
7
+ from tqdm import tqdm
8
  from fastapi import APIRouter
9
  from datetime import datetime
10
+ from datasets import load_dataset, Audio
11
  from sklearn.metrics import accuracy_score
 
 
12
 
13
  from .utils.evaluation import AudioEvaluationRequest
14
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
 
22
  ROUTE = "/audio"
23
 
24
 
25
+ def is_valid_duration(example):
26
+ """
27
+ Filter function to remove samples with decoding errors.
28
+ To be used with datasets.filter()
29
+ """
30
+ return len(example["audio"]["array"]) > 0
31
+
32
+
33
+ def enhanced_dsp_pipeline(y, sr, n_fft=80, hop_length=40):
34
+ """Extract enhanced audio features."""
35
+ features = {}
36
+
37
+ # Normalize audio with a larger maximum value
38
+ y = librosa.util.normalize(y, norm=np.inf)
39
+
40
+ # Apply pre-emphasis to enhance high frequencies
41
+ y_pre = librosa.effects.preemphasis(y, coef=0.97)
42
+
43
+ # Compute spectrograms for both original and pre-emphasized signals
44
+ D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
45
+ D_pre = librosa.stft(y_pre, n_fft=n_fft, hop_length=hop_length)
46
+ S = np.abs(D)
47
+ S_pre = np.abs(D_pre)
48
+
49
+ # Core spectral features from original signal
50
+ features['centroid'] = librosa.feature.spectral_centroid(S=S, sr=sr).ravel()
51
+ features['roloff'] = librosa.feature.spectral_rolloff(S=S, sr=sr, roll_percent=0.85).ravel()
52
+ features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
53
+ features['rmse'] = librosa.feature.rms(S=S, frame_length=n_fft).ravel()
54
+ features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
55
+
56
+ # Additional features from pre-emphasized signal
57
+ features['pre_centroid'] = librosa.feature.spectral_centroid(S=S_pre, sr=sr).ravel()
58
+ features['pre_roloff'] = librosa.feature.spectral_rolloff(S=S_pre, sr=sr, roll_percent=0.85).ravel()
59
+ features['pre_contrast'] = librosa.feature.spectral_contrast(S=S_pre, sr=sr, n_bands=2).ravel()
60
+
61
+ # Bandwidth at different frequency cutoffs
62
+ features['bandwidth_80'] = librosa.feature.spectral_bandwidth(S=S, sr=sr, p=0.8).ravel()
63
+ features['bandwidth_90'] = librosa.feature.spectral_bandwidth(S=S, sr=sr, p=0.9).ravel()
64
+
65
+ # MFCC with focused frequency range
66
+ mfcc = librosa.feature.mfcc(
67
+ y=y, sr=sr, n_fft=n_fft, hop_length=hop_length,
68
+ n_mfcc=13, fmin=20, fmax=sr/2
69
+ )
70
+ mfcc_delta = librosa.feature.delta(mfcc)
71
+
72
+ for idx, (v_mfcc, v_delta) in enumerate(zip(mfcc, mfcc_delta)):
73
+ features[f'mfcc_{idx}'] = v_mfcc.ravel()
74
+ features[f'mfcc_delta_{idx}'] = v_delta.ravel()
75
+
76
+ # Calculate statistics
77
+ stats_dict = {}
78
+ for k, v in features.items():
79
+ stats_dict[f'{k}_max'] = np.max(v)
80
+ stats_dict[f'{k}_min'] = np.min(v)
81
+ stats_dict[f'{k}_mean'] = np.mean(v)
82
+ stats_dict[f'{k}_std'] = np.std(v)
83
+
84
+ return stats_dict
85
+
86
+
87
+ def segment_features(y, sr, segment_duration=0.5):
88
+ """Extract features from audio segments."""
89
+ segment_length = int(segment_duration * sr)
90
+ segments = [y[i:i + segment_length] for i in range(0, len(y), segment_length)]
91
+
92
+ all_features = []
93
+ for segment in segments:
94
+ if len(segment) >= segment_length // 2:
95
+ features = enhanced_dsp_pipeline(segment, sr)
96
+ all_features.append(features)
97
+
98
+ if not all_features:
99
+ return enhanced_dsp_pipeline(y, sr)
100
+
101
+ # Aggregate features across segments
102
+ aggregated_features = {}
103
+ for key in all_features[0].keys():
104
+ values = [f[key] for f in all_features]
105
+ aggregated_features[key] = np.mean(values)
106
+ aggregated_features[f"{key}_var"] = np.var(values)
107
+
108
+ return aggregated_features
109
+
110
+
111
+ def process_dataset(dataset):
112
+ """Process the dataset and prepare features."""
113
+ features = []
114
+ labels = []
115
+
116
+ for d in tqdm(dataset):
117
+ y = d["audio"]["array"]
118
+ label = d["label"]
119
+
120
+ # Process original audio
121
+ segment_feats = segment_features(y, sr=4000)
122
+ features.append(segment_feats)
123
+ labels.append(label)
124
+
125
+ X = pd.DataFrame(features)
126
+ y = np.array(labels)
127
+
128
+ return X, y
129
+
130
+ def evaluate_model(model, X_test, selected_features):
131
+ """Evaluate model on test set."""
132
+ X_test_selected = X_test[selected_features]
133
+ return model.predict(X_test_selected)
134
+
135
 
136
  @router.post(ROUTE, tags=["Audio Task"],
137
  description=DESCRIPTION)
 
167
  # YOUR MODEL INFERENCE CODE HERE
168
  # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
169
  #--------------------------------------------------------------------------------------------
170
+
171
  # Make random predictions (placeholder for actual model inference)
172
+ test_dataset = test_dataset.filter(is_valid_duration)
173
+ test_dataset = test_dataset.cast_column("audio", Audio(sampling_rate=4000))
174
+
175
+ X_test, true_labels = process_dataset(test_dataset)
176
+ model = joblib.load('tasks/assets/chainsaw_model.joblib')
177
+ selected_features = joblib.load('tasks/assets/selected_features.joblib')
178
+ predictions = evaluate_model(model, X_test, selected_features)
179
+
180
  #--------------------------------------------------------------------------------------------
181
  # YOUR MODEL INFERENCE STOPS HERE
182
  #--------------------------------------------------------------------------------------------