Spaces:
Sleeping
Sleeping
Hguimaraes
commited on
Commit
·
379f06e
1
Parent(s):
0ae53cb
simple version for the challenge
Browse files- requirements.txt +3 -1
- tasks/assets/chainsaw_model.joblib +3 -0
- tasks/assets/selected_features.joblib +3 -0
- tasks/audio.py +127 -7
requirements.txt
CHANGED
@@ -7,4 +7,6 @@ pydantic>=1.10.0
|
|
7 |
python-dotenv>=1.0.0
|
8 |
gradio>=4.0.0
|
9 |
requests>=2.31.0
|
10 |
-
librosa==0.10.2.post1
|
|
|
|
|
|
7 |
python-dotenv>=1.0.0
|
8 |
gradio>=4.0.0
|
9 |
requests>=2.31.0
|
10 |
+
librosa==0.10.2.post1
|
11 |
+
tqdm==4.67.1
|
12 |
+
lightgbm==4.5.0
|
tasks/assets/chainsaw_model.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c93aa663b8b332dafb175145ed71ce37edaff6a0a919c780bd33c8ea45df4fa8
|
3 |
+
size 734390
|
tasks/assets/selected_features.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f252d9a5b0ecd44fc3e273103fa29859010817a29dbf2d0286395f8ca1d9a87b
|
3 |
+
size 3380
|
tasks/audio.py
CHANGED
@@ -1,9 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from fastapi import APIRouter
|
2 |
from datetime import datetime
|
3 |
-
from datasets import load_dataset
|
4 |
from sklearn.metrics import accuracy_score
|
5 |
-
import random
|
6 |
-
import os
|
7 |
|
8 |
from .utils.evaluation import AudioEvaluationRequest
|
9 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
@@ -17,6 +22,116 @@ DESCRIPTION = "Random Baseline"
|
|
17 |
ROUTE = "/audio"
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
@router.post(ROUTE, tags=["Audio Task"],
|
22 |
description=DESCRIPTION)
|
@@ -52,11 +167,16 @@ async def evaluate_audio(request: AudioEvaluationRequest):
|
|
52 |
# YOUR MODEL INFERENCE CODE HERE
|
53 |
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
54 |
#--------------------------------------------------------------------------------------------
|
55 |
-
|
56 |
# Make random predictions (placeholder for actual model inference)
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
60 |
#--------------------------------------------------------------------------------------------
|
61 |
# YOUR MODEL INFERENCE STOPS HERE
|
62 |
#--------------------------------------------------------------------------------------------
|
|
|
1 |
+
import os
|
2 |
+
import random
|
3 |
+
import joblib
|
4 |
+
import librosa
|
5 |
+
import numpy as np
|
6 |
+
import pandas as pd
|
7 |
+
from tqdm import tqdm
|
8 |
from fastapi import APIRouter
|
9 |
from datetime import datetime
|
10 |
+
from datasets import load_dataset, Audio
|
11 |
from sklearn.metrics import accuracy_score
|
|
|
|
|
12 |
|
13 |
from .utils.evaluation import AudioEvaluationRequest
|
14 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
|
|
22 |
ROUTE = "/audio"
|
23 |
|
24 |
|
25 |
+
def is_valid_duration(example):
|
26 |
+
"""
|
27 |
+
Filter function to remove samples with decoding errors.
|
28 |
+
To be used with datasets.filter()
|
29 |
+
"""
|
30 |
+
return len(example["audio"]["array"]) > 0
|
31 |
+
|
32 |
+
|
33 |
+
def enhanced_dsp_pipeline(y, sr, n_fft=80, hop_length=40):
|
34 |
+
"""Extract enhanced audio features."""
|
35 |
+
features = {}
|
36 |
+
|
37 |
+
# Normalize audio with a larger maximum value
|
38 |
+
y = librosa.util.normalize(y, norm=np.inf)
|
39 |
+
|
40 |
+
# Apply pre-emphasis to enhance high frequencies
|
41 |
+
y_pre = librosa.effects.preemphasis(y, coef=0.97)
|
42 |
+
|
43 |
+
# Compute spectrograms for both original and pre-emphasized signals
|
44 |
+
D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
|
45 |
+
D_pre = librosa.stft(y_pre, n_fft=n_fft, hop_length=hop_length)
|
46 |
+
S = np.abs(D)
|
47 |
+
S_pre = np.abs(D_pre)
|
48 |
+
|
49 |
+
# Core spectral features from original signal
|
50 |
+
features['centroid'] = librosa.feature.spectral_centroid(S=S, sr=sr).ravel()
|
51 |
+
features['roloff'] = librosa.feature.spectral_rolloff(S=S, sr=sr, roll_percent=0.85).ravel()
|
52 |
+
features['zcr'] = librosa.feature.zero_crossing_rate(y, frame_length=n_fft, hop_length=hop_length).ravel()
|
53 |
+
features['rmse'] = librosa.feature.rms(S=S, frame_length=n_fft).ravel()
|
54 |
+
features['flux'] = librosa.onset.onset_strength(y=y, sr=sr).ravel()
|
55 |
+
|
56 |
+
# Additional features from pre-emphasized signal
|
57 |
+
features['pre_centroid'] = librosa.feature.spectral_centroid(S=S_pre, sr=sr).ravel()
|
58 |
+
features['pre_roloff'] = librosa.feature.spectral_rolloff(S=S_pre, sr=sr, roll_percent=0.85).ravel()
|
59 |
+
features['pre_contrast'] = librosa.feature.spectral_contrast(S=S_pre, sr=sr, n_bands=2).ravel()
|
60 |
+
|
61 |
+
# Bandwidth at different frequency cutoffs
|
62 |
+
features['bandwidth_80'] = librosa.feature.spectral_bandwidth(S=S, sr=sr, p=0.8).ravel()
|
63 |
+
features['bandwidth_90'] = librosa.feature.spectral_bandwidth(S=S, sr=sr, p=0.9).ravel()
|
64 |
+
|
65 |
+
# MFCC with focused frequency range
|
66 |
+
mfcc = librosa.feature.mfcc(
|
67 |
+
y=y, sr=sr, n_fft=n_fft, hop_length=hop_length,
|
68 |
+
n_mfcc=13, fmin=20, fmax=sr/2
|
69 |
+
)
|
70 |
+
mfcc_delta = librosa.feature.delta(mfcc)
|
71 |
+
|
72 |
+
for idx, (v_mfcc, v_delta) in enumerate(zip(mfcc, mfcc_delta)):
|
73 |
+
features[f'mfcc_{idx}'] = v_mfcc.ravel()
|
74 |
+
features[f'mfcc_delta_{idx}'] = v_delta.ravel()
|
75 |
+
|
76 |
+
# Calculate statistics
|
77 |
+
stats_dict = {}
|
78 |
+
for k, v in features.items():
|
79 |
+
stats_dict[f'{k}_max'] = np.max(v)
|
80 |
+
stats_dict[f'{k}_min'] = np.min(v)
|
81 |
+
stats_dict[f'{k}_mean'] = np.mean(v)
|
82 |
+
stats_dict[f'{k}_std'] = np.std(v)
|
83 |
+
|
84 |
+
return stats_dict
|
85 |
+
|
86 |
+
|
87 |
+
def segment_features(y, sr, segment_duration=0.5):
|
88 |
+
"""Extract features from audio segments."""
|
89 |
+
segment_length = int(segment_duration * sr)
|
90 |
+
segments = [y[i:i + segment_length] for i in range(0, len(y), segment_length)]
|
91 |
+
|
92 |
+
all_features = []
|
93 |
+
for segment in segments:
|
94 |
+
if len(segment) >= segment_length // 2:
|
95 |
+
features = enhanced_dsp_pipeline(segment, sr)
|
96 |
+
all_features.append(features)
|
97 |
+
|
98 |
+
if not all_features:
|
99 |
+
return enhanced_dsp_pipeline(y, sr)
|
100 |
+
|
101 |
+
# Aggregate features across segments
|
102 |
+
aggregated_features = {}
|
103 |
+
for key in all_features[0].keys():
|
104 |
+
values = [f[key] for f in all_features]
|
105 |
+
aggregated_features[key] = np.mean(values)
|
106 |
+
aggregated_features[f"{key}_var"] = np.var(values)
|
107 |
+
|
108 |
+
return aggregated_features
|
109 |
+
|
110 |
+
|
111 |
+
def process_dataset(dataset):
|
112 |
+
"""Process the dataset and prepare features."""
|
113 |
+
features = []
|
114 |
+
labels = []
|
115 |
+
|
116 |
+
for d in tqdm(dataset):
|
117 |
+
y = d["audio"]["array"]
|
118 |
+
label = d["label"]
|
119 |
+
|
120 |
+
# Process original audio
|
121 |
+
segment_feats = segment_features(y, sr=4000)
|
122 |
+
features.append(segment_feats)
|
123 |
+
labels.append(label)
|
124 |
+
|
125 |
+
X = pd.DataFrame(features)
|
126 |
+
y = np.array(labels)
|
127 |
+
|
128 |
+
return X, y
|
129 |
+
|
130 |
+
def evaluate_model(model, X_test, selected_features):
|
131 |
+
"""Evaluate model on test set."""
|
132 |
+
X_test_selected = X_test[selected_features]
|
133 |
+
return model.predict(X_test_selected)
|
134 |
+
|
135 |
|
136 |
@router.post(ROUTE, tags=["Audio Task"],
|
137 |
description=DESCRIPTION)
|
|
|
167 |
# YOUR MODEL INFERENCE CODE HERE
|
168 |
# Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
|
169 |
#--------------------------------------------------------------------------------------------
|
170 |
+
|
171 |
# Make random predictions (placeholder for actual model inference)
|
172 |
+
test_dataset = test_dataset.filter(is_valid_duration)
|
173 |
+
test_dataset = test_dataset.cast_column("audio", Audio(sampling_rate=4000))
|
174 |
+
|
175 |
+
X_test, true_labels = process_dataset(test_dataset)
|
176 |
+
model = joblib.load('tasks/assets/chainsaw_model.joblib')
|
177 |
+
selected_features = joblib.load('tasks/assets/selected_features.joblib')
|
178 |
+
predictions = evaluate_model(model, X_test, selected_features)
|
179 |
+
|
180 |
#--------------------------------------------------------------------------------------------
|
181 |
# YOUR MODEL INFERENCE STOPS HERE
|
182 |
#--------------------------------------------------------------------------------------------
|