Spaces:
Runtime error
Runtime error
File size: 2,163 Bytes
c165076 f21fcbc c165076 f21fcbc c165076 f21fcbc 17c370b c165076 d2ef383 c165076 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio
import torchaudio
from fastai.vision.all import *
from fastai.learner import load_learner
from torchvision.utils import save_image
from huggingface_hub import hf_hub_download
model = load_learner(
hf_hub_download("kurianbenoy/music_genre_classification_baseline", "model.pkl")
)
EXAMPLES_PATH = Path("./examples")
labels = model.dls.vocab
interface_options = {
"title": "Music Genre Classification",
"description": "A simple baseline model for classifying music genres with fast.ai on [Kaggle competition data](https://www.kaggle.com/competitions/kaggle-pog-series-s01e02/data)",
"examples": [f"{EXAMPLES_PATH}/{f.name}" for f in EXAMPLES_PATH.iterdir()],
"interpretation": "default",
"layout": "horizontal",
"theme": "default",
}
N_FFT = 2048
HOP_LEN = 1024
def create_spectrogram(filename):
audio, sr = torchaudio.load(filename)
specgram = torchaudio.transforms.MelSpectrogram(
sample_rate=sr,
n_fft=N_FFT,
win_length=N_FFT,
hop_length=HOP_LEN,
center=True,
pad_mode="reflect",
power=2.0,
norm="slaney",
onesided=True,
n_mels=224,
mel_scale="htk",
)(audio).mean(axis=0)
specgram = torchaudio.transforms.AmplitudeToDB()(specgram)
specgram = specgram - specgram.min()
specgram = specgram / specgram.max()
return specgram
def create_image(filename):
specgram = create_spectrogram(filename)
dest = Path("temp.png")
save_image(specgram, "temp.png")
def predict(img):
img = PILImage.create(img)
_pred, _pred_w_idx, probs = model.predict(img)
labels_probs = {labels[i]: float(probs[i]) for i, _ in enumerate(labels)}
return labels_probs
def end2endpipeline(filename):
create_image(filename)
return predict("temp.png")
demo = gradio.Interface(
fn=end2endpipeline,
inputs=gradio.inputs.Audio(
source="microphone", type="filepath", label="Record/ Drop audio"
),
outputs=gradio.outputs.Label(num_top_classes=5),
**interface_options,
)
launch_options = {
"enable_queue": True,
"share": False,
}
demo.launch(**launch_options)
|