Spaces:
Runtime error
Runtime error
"""App to demonstrate fish sound classifier. | |
Includes code to create spectrograms from https://github.com/axiom-data-science/project-classify-fish-sounds | |
which was copied to this dir and slightly modified for in-memory buffer because the archive repo is not pip installable. | |
""" | |
import io | |
import fastai.vision.all as fai_vision | |
import gradio as gr | |
import numpy as np | |
from huggingface_hub import from_pretrained_fastai | |
from PIL import Image | |
from create_spectrograms import ( | |
FFTConfig, | |
load_wav, | |
calc_stft, | |
plot_spec, | |
fish_filter | |
) | |
MODEL = from_pretrained_fastai('axds/classify-fish-sounds') | |
LABELS = { | |
0: 'No call', | |
1: 'Black grouper call 1', | |
2: 'Black grouper call 2', | |
3: 'Black grouper grunt', | |
4: 'Unidentified sound', | |
5: 'Red grouper 1', | |
6: 'Red grouper 2', | |
7: 'Red hind 1', | |
8: 'Red hind 2', | |
9: 'Red hind 3', | |
10: 'Goliath grouper', | |
11: 'Goliath grouper multi-phase' | |
} | |
FFT_CONFIG = FFTConfig() | |
def classify_audio(inp, model=MODEL, labels=LABELS): | |
with Spectrogram(inp) as spec_buffer: | |
# Open spec from in-memory file as image | |
image_buffer = Image.open(spec_buffer) | |
# Cast to array, skip alpha channel | |
image_arr = np.array(image_buffer)[:, :, :3] | |
# Predict! | |
results = model.predict(image_arr) | |
# Return class labels and confidence value | |
confidences = {labels[i]: float(results[2][i]) for i in range(len(labels))} | |
return image_buffer, confidences | |
class Spectrogram: | |
def __init__(self, inp, fft_config=FFT_CONFIG): | |
self.inp = inp | |
self.buffer = io.BytesIO() | |
self.fft_config = fft_config | |
def __enter__(self): | |
plot_spec(self.inp, self.buffer, self.fft_config) | |
return self.buffer | |
def __exit__(self, exc_typ, exc_value, exc_traceback): | |
self.buffer.close() | |
iface = gr.Interface( | |
fn=classify_audio, | |
inputs=gr.inputs.Audio(source="upload", type="numpy"), | |
outputs=[ | |
gr.outputs.Image(), | |
gr.outputs.Label(num_top_classes=3), | |
], | |
examples=["sample-0002.wav", "sample-20088.wav", "sample-2990.wav"], | |
title="Classify fish sounds from audio files" | |
) | |
iface.launch() | |