TheStinger's picture
Added Mel spectrogram (#1)
ccd4978 verified
raw
history blame
3.88 kB
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
import os
import soundfile as sf
import requests
import librosa.display
def download_file(url):
file_id = url.split('/')[-2]
download_url = f'https://docs.google.com/uc?export=download&id={file_id}'
response = requests.get(download_url, allow_redirects=True)
local_filename = url.split('/')[-1] + '.wav'
open(local_filename, 'wb').write(response.content)
return local_filename
def main():
with gr.Blocks() as app:
gr.Markdown(
"""
<h1><center>Audio Analyzer by Ilaria</center></h1>\n
<h3><center>Help me on <a href="https://ko-fi.com/ilariaowo/shop">Ko-Fi</a>!</center></h3>\n
## Special thanks to Alex Murkoff for helping me code it!
#### Need help with AI? Join [AI Hub](https://discord.gg/aihub)!\n
**Note**: Try to keep the audio length under **2 minutes**,
since long audio files dont work well with a static spectrogram
"""
)
with gr.Row():
image_output = gr.Image(type='filepath', interactive=False)
with gr.Row():
with gr.Column():
audio_input = gr.Audio(type='filepath')
create_spec_butt = gr.Button(value='Create Spectrogram And Get Info', variant='primary')
with gr.Column():
output_markdown = gr.Markdown(value="", visible=True)
with gr.Accordion('Audio Downloader', open=False):
url_input = gr.Textbox(value='', label='Google Drive Audio URL')
download_butt = gr.Button(value='Download audio', variant='primary')
download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
outputs=[output_markdown, image_output])
download_butt.click(fn=download_file, inputs=[url_input], outputs=[audio_input])
create_spec_butt.click(fn=create_spectrogram_and_get_info, inputs=[audio_input],
outputs=[output_markdown, image_output])
app.queue(max_size=1022).launch(share=True)
def create_spectrogram_and_get_info(audio_file):
plt.clf()
y, sr = librosa.load(audio_file, sr=None)
S = librosa.feature.melspectrogram(y, sr=sr, n_mels=256)
log_S = librosa.amplitude_to_db(S, ref=np.max, top_db=256)
plt.figure(figsize=(12, 5.5))
librosa.display.specshow(log_S, sr=sr, x_axis='time')
plt.colorbar(format='%+2.0f dB', pad=0.01)
plt.tight_layout(pad=0.5)
plt.savefig('spectrogram.png', dpi=500)
audio_info = sf.info(audio_file)
bit_depth = {'PCM_16': 16, 'FLOAT': 32}.get(audio_info.subtype, 0)
minutes, seconds = divmod(audio_info.duration, 60)
seconds, milliseconds = divmod(seconds, 1)
milliseconds *= 1000
# bitrate = audio_info.samplerate * audio_info.channels * bit_depth / 8 / 1024 / 1024
# this bitrate one doesnt seem to be used anywhere so i just removed it
speed_in_kbps = audio_info.samplerate * bit_depth / 1000
filename_without_extension, _ = os.path.splitext(os.path.basename(audio_file))
info_table = f"""
| Information | Value |
| :---: | :---: |
| File Name | {filename_without_extension} |
| Duration | {int(minutes)} minutes - {int(seconds)} seconds - {int(milliseconds)} milliseconds |
| Bitrate | {speed_in_kbps} kbp/s |
| Audio Channels | {audio_info.channels} |
| Samples per second | {audio_info.samplerate} Hz |
| Bit per second | {audio_info.samplerate * audio_info.channels * bit_depth} bit/s |
"""
# Return the PNG file of the spectrogram and the info table
return info_table, 'spectrogram.png'
# Create the Gradio interface
main()