|
import os |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
from scipy.io import wavfile |
|
from scipy import signal |
|
from PIL import Image |
|
|
|
def create_spectrogram(wav_file, output_folder): |
|
|
|
sample_rate, data = wavfile.read(wav_file) |
|
|
|
|
|
if data.ndim == 2: |
|
data = np.mean(data, axis=1) |
|
|
|
|
|
frequencies, times, spectrogram = signal.spectrogram(data, sample_rate) |
|
|
|
|
|
plt.figure(figsize=(10, 4)) |
|
|
|
|
|
plt.pcolormesh(times, frequencies, 10 * np.log10(spectrogram), shading='gouraud', cmap='inferno') |
|
|
|
|
|
plt.ylim(0, sample_rate / 2) |
|
|
|
|
|
plt.axis('off') |
|
|
|
|
|
ax = plt.gca() |
|
|
|
|
|
ax.set_xlim(0, times[-1]) |
|
|
|
|
|
ax.add_patch(plt.Rectangle((times[-1], 0), 10, sample_rate / 2, facecolor='black', edgecolor='none')) |
|
|
|
|
|
filename = os.path.splitext(os.path.basename(wav_file))[0] + '.png' |
|
plt.savefig(os.path.join(output_folder, filename), bbox_inches='tight', pad_inches=0) |
|
plt.close() |
|
|
|
|
|
convert_white_to_black(os.path.join(output_folder, filename)) |
|
|
|
def convert_white_to_black(image_path): |
|
|
|
img = Image.open(image_path) |
|
|
|
|
|
img = img.convert("RGB") |
|
|
|
|
|
data = np.array(img) |
|
|
|
|
|
white_pixels = (data[:, :, 0] == 255) & (data[:, :, 1] == 255) & (data[:, :, 2] == 255) |
|
|
|
|
|
data[white_pixels] = [0, 0, 0] |
|
|
|
|
|
new_img = Image.fromarray(data) |
|
|
|
|
|
new_img.save(image_path) |
|
|
|
def convert_wav_to_spectrograms(input_folder, output_folder): |
|
|
|
if not os.path.exists(output_folder): |
|
os.makedirs(output_folder) |
|
|
|
|
|
for file in os.listdir(input_folder): |
|
if file.endswith('.wav'): |
|
wav_file_path = os.path.join(input_folder, file) |
|
create_spectrogram(wav_file_path, output_folder) |
|
print(f"Converted {file} to spectrogram.") |
|
|
|
if __name__ == "__main__": |
|
input_folder = 'dataset' |
|
output_folder = 'spectrograms' |
|
convert_wav_to_spectrograms(input_folder, output_folder) |
|
|