Spaces:
Runtime error
Runtime error
#Code Author: Jonathan Whitaker π | |
import librosa | |
import soundfile as sf | |
from scipy.signal import savgol_filter | |
# The driving audio file | |
audio_file = './sounds/bensound-cute.wav' #@param | |
# How many points in the base latent walk loop | |
n_points = 6 #@param | |
# Smooths the animation effect, smaller=jerkier, must be odd | |
filter_window_size=301 #@param | |
# How much should we scale position based on music vs the base path? | |
chr_scale = 0.5 #@param | |
base_scale = 0.3 #@param | |
# Load the file | |
X, sample_rate = sf.read(audio_file, dtype='float32') | |
X= X[:int(len(X)*0.5)] | |
# Remove percussive elements | |
harmonic = librosa.effects.harmonic(X[:,0]) | |
# Get chroma_stft (power in different notes) | |
chroma = librosa.feature.chroma_stft(harmonic) # Just one channel | |
# Smooth these out | |
chroma = savgol_filter(chroma, filter_window_size, 3) | |
# Calculate how many frames we want | |
fps = 25 | |
duration = X.shape[0] / sample_rate | |
print('Duration:', duration) | |
n_steps = int(fps * duration) | |
print('N frames:', n_steps, fps * duration) | |
latents = torch.randn(n_points, 256)*base_scale | |
chroma_latents = torch.randn(12, 256)*chr_scale | |
frames=[] | |
for i in tqdm(range(n_steps)): | |
p1 = max(0, int(n_points*i/n_steps)) | |
p2 = min(n_points, int(n_points*i/n_steps)+1)%n_points # so it wraps back to 0 | |
frac = (i-(p1*(n_steps/n_points))) / (n_steps/n_points) | |
l = latents[p1]*(1-frac) + latents[p2]*frac | |
for c in range(12): # HERE adding the music influence to the latent | |
scale_factor = chroma[c, int(i*chroma.shape[1]/n_steps)] | |
l += chroma_latents[c]*chr_scale*scale_factor | |
im = model.G(l.unsqueeze(0)).clamp_(0., 1.) | |
frame=(im[0].permute(1, 2, 0).detach().cpu().numpy()*255).astype(np.uint8) | |
frames.append(frame) | |