Spaces:
Build error
Build error
from typing import Dict | |
import torch | |
from ddsp.core import tf_float32 | |
import tensorflow as tf | |
import ddsp | |
import numpy as np | |
from torch import tensor | |
from melody_synth.complex_torch_synth import SinSawSynth, DoubleSawSynth, TriangleSawSynth, SinTriangleSynth | |
from torchsynth.config import SynthConfig | |
if torch.cuda.is_available(): | |
device = "cuda" | |
else: | |
device = "cpu" | |
class MelodyGenerator: | |
"""This is the only external interface of the melody_synth package.""" | |
def __init__(self, | |
sample_rate: int, | |
n_note_samples: int, | |
n_melody_samples: int): | |
self.sample_rate = sample_rate | |
self.n_note_samples = n_note_samples | |
self.n_melody_samples = n_melody_samples | |
synthconfig = SynthConfig( | |
batch_size=1, reproducible=False, sample_rate=sample_rate, | |
buffer_size_seconds=np.float64(n_note_samples) / np.float64(sample_rate) | |
) | |
self.Saw_Square_Voice = DoubleSawSynth(synthconfig) | |
self.SinSawVoice = SinSawSynth(synthconfig) | |
self.SinTriVoice = SinTriangleSynth(synthconfig) | |
self.TriSawVoice = TriangleSawSynth(synthconfig) | |
def get_melody(self, params: Dict[str, float], midi) -> [tf.Tensor]: | |
"""Generates a random melody audio. | |
Parameters | |
---------- | |
params: Dict[str, float] | |
Dictionary of specifications (see Readme). | |
midi: List[float, float, float] | |
Melody midi (see Readme). | |
Returns | |
------- | |
onsets: List[tf.Tensor] | |
Audio. | |
""" | |
osc1_amp = np.float(params.get("osc1_amp", 0)) | |
osc2_amp = np.float(params.get("osc2_amp", 0)) | |
attack = np.float(params.get("attack", 0)) | |
decay = np.float(params.get("decay", 0)) | |
sustain = np.float(params.get("sustain", 0)) | |
release = np.float(params.get("release", 0)) | |
cutoff_freq = params.get("cutoff_freq", 4000) | |
syn_parameters = { | |
("adsr", "attack"): tensor([attack]), # [0.0, 2.0] | |
("adsr", "decay"): tensor([decay]), # [0.0, 2.0] | |
("adsr", "sustain"): tensor([sustain]), # [0.0, 2.0] | |
("adsr", "release"): tensor([release]), # [0.0, 2.0] | |
("adsr", "alpha"): tensor([3]), # [0.1, 6.0] | |
# Mixer parameter | |
("mixer", "vco_1"): tensor([osc1_amp]), # [0, 1] | |
("mixer", "vco_2"): tensor([osc2_amp]), # [0, 1] | |
# Constant parameters: | |
("vco_1", "mod_depth"): tensor([0.0]), # [-96, 96] | |
("vco_1", "tuning"): tensor([0.0]), # [-24.0, 24] | |
("vco_2", "mod_depth"): tensor([0.0]), # [-96, 96] | |
("vco_2", "tuning"): tensor([0.0]), # [-24.0, 24] | |
} | |
osc_types = params.get("osc_types", 0) | |
if osc_types == 0: | |
synth = self.SinSawVoice | |
syn_parameters[("vco_2", "shape")] = tensor([1]) | |
elif osc_types == 1: | |
synth = self.SinSawVoice | |
syn_parameters[("vco_2", "shape")] = tensor([0]) | |
elif osc_types == 2: | |
synth = self.Saw_Square_Voice | |
syn_parameters[("vco_1", "shape")] = tensor([1]) | |
syn_parameters[("vco_2", "shape")] = tensor([0]) | |
elif osc_types == 3: | |
synth = self.SinTriVoice | |
elif osc_types == 4: | |
synth = self.TriSawVoice | |
syn_parameters[("vco_2", "shape")] = tensor([1]) | |
else: | |
synth = self.TriSawVoice | |
syn_parameters[("vco_2", "shape")] = tensor([0]) | |
track = np.zeros(self.n_melody_samples) | |
for i in range(len(midi)): | |
(location, pitch, duration) = midi[i] | |
syn_parameters[("keyboard", "midi_f0")] = tensor([pitch]) | |
syn_parameters[("keyboard", "duration")] = tensor([duration]) | |
synth.set_parameters(syn_parameters) | |
audio_out, parameters, is_train = synth() | |
single_note = audio_out[0] | |
single_note = np.hstack( | |
[np.zeros(int(location * self.sample_rate)), single_note, np.zeros(self.n_melody_samples)])[ | |
:self.n_melody_samples] | |
track = track + single_note | |
no_cutoff = False | |
if no_cutoff: | |
return track | |
cutoff_freq = tf_float32(cutoff_freq) | |
impulse_response = ddsp.core.sinc_impulse_response(cutoff_freq, | |
2048, | |
self.sample_rate) | |
track = tf_float32(track) | |
return ddsp.core.fft_convolve(track[tf.newaxis, :], impulse_response)[0, :] | |