Spaces:

WeixuanYuan
/

Sound_VAE

Build error

App Files Files Community

Sound_VAE / melody_synth /melody_generator.py

WeixuanYuan

Upload 31 files

b88cc47 about 2 years ago

raw

history blame contribute delete

4.67 kB

	from typing import Dict
	import torch
	from ddsp.core import tf_float32
	import tensorflow as tf
	import ddsp
	import numpy as np
	from torch import tensor
	from melody_synth.complex_torch_synth import SinSawSynth, DoubleSawSynth, TriangleSawSynth, SinTriangleSynth
	from torchsynth.config import SynthConfig

	if torch.cuda.is_available():
	device = "cuda"
	else:
	device = "cpu"


	class MelodyGenerator:
	"""This is the only external interface of the melody_synth package."""

	def __init__(self,
	sample_rate: int,
	n_note_samples: int,
	n_melody_samples: int):
	self.sample_rate = sample_rate
	self.n_note_samples = n_note_samples
	self.n_melody_samples = n_melody_samples
	synthconfig = SynthConfig(
	batch_size=1, reproducible=False, sample_rate=sample_rate,
	buffer_size_seconds=np.float64(n_note_samples) / np.float64(sample_rate)
	)
	self.Saw_Square_Voice = DoubleSawSynth(synthconfig)
	self.SinSawVoice = SinSawSynth(synthconfig)
	self.SinTriVoice = SinTriangleSynth(synthconfig)
	self.TriSawVoice = TriangleSawSynth(synthconfig)

	def get_melody(self, params: Dict[str, float], midi) -> [tf.Tensor]:
	"""Generates a random melody audio.

	Parameters
	----------
	params: Dict[str, float]
	Dictionary of specifications (see Readme).
	midi: List[float, float, float]
	Melody midi (see Readme).

	Returns
	-------
	onsets: List[tf.Tensor]
	Audio.
	"""

	osc1_amp = np.float(params.get("osc1_amp", 0))
	osc2_amp = np.float(params.get("osc2_amp", 0))
	attack = np.float(params.get("attack", 0))
	decay = np.float(params.get("decay", 0))
	sustain = np.float(params.get("sustain", 0))
	release = np.float(params.get("release", 0))
	cutoff_freq = params.get("cutoff_freq", 4000)

	syn_parameters = {
	("adsr", "attack"): tensor([attack]), # [0.0, 2.0]
	("adsr", "decay"): tensor([decay]), # [0.0, 2.0]
	("adsr", "sustain"): tensor([sustain]), # [0.0, 2.0]
	("adsr", "release"): tensor([release]), # [0.0, 2.0]
	("adsr", "alpha"): tensor([3]), # [0.1, 6.0]

	# Mixer parameter
	("mixer", "vco_1"): tensor([osc1_amp]), # [0, 1]
	("mixer", "vco_2"): tensor([osc2_amp]), # [0, 1]

	# Constant parameters:
	("vco_1", "mod_depth"): tensor([0.0]), # [-96, 96]
	("vco_1", "tuning"): tensor([0.0]), # [-24.0, 24]
	("vco_2", "mod_depth"): tensor([0.0]), # [-96, 96]
	("vco_2", "tuning"): tensor([0.0]), # [-24.0, 24]
	}

	osc_types = params.get("osc_types", 0)
	if osc_types == 0:
	synth = self.SinSawVoice
	syn_parameters[("vco_2", "shape")] = tensor([1])
	elif osc_types == 1:
	synth = self.SinSawVoice
	syn_parameters[("vco_2", "shape")] = tensor([0])
	elif osc_types == 2:
	synth = self.Saw_Square_Voice
	syn_parameters[("vco_1", "shape")] = tensor([1])
	syn_parameters[("vco_2", "shape")] = tensor([0])
	elif osc_types == 3:
	synth = self.SinTriVoice
	elif osc_types == 4:
	synth = self.TriSawVoice
	syn_parameters[("vco_2", "shape")] = tensor([1])
	else:
	synth = self.TriSawVoice
	syn_parameters[("vco_2", "shape")] = tensor([0])

	track = np.zeros(self.n_melody_samples)
	for i in range(len(midi)):
	(location, pitch, duration) = midi[i]
	syn_parameters[("keyboard", "midi_f0")] = tensor([pitch])
	syn_parameters[("keyboard", "duration")] = tensor([duration])
	synth.set_parameters(syn_parameters)

	audio_out, parameters, is_train = synth()
	single_note = audio_out[0]

	single_note = np.hstack(
	[np.zeros(int(location * self.sample_rate)), single_note, np.zeros(self.n_melody_samples)])[
	:self.n_melody_samples]
	track = track + single_note

	no_cutoff = False
	if no_cutoff:
	return track
	cutoff_freq = tf_float32(cutoff_freq)
	impulse_response = ddsp.core.sinc_impulse_response(cutoff_freq,
	2048,
	self.sample_rate)
	track = tf_float32(track)
	return ddsp.core.fft_convolve(track[tf.newaxis, :], impulse_response)[0, :]