Spaces:

WeixuanYuan
/

Sound_VAE

Build error

App Files Files Community

Sound_VAE / melody_synth /random_midi.py

WeixuanYuan

Upload 31 files

b88cc47 about 2 years ago

raw

history blame contribute delete

3.39 kB

	import numpy as np

	from configurations.read_configuration import get_conf_sample_rate, get_conf_stft_hyperparameter,\
	midi_parameter_range, get_conf_time_resolution, get_conf_max_n_notes
	from melody_synth.random_duration import RandomDuration
	from melody_synth.random_pitch import RandomPitch
	from melody_synth.random_rhythm import RandomRhythm


	class RandomMidi:
	"""Pipeline generating random midi"""

	def __init__(self):
	self.randomRhythm = RandomRhythm()
	self.randomPitch = RandomPitch()
	self.randomDuration = RandomDuration()
	self.max_n_notes = get_conf_max_n_notes()

	def __call__(self, strategy=None, args, *kwargs):
	"""Assembles the pipeline based on given strategies and return random midi.

	Parameters
	----------
	strategy: Dict[str, str]
	Strategies names for random rhythm, pitch and duration generation (see Readme).

	Returns
	-------
	encode, midi: List[int], List[(float, float, float)]
	encode -- Midi's label as a list of 0s and 1s
	midi -- A list of (onset, pitch, duration) tuples, each tuple refers to a note
	"""

	if strategy is None:
	strategy = {"rhythm_strategy": "non-test",
	"pitch_strategy": "random_major",
	"duration_strategy": "limited_random",
	}

	midi = self.randomRhythm(strategy["rhythm_strategy"])
	midi = self.randomPitch(strategy["pitch_strategy"], midi)
	midi = self.randomDuration(strategy["duration_strategy"], midi)

	return self.get_encode(midi), midi

	def get_encode(self, midi):
	"""Generate labels for midi

	Parameters
	----------
	midi: List[(onset, pitch, duration)]
	A list of (onset, pitch, duration) tuples, each tuple refers to a note

	Returns
	-------
	encode: List[int]
	Midi's label as a list of 0s and 1s

	Encoding method
	-------
	One-hot Encoding for each note. Stack all note labels to form midi label.
	"""
	duration_range = midi_parameter_range("duration")
	pitch_range = midi_parameter_range("pitch")
	time_resolution = get_conf_time_resolution()

	pixel_duration = get_conf_stft_hyperparameter()["frame_step"] / get_conf_sample_rate()
	single_note_encode_length = (time_resolution + len(pitch_range) + len(duration_range))
	encode_length = single_note_encode_length * self.max_n_notes
	encode = []
	for i in range(len(midi)):
	(location, pitch, duration) = midi[i]

	location_index = int(float(location) / pixel_duration)
	if location_index >= time_resolution:
	break
	pitch_index = pitch - pitch_range[0]
	duration_index = np.argmin(np.abs(np.array(duration_range) - duration))

	single_note_encode = np.zeros(single_note_encode_length)
	single_note_encode[location_index] = 1
	single_note_encode[time_resolution + pitch_index] = 1
	single_note_encode[time_resolution + len(pitch_range) + duration_index] = 1
	encode = np.hstack([encode, single_note_encode])

	return np.hstack([encode, np.zeros(encode_length)])[:encode_length]