Spaces:

TristanBehrens
/

Garland-Composer

Running

App Files Files Community

TristanBehrens commited on 29 days ago

Commit

87ae0b7

•

1 Parent(s): 2d7a385

Initial commit

Browse files

Files changed (5) hide show

app.py +245 -0
assets/asciilogo.txt +11 -0
requirements.txt +11 -0
source/languagemodel.py +288 -0
source/utilities.py +331 -0

app.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import streamlit as st
+from source.languagemodel import LanguageModel
+from source.utilities import (
+ convert_tokens_to_songdata,
+ convert_songdata_to_notesequence,
+ convert_songdata_to_pianoroll,
+ convert_notesequence_to_wave,
+ convert_notesequence_to_midi
+)
+# Define the MIDI instruments.
+midi_instruments = {
+ "Harpsichord": 6,
+ "Church Organ": 19,
+ "Piano": 0,
+}
+# Load the model once and cache it.
+@st.cache_resource
+def load_model():
+ model = LanguageModel("TristanBehrens/bach-garland-mambaplus")
+ return model
+model = load_model()
+# Initialize token_sequence in session state if it doesn't exist
+if "token_sequence" not in st.session_state:
+ st.session_state.token_sequence = "GARLAND_START"
+ st.session_state.song_data = None
+ st.session_state.piano_roll = None
+ st.session_state.wave = None
+ st.session_state.note_sequence = None
+ st.session_state.midi_file_content = None
+ st.session_state.temperature = 0.1
+ st.session_state.bpm = 100
+ st.session_state.instrument = "Piano"
+# Define the main function.
+def main():
+ columns = st.columns([0.7, 0.3])
+ # Set up the Streamlit application
+ column = columns.pop(0)
+ with column:
+ # Change the colors of the a-tag to (255, 75, 75).
+ st.markdown("<style>a:link { color: #FF4B4B; } a:visited { color: #FF4B4B; }</style>", unsafe_allow_html=True)
+ # Add a title.
+ st.title("Garland Composer")
+ linkedin_url = "https://huggingface.co/TristanBehrens/bach-garland-mambaplus/"
+ x_url = "https://huggingface.co/TristanBehrens/bach-garland-mambaplus/"
+ st.write(f"By Dr. Tristan Behrens. Find me on [LinkedIn]({linkedin_url}) and [X]({x_url}).")
+ hf_url = "https://huggingface.co/TristanBehrens/bach-garland-mambaplus/"
+ st.write(f"Model available on [Hugging Face]({hf_url}).")
+ # Add a picture.
+ column = columns.pop(0)
+ with column:
+ st.write(" ")
+ st.write(" ")
+ st.write(" ")
+ st.image("garland.jpg", use_column_width=True)
+ # Add a horizontal line.
+ st.markdown("---")
+ # Create two columns.
+ columns = st.columns(3)
+ # Add a slider to control the temperature.
+ state_temperature = st.session_state.temperature
+ with columns.pop(0):
+ temperature = st.slider("Temperature", 0.0, 1.0, state_temperature)
+ st.session_state.temperature = temperature
+ # Add a slider to control the bpm.
+ state_bpm = st.session_state.bpm
+ with columns.pop(0):
+ bpm = st.slider("BPM", 80, 120, state_bpm, 5)
+ st.session_state.bpm = bpm
+ # Dropdown for the instrument.
+ state_instrument = st.session_state.instrument
+ with columns.pop(0):
+ instrument = st.selectbox("Instrument", list(midi_instruments.keys()), index=list(midi_instruments.keys()).index(state_instrument))
+ st.session_state.instrument = instrument
+ # Get the token sequence from the session state.
+ token_sequence = st.session_state.token_sequence
+ # Columns for the buttons.
+ columns = st.columns(5)
+ # Add a button to generate the next bar.
+ column = columns.pop(0)
+ with column:
+ if st.button("Add a bar", use_container_width=True):
+ token_sequence = extend_sequence(model, token_sequence, temperature)
+ refresh(token_sequence, bpm, instrument)
+ # Add a button to compose long.
+ column = columns.pop(0)
+ with column:
+ if st.button("Auto compose", use_container_width=True):
+ token_sequence = auto_compose(model, token_sequence, temperature)
+ refresh(token_sequence, bpm, instrument)
+ # Add a button to remove the last bar.
+ column = columns.pop(0)
+ with column:
+ if st.button("Remove last", use_container_width=True):
+ token_sequence = shortened_sequence(token_sequence)
+ refresh(token_sequence, bpm, instrument)
+ # Add a button to reset the sequence.
+ column = columns.pop(0)
+ if token_sequence != "GARLAND_START":
+ with column:
+ if st.button("Reset", use_container_width=True):
+ with columns.pop(0):
+ token_sequence = "GARLAND_START"
+ refresh(token_sequence, bpm, instrument)
+ # Provide a download button for the MIDI file.
+ column = columns.pop(0)
+ if "midi_file_content" in st.session_state and st.session_state.midi_file_content is not None:
+ with column:
+ midi_file_content = st.session_state.midi_file_content
+ if st.download_button(
+ label="Download MIDI",
+ data=midi_file_content,
+ file_name="music.mid",
+ mime="audio/midi",
+ use_container_width=True
+ ):
+ pass
+ # Add a horizontal line.
+ st.markdown("---")
+ # Display the piano roll.
+ if "piano_roll" in st.session_state and st.session_state.piano_roll is not None:
+ st.image(st.session_state.piano_roll)
+ # Display an audio player.
+ if "wave" in st.session_state and st.session_state.wave is not None:
+ st.audio(st.session_state.wave, format="audio/wav", sample_rate=44100, autoplay=True)
+ # Add a horizontal line.
+ st.markdown("---")
+ # Set the text color to (255, 31, 75).
+ if token_sequence.endswith("GARLAND_END"):
+ st.write("The AI believes that the music is finished.")
+ else:
+ st.write("The AI believes that the music is not finished.")
+def auto_compose(model, token_sequence, temperature):
+ max_iterations = 100
+ for _ in range(max_iterations):
+ token_sequence = extend_sequence(model, token_sequence, temperature)
+ if token_sequence.endswith("GARLAND_END"):
+ break
+ return token_sequence
+def extend_sequence(model, token_sequence, temperature):
+ # Replace the last GARLAND_END token with NEXT.
+ if token_sequence.endswith("GARLAND_END"):
+ token_sequence = token_sequence.replace("GARLAND_END", "NEXT")
+ # The maximum length of the generated music.
+ max_length = 16_384
+ # When to stop the generation.
+ end_tokens = ["NEXT", "GARLAND_END"]
+ # Compose the music iterativelybar by bar.
+ output_dict = model.generate(
+ prompt=token_sequence,
+ temperature=temperature,
+ max_length=max_length,
+ end_tokens=end_tokens,
+ forbidden_tokens=["[PAD]", "[EOS]"],
+ return_structured_output=True
+ )
+ output = output_dict["output"]
+ return output
+def shortened_sequence(token_sequence):
+ # Find the position of the next to last NEXT token.
+ next_tokens = token_sequence.split()
+ next_positions = [i for i, x in enumerate(next_tokens) if x == "NEXT" or x == "GARLAND_END"]
+ if len(next_positions) <= 1:
+ token_sequence = "GARLAND_START"
+ else:
+ next_position = next_positions[-2]
+ token_sequence = " ".join(next_tokens[:next_position + 1])
+ return token_sequence
+def refresh(token_sequence="GARLAND_START", bpm=120, instrument="Piano"):
+ # Get the token sequence into the session state.
+ st.session_state.token_sequence = token_sequence
+ # Convert to song data.
+ song_data = convert_tokens_to_songdata(token_sequence)
+ song_data["bpm"] = bpm
+ st.session_state.song_data = song_data
+ # Set the instrument.
+ for track in song_data["tracks"]:
+ track["instrument"] = midi_instruments[instrument]
+ # Convert to piano roll.
+ piano_roll = convert_songdata_to_pianoroll(song_data)
+ st.session_state.piano_roll = piano_roll
+ # Convert to note sequence.
+ note_sequence = convert_songdata_to_notesequence(song_data)
+ st.session_state.note_sequence = note_sequence
+ # Play the note sequence.
+ wave = convert_notesequence_to_wave(note_sequence)
+ st.session_state.wave = wave
+ # Get the MIDI file content.
+ midi_file_content = convert_notesequence_to_midi(note_sequence)
+ st.session_state.midi_file_content = midi_file_content
+ # Rerun the app.
+ st.rerun()
+if __name__ == "__main__":
+ main()

assets/asciilogo.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+ ▄█ █▄ ▄████████ ▄█ ▄█ ▀█████████▄ ▄████████ ███ █▄ ███▄▄▄▄ ███▄▄▄▄ ▄████████
+ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███▀▀▀██▄ ███▀▀▀██▄ ███ ███
+ ███ ███ ███ █▀ ███ ███▌ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███
+ ▄███▄▄▄▄███▄▄ ▄███▄▄▄ ███ ███▌ ▄███▄▄▄██▀ ▄███▄▄▄▄██▀ ███ ███ ███ ███ ███ ███ ███ ███
+▀▀███▀▀▀▀███▀ ▀▀███▀▀▀ ███ ███▌ ▀▀███▀▀▀██▄ ▀▀███▀▀▀▀▀ ███ ███ ███ ███ ███ ███ ▀███████████
+ ███ ███ ███ █▄ ███ ███ ███ ██▄ ▀███████████ ███ ███ ███ ███ ███ ███ ███ ███
+ ███ ███ ███ ███ ███▌ ▄ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███ ███
+ ███ █▀ ██████████ █████▄▄██ █▀ ▄█████████▀ ███ ███ ████████▀ ▀█ █▀ ▀█ █▀ ███ █▀
+ ▀ ███ ███
+By Dr. Tristan Behrens

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+dacite==1.8.1
+colorama==0.4.6
+omegaconf==2.3.0
+streamlit==1.38.0
+note_seq==0.0.5
+pyfluidsynth==1.3.2
+torch==2.2.0
+transformers==4.44.0
+mamba-ssm==2.2.2
+einops==0.8.0
+mambapy==1.2.0

source/languagemodel.py ADDED Viewed

	@@ -0,0 +1,288 @@

+# Helibrunna - A HuggingFace compatible xLSTM trainer.
+# Copyright (c) 2024 Dr. Tristan Behrens
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import os
+import glob
+from omegaconf import OmegaConf
+from transformers import PreTrainedTokenizerFast
+import torch
+from safetensors.torch import load_file
+import time
+from .utilities import display_logo, model_from_config
+class LanguageModel:
+ def __init__(self, model_path_or_repo, config_overrides={}, mask_special_tokens=True, device="auto"):
+ """
+ Initializes the LanguageModel object.
+ Args:
+ model_path_or_repo (str): The path to the model or the repository ID.
+ Raises:
+ ValueError: If the model checkpoint, tokenizer, config, or weights are not found.
+ Exception: If failed to download the model.
+ Returns:
+ None
+ """
+ # Set the maskt_special_tokens flag.
+ self.mask_special_tokens = mask_special_tokens
+ # Set the device. CPU is default.
+ if device != "auto":
+ # Check if CUDA is available.
+ if not torch.cuda.is_available() and device == "cuda":
+ raise ValueError("CUDA is not available on this system.")
+ # Check if MPS is available.
+ if not torch.backends.mps.is_available() and device == "mps":
+ raise ValueError("MPS is not available on this system.")
+ # Set the device.
+ self.device = device
+ # Set the device to auto.
+ else:
+ # Set the device to CPU if auto is selected.
+ self.device = "cpu" if device == "auto" else device
+ # Check if CUDA is available.
+ if torch.cuda.is_available() and device == "auto":
+ self.device = "cuda"
+ # See if MPS is available.
+ # Note: This is disabled for now. It's not working as expected. It is very slow.
+ #if torch.backends.mps.is_available():
+ # self.device = "mps"
+ # Display the logo.
+ display_logo()
+ # Download the model if it doesn't exist. Or at least try to.
+ if not os.path.exists(model_path_or_repo):
+ from huggingface_hub import snapshot_download
+ try:
+ model_path=snapshot_download(repo_id=model_path_or_repo)
+ tokenizer_path=model_path
+ except Exception as e:
+ raise f"Failed to download the model: {e}"
+ # Use a local model.
+ else:
+ # Set the model path and tokenizer path.
+ model_path = None
+ tokenizer_path = model_path_or_repo
+ # Find all the checkpoint folders, folders that start with "checkpoint-". Then find the last one.
+ checkpoint_folders = glob.glob(os.path.join(model_path_or_repo, "checkpoint-*"))
+ for checkpoint_folder in checkpoint_folders:
+ if checkpoint_folder.endswith("-last"):
+ model_path = checkpoint_folder
+ break
+ if model_path is None:
+ raise ValueError("No model checkpoint found.")
+ # Find the tokenizer folder.
+ if os.path.exists(os.path.join(model_path_or_repo, "tokenizer.json")):
+ tokenizer_path = model_path_or_repo
+ if not os.path.exists(tokenizer_path):
+ raise ValueError("Tokenizer not found.")
+ # Load the config.
+ config_path = os.path.join(model_path, "config.yaml")
+ if not os.path.exists(config_path):
+ raise ValueError(f"Config not found at {config_path}")
+ model_config = OmegaConf.load(config_path)
+ # Override the config.
+ if config_overrides != {} and config_overrides is not None:
+ model_config = OmegaConf.merge(model_config, config_overrides)
+ import json
+ print(json.dumps(OmegaConf.to_container(model_config), indent=4))
+ # Create the model from the config.
+ model = model_from_config(model_config, device=self.device)
+ model.to(self.device)
+ self.config = model_config
+ # Load the weights from the checkpoint.
+ weights_path = os.path.join(model_path, "model.safetensors")
+ if not os.path.exists(weights_path):
+ raise ValueError(f"Weights not found at {weights_path}")
+ state_dict = load_file(weights_path)
+ # TODO: Permute the last two dimensions of these parameters: xlstm_block_stack.blocks.2.xlstm.slstm_cell._recurrent_kernel_:
+ # Check if we have an xLSTM model and if CUDA is not available.
+ if not torch.cuda.is_available() and model_config.get("type", "xLSTMLMModel") == "xLSTMLMModel":
+ print(state_dict.keys())
+ endings = ["xlstm.slstm_cell._recurrent_kernel_"]
+ for key, values in state_dict.items():
+ for ending in endings:
+ if key.endswith(ending):
+ print(key)
+ print(values.shape)
+ # Option: Permute the last two dimensions.
+ values = values.permute(0, 2, 1)
+ # Option: View the tensor.
+ #new_shape = (values.shape[0], values.shape[2], values.shape[1])
+ #values = values.view(new_shape)
+ print(values.shape)
+ state_dict[key] = values
+ break
+ # Load the weights into the model.
+ model.load_state_dict(state_dict)
+ self.model = model
+ # Load the tokenizer.
+ tokenizer_path = os.path.join(tokenizer_path, "tokenizer.json")
+ if not os.path.exists(tokenizer_path):
+ raise ValueError(f"Tokenizer not found at {tokenizer_path}")
+ tokenizer = PreTrainedTokenizerFast.from_pretrained(tokenizer_path)
+ self.tokenizer = tokenizer
+ def generate(
+ self,
+ prompt: str,
+ temperature: float = 1.0,
+ max_length: int = 100,
+ end_tokens: list[str] = [],
+ forbidden_tokens: list[str] = [],
+ return_structured_output: bool = False
+ ):
+ """
+ Generates a continuation for a given prompt using the language model.
+ Args:
+ prompt (str): The prompt to generate a continuation for.
+ temperature (float, optional): The temperature value for controlling the randomness of the generated output.
+ Higher values (e.g., 1.0) make the output more random, while lower values (e.g., 0.5) make it more deterministic.
+ Defaults to 1.0.
+ max_length (int, optional): The maximum length of the generated output. Defaults to 100.
+ end_tokens (list[str], optional): A list of end tokens that, if encountered, will stop the generation process.
+ Defaults to an empty list.
+ return_structured_output (bool, optional): If True, returns a dictionary with the generated output, elapsed time,
+ and tokens per second. If False, returns only the generated output as a string. Defaults to False.
+ Returns:
+ str or dict: The generated output as a string if return_structured_output is False.
+ A dictionary with the generated output, elapsed time, and tokens per second if return_structured_output is True.
+ """
+ # Tokenize the prompt.
+ inputs = self.tokenizer(prompt, return_tensors="pt").input_ids.to(self.device)
+ assert inputs.shape[0] == 1
+ # Determine the end tokens ids.
+ end_token_ids = []
+ for end_token in end_tokens:
+ assert end_token in self.tokenizer.vocab
+ end_token_ids.append(self.tokenizer(end_token).input_ids[0])
+ # Initialize the ids to mask.
+ ids_to_mask = []
+ # Mask the forbidden tokens.
+ for forbidden_token in forbidden_tokens:
+ assert forbidden_token in self.tokenizer.vocab
+ ids_to_mask.extend(self.tokenizer(forbidden_token).input_ids)
+ # Generate the continuation.
+ start_time = time.time()
+ tokens_count = 0
+ while inputs.shape[1] < max_length:
+ # Stop if the maximum context length is reached.
+ if inputs.shape[1] >= self.config.context_length:
+ print("Warning: The maximum context length has been reached.")
+ break
+ # Generate the continuation.
+ outputs = self.model(inputs.to(device=self.device))
+ assert outputs.shape[0] == 1
+ # Mask the tokens.
+ outputs[:, :, self.tokenizer.all_special_ids] = float("-inf")
+ # Use the temperature to sample from the distribution.
+ outputs = outputs / temperature
+ outputs = torch.nn.functional.softmax(outputs, dim=-1)
+ outputs = torch.multinomial(outputs[0, -1], num_samples=1)
+ # Add to the inputs.
+ inputs = torch.cat([inputs, outputs.unsqueeze(0)], dim=1)
+ # Increment the tokens count.
+ tokens_count += 1
+ # Check if the end token is reached.
+ if outputs[0] in end_token_ids:
+ break
+ # Print the elapsed time and tokens per second.
+ elapsed_time = time.time() - start_time
+ tokens_per_second = tokens_count / elapsed_time
+ # Decode the output.
+ output = self.tokenizer.decode(inputs[0].tolist())
+ # Return the output.
+ if not return_structured_output:
+ return output
+ # Return the structured output.
+ else:
+ return {
+ "output": output,
+ "elapsed_time": elapsed_time,
+ "tokens_per_second": tokens_per_second
+ }
+ def summary(self):
+ """
+ Prints a summary of the model. Makes the model architecture readable. Includes the number of parameters.
+ """
+ # Print the model.
+ print(self.model)
+ # Get the number of parameters.
+ number_of_parameters = sum(p.numel() for p in self.model.parameters())
+ print(f"Number of parameters: {number_of_parameters:_}")
+ sizes = ["", "K", "M", "B", "T"]
+ size_index = 0
+ while number_of_parameters > 1000:
+ number_of_parameters /= 1000
+ size_index += 1
+ print(f"Number of parameters: {number_of_parameters:.2f}{sizes[size_index]}")
+ # Size of the model.
+ # Get the total size of all the markdown files. And make it human readable.
+ number_of_parameters = sum(p.numel() for p in self.model.parameters())
+ total_size = number_of_parameters * 4
+ sizes = ["B", "KB", "MB", "GB", "TB"]
+ size_index = 0
+ while total_size > 1024:
+ total_size /= 1024
+ size_index += 1
+ print(f"Total size of the model: {total_size:.2f}{sizes[size_index]} for precision 32-bit floats.")
+ # Print on which device the model is running.
+ print(f"Device: {self.device}")

source/utilities.py ADDED Viewed

	@@ -0,0 +1,331 @@

+import copy
+import note_seq
+from PIL import Image
+import tempfile
+import os
+import colorama
+from omegaconf import DictConfig, OmegaConf
+import torch
+from typing import List, Tuple, Dict
+from dacite import from_dict
+from collections.abc import MutableMapping
+import sys
+# NOTE: Imported from helibrunna.
+def display_logo():
+ """
+ Display the logo by printing it line by line with a cyberpunk color scheme.
+ Raises:
+ FileNotFoundError: If the logo file is missing.
+ """
+ # Get the path of this script and use it to find the logo.
+ script_path = os.path.dirname(os.path.realpath(__file__))
+ search_path = os.path.dirname(script_path)
+ # Load the logo.
+ logo_path = os.path.join(search_path, "assets", "asciilogo.txt")
+ if not os.path.exists(logo_path):
+ raise FileNotFoundError("The logo file is missing.")
+ with open(logo_path, "r") as f:
+ logo = f.read()
+ # Print the logo line by line. Use colorama to colorize the output. Use a cyberpunk color scheme.
+ for line_index, line in enumerate(logo.split("\n")):
+ color = colorama.Fore.GREEN
+ style = colorama.Style.BRIGHT if line_index % 2 == 0 else colorama.Style.NORMAL
+ print(color + style + line)
+ print(colorama.Style.RESET_ALL)
+# NOTE: Imported from helibrunna.
+def model_from_config(model_config: DictConfig, device:str) -> torch.nn.Module:
+ """
+ Create a model based on the provided model configuration.
+ Args:
+ model_config (DictConfig): The configuration for the model.
+ Returns:
+ The created model.
+ Raises:
+ ValueError: If the model type is unknown.
+ """
+ # Get the model type from the configuration.
+ model_type = model_config.get("type", "xLSTMLMModel")
+ # Create the xLSTMLMModel.
+ if model_type == "xLSTMLMModel":
+ print("Creating xLSTMLMModel...")
+ from xlstm.xlstm_lm_model import xLSTMLMModel, xLSTMLMModelConfig
+ # If there is no GPU, use the vanilla backend.
+ if not torch.cuda.is_available():
+ #model_config.backend = "vanilla"
+ model_config.slstm_block.slstm.backend = "vanilla"
+ model_config.mlstm_block.mlstm.backend = "vanilla"
+ model_config_object = from_dict(xLSTMLMModelConfig, OmegaConf.to_container(model_config))
+ # Create the model.
+ model = xLSTMLMModel(model_config_object)
+ model.reset_parameters()
+ # Create the GPT2LMModel.
+ elif model_type == "gpt2":
+ print("Creating GPT2LMModel...")
+ from .models.gpttwo import GPT2LMModel, GPT2LMModelConfig
+ model_config_object = from_dict(GPT2LMModelConfig, OmegaConf.to_container(model_config))
+ model = GPT2LMModel(model_config_object)
+ # Create the MambaLM.
+ elif model_type == "mamba":
+ print("Creating Mamba LM...")
+ from mambapy.lm import LM, MambaConfig
+ model_config_object = from_dict(MambaConfig, OmegaConf.to_container(model_config))
+ model = LM(model_config_object, model_config.vocab_size)
+ # Create the Transformer.
+ elif model_type == "transformer":
+ from .models.transformer import TransformerConfig, Transformer
+ model_config_object = from_dict(TransformerConfig, OmegaConf.to_container(model_config))
+ model = Transformer(model_config_object)
+ # Create a Pharia instance.
+ elif model_type == "pharia":
+ from .models.pharia import PhariaConfig, PhariaModel
+ model_config_object = from_dict(PhariaConfig, OmegaConf.to_container(model_config))
+ model = PhariaModel(model_config_object)
+ # Create a TransformerXL instance.
+ else:
+ raise ValueError(f"Unknown model type: {model_type}")
+ # Move the model to the device.
+ model.to(device)
+ return model
+def convert_tokens_to_songdata(tokens):
+ if isinstance(tokens, str):
+ tokens = tokens.split()
+ song_data = {}
+ song_data["tracks"] = []
+ current_track_index = 0
+ current_timestep = 0
+ for token in tokens:
+ if token == "GARLAND_START":
+ pass
+ elif token == "BAR_START":
+ if current_track_index == len(song_data["tracks"]):
+ song_data["tracks"] += [{"bars": [], "instrument": "0"}]
+ bar_data = {"notes": []}
+ song_data["tracks"][current_track_index]["bars"] += [bar_data]
+ current_timestep = 0
+ elif token.startswith("INST="):
+ instrument = token.split("=")[1]
+ song_data["tracks"][current_track_index]["instrument"] = instrument
+ elif token.startswith("DENSITY="):
+ pass
+ elif token.startswith("NOTE_ON="):
+ note_pitch = int(token.split("=")[1])
+ note_data = {
+ "note": note_pitch,
+ "start": current_timestep,
+ "end": current_timestep,
+ "veloctiy": 80
+ }
+ song_data["tracks"][current_track_index]["bars"][-1]["notes"] += [note_data]
+ pass
+ elif token.startswith("TIME_DELTA="):
+ current_timestep += int(token.split("=")[1])
+ elif token.startswith("NOTE_OFF="):
+ note_pitch = int(token.split("=")[1])
+ for note_data in song_data["tracks"][current_track_index]["bars"][-1]["notes"]:
+ if note_data["note"] == note_pitch and note_data["start"] == note_data["end"]:
+ note_data["end"] = current_timestep
+ break
+ pass
+ elif token == "BAR_END":
+ current_track_index += 1
+ elif token == "NEXT":
+ current_track_index = 0
+ elif token == "GARLAND_END":
+ pass
+ elif token == "[PAD]":
+ pass
+ elif token == "[EOS]":
+ pass
+ else:
+ raise Exception(f"Unknown token: {token}")
+ assert isinstance(song_data, dict)
+ return song_data
+def convert_songdata_to_notesequence(song_data:dict, quantize_steps_per_quarter=8, remove_disabled_tracks=True):
+ assert isinstance(song_data, dict), f"Invalid song data type: {type(song_data)}"
+ # Clone the song data.
+ song_data = copy.deepcopy(song_data)
+ # Sort the tracks by instrument.
+ assert "tracks" in song_data, f"Invalid song data: {song_data.keys()}"
+ tracks = sorted(song_data["tracks"], key=lambda t: t["instrument"])
+ song_data["tracks"] = tracks
+ # Remove tracks that are not enabled.
+ if remove_disabled_tracks:
+ song_data["tracks"] = [t for t in song_data["tracks"] if t.get("enabled", True)]
+ # Create an empy note sequence.
+ note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
+ # Add the tempo.
+ bpm = song_data["bpm"] if "bpm" in song_data else 120
+ note_sequence.tempos.add().qpm = bpm
+ # Compute some lengths.
+ step_length_seconds = 60.0 / bpm / quantize_steps_per_quarter
+ bar_length_seconds = 4 * step_length_seconds * quantize_steps_per_quarter
+ # Get the instruments.
+ instruments = list(set([t["instrument"] for t in song_data["tracks"]]))
+ # Add the tracks.
+ for track_index, track_data in enumerate(song_data["tracks"]):
+ instrument = track_data["instrument"]
+ for bar_index, bar_data in enumerate(track_data["bars"]):
+ bar_start_time = bar_index * bar_length_seconds
+ for note_data in bar_data["notes"]:
+ assert "note" in note_data
+ assert "start" in note_data
+ assert "end" in note_data
+ note = note_sequence.notes.add()
+ #note.instrument = instrument TODO
+ note.pitch = note_data["note"]
+ note.start_time = note_data["start"] * step_length_seconds + bar_start_time
+ note.end_time = note_data["end"] * step_length_seconds + bar_start_time
+ if "velocity" in note_data:
+ note.velocity = note_data["velocity"]
+ else:
+ note.velocity = 80
+ note.instrument = track_index
+ if instrument == "drums":
+ note.is_drum = True
+ else:
+ note.is_drum = False
+ note.program = int(instrument)
+ return note_sequence
+def convert_songdata_to_pianoroll(song_data):
+ # The bars are 4/4 and the quantization is 8 steps per quarter, aka 32 steps per bar.
+ # We will render a grid. The height is 64 pixels. The width is 32 pixels per bar
+ # Create a new image.
+ lengths = [len(track["bars"]) for track in song_data["tracks"]]
+ if lengths == []:
+ return None
+ assert len(set(lengths)) == 1, f"Unequal number of bars: {lengths}"
+ num_bars = lengths[0]
+ # Get the note extremes.
+ min_note = 128
+ max_note = 0
+ for track_data in song_data["tracks"]:
+ for bar_data in track_data["bars"]:
+ for note_data in bar_data["notes"]:
+ min_note = min(min_note, note_data["note"])
+ max_note = max(max_note, note_data["note"])
+ # The width depends on the bars.
+ width = 32 * num_bars
+ # The width depends on the notes.
+ height = 1 + max_note - min_note
+ # Create the image.
+ image = Image.new("RGB", (width, height), (14, 17, 23))
+ # Define some colors.
+ base_color = (255, 75, 75)
+ adjustments = [1.2, 1.0, 0.8, 0.6]
+ colors = []
+ for adjustment in adjustments:
+ import colorsys
+ rgb = base_color
+ rgb = [float(c) / 255.0 for c in rgb]
+ hsv = colorsys.rgb_to_hsv(*rgb)
+ # Rotate the hue.
+ offset = (adjustment - 1.0) * 0.1
+ hsv = (hsv[0] + offset, hsv[1], hsv[2])
+ rgb = colorsys.hsv_to_rgb(*hsv)
+ rgb = tuple([int(255.0 * c) for c in rgb])
+ colors += [rgb]
+ print("")
+ for color in colors:
+ print(color)
+ # Draw the grid.
+ for track_index, track_data in enumerate(song_data["tracks"]):
+ color = colors[track_index % len(colors)]
+ for bar_index, bar_data in enumerate(track_data["bars"]):
+ x = bar_index * 32
+ for note_data in bar_data["notes"]:
+ y = max_note - note_data["note"]
+ assert y >= 0 and y < height, f"Invalid y: {y}, note {note_data['note']} min_note: {min_note}, max_note: {max_note}, difference: {max_note - min_note}, height: {height}"
+ for i in range(note_data["start"], note_data["end"]):
+ image.putpixel((x + i, y), color)
+ # Resize the image. Use nearest neighbor for pixel art.
+ factor = 4
+ image = image.resize((width * factor, height * factor), Image.NEAREST)
+ return image
+def convert_notesequence_to_wave(note_sequence):
+ if len(note_sequence.notes) == 0:
+ return None
+ try:
+ synthesizer = note_seq.fluidsynth
+ wave = synthesizer(note_sequence, sample_rate=44100)
+ return wave
+ except Exception as e:
+ synthesizer = note_seq.synthesize
+ wave = synthesizer(note_sequence)
+ return wave
+def convert_notesequence_to_midi(note_sequence, filename="output.mid"):
+ if len(note_sequence.notes) == 0:
+ return None
+ # Returns the file content of the midi file.
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+ filename = temp_file.name
+ note_seq.sequence_proto_to_midi_file(note_sequence, filename)
+ with open(filename, "rb") as file:
+ content = file.read()
+ return content