midi-composer

Running on Zero

App Files Files Community

skytnt commited on Oct 5, 2024

Commit

183a87e

1 Parent(s): 94ac77e

add lora

Browse files

Files changed (2) hide show

app.py +32 -12
midi_model.py +5 -1

app.py CHANGED Viewed

@@ -142,7 +142,12 @@ def get_duration(model_name, tab, mid_seq, continuation_state, continuation_sele
 def run(model_name, tab, mid_seq, continuation_state, continuation_select, instruments, drum_kit, bpm, time_sig,
         key_sig, mid, midi_events, reduce_cc_st, remap_track_channel, add_default_instr, remove_empty_channels,
         seed, seed_rand, gen_events, temp, top_p, top_k, allow_cc):
-    model = models[model_name]
     model.to(device=opt.device)
     tokenizer = model.tokenizer
     bpm = int(bpm)
@@ -253,7 +258,7 @@ def finish_run(model_name, mid_seq):
     if mid_seq is None:
         outputs = [None] * OUTPUT_BATCH_SIZE
         return *outputs, []
-    tokenizer = models[model_name].tokenizer
     outputs = []
     end_msgs = [create_msg("progress", [0, 0])]
     if not os.path.exists("outputs"):
@@ -277,7 +282,7 @@ def render_audio(model_name, mid_seq, should_render_audio):
     if (not should_render_audio) or mid_seq is None:
         outputs = [None] * OUTPUT_BATCH_SIZE
         return tuple(outputs)
-    tokenizer = models[model_name].tokenizer
     outputs = []
     if not os.path.exists("outputs"):
         os.mkdir("outputs")
@@ -294,7 +299,7 @@ def render_audio(model_name, mid_seq, should_render_audio):
 def undo_continuation(model_name, mid_seq, continuation_state):
     if mid_seq is None or len(continuation_state) < 2:
         return mid_seq, continuation_state, send_msgs([])
-    tokenizer = models[model_name].tokenizer
     if isinstance(continuation_state[-1], list):
         mid_seq = continuation_state[-1]
     else:
@@ -364,12 +369,21 @@ if __name__ == "__main__":
     thread_pool = ThreadPoolExecutor(max_workers=OUTPUT_BATCH_SIZE)
     synthesizer = MidiSynthesizer(soundfont_path)
     models_info = {
-        "generic pretrain model (tv2o-medium) by skytnt": ["skytnt/midi-model-tv2o-medium", "", "tv2o-medium"],
-        "generic pretrain model (tv2o-large) by asigalov61": ["asigalov61/Music-Llama", "", "tv2o-large"],
-        "generic pretrain model (tv2o-medium) by asigalov61": ["asigalov61/Music-Llama-Medium", "", "tv2o-medium"],
-        "generic pretrain model (tv1-medium) by skytnt": ["skytnt/midi-model", "", "tv1-medium"],
-        "j-pop finetune model (tv2o-medium) by skytnt": ["skytnt/midi-model-ft", "jpop-tv2o-medium/", "tv2o-medium"],
-        "touhou finetune model (tv2o-medium) by skytnt": ["skytnt/midi-model-ft", "touhou-tv2o-medium/", "tv2o-medium"],
     }
     models = {}
     if opt.device == "cuda":
@@ -379,14 +393,20 @@ if __name__ == "__main__":
         torch.backends.cudnn.allow_tf32 = True
         torch.backends.cuda.enable_mem_efficient_sdp(True)
         torch.backends.cuda.enable_flash_sdp(True)
-    for name, (repo_id, path, config) in models_info.items():
         model_path = hf_hub_download_retry(repo_id=repo_id, filename=f"{path}model.ckpt")
         model = MIDIModel(config=MIDIModelConfig.from_name(config))
         ckpt = torch.load(model_path, map_location="cpu", weights_only=True)
         state_dict = ckpt.get("state_dict", ckpt)
         model.load_state_dict(state_dict, strict=False)
         model.to(device="cpu", dtype=torch.float32).eval()
-        models[name] = model
     load_javascript()
     app = gr.Blocks()

 def run(model_name, tab, mid_seq, continuation_state, continuation_select, instruments, drum_kit, bpm, time_sig,
         key_sig, mid, midi_events, reduce_cc_st, remap_track_channel, add_default_instr, remove_empty_channels,
         seed, seed_rand, gen_events, temp, top_p, top_k, allow_cc):
+    model, lora_name = models[model_name]
+    if lora_name is None and model.peft_loaded():
+        model.disable_adapters()
+    elif lora_name is not None:
+        model.enable_adapters()
+        model.set_adapter(lora_name)
     model.to(device=opt.device)
     tokenizer = model.tokenizer
     bpm = int(bpm)
     if mid_seq is None:
         outputs = [None] * OUTPUT_BATCH_SIZE
         return *outputs, []
+    tokenizer = models[model_name][0].tokenizer
     outputs = []
     end_msgs = [create_msg("progress", [0, 0])]
     if not os.path.exists("outputs"):
     if (not should_render_audio) or mid_seq is None:
         outputs = [None] * OUTPUT_BATCH_SIZE
         return tuple(outputs)
+    tokenizer = models[model_name][0].tokenizer
     outputs = []
     if not os.path.exists("outputs"):
         os.mkdir("outputs")
 def undo_continuation(model_name, mid_seq, continuation_state):
     if mid_seq is None or len(continuation_state) < 2:
         return mid_seq, continuation_state, send_msgs([])
+    tokenizer = models[model_name][0].tokenizer
     if isinstance(continuation_state[-1], list):
         mid_seq = continuation_state[-1]
     else:
     thread_pool = ThreadPoolExecutor(max_workers=OUTPUT_BATCH_SIZE)
     synthesizer = MidiSynthesizer(soundfont_path)
     models_info = {
+        "generic pretrain model (tv2o-medium) by skytnt": [
+            "skytnt/midi-model-tv2o-medium", "", "tv2o-medium", {
+                "jpop": "skytnt/midi-model-tv2om-jpop-lora",
+                "touhou": "skytnt/midi-model-tv2om-touhou-lora"
+            }
+        ],
+        "generic pretrain model (tv2o-large) by asigalov61": [
+            "asigalov61/Music-Llama", "", "tv2o-large", {}
+        ],
+        "generic pretrain model (tv2o-medium) by asigalov61": [
+            "asigalov61/Music-Llama-Medium", "", "tv2o-medium", {}
+        ],
+        "generic pretrain model (tv1-medium) by skytnt": [
+            "skytnt/midi-model", "", "tv1-medium", {}
+        ]
     }
     models = {}
     if opt.device == "cuda":
         torch.backends.cudnn.allow_tf32 = True
         torch.backends.cuda.enable_mem_efficient_sdp(True)
         torch.backends.cuda.enable_flash_sdp(True)
+    for name, (repo_id, path, config, loras) in models_info.items():
         model_path = hf_hub_download_retry(repo_id=repo_id, filename=f"{path}model.ckpt")
         model = MIDIModel(config=MIDIModelConfig.from_name(config))
         ckpt = torch.load(model_path, map_location="cpu", weights_only=True)
         state_dict = ckpt.get("state_dict", ckpt)
         model.load_state_dict(state_dict, strict=False)
+        for lora_name, lora_repo in loras.items():
+            model.load_adapter(lora_repo, lora_name)
+        if loras:
+            model.disable_adapters()
         model.to(device="cpu", dtype=torch.float32).eval()
+        models[name] = model, None
+        for lora_name, lora_repo in loras.items():
+            models[f"{name} with {lora_name} lora"] = model, lora_name
     load_javascript()
     app = gr.Blocks()

midi_model.py CHANGED Viewed

@@ -6,6 +6,7 @@ import torch.nn as nn
 import torch.nn.functional as F
 import tqdm
 from transformers import LlamaModel, LlamaConfig
 from midi_tokenizer import MIDITokenizerV1, MIDITokenizerV2, MIDITokenizer
@@ -55,7 +56,7 @@ class MIDIModelConfig:
             raise ValueError(f"Unknown model size {size}")
-class MIDIModel(nn.Module):
     def __init__(self, config: MIDIModelConfig, *args, **kwargs):
         super(MIDIModel, self).__init__()
         self.tokenizer = config.tokenizer
@@ -69,6 +70,9 @@ class MIDIModel(nn.Module):
             self.device = kwargs["device"]
         return super(MIDIModel, self).to(*args, **kwargs)
     def forward_token(self, hidden_state, x=None):
         """

 import torch.nn.functional as F
 import tqdm
 from transformers import LlamaModel, LlamaConfig
+from transformers.integrations import PeftAdapterMixin
 from midi_tokenizer import MIDITokenizerV1, MIDITokenizerV2, MIDITokenizer
             raise ValueError(f"Unknown model size {size}")
+class MIDIModel(nn.Module, PeftAdapterMixin):
     def __init__(self, config: MIDIModelConfig, *args, **kwargs):
         super(MIDIModel, self).__init__()
         self.tokenizer = config.tokenizer
             self.device = kwargs["device"]
         return super(MIDIModel, self).to(*args, **kwargs)
+    def peft_loaded(self):
+        return self._hf_peft_config_loaded
     def forward_token(self, hidden_state, x=None):
         """