Spaces:
Build error
Build error
juancopi81
commited on
Commit
·
d13787f
1
Parent(s):
197a713
Initial commit
Browse files
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Mutopia Guitar Composer
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
title: Mutopia Guitar Composer
|
3 |
+
emoji: 🎸
|
4 |
colorFrom: blue
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
app.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoTokenizer, TFGPT2LMHeadModel
|
3 |
+
from transformers import pipeline
|
4 |
+
import note_seq
|
5 |
+
|
6 |
+
from utils import token_sequence_to_note_sequence, create_image_from_note_sequence
|
7 |
+
|
8 |
+
SAMPLE_RATE=44100
|
9 |
+
|
10 |
+
# Feel free to change this, I am using only three notes here because the model
|
11 |
+
# works better this way.
|
12 |
+
notes = ["D3", "G3", "E4"]
|
13 |
+
notes_to_midi = {"D3": 50, "G3": 55, "E4": 64}
|
14 |
+
time_signatures = ["4/4", "3/4", "2/4", "6/8"]
|
15 |
+
time_signature_to_tokens = {"4/4": "4_4", "3/4": "3_4", "2/4": "2_4", "6/8": "6_8"}
|
16 |
+
|
17 |
+
# Mutopi model for music generation
|
18 |
+
mutopia_model = TFGPT2LMHeadModel.from_pretrained("juancopi81/mutopia_guitar_mmm")
|
19 |
+
mutopia_tokenizer = AutoTokenizer.from_pretrained("juancopi81/mutopia_guitar_mmm")
|
20 |
+
pipe = pipeline(
|
21 |
+
"text-generation", model=mutopia_model, tokenizer=mutopia_tokenizer, device=0
|
22 |
+
)
|
23 |
+
|
24 |
+
# Content for your demo:
|
25 |
+
title = "Mutopia Guitar Composer"
|
26 |
+
# I am adding here an image that I generated using DALL-E
|
27 |
+
description = """
|
28 |
+
The bot was trained to compose guitar music using the
|
29 |
+
[Mutopia Guitar Dataset](https://huggingface.co/datasets/juancopi81/mutopia_guitar_dataset).
|
30 |
+
Change the controllers and receive a new guitar piece!
|
31 |
+
<figure>
|
32 |
+
<center>
|
33 |
+
<img src="https://drive.google.com/uc?export=view&id=1F22ofTCeJAHqVag4lJvBZugAE1OyabVA"
|
34 |
+
width=200px alt="Robot playing the guitar">
|
35 |
+
<figcaption>Image generated using DALL-E</figcaption>
|
36 |
+
</center>
|
37 |
+
</figure>
|
38 |
+
"""
|
39 |
+
article = """
|
40 |
+
For a complete tutorial on how to create this demo from scratch, check out this
|
41 |
+
[GitHub Repo](https://github.com/juancopi81/MMM_Mutopia_Guitar).
|
42 |
+
"""
|
43 |
+
|
44 |
+
# Helper function to create the string seed
|
45 |
+
def create_seed(time_signature: str,
|
46 |
+
note: str,
|
47 |
+
bpm: int,
|
48 |
+
density: int) -> str:
|
49 |
+
|
50 |
+
seed = (f"PIECE_START TIME_SIGNATURE={time_signature_to_tokens[time_signature]} "
|
51 |
+
f"BPM={bpm} TRACK_START INST=0 DENSITY={density} "
|
52 |
+
f"BAR_START NOTE_ON={notes_to_midi[note]} ")
|
53 |
+
return seed
|
54 |
+
|
55 |
+
def generate_guitar_piece(time_signature: str,
|
56 |
+
note: str,
|
57 |
+
bpm: int,
|
58 |
+
density: int):
|
59 |
+
seed = create_seed(time_signature, note, bpm, density)
|
60 |
+
piece = pipe(seed, max_length=250)[0]["generated_text"]
|
61 |
+
|
62 |
+
# Convert text of notes to audio
|
63 |
+
note_sequence = token_sequence_to_note_sequence(piece)
|
64 |
+
synth = note_seq.midi_synth.fluidsynth
|
65 |
+
array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
|
66 |
+
int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
|
67 |
+
piano_roll = create_image_from_note_sequence(note_sequence)
|
68 |
+
return (SAMPLE_RATE, int16_data), piano_roll
|
69 |
+
|
70 |
+
# Create a block object
|
71 |
+
demo = gr.Blocks()
|
72 |
+
|
73 |
+
# Use your Block object as a context
|
74 |
+
with demo:
|
75 |
+
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
|
76 |
+
+ title
|
77 |
+
+ "</h1>")
|
78 |
+
gr.Markdown(description)
|
79 |
+
|
80 |
+
# UI for the inputs of the model
|
81 |
+
gr.Markdown("Select the generation parameters.")
|
82 |
+
with gr.Row():
|
83 |
+
time_signature = gr.Dropdown(time_signatures, value="4/4", label="Time signature")
|
84 |
+
note = gr.Dropdown(notes, value="G3", label="First note")
|
85 |
+
bpm = gr.Slider(minimum=60, maximum=140, step=10, value=90, label="Tempo")
|
86 |
+
density = gr.Slider(minimum=0, maximum=4, step=1, value=2, label="Density")
|
87 |
+
with gr.Row():
|
88 |
+
btn = gr.Button("Compose")
|
89 |
+
with gr.Row():
|
90 |
+
audio_output = gr.Audio()
|
91 |
+
image_output = gr.Image()
|
92 |
+
btn.click(generate_guitar_piece,
|
93 |
+
inputs = [
|
94 |
+
time_signature,
|
95 |
+
note,
|
96 |
+
bpm,
|
97 |
+
density
|
98 |
+
],
|
99 |
+
outputs=[audio_output, image_output])
|
100 |
+
|
101 |
+
gr.Markdown(article)
|
102 |
+
|
103 |
+
# Launch your demo
|
104 |
+
demo.launch(debug=False)
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
transformers
|
2 |
+
note-seq
|
3 |
+
protobuf==3.20.1
|
4 |
+
pyfluidsynth==1.3.0
|
5 |
+
pandas
|
6 |
+
matplotlib
|
utils.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import collections
|
2 |
+
import io
|
3 |
+
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
from matplotlib.patches import Rectangle
|
7 |
+
from PIL import Image
|
8 |
+
|
9 |
+
import note_seq
|
10 |
+
import copy
|
11 |
+
|
12 |
+
# Value of BPM for 1 second
|
13 |
+
BPM_1_SECOND = 60
|
14 |
+
|
15 |
+
# Variables to change based on the time signature
|
16 |
+
numerator = ""
|
17 |
+
denominator = ""
|
18 |
+
|
19 |
+
def token_sequence_to_note_sequence(token_sequence,
|
20 |
+
use_program=True,
|
21 |
+
use_drums=False,
|
22 |
+
instrument_mapper=None,
|
23 |
+
only_guitar=True):
|
24 |
+
|
25 |
+
if isinstance(token_sequence, str):
|
26 |
+
token_sequence = token_sequence.split()
|
27 |
+
|
28 |
+
note_sequence = empty_note_sequence()
|
29 |
+
|
30 |
+
# Render all notes.
|
31 |
+
current_program = 1
|
32 |
+
current_is_drum = False
|
33 |
+
current_instrument = 0
|
34 |
+
track_count = 0
|
35 |
+
for token_index, token in enumerate(token_sequence):
|
36 |
+
|
37 |
+
if token == "PIECE_START":
|
38 |
+
pass
|
39 |
+
elif token == "PIECE_END":
|
40 |
+
print("The end.")
|
41 |
+
break
|
42 |
+
elif token.startswith("TIME_SIGNATURE="):
|
43 |
+
time_signature_str = token.split("=")[-1]
|
44 |
+
numerator = int(time_signature_str.split("_")[0])
|
45 |
+
denominator = int(time_signature_str.split("_")[-1])
|
46 |
+
time_signature = note_sequence.time_signatures.add()
|
47 |
+
time_signature.numerator = numerator
|
48 |
+
time_signature.denominator = denominator
|
49 |
+
elif token.startswith("BPM="):
|
50 |
+
bpm_str = token.split("=")[-1]
|
51 |
+
bpm = int(bpm_str)
|
52 |
+
note_sequence.tempos[0].qpm = bpm
|
53 |
+
pulse_duration, bar_duration = duration_in_sec(
|
54 |
+
bpm, numerator, denominator
|
55 |
+
)
|
56 |
+
elif token == "TRACK_START":
|
57 |
+
current_bar_index = 0
|
58 |
+
track_count += 1
|
59 |
+
pass
|
60 |
+
elif token == "TRACK_END":
|
61 |
+
pass
|
62 |
+
elif token == "KEYS_START":
|
63 |
+
pass
|
64 |
+
elif token == "KEYS_END":
|
65 |
+
pass
|
66 |
+
elif token.startswith("KEY="):
|
67 |
+
pass
|
68 |
+
elif token.startswith("INST"):
|
69 |
+
instrument = token.split("=")[-1]
|
70 |
+
if instrument != "DRUMS" and use_program:
|
71 |
+
if instrument_mapper is not None:
|
72 |
+
if instrument in instrument_mapper:
|
73 |
+
instrument = instrument_mapper[instrument]
|
74 |
+
current_program = int(instrument)
|
75 |
+
current_instrument = track_count
|
76 |
+
current_is_drum = False
|
77 |
+
if instrument == "DRUMS" and use_drums:
|
78 |
+
current_instrument = 0
|
79 |
+
current_program = 0
|
80 |
+
current_is_drum = True
|
81 |
+
elif token == "BAR_START":
|
82 |
+
current_time = (current_bar_index * bar_duration)
|
83 |
+
current_notes = {}
|
84 |
+
elif token == "BAR_END":
|
85 |
+
current_bar_index += 1
|
86 |
+
pass
|
87 |
+
elif token.startswith("NOTE_ON"):
|
88 |
+
pitch = int(token.split("=")[-1])
|
89 |
+
note = note_sequence.notes.add()
|
90 |
+
note.start_time = current_time
|
91 |
+
note.end_time = current_time + denominator * pulse_duration
|
92 |
+
note.pitch = pitch
|
93 |
+
note.instrument = current_instrument
|
94 |
+
note.program = current_program
|
95 |
+
note.velocity = 80
|
96 |
+
note.is_drum = current_is_drum
|
97 |
+
current_notes[pitch] = note
|
98 |
+
elif token.startswith("NOTE_OFF"):
|
99 |
+
pitch = int(token.split("=")[-1])
|
100 |
+
if pitch in current_notes:
|
101 |
+
note = current_notes[pitch]
|
102 |
+
note.end_time = current_time
|
103 |
+
elif token.startswith("TIME_DELTA"):
|
104 |
+
delta = float(token.split("=")[-1]) * (0.25) * pulse_duration
|
105 |
+
current_time += delta
|
106 |
+
elif token.startswith("DENSITY="):
|
107 |
+
pass
|
108 |
+
elif token == "[PAD]":
|
109 |
+
pass
|
110 |
+
else:
|
111 |
+
#print(f"Ignored token {token}.")
|
112 |
+
pass
|
113 |
+
|
114 |
+
# Make the instruments right.
|
115 |
+
instruments_drums = []
|
116 |
+
for note in note_sequence.notes:
|
117 |
+
pair = [note.program, note.is_drum]
|
118 |
+
if pair not in instruments_drums:
|
119 |
+
instruments_drums += [pair]
|
120 |
+
note.instrument = instruments_drums.index(pair)
|
121 |
+
|
122 |
+
if only_guitar:
|
123 |
+
for note in note_sequence.notes:
|
124 |
+
if not note.is_drum:
|
125 |
+
# Midi number for guitar is 23
|
126 |
+
note.instrument = 24
|
127 |
+
note.program = 24
|
128 |
+
|
129 |
+
return note_sequence
|
130 |
+
|
131 |
+
# Calculate the duration in seconds of pulse and bar
|
132 |
+
def duration_in_sec(bpm, numerator, denominator):
|
133 |
+
pulse_duration = BPM_1_SECOND / bpm
|
134 |
+
number_of_quarters_per_bar = (4 / denominator) * numerator
|
135 |
+
bar_duration = pulse_duration * number_of_quarters_per_bar
|
136 |
+
return pulse_duration, bar_duration
|
137 |
+
|
138 |
+
def empty_note_sequence(qpm=120, total_time=0.0):
|
139 |
+
note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
|
140 |
+
note_sequence.tempos.add().qpm = qpm
|
141 |
+
#note_sequence.ticks_per_quarter = note_seq.constants.STANDARD_PPQ
|
142 |
+
note_sequence.total_time = total_time
|
143 |
+
return note_sequence
|
144 |
+
|
145 |
+
# Generate piano_roll
|
146 |
+
def sequence_to_pandas_dataframe(sequence):
|
147 |
+
pd_dict = collections.defaultdict(list)
|
148 |
+
for note in sequence.notes:
|
149 |
+
pd_dict["start_time"].append(note.start_time)
|
150 |
+
pd_dict["end_time"].append(note.end_time)
|
151 |
+
pd_dict["duration"].append(note.end_time - note.start_time)
|
152 |
+
pd_dict["pitch"].append(note.pitch)
|
153 |
+
|
154 |
+
return pd.DataFrame(pd_dict)
|
155 |
+
|
156 |
+
def dataframe_to_pianoroll_img(df):
|
157 |
+
fig = plt.figure(figsize=(8, 5))
|
158 |
+
ax = fig.add_subplot(111)
|
159 |
+
ax.scatter(df.start_time, df.pitch, c="white")
|
160 |
+
for _, row in df.iterrows():
|
161 |
+
ax.add_patch(Rectangle(
|
162 |
+
(row["start_time"], row["pitch"]-0.4),
|
163 |
+
row["duration"], 0.4, color="black"
|
164 |
+
))
|
165 |
+
plt.xlabel('Seconds', fontsize=18)
|
166 |
+
plt.ylabel('MIDI pitch', fontsize=16)
|
167 |
+
return fig
|
168 |
+
|
169 |
+
def fig2img(fig):
|
170 |
+
"""Convert a Matplotlib figure to a PIL Image and return it"""
|
171 |
+
import io
|
172 |
+
buf = io.BytesIO()
|
173 |
+
fig.savefig(buf, format="png")
|
174 |
+
buf.seek(0)
|
175 |
+
img = Image.open(buf)
|
176 |
+
return img
|
177 |
+
|
178 |
+
def create_image_from_note_sequence(sequence):
|
179 |
+
df_sequence = sequence_to_pandas_dataframe(sequence)
|
180 |
+
fig = dataframe_to_pianoroll_img(df_sequence)
|
181 |
+
img = fig2img(fig)
|
182 |
+
return img
|