juancopi81 commited on
Commit
d13787f
·
1 Parent(s): 197a713

Initial commit

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. app.py +104 -0
  3. requirements.txt +6 -0
  4. utils.py +182 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Mutopia Guitar Composer
3
- emoji: 🏃
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
 
1
  ---
2
  title: Mutopia Guitar Composer
3
+ emoji: 🎸
4
  colorFrom: blue
5
  colorTo: green
6
  sdk: gradio
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, TFGPT2LMHeadModel
3
+ from transformers import pipeline
4
+ import note_seq
5
+
6
+ from utils import token_sequence_to_note_sequence, create_image_from_note_sequence
7
+
8
+ SAMPLE_RATE=44100
9
+
10
+ # Feel free to change this, I am using only three notes here because the model
11
+ # works better this way.
12
+ notes = ["D3", "G3", "E4"]
13
+ notes_to_midi = {"D3": 50, "G3": 55, "E4": 64}
14
+ time_signatures = ["4/4", "3/4", "2/4", "6/8"]
15
+ time_signature_to_tokens = {"4/4": "4_4", "3/4": "3_4", "2/4": "2_4", "6/8": "6_8"}
16
+
17
+ # Mutopi model for music generation
18
+ mutopia_model = TFGPT2LMHeadModel.from_pretrained("juancopi81/mutopia_guitar_mmm")
19
+ mutopia_tokenizer = AutoTokenizer.from_pretrained("juancopi81/mutopia_guitar_mmm")
20
+ pipe = pipeline(
21
+ "text-generation", model=mutopia_model, tokenizer=mutopia_tokenizer, device=0
22
+ )
23
+
24
+ # Content for your demo:
25
+ title = "Mutopia Guitar Composer"
26
+ # I am adding here an image that I generated using DALL-E
27
+ description = """
28
+ The bot was trained to compose guitar music using the
29
+ [Mutopia Guitar Dataset](https://huggingface.co/datasets/juancopi81/mutopia_guitar_dataset).
30
+ Change the controllers and receive a new guitar piece!
31
+ <figure>
32
+ <center>
33
+ <img src="https://drive.google.com/uc?export=view&id=1F22ofTCeJAHqVag4lJvBZugAE1OyabVA"
34
+ width=200px alt="Robot playing the guitar">
35
+ <figcaption>Image generated using DALL-E</figcaption>
36
+ </center>
37
+ </figure>
38
+ """
39
+ article = """
40
+ For a complete tutorial on how to create this demo from scratch, check out this
41
+ [GitHub Repo](https://github.com/juancopi81/MMM_Mutopia_Guitar).
42
+ """
43
+
44
+ # Helper function to create the string seed
45
+ def create_seed(time_signature: str,
46
+ note: str,
47
+ bpm: int,
48
+ density: int) -> str:
49
+
50
+ seed = (f"PIECE_START TIME_SIGNATURE={time_signature_to_tokens[time_signature]} "
51
+ f"BPM={bpm} TRACK_START INST=0 DENSITY={density} "
52
+ f"BAR_START NOTE_ON={notes_to_midi[note]} ")
53
+ return seed
54
+
55
+ def generate_guitar_piece(time_signature: str,
56
+ note: str,
57
+ bpm: int,
58
+ density: int):
59
+ seed = create_seed(time_signature, note, bpm, density)
60
+ piece = pipe(seed, max_length=250)[0]["generated_text"]
61
+
62
+ # Convert text of notes to audio
63
+ note_sequence = token_sequence_to_note_sequence(piece)
64
+ synth = note_seq.midi_synth.fluidsynth
65
+ array_of_floats = synth(note_sequence, sample_rate=SAMPLE_RATE)
66
+ int16_data = note_seq.audio_io.float_samples_to_int16(array_of_floats)
67
+ piano_roll = create_image_from_note_sequence(note_sequence)
68
+ return (SAMPLE_RATE, int16_data), piano_roll
69
+
70
+ # Create a block object
71
+ demo = gr.Blocks()
72
+
73
+ # Use your Block object as a context
74
+ with demo:
75
+ gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
76
+ + title
77
+ + "</h1>")
78
+ gr.Markdown(description)
79
+
80
+ # UI for the inputs of the model
81
+ gr.Markdown("Select the generation parameters.")
82
+ with gr.Row():
83
+ time_signature = gr.Dropdown(time_signatures, value="4/4", label="Time signature")
84
+ note = gr.Dropdown(notes, value="G3", label="First note")
85
+ bpm = gr.Slider(minimum=60, maximum=140, step=10, value=90, label="Tempo")
86
+ density = gr.Slider(minimum=0, maximum=4, step=1, value=2, label="Density")
87
+ with gr.Row():
88
+ btn = gr.Button("Compose")
89
+ with gr.Row():
90
+ audio_output = gr.Audio()
91
+ image_output = gr.Image()
92
+ btn.click(generate_guitar_piece,
93
+ inputs = [
94
+ time_signature,
95
+ note,
96
+ bpm,
97
+ density
98
+ ],
99
+ outputs=[audio_output, image_output])
100
+
101
+ gr.Markdown(article)
102
+
103
+ # Launch your demo
104
+ demo.launch(debug=False)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ note-seq
3
+ protobuf==3.20.1
4
+ pyfluidsynth==1.3.0
5
+ pandas
6
+ matplotlib
utils.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import collections
2
+ import io
3
+
4
+ import pandas as pd
5
+ import matplotlib.pyplot as plt
6
+ from matplotlib.patches import Rectangle
7
+ from PIL import Image
8
+
9
+ import note_seq
10
+ import copy
11
+
12
+ # Value of BPM for 1 second
13
+ BPM_1_SECOND = 60
14
+
15
+ # Variables to change based on the time signature
16
+ numerator = ""
17
+ denominator = ""
18
+
19
+ def token_sequence_to_note_sequence(token_sequence,
20
+ use_program=True,
21
+ use_drums=False,
22
+ instrument_mapper=None,
23
+ only_guitar=True):
24
+
25
+ if isinstance(token_sequence, str):
26
+ token_sequence = token_sequence.split()
27
+
28
+ note_sequence = empty_note_sequence()
29
+
30
+ # Render all notes.
31
+ current_program = 1
32
+ current_is_drum = False
33
+ current_instrument = 0
34
+ track_count = 0
35
+ for token_index, token in enumerate(token_sequence):
36
+
37
+ if token == "PIECE_START":
38
+ pass
39
+ elif token == "PIECE_END":
40
+ print("The end.")
41
+ break
42
+ elif token.startswith("TIME_SIGNATURE="):
43
+ time_signature_str = token.split("=")[-1]
44
+ numerator = int(time_signature_str.split("_")[0])
45
+ denominator = int(time_signature_str.split("_")[-1])
46
+ time_signature = note_sequence.time_signatures.add()
47
+ time_signature.numerator = numerator
48
+ time_signature.denominator = denominator
49
+ elif token.startswith("BPM="):
50
+ bpm_str = token.split("=")[-1]
51
+ bpm = int(bpm_str)
52
+ note_sequence.tempos[0].qpm = bpm
53
+ pulse_duration, bar_duration = duration_in_sec(
54
+ bpm, numerator, denominator
55
+ )
56
+ elif token == "TRACK_START":
57
+ current_bar_index = 0
58
+ track_count += 1
59
+ pass
60
+ elif token == "TRACK_END":
61
+ pass
62
+ elif token == "KEYS_START":
63
+ pass
64
+ elif token == "KEYS_END":
65
+ pass
66
+ elif token.startswith("KEY="):
67
+ pass
68
+ elif token.startswith("INST"):
69
+ instrument = token.split("=")[-1]
70
+ if instrument != "DRUMS" and use_program:
71
+ if instrument_mapper is not None:
72
+ if instrument in instrument_mapper:
73
+ instrument = instrument_mapper[instrument]
74
+ current_program = int(instrument)
75
+ current_instrument = track_count
76
+ current_is_drum = False
77
+ if instrument == "DRUMS" and use_drums:
78
+ current_instrument = 0
79
+ current_program = 0
80
+ current_is_drum = True
81
+ elif token == "BAR_START":
82
+ current_time = (current_bar_index * bar_duration)
83
+ current_notes = {}
84
+ elif token == "BAR_END":
85
+ current_bar_index += 1
86
+ pass
87
+ elif token.startswith("NOTE_ON"):
88
+ pitch = int(token.split("=")[-1])
89
+ note = note_sequence.notes.add()
90
+ note.start_time = current_time
91
+ note.end_time = current_time + denominator * pulse_duration
92
+ note.pitch = pitch
93
+ note.instrument = current_instrument
94
+ note.program = current_program
95
+ note.velocity = 80
96
+ note.is_drum = current_is_drum
97
+ current_notes[pitch] = note
98
+ elif token.startswith("NOTE_OFF"):
99
+ pitch = int(token.split("=")[-1])
100
+ if pitch in current_notes:
101
+ note = current_notes[pitch]
102
+ note.end_time = current_time
103
+ elif token.startswith("TIME_DELTA"):
104
+ delta = float(token.split("=")[-1]) * (0.25) * pulse_duration
105
+ current_time += delta
106
+ elif token.startswith("DENSITY="):
107
+ pass
108
+ elif token == "[PAD]":
109
+ pass
110
+ else:
111
+ #print(f"Ignored token {token}.")
112
+ pass
113
+
114
+ # Make the instruments right.
115
+ instruments_drums = []
116
+ for note in note_sequence.notes:
117
+ pair = [note.program, note.is_drum]
118
+ if pair not in instruments_drums:
119
+ instruments_drums += [pair]
120
+ note.instrument = instruments_drums.index(pair)
121
+
122
+ if only_guitar:
123
+ for note in note_sequence.notes:
124
+ if not note.is_drum:
125
+ # Midi number for guitar is 23
126
+ note.instrument = 24
127
+ note.program = 24
128
+
129
+ return note_sequence
130
+
131
+ # Calculate the duration in seconds of pulse and bar
132
+ def duration_in_sec(bpm, numerator, denominator):
133
+ pulse_duration = BPM_1_SECOND / bpm
134
+ number_of_quarters_per_bar = (4 / denominator) * numerator
135
+ bar_duration = pulse_duration * number_of_quarters_per_bar
136
+ return pulse_duration, bar_duration
137
+
138
+ def empty_note_sequence(qpm=120, total_time=0.0):
139
+ note_sequence = note_seq.protobuf.music_pb2.NoteSequence()
140
+ note_sequence.tempos.add().qpm = qpm
141
+ #note_sequence.ticks_per_quarter = note_seq.constants.STANDARD_PPQ
142
+ note_sequence.total_time = total_time
143
+ return note_sequence
144
+
145
+ # Generate piano_roll
146
+ def sequence_to_pandas_dataframe(sequence):
147
+ pd_dict = collections.defaultdict(list)
148
+ for note in sequence.notes:
149
+ pd_dict["start_time"].append(note.start_time)
150
+ pd_dict["end_time"].append(note.end_time)
151
+ pd_dict["duration"].append(note.end_time - note.start_time)
152
+ pd_dict["pitch"].append(note.pitch)
153
+
154
+ return pd.DataFrame(pd_dict)
155
+
156
+ def dataframe_to_pianoroll_img(df):
157
+ fig = plt.figure(figsize=(8, 5))
158
+ ax = fig.add_subplot(111)
159
+ ax.scatter(df.start_time, df.pitch, c="white")
160
+ for _, row in df.iterrows():
161
+ ax.add_patch(Rectangle(
162
+ (row["start_time"], row["pitch"]-0.4),
163
+ row["duration"], 0.4, color="black"
164
+ ))
165
+ plt.xlabel('Seconds', fontsize=18)
166
+ plt.ylabel('MIDI pitch', fontsize=16)
167
+ return fig
168
+
169
+ def fig2img(fig):
170
+ """Convert a Matplotlib figure to a PIL Image and return it"""
171
+ import io
172
+ buf = io.BytesIO()
173
+ fig.savefig(buf, format="png")
174
+ buf.seek(0)
175
+ img = Image.open(buf)
176
+ return img
177
+
178
+ def create_image_from_note_sequence(sequence):
179
+ df_sequence = sequence_to_pandas_dataframe(sequence)
180
+ fig = dataframe_to_pianoroll_img(df_sequence)
181
+ img = fig2img(fig)
182
+ return img