Spaces:
Build error
Build error
Commit
·
e586364
1
Parent(s):
e9d627c
Add midi option
Browse files
app.py
CHANGED
@@ -29,8 +29,10 @@ from demucs import pretrained
|
|
29 |
from demucs.apply import apply_model
|
30 |
from demucs.audio import convert_audio
|
31 |
from gradio_client import Client
|
|
|
32 |
|
33 |
LOCAL = False
|
|
|
34 |
|
35 |
|
36 |
MODEL = None # Last used model
|
@@ -44,9 +46,15 @@ _old_call = sp.call
|
|
44 |
stem2idx = {'drums': 0, 'bass': 1, 'other': 2, 'vocal': 3}
|
45 |
stem_idx = torch.LongTensor([stem2idx['vocal'], stem2idx['other'], stem2idx['bass']])
|
46 |
|
47 |
-
melody_files = glob.glob('clips/**/*.
|
|
|
|
|
48 |
|
49 |
|
|
|
|
|
|
|
|
|
50 |
def _call_nostderr(*args, **kwargs):
|
51 |
# Avoid ffmpeg vomitting on the logs.
|
52 |
kwargs['stderr'] = sp.DEVNULL
|
@@ -183,6 +191,7 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
|
|
183 |
|
184 |
|
185 |
def predict_full(text, melody, progress=gr.Progress()):
|
|
|
186 |
global INTERRUPTING
|
187 |
INTERRUPTING = False
|
188 |
print("Running local model")
|
@@ -194,17 +203,45 @@ def predict_full(text, melody, progress=gr.Progress()):
|
|
194 |
|
195 |
outs = _do_predictions(
|
196 |
[text], [melody], duration=10, progress=True)
|
197 |
-
|
198 |
return outs[0], gr.File.update(value=outs[0], visible=True)
|
199 |
|
200 |
|
201 |
|
202 |
def select_new_melody():
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
def run_remote_model(text, melody):
|
207 |
-
|
|
|
208 |
result = client.predict(
|
209 |
text, # str in 'Describe your music' Textbox component
|
210 |
melody, # str (filepath or URL to file) in 'File' Audio component
|
@@ -223,6 +260,8 @@ def run_remote_model(text, melody):
|
|
223 |
sp.run(["ffmpeg", "-i", result, "-vn", "-acodec", "pcm_s16le", "-ar", "32000", "-ac", "1", d_filename])
|
224 |
# Load wav file
|
225 |
output, sr = audio_read(d_filename)
|
|
|
|
|
226 |
# Demucs
|
227 |
print("Running demucs")
|
228 |
wav = convert_audio(output, sr, DEMUCS_MODEL.samplerate, DEMUCS_MODEL.audio_channels)
|
@@ -241,9 +280,14 @@ def run_remote_model(text, melody):
|
|
241 |
d_filename, demucs_output, 32000, strategy="loudness",
|
242 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
243 |
file_cleaner.add(d_filename)
|
|
|
|
|
244 |
print("Finished", text)
|
245 |
print("Tempfiles currently stored: ", len(file_cleaner.files))
|
246 |
-
return d_filename
|
|
|
|
|
|
|
247 |
|
248 |
def ui_full(launch_kwargs):
|
249 |
with gr.Blocks() as interface:
|
@@ -262,8 +306,8 @@ def ui_full(launch_kwargs):
|
|
262 |
audio_type="numpy"
|
263 |
else:
|
264 |
audio_type="filepath"
|
265 |
-
melody = gr.Audio(type=audio_type, label="File",
|
266 |
-
interactive=True, elem_id="melody-input", value=
|
267 |
new_melody = gr.Button("New Melody", interactive=True)
|
268 |
with gr.Row():
|
269 |
submit = gr.Button("Submit")
|
@@ -272,26 +316,33 @@ def ui_full(launch_kwargs):
|
|
272 |
|
273 |
with gr.Column():
|
274 |
output_without_drum = gr.Audio(label="Output")
|
275 |
-
|
276 |
-
|
277 |
-
""
|
278 |
-
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
281 |
if LOCAL:
|
282 |
submit.click(predict_full,
|
283 |
inputs=[text, melody],
|
284 |
-
outputs=[output_without_drum
|
285 |
else:
|
286 |
-
submit.click(run_remote_model, inputs=[text, melody], outputs=[output_without_drum
|
287 |
new_melody.click(select_new_melody, outputs=[melody])
|
|
|
|
|
|
|
|
|
288 |
gr.Examples(
|
289 |
fn=predict_full,
|
290 |
examples=[
|
291 |
["Enchanting Flute Trills amidst Misty String Section"],
|
292 |
["Gliding Mellotron Strings over Vibrant Phrases"],
|
293 |
["Synth Brass Melody Floating over Airy Wind Chimes"],
|
294 |
-
["Echoing Electric Guitar Licks with Ethereal Vocal Chops"],
|
295 |
["Rhythmic Acoustic Guitar Licks with Echoing Layers"],
|
296 |
["Whimsical Flute Flourishes in a Mystical Forest Glade"],
|
297 |
["Airy Piccolo Trills accompanied by Floating Harp Arpeggios"],
|
@@ -300,7 +351,7 @@ def ui_full(launch_kwargs):
|
|
300 |
["Enchanting Kalimba Melodies atop Mystical Atmosphere"],
|
301 |
],
|
302 |
inputs=[text],
|
303 |
-
outputs=[output_without_drum
|
304 |
)
|
305 |
|
306 |
interface.queue().launch(**launch_kwargs)
|
@@ -315,6 +366,7 @@ if __name__ == "__main__":
|
|
315 |
help='IP to listen on for connections to Gradio',
|
316 |
)
|
317 |
parser.add_argument("--local", action="store_true", help="Run locally instead of using API")
|
|
|
318 |
|
319 |
args = parser.parse_args()
|
320 |
|
@@ -322,6 +374,9 @@ if __name__ == "__main__":
|
|
322 |
launch_kwargs['server_name'] = args.listen
|
323 |
|
324 |
LOCAL = args.local
|
|
|
|
|
|
|
325 |
# Load melody model
|
326 |
load_model()
|
327 |
if not LOCAL:
|
|
|
29 |
from demucs.apply import apply_model
|
30 |
from demucs.audio import convert_audio
|
31 |
from gradio_client import Client
|
32 |
+
import pretty_midi
|
33 |
|
34 |
LOCAL = False
|
35 |
+
USE_MIDI = True
|
36 |
|
37 |
|
38 |
MODEL = None # Last used model
|
|
|
46 |
stem2idx = {'drums': 0, 'bass': 1, 'other': 2, 'vocal': 3}
|
47 |
stem_idx = torch.LongTensor([stem2idx['vocal'], stem2idx['other'], stem2idx['bass']])
|
48 |
|
49 |
+
melody_files = list(glob.glob('clips/**/*.wav', recursive=True))
|
50 |
+
midi_files = list(glob.glob('clips/**/*.mid', recursive=True))
|
51 |
+
crops = [(0, 5), (0, 10), (0, 15)]
|
52 |
|
53 |
|
54 |
+
selected_melody = ""
|
55 |
+
selected_crop = None
|
56 |
+
selected_text = ""
|
57 |
+
|
58 |
def _call_nostderr(*args, **kwargs):
|
59 |
# Avoid ffmpeg vomitting on the logs.
|
60 |
kwargs['stderr'] = sp.DEVNULL
|
|
|
191 |
|
192 |
|
193 |
def predict_full(text, melody, progress=gr.Progress()):
|
194 |
+
global selected_text
|
195 |
global INTERRUPTING
|
196 |
INTERRUPTING = False
|
197 |
print("Running local model")
|
|
|
203 |
|
204 |
outs = _do_predictions(
|
205 |
[text], [melody], duration=10, progress=True)
|
206 |
+
selected_text = text
|
207 |
return outs[0], gr.File.update(value=outs[0], visible=True)
|
208 |
|
209 |
|
210 |
|
211 |
def select_new_melody():
|
212 |
+
global selected_melody
|
213 |
+
with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
|
214 |
+
if not USE_MIDI:
|
215 |
+
new_melody_file = np.random.choice(melody_files)
|
216 |
+
selected_melody = new_melody_file
|
217 |
+
else:
|
218 |
+
new_melody_file = np.random.choice(midi_files)
|
219 |
+
selected_melody = new_melody_file
|
220 |
+
new_melody_file = render_midi(new_melody_file, fname=file.name)
|
221 |
+
|
222 |
+
crop_melody(new_melody_file, fname=file.name)
|
223 |
+
file_cleaner.add(file.name)
|
224 |
+
return file.name
|
225 |
+
|
226 |
+
def render_midi(midi_file, fname):
|
227 |
+
# sonify midi as sine wave
|
228 |
+
pm = pretty_midi.PrettyMIDI(midi_file)
|
229 |
+
sine_waves = pm.synthesize(fs=32000)
|
230 |
+
audio_write(fname, torch.from_numpy(sine_waves), 32000, strategy="loudness", loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
231 |
+
return fname
|
232 |
+
|
233 |
+
def crop_melody(melody_file, fname):
|
234 |
+
global selected_crop
|
235 |
+
crop = np.random.choice(len(crops))
|
236 |
+
crop = crops[crop]
|
237 |
+
selected_crop = crop
|
238 |
+
melody, sr = audio_read(melody_file)
|
239 |
+
melody = melody[:, crop[0]*sr:crop[1]*sr]
|
240 |
+
audio_write(fname, melody, sr, strategy="loudness", loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
241 |
|
242 |
def run_remote_model(text, melody):
|
243 |
+
global selected_text
|
244 |
+
print("Running Audiocraft API model with text", text, "and melody", melody.split("/")[-1])
|
245 |
result = client.predict(
|
246 |
text, # str in 'Describe your music' Textbox component
|
247 |
melody, # str (filepath or URL to file) in 'File' Audio component
|
|
|
260 |
sp.run(["ffmpeg", "-i", result, "-vn", "-acodec", "pcm_s16le", "-ar", "32000", "-ac", "1", d_filename])
|
261 |
# Load wav file
|
262 |
output, sr = audio_read(d_filename)
|
263 |
+
# Crop to 10 seconds
|
264 |
+
output = output[:, :10*sr]
|
265 |
# Demucs
|
266 |
print("Running demucs")
|
267 |
wav = convert_audio(output, sr, DEMUCS_MODEL.samplerate, DEMUCS_MODEL.audio_channels)
|
|
|
280 |
d_filename, demucs_output, 32000, strategy="loudness",
|
281 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
282 |
file_cleaner.add(d_filename)
|
283 |
+
selected_text = text
|
284 |
+
|
285 |
print("Finished", text)
|
286 |
print("Tempfiles currently stored: ", len(file_cleaner.files))
|
287 |
+
return d_filename#, gr.File.update(value=d_filename, visible=True)
|
288 |
+
|
289 |
+
def rating_callback(rating):
|
290 |
+
print("TEXT:", selected_text, "MELODY:" ,selected_melody, "CROP:", selected_crop, "RATING:", rating)
|
291 |
|
292 |
def ui_full(launch_kwargs):
|
293 |
with gr.Blocks() as interface:
|
|
|
306 |
audio_type="numpy"
|
307 |
else:
|
308 |
audio_type="filepath"
|
309 |
+
melody = gr.Audio(type=audio_type, label="File", source="upload",
|
310 |
+
interactive=True, elem_id="melody-input", value=select_new_melody())
|
311 |
new_melody = gr.Button("New Melody", interactive=True)
|
312 |
with gr.Row():
|
313 |
submit = gr.Button("Submit")
|
|
|
316 |
|
317 |
with gr.Column():
|
318 |
output_without_drum = gr.Audio(label="Output")
|
319 |
+
with gr.Row():
|
320 |
+
slider = gr.Slider(label="Rating", minimum=0, maximum=10, step=1, value=0, scale=2)
|
321 |
+
submit_button = gr.Button("Submit Rating", scale=1)
|
322 |
+
|
323 |
+
# file_download_no_drum = gr.File(label="Download", visible=False)
|
324 |
+
# gr.Markdown(
|
325 |
+
# """
|
326 |
+
# Note that the files will be deleted after 10 minutes, so make sure to download!
|
327 |
+
# """
|
328 |
+
# )
|
329 |
if LOCAL:
|
330 |
submit.click(predict_full,
|
331 |
inputs=[text, melody],
|
332 |
+
outputs=[output_without_drum])#, file_download_no_drum])
|
333 |
else:
|
334 |
+
submit.click(run_remote_model, inputs=[text, melody], outputs=[output_without_drum])#, file_download_no_drum])
|
335 |
new_melody.click(select_new_melody, outputs=[melody])
|
336 |
+
|
337 |
+
# Button callbacks
|
338 |
+
submit_button.click(rating_callback, inputs=[slider])
|
339 |
+
|
340 |
gr.Examples(
|
341 |
fn=predict_full,
|
342 |
examples=[
|
343 |
["Enchanting Flute Trills amidst Misty String Section"],
|
344 |
["Gliding Mellotron Strings over Vibrant Phrases"],
|
345 |
["Synth Brass Melody Floating over Airy Wind Chimes"],
|
|
|
346 |
["Rhythmic Acoustic Guitar Licks with Echoing Layers"],
|
347 |
["Whimsical Flute Flourishes in a Mystical Forest Glade"],
|
348 |
["Airy Piccolo Trills accompanied by Floating Harp Arpeggios"],
|
|
|
351 |
["Enchanting Kalimba Melodies atop Mystical Atmosphere"],
|
352 |
],
|
353 |
inputs=[text],
|
354 |
+
outputs=[output_without_drum]#, file_download_no_drum]
|
355 |
)
|
356 |
|
357 |
interface.queue().launch(**launch_kwargs)
|
|
|
366 |
help='IP to listen on for connections to Gradio',
|
367 |
)
|
368 |
parser.add_argument("--local", action="store_true", help="Run locally instead of using API")
|
369 |
+
parser.add_argument("--midi", action="store_true", help="Render midi instead of wav")
|
370 |
|
371 |
args = parser.parse_args()
|
372 |
|
|
|
374 |
launch_kwargs['server_name'] = args.listen
|
375 |
|
376 |
LOCAL = args.local
|
377 |
+
USE_MIDI = args.midi
|
378 |
+
|
379 |
+
print("Using midi:", USE_MIDI)
|
380 |
# Load melody model
|
381 |
load_model()
|
382 |
if not LOCAL:
|
setup.py
CHANGED
@@ -35,7 +35,9 @@ setup(
|
|
35 |
"flask",
|
36 |
"flask-socketio",
|
37 |
"audiocraft@git+https://github.com/facebookresearch/audiocraft",
|
38 |
-
"gradio"
|
|
|
|
|
39 |
],
|
40 |
include_package_data=True,
|
41 |
)
|
|
|
35 |
"flask",
|
36 |
"flask-socketio",
|
37 |
"audiocraft@git+https://github.com/facebookresearch/audiocraft",
|
38 |
+
"gradio",
|
39 |
+
"gradio_client",
|
40 |
+
"pretty_midi"
|
41 |
],
|
42 |
include_package_data=True,
|
43 |
)
|