Spaces:

soundsauce
/

soundsauce-old

Build error

App Files Files Community

mattricesound commited on Jul 29, 2023

Commit

e586364

1 Parent(s): e9d627c

Add midi option

Browse files

Files changed (2) hide show

app.py +73 -18
setup.py +3 -1

app.py CHANGED Viewed

@@ -29,8 +29,10 @@ from demucs import pretrained
 from demucs.apply import apply_model
 from demucs.audio import convert_audio
 from gradio_client import Client
 LOCAL = False
 MODEL = None  # Last used model
@@ -44,9 +46,15 @@ _old_call = sp.call
 stem2idx = {'drums': 0, 'bass': 1, 'other': 2, 'vocal': 3}
 stem_idx = torch.LongTensor([stem2idx['vocal'], stem2idx['other'], stem2idx['bass']])
-melody_files = glob.glob('clips/**/*.mp3', recursive=True)
 def _call_nostderr(*args, **kwargs):
     # Avoid ffmpeg vomitting on the logs.
     kwargs['stderr'] = sp.DEVNULL
@@ -183,6 +191,7 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
 def predict_full(text, melody, progress=gr.Progress()):
     global INTERRUPTING
     INTERRUPTING = False
     print("Running local model")
@@ -194,17 +203,45 @@ def predict_full(text, melody, progress=gr.Progress()):
     outs = _do_predictions(
         [text], [melody], duration=10, progress=True)
     return outs[0], gr.File.update(value=outs[0], visible=True)
 def select_new_melody():
-    new_melody_file = np.random.choice(melody_files)
-    return gr.update(source="upload", value=new_melody_file)
 def run_remote_model(text, melody):
-    print("Running Audiocraft API model with text", text, "and melody", melody)
     result = client.predict(
                     text,	# str  in 'Describe your music' Textbox component
                     melody,	# str (filepath or URL to file) in 'File' Audio component
@@ -223,6 +260,8 @@ def run_remote_model(text, melody):
     sp.run(["ffmpeg", "-i", result, "-vn", "-acodec", "pcm_s16le", "-ar", "32000", "-ac", "1", d_filename])
     # Load wav file
     output, sr = audio_read(d_filename)
     # Demucs
     print("Running demucs")
     wav = convert_audio(output, sr, DEMUCS_MODEL.samplerate, DEMUCS_MODEL.audio_channels)
@@ -241,9 +280,14 @@ def run_remote_model(text, melody):
         d_filename, demucs_output, 32000, strategy="loudness",
         loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
     file_cleaner.add(d_filename)
     print("Finished", text)
     print("Tempfiles currently stored: ", len(file_cleaner.files))
-    return d_filename, gr.File.update(value=d_filename, visible=True)
 def ui_full(launch_kwargs):
     with gr.Blocks() as interface:
@@ -262,8 +306,8 @@ def ui_full(launch_kwargs):
                             audio_type="numpy"
                         else:
                             audio_type="filepath"
-                        melody = gr.Audio(type=audio_type, label="File",
-                                          interactive=True, elem_id="melody-input", value="clips/chipmunk.wav")
                         new_melody = gr.Button("New Melody", interactive=True)
                 with gr.Row():
                     submit = gr.Button("Submit")
@@ -272,26 +316,33 @@ def ui_full(launch_kwargs):
             with gr.Column():
                     output_without_drum = gr.Audio(label="Output")
-                    file_download_no_drum = gr.File(label="Download", visible=False)
-                    gr.Markdown(
-                        """
-                        Note that the files will be deleted after 10 minutes, so make sure to download!
-                        """
-                    )
         if LOCAL:
             submit.click(predict_full,
                         inputs=[text, melody],
-                        outputs=[output_without_drum, file_download_no_drum])
         else:
-            submit.click(run_remote_model, inputs=[text, melody], outputs=[output_without_drum, file_download_no_drum])
         new_melody.click(select_new_melody, outputs=[melody])
         gr.Examples(
             fn=predict_full,
             examples=[
                 ["Enchanting Flute Trills amidst Misty String Section"],
                 ["Gliding Mellotron Strings over Vibrant Phrases"],
                 ["Synth Brass Melody Floating over Airy Wind Chimes"],
-                ["Echoing Electric Guitar Licks with Ethereal Vocal Chops"],
                 ["Rhythmic Acoustic Guitar Licks with Echoing Layers"],
                 ["Whimsical Flute Flourishes in a Mystical Forest Glade"],
                 ["Airy Piccolo Trills accompanied by Floating Harp Arpeggios"],
@@ -300,7 +351,7 @@ def ui_full(launch_kwargs):
                 ["Enchanting Kalimba Melodies atop Mystical Atmosphere"],
             ],
             inputs=[text],
-            outputs=[output_without_drum, file_download_no_drum]
         )
         interface.queue().launch(**launch_kwargs)
@@ -315,6 +366,7 @@ if __name__ == "__main__":
         help='IP to listen on for connections to Gradio',
     )
     parser.add_argument("--local", action="store_true", help="Run locally instead of using API")
     args = parser.parse_args()
@@ -322,6 +374,9 @@ if __name__ == "__main__":
     launch_kwargs['server_name'] = args.listen
     LOCAL = args.local
     # Load melody model
     load_model()
     if not LOCAL:

 from demucs.apply import apply_model
 from demucs.audio import convert_audio
 from gradio_client import Client
+import pretty_midi
 LOCAL = False
+USE_MIDI = True
 MODEL = None  # Last used model
 stem2idx = {'drums': 0, 'bass': 1, 'other': 2, 'vocal': 3}
 stem_idx = torch.LongTensor([stem2idx['vocal'], stem2idx['other'], stem2idx['bass']])
+melody_files = list(glob.glob('clips/**/*.wav', recursive=True))
+midi_files = list(glob.glob('clips/**/*.mid', recursive=True))
+crops = [(0, 5), (0, 10), (0, 15)]
+selected_melody = ""
+selected_crop = None
+selected_text = ""
 def _call_nostderr(*args, **kwargs):
     # Avoid ffmpeg vomitting on the logs.
     kwargs['stderr'] = sp.DEVNULL
 def predict_full(text, melody, progress=gr.Progress()):
+    global selected_text
     global INTERRUPTING
     INTERRUPTING = False
     print("Running local model")
     outs = _do_predictions(
         [text], [melody], duration=10, progress=True)
+    selected_text = text
     return outs[0], gr.File.update(value=outs[0], visible=True)
 def select_new_melody():
+    global selected_melody
+    with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
+        if not USE_MIDI:
+            new_melody_file = np.random.choice(melody_files)
+            selected_melody = new_melody_file
+        else:
+            new_melody_file = np.random.choice(midi_files)
+            selected_melody = new_melody_file
+            new_melody_file = render_midi(new_melody_file, fname=file.name)
+        crop_melody(new_melody_file, fname=file.name)
+        file_cleaner.add(file.name)
+        return file.name
+def render_midi(midi_file, fname):
+    # sonify midi as sine wave
+    pm = pretty_midi.PrettyMIDI(midi_file)
+    sine_waves = pm.synthesize(fs=32000)
+    audio_write(fname, torch.from_numpy(sine_waves), 32000, strategy="loudness", loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
+    return fname
+def crop_melody(melody_file, fname):
+    global selected_crop
+    crop = np.random.choice(len(crops))
+    crop = crops[crop]
+    selected_crop = crop
+    melody, sr = audio_read(melody_file)
+    melody = melody[:, crop[0]*sr:crop[1]*sr]
+    audio_write(fname, melody, sr, strategy="loudness", loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
 def run_remote_model(text, melody):
+    global selected_text
+    print("Running Audiocraft API model with text", text, "and melody", melody.split("/")[-1])
     result = client.predict(
                     text,	# str  in 'Describe your music' Textbox component
                     melody,	# str (filepath or URL to file) in 'File' Audio component
     sp.run(["ffmpeg", "-i", result, "-vn", "-acodec", "pcm_s16le", "-ar", "32000", "-ac", "1", d_filename])
     # Load wav file
     output, sr = audio_read(d_filename)
+    # Crop to 10 seconds
+    output = output[:, :10*sr]
     # Demucs
     print("Running demucs")
     wav = convert_audio(output, sr, DEMUCS_MODEL.samplerate, DEMUCS_MODEL.audio_channels)
         d_filename, demucs_output, 32000, strategy="loudness",
         loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
     file_cleaner.add(d_filename)
+    selected_text = text
     print("Finished", text)
     print("Tempfiles currently stored: ", len(file_cleaner.files))
+    return d_filename#, gr.File.update(value=d_filename, visible=True)
+def rating_callback(rating):
+    print("TEXT:", selected_text, "MELODY:" ,selected_melody, "CROP:", selected_crop, "RATING:", rating)
 def ui_full(launch_kwargs):
     with gr.Blocks() as interface:
                             audio_type="numpy"
                         else:
                             audio_type="filepath"
+                        melody = gr.Audio(type=audio_type, label="File", source="upload",
+                                          interactive=True, elem_id="melody-input", value=select_new_melody())
                         new_melody = gr.Button("New Melody", interactive=True)
                 with gr.Row():
                     submit = gr.Button("Submit")
             with gr.Column():
                     output_without_drum = gr.Audio(label="Output")
+                    with gr.Row():
+                        slider = gr.Slider(label="Rating", minimum=0, maximum=10, step=1, value=0, scale=2)
+                        submit_button = gr.Button("Submit Rating", scale=1)
+                    # file_download_no_drum = gr.File(label="Download", visible=False)
+                    # gr.Markdown(
+                    #     """
+                    #     Note that the files will be deleted after 10 minutes, so make sure to download!
+                    #     """
+                    # )
         if LOCAL:
             submit.click(predict_full,
                         inputs=[text, melody],
+                        outputs=[output_without_drum])#, file_download_no_drum])
         else:
+            submit.click(run_remote_model, inputs=[text, melody], outputs=[output_without_drum])#, file_download_no_drum])
         new_melody.click(select_new_melody, outputs=[melody])
+        # Button callbacks
+        submit_button.click(rating_callback, inputs=[slider])
         gr.Examples(
             fn=predict_full,
             examples=[
                 ["Enchanting Flute Trills amidst Misty String Section"],
                 ["Gliding Mellotron Strings over Vibrant Phrases"],
                 ["Synth Brass Melody Floating over Airy Wind Chimes"],
                 ["Rhythmic Acoustic Guitar Licks with Echoing Layers"],
                 ["Whimsical Flute Flourishes in a Mystical Forest Glade"],
                 ["Airy Piccolo Trills accompanied by Floating Harp Arpeggios"],
                 ["Enchanting Kalimba Melodies atop Mystical Atmosphere"],
             ],
             inputs=[text],
+            outputs=[output_without_drum]#, file_download_no_drum]
         )
         interface.queue().launch(**launch_kwargs)
         help='IP to listen on for connections to Gradio',
     )
     parser.add_argument("--local", action="store_true", help="Run locally instead of using API")
+    parser.add_argument("--midi", action="store_true", help="Render midi instead of wav")
     args = parser.parse_args()
     launch_kwargs['server_name'] = args.listen
     LOCAL = args.local
+    USE_MIDI = args.midi
+    print("Using midi:", USE_MIDI)
     # Load melody model
     load_model()
     if not LOCAL:

setup.py CHANGED Viewed

@@ -35,7 +35,9 @@ setup(
         "flask",
         "flask-socketio",
         "audiocraft@git+https://github.com/facebookresearch/audiocraft",
-        "gradio"
     ],
     include_package_data=True,
 )

         "flask",
         "flask-socketio",
         "audiocraft@git+https://github.com/facebookresearch/audiocraft",
+        "gradio",
+        "gradio_client",
+        "pretty_midi"
     ],
     include_package_data=True,
 )