Spaces:

soundsauce
/

soundsauce-old

Build error

App Files Files Community

mattricesound commited on Jul 12, 2023

Commit

d84fb5f

1 Parent(s): 6ab4004

Change video to audio component. Rename filepaths

Browse files

Files changed (2) hide show

.gitignore +2 -1
app.py +42 -82

.gitignore CHANGED Viewed

@@ -159,4 +159,5 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-data/

 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+data/
+temp/

app.py CHANGED Viewed

@@ -31,9 +31,7 @@ from demucs.audio import convert_audio
 MODEL = None  # Last used model
 DEMUCS_MODEL = None
-IS_BATCHED = False
 MAX_BATCH_SIZE = 12
-BATCHED_DURATION = 15
 INTERRUPTING = False
 # We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
 _old_call = sp.call
@@ -80,8 +78,8 @@ class FileCleaner:
             else:
                 break
-file_cleaner = FileCleaner()
 def make_waveform(*args, **kwargs):
@@ -149,19 +147,34 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
         output = output.cpu()
         demucs_output = demucs_output.cpu()
-        with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
-            audio_write(
-                file.name, output, MODEL.sample_rate, strategy="loudness",
-                loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
-            out_files.append(pool.submit(make_waveform, file.name))
-            file_cleaner.add(file.name)
-        with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
-            audio_write(
-                file.name, demucs_output, MODEL.sample_rate, strategy="loudness",
-                loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
-            out_files.append(pool.submit(make_waveform, file.name))
-            file_cleaner.add(file.name)
-    res = [out_file.result() for out_file in out_files]
     for file in res:
         file_cleaner.add(file)
     print("batch finished", len(texts), time.time() - be)
@@ -169,13 +182,6 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
     return res
-def predict_batched(texts, melodies):
-    max_text_length = 512
-    texts = [text[:max_text_length] for text in texts]
-    load_model('melody')
-    res = _do_predictions(texts, melodies, BATCHED_DURATION)
-    return [res]
 def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()):
     global INTERRUPTING
@@ -188,7 +194,6 @@ def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, prog
         raise gr.Error("Topp must be non-negative.")
     topk = int(topk)
-    # load_model(model)
     def _progress(generated, to_generate):
         progress((generated, to_generate))
@@ -234,10 +239,17 @@ def ui_full(launch_kwargs):
                     cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
             with gr.Column():
                 with gr.Row():
-                    output_normal = gr.Video(label="Generated Music")
                 with gr.Row():
-                    output_without_drum = gr.Video(label="Removed drums")
         submit.click(predict_full,
                      inputs=[text, melody, duration, topk, topp, temperature, cfg_coef],
                      outputs=[output_normal, output_without_drum])
@@ -262,56 +274,6 @@ def ui_full(launch_kwargs):
         interface.queue().launch(**launch_kwargs)
-def ui_batched(launch_kwargs):
-    with gr.Blocks() as demo:
-        gr.Markdown(
-            """
-            # MusicGen
-            This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
-            a simple and controllable model for music generation
-            presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
-            <br/>
-            <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true"
-                style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
-            <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;"
-                src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
-            for longer sequences, more control and no queue.</p>
-            """
-        )
-        with gr.Row():
-            with gr.Column():
-                with gr.Row():
-                    text = gr.Text(label="Describe your music", lines=2, interactive=True)
-                    with gr.Column():
-                        radio = gr.Radio(["file", "mic"], value="file",
-                                         label="Condition on a melody (optional) File or Mic")
-                        melody = gr.Audio(source="upload", type="numpy", label="File",
-                                          interactive=True, elem_id="melody-input")
-                with gr.Row():
-                    submit = gr.Button("Generate")
-            with gr.Column():
-                output = gr.Video(label="Generated Music")
-        submit.click(predict_batched, inputs=[text, melody],
-                     outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
-        radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
-        gr.Markdown("""
-        ### More details
-        The model will generate 12 seconds of audio based on the description you provided.
-        You can optionaly provide a reference audio from which a broad melody will be extracted.
-        The model will then try to follow both the description and melody provided.
-        All samples are generated with the `melody` model.
-        You can also use your own GPU or a Google Colab by following the instructions on our repo.
-        See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
-        for more details.
-        """)
-        demo.queue(max_size=8 * 4).launch(**launch_kwargs)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
@@ -355,8 +317,6 @@ if __name__ == "__main__":
     # Load melody model
     load_model()
     # Show the interface
-    if IS_BATCHED:
-        ui_batched(launch_kwargs)
-    else:
-        ui_full(launch_kwargs)

 MODEL = None  # Last used model
 DEMUCS_MODEL = None
 MAX_BATCH_SIZE = 12
 INTERRUPTING = False
 # We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
 _old_call = sp.call
             else:
                 break
+# 10 minutes
+file_cleaner = FileCleaner(600)
 def make_waveform(*args, **kwargs):
         output = output.cpu()
         demucs_output = demucs_output.cpu()
+        # Naming
+        filename = f"temp/{texts[0][:10]}.wav"
+        d_filename = f"temp/{texts[0][:10]}_demucs.wav"
+        # If path exists, add number. If number exists, update number.
+        i = 1
+        while Path(filename).exists():
+            filename = f"{texts[0][:10]}_{i}.wav"
+            d_filename = f"{texts[0][:10]}_{i}_demucs.wav"
+            i += 1
+        # with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
+        audio_write(
+            filename, output, MODEL.sample_rate, strategy="loudness",
+            loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
+        # out_files.append(pool.submit(make_waveform, filename))
+        out_files.append(filename)
+        file_cleaner.add(filename)
+    # with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
+        audio_write(
+            d_filename, demucs_output, MODEL.sample_rate, strategy="loudness",
+            loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
+        out_files.append(d_filename)
+        # out_files.append(pool.submit(make_waveform, d_filename))
+        file_cleaner.add(d_filename)
+    # res = [out_file.result() for out_file in out_files]
+    res = [out_file for out_file in out_files]
     for file in res:
         file_cleaner.add(file)
     print("batch finished", len(texts), time.time() - be)
     return res
 def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()):
     global INTERRUPTING
         raise gr.Error("Topp must be non-negative.")
     topk = int(topk)
     def _progress(generated, to_generate):
         progress((generated, to_generate))
                     cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
             with gr.Column():
                 with gr.Row():
+                    # output_normal = gr.Video(label="Generated Music")
+                    output_normal = gr.Audio(label="Generated Music")
                 with gr.Row():
+                    # output_without_drum = gr.Video(label="Removed drums")
+                    output_without_drum = gr.Audio(label="Removed drums")
+                with gr.Row():
+                    gr.Markdown(
+                        """
+                        Note that the files will be deleted after 10 minutes, so make sure to download!
+                        """
+                    )
         submit.click(predict_full,
                      inputs=[text, melody, duration, topk, topp, temperature, cfg_coef],
                      outputs=[output_normal, output_without_drum])
         interface.queue().launch(**launch_kwargs)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
     # Load melody model
     load_model()
+    os.mkdir("temp")
     # Show the interface
+    ui_full(launch_kwargs)