Spaces:
Build error
Build error
Commit
·
d84fb5f
1
Parent(s):
6ab4004
Change video to audio component. Rename filepaths
Browse files- .gitignore +2 -1
- app.py +42 -82
.gitignore
CHANGED
@@ -159,4 +159,5 @@ cython_debug/
|
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
#.idea/
|
161 |
|
162 |
-
data/
|
|
|
|
159 |
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
160 |
#.idea/
|
161 |
|
162 |
+
data/
|
163 |
+
temp/
|
app.py
CHANGED
@@ -31,9 +31,7 @@ from demucs.audio import convert_audio
|
|
31 |
|
32 |
MODEL = None # Last used model
|
33 |
DEMUCS_MODEL = None
|
34 |
-
IS_BATCHED = False
|
35 |
MAX_BATCH_SIZE = 12
|
36 |
-
BATCHED_DURATION = 15
|
37 |
INTERRUPTING = False
|
38 |
# We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
|
39 |
_old_call = sp.call
|
@@ -80,8 +78,8 @@ class FileCleaner:
|
|
80 |
else:
|
81 |
break
|
82 |
|
83 |
-
|
84 |
-
file_cleaner = FileCleaner()
|
85 |
|
86 |
|
87 |
def make_waveform(*args, **kwargs):
|
@@ -149,19 +147,34 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
|
|
149 |
|
150 |
output = output.cpu()
|
151 |
demucs_output = demucs_output.cpu()
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
for file in res:
|
166 |
file_cleaner.add(file)
|
167 |
print("batch finished", len(texts), time.time() - be)
|
@@ -169,13 +182,6 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
|
|
169 |
return res
|
170 |
|
171 |
|
172 |
-
def predict_batched(texts, melodies):
|
173 |
-
max_text_length = 512
|
174 |
-
texts = [text[:max_text_length] for text in texts]
|
175 |
-
load_model('melody')
|
176 |
-
res = _do_predictions(texts, melodies, BATCHED_DURATION)
|
177 |
-
return [res]
|
178 |
-
|
179 |
|
180 |
def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()):
|
181 |
global INTERRUPTING
|
@@ -188,7 +194,6 @@ def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, prog
|
|
188 |
raise gr.Error("Topp must be non-negative.")
|
189 |
|
190 |
topk = int(topk)
|
191 |
-
# load_model(model)
|
192 |
|
193 |
def _progress(generated, to_generate):
|
194 |
progress((generated, to_generate))
|
@@ -234,10 +239,17 @@ def ui_full(launch_kwargs):
|
|
234 |
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
|
235 |
with gr.Column():
|
236 |
with gr.Row():
|
237 |
-
output_normal = gr.Video(label="Generated Music")
|
|
|
238 |
with gr.Row():
|
239 |
-
output_without_drum = gr.Video(label="Removed drums")
|
240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
submit.click(predict_full,
|
242 |
inputs=[text, melody, duration, topk, topp, temperature, cfg_coef],
|
243 |
outputs=[output_normal, output_without_drum])
|
@@ -262,56 +274,6 @@ def ui_full(launch_kwargs):
|
|
262 |
interface.queue().launch(**launch_kwargs)
|
263 |
|
264 |
|
265 |
-
def ui_batched(launch_kwargs):
|
266 |
-
with gr.Blocks() as demo:
|
267 |
-
gr.Markdown(
|
268 |
-
"""
|
269 |
-
# MusicGen
|
270 |
-
|
271 |
-
This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
|
272 |
-
a simple and controllable model for music generation
|
273 |
-
presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
|
274 |
-
<br/>
|
275 |
-
<a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true"
|
276 |
-
style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
|
277 |
-
<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;"
|
278 |
-
src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
279 |
-
for longer sequences, more control and no queue.</p>
|
280 |
-
"""
|
281 |
-
)
|
282 |
-
with gr.Row():
|
283 |
-
with gr.Column():
|
284 |
-
with gr.Row():
|
285 |
-
text = gr.Text(label="Describe your music", lines=2, interactive=True)
|
286 |
-
with gr.Column():
|
287 |
-
radio = gr.Radio(["file", "mic"], value="file",
|
288 |
-
label="Condition on a melody (optional) File or Mic")
|
289 |
-
melody = gr.Audio(source="upload", type="numpy", label="File",
|
290 |
-
interactive=True, elem_id="melody-input")
|
291 |
-
with gr.Row():
|
292 |
-
submit = gr.Button("Generate")
|
293 |
-
with gr.Column():
|
294 |
-
output = gr.Video(label="Generated Music")
|
295 |
-
submit.click(predict_batched, inputs=[text, melody],
|
296 |
-
outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
|
297 |
-
radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
|
298 |
-
gr.Markdown("""
|
299 |
-
### More details
|
300 |
-
|
301 |
-
The model will generate 12 seconds of audio based on the description you provided.
|
302 |
-
You can optionaly provide a reference audio from which a broad melody will be extracted.
|
303 |
-
The model will then try to follow both the description and melody provided.
|
304 |
-
All samples are generated with the `melody` model.
|
305 |
-
|
306 |
-
You can also use your own GPU or a Google Colab by following the instructions on our repo.
|
307 |
-
|
308 |
-
See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
|
309 |
-
for more details.
|
310 |
-
""")
|
311 |
-
|
312 |
-
demo.queue(max_size=8 * 4).launch(**launch_kwargs)
|
313 |
-
|
314 |
-
|
315 |
if __name__ == "__main__":
|
316 |
parser = argparse.ArgumentParser()
|
317 |
parser.add_argument(
|
@@ -355,8 +317,6 @@ if __name__ == "__main__":
|
|
355 |
|
356 |
# Load melody model
|
357 |
load_model()
|
|
|
358 |
# Show the interface
|
359 |
-
|
360 |
-
ui_batched(launch_kwargs)
|
361 |
-
else:
|
362 |
-
ui_full(launch_kwargs)
|
|
|
31 |
|
32 |
MODEL = None # Last used model
|
33 |
DEMUCS_MODEL = None
|
|
|
34 |
MAX_BATCH_SIZE = 12
|
|
|
35 |
INTERRUPTING = False
|
36 |
# We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
|
37 |
_old_call = sp.call
|
|
|
78 |
else:
|
79 |
break
|
80 |
|
81 |
+
# 10 minutes
|
82 |
+
file_cleaner = FileCleaner(600)
|
83 |
|
84 |
|
85 |
def make_waveform(*args, **kwargs):
|
|
|
147 |
|
148 |
output = output.cpu()
|
149 |
demucs_output = demucs_output.cpu()
|
150 |
+
|
151 |
+
# Naming
|
152 |
+
filename = f"temp/{texts[0][:10]}.wav"
|
153 |
+
d_filename = f"temp/{texts[0][:10]}_demucs.wav"
|
154 |
+
|
155 |
+
# If path exists, add number. If number exists, update number.
|
156 |
+
i = 1
|
157 |
+
while Path(filename).exists():
|
158 |
+
filename = f"{texts[0][:10]}_{i}.wav"
|
159 |
+
d_filename = f"{texts[0][:10]}_{i}_demucs.wav"
|
160 |
+
i += 1
|
161 |
+
|
162 |
+
# with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
|
163 |
+
audio_write(
|
164 |
+
filename, output, MODEL.sample_rate, strategy="loudness",
|
165 |
+
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
166 |
+
# out_files.append(pool.submit(make_waveform, filename))
|
167 |
+
out_files.append(filename)
|
168 |
+
file_cleaner.add(filename)
|
169 |
+
# with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
|
170 |
+
audio_write(
|
171 |
+
d_filename, demucs_output, MODEL.sample_rate, strategy="loudness",
|
172 |
+
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
173 |
+
out_files.append(d_filename)
|
174 |
+
# out_files.append(pool.submit(make_waveform, d_filename))
|
175 |
+
file_cleaner.add(d_filename)
|
176 |
+
# res = [out_file.result() for out_file in out_files]
|
177 |
+
res = [out_file for out_file in out_files]
|
178 |
for file in res:
|
179 |
file_cleaner.add(file)
|
180 |
print("batch finished", len(texts), time.time() - be)
|
|
|
182 |
return res
|
183 |
|
184 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
def predict_full(text, melody, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()):
|
187 |
global INTERRUPTING
|
|
|
194 |
raise gr.Error("Topp must be non-negative.")
|
195 |
|
196 |
topk = int(topk)
|
|
|
197 |
|
198 |
def _progress(generated, to_generate):
|
199 |
progress((generated, to_generate))
|
|
|
239 |
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
|
240 |
with gr.Column():
|
241 |
with gr.Row():
|
242 |
+
# output_normal = gr.Video(label="Generated Music")
|
243 |
+
output_normal = gr.Audio(label="Generated Music")
|
244 |
with gr.Row():
|
245 |
+
# output_without_drum = gr.Video(label="Removed drums")
|
246 |
+
output_without_drum = gr.Audio(label="Removed drums")
|
247 |
+
with gr.Row():
|
248 |
+
gr.Markdown(
|
249 |
+
"""
|
250 |
+
Note that the files will be deleted after 10 minutes, so make sure to download!
|
251 |
+
"""
|
252 |
+
)
|
253 |
submit.click(predict_full,
|
254 |
inputs=[text, melody, duration, topk, topp, temperature, cfg_coef],
|
255 |
outputs=[output_normal, output_without_drum])
|
|
|
274 |
interface.queue().launch(**launch_kwargs)
|
275 |
|
276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
277 |
if __name__ == "__main__":
|
278 |
parser = argparse.ArgumentParser()
|
279 |
parser.add_argument(
|
|
|
317 |
|
318 |
# Load melody model
|
319 |
load_model()
|
320 |
+
os.mkdir("temp")
|
321 |
# Show the interface
|
322 |
+
ui_full(launch_kwargs)
|
|
|
|
|
|