Spaces:
Runtime error
Runtime error
clarification
Browse files- notebooks/test_model.ipynb +15 -15
notebooks/test_model.ipynb
CHANGED
@@ -492,7 +492,7 @@
|
|
492 |
},
|
493 |
{
|
494 |
"cell_type": "markdown",
|
495 |
-
"id": "
|
496 |
"metadata": {},
|
497 |
"source": [
|
498 |
"## Latent Audio Diffusion\n",
|
@@ -502,7 +502,7 @@
|
|
502 |
{
|
503 |
"cell_type": "code",
|
504 |
"execution_count": null,
|
505 |
-
"id": "
|
506 |
"metadata": {},
|
507 |
"outputs": [],
|
508 |
"source": [
|
@@ -512,7 +512,7 @@
|
|
512 |
{
|
513 |
"cell_type": "code",
|
514 |
"execution_count": null,
|
515 |
-
"id": "
|
516 |
"metadata": {},
|
517 |
"outputs": [],
|
518 |
"source": [
|
@@ -522,7 +522,7 @@
|
|
522 |
{
|
523 |
"cell_type": "code",
|
524 |
"execution_count": null,
|
525 |
-
"id": "
|
526 |
"metadata": {},
|
527 |
"outputs": [],
|
528 |
"source": [
|
@@ -537,7 +537,7 @@
|
|
537 |
{
|
538 |
"cell_type": "code",
|
539 |
"execution_count": null,
|
540 |
-
"id": "
|
541 |
"metadata": {},
|
542 |
"outputs": [],
|
543 |
"source": [
|
@@ -551,7 +551,7 @@
|
|
551 |
},
|
552 |
{
|
553 |
"cell_type": "markdown",
|
554 |
-
"id": "
|
555 |
"metadata": {},
|
556 |
"source": [
|
557 |
"### Interpolation in latent space\n",
|
@@ -561,43 +561,43 @@
|
|
561 |
{
|
562 |
"cell_type": "code",
|
563 |
"execution_count": null,
|
564 |
-
"id": "
|
565 |
"metadata": {},
|
566 |
"outputs": [],
|
567 |
"source": [
|
568 |
"generator.manual_seed(seed)\n",
|
569 |
-
"
|
570 |
" audio_diffusion.pipe.unet.sample_size[0],\n",
|
571 |
" audio_diffusion.pipe.unet.sample_size[1]),\n",
|
572 |
" generator=generator)\n",
|
573 |
-
"
|
574 |
]
|
575 |
},
|
576 |
{
|
577 |
"cell_type": "code",
|
578 |
"execution_count": null,
|
579 |
-
"id": "
|
580 |
"metadata": {},
|
581 |
"outputs": [],
|
582 |
"source": [
|
583 |
"generator.manual_seed(seed2)\n",
|
584 |
-
"
|
585 |
" audio_diffusion.pipe.unet.sample_size[0],\n",
|
586 |
" audio_diffusion.pipe.unet.sample_size[1]),\n",
|
587 |
" generator=generator)\n",
|
588 |
-
"
|
589 |
]
|
590 |
},
|
591 |
{
|
592 |
"cell_type": "code",
|
593 |
"execution_count": null,
|
594 |
-
"id": "
|
595 |
"metadata": {},
|
596 |
"outputs": [],
|
597 |
"source": [
|
598 |
"alpha = 0.5 #@param {type:\"slider\", min:0, max:1, step:0.1}\n",
|
599 |
"_, (sample_rate, audio3) = audio_diffusion.generate_spectrogram_and_audio(\n",
|
600 |
-
" noise=audio_diffusion.pipe.slerp(
|
601 |
" generator=generator)\n",
|
602 |
"display(Audio(audio, rate=mel.get_sample_rate()))\n",
|
603 |
"display(Audio(audio2, rate=mel.get_sample_rate()))\n",
|
@@ -607,7 +607,7 @@
|
|
607 |
{
|
608 |
"cell_type": "code",
|
609 |
"execution_count": null,
|
610 |
-
"id": "
|
611 |
"metadata": {},
|
612 |
"outputs": [],
|
613 |
"source": []
|
|
|
492 |
},
|
493 |
{
|
494 |
"cell_type": "markdown",
|
495 |
+
"id": "9b244547",
|
496 |
"metadata": {},
|
497 |
"source": [
|
498 |
"## Latent Audio Diffusion\n",
|
|
|
502 |
{
|
503 |
"cell_type": "code",
|
504 |
"execution_count": null,
|
505 |
+
"id": "a88b3fbb",
|
506 |
"metadata": {},
|
507 |
"outputs": [],
|
508 |
"source": [
|
|
|
512 |
{
|
513 |
"cell_type": "code",
|
514 |
"execution_count": null,
|
515 |
+
"id": "15e353ee",
|
516 |
"metadata": {},
|
517 |
"outputs": [],
|
518 |
"source": [
|
|
|
522 |
{
|
523 |
"cell_type": "code",
|
524 |
"execution_count": null,
|
525 |
+
"id": "fa0f0c8c",
|
526 |
"metadata": {},
|
527 |
"outputs": [],
|
528 |
"source": [
|
|
|
537 |
{
|
538 |
"cell_type": "code",
|
539 |
"execution_count": null,
|
540 |
+
"id": "73dc575d",
|
541 |
"metadata": {},
|
542 |
"outputs": [],
|
543 |
"source": [
|
|
|
551 |
},
|
552 |
{
|
553 |
"cell_type": "markdown",
|
554 |
+
"id": "428d2d67",
|
555 |
"metadata": {},
|
556 |
"source": [
|
557 |
"### Interpolation in latent space\n",
|
|
|
561 |
{
|
562 |
"cell_type": "code",
|
563 |
"execution_count": null,
|
564 |
+
"id": "72211c2b",
|
565 |
"metadata": {},
|
566 |
"outputs": [],
|
567 |
"source": [
|
568 |
"generator.manual_seed(seed)\n",
|
569 |
+
"latents = torch.randn((1, audio_diffusion.pipe.unet.in_channels,\n",
|
570 |
" audio_diffusion.pipe.unet.sample_size[0],\n",
|
571 |
" audio_diffusion.pipe.unet.sample_size[1]),\n",
|
572 |
" generator=generator)\n",
|
573 |
+
"latents.shape"
|
574 |
]
|
575 |
},
|
576 |
{
|
577 |
"cell_type": "code",
|
578 |
"execution_count": null,
|
579 |
+
"id": "6c732dbe",
|
580 |
"metadata": {},
|
581 |
"outputs": [],
|
582 |
"source": [
|
583 |
"generator.manual_seed(seed2)\n",
|
584 |
+
"latents2 = torch.randn((1, audio_diffusion.pipe.unet.in_channels,\n",
|
585 |
" audio_diffusion.pipe.unet.sample_size[0],\n",
|
586 |
" audio_diffusion.pipe.unet.sample_size[1]),\n",
|
587 |
" generator=generator)\n",
|
588 |
+
"latents2.shape"
|
589 |
]
|
590 |
},
|
591 |
{
|
592 |
"cell_type": "code",
|
593 |
"execution_count": null,
|
594 |
+
"id": "159bcfc4",
|
595 |
"metadata": {},
|
596 |
"outputs": [],
|
597 |
"source": [
|
598 |
"alpha = 0.5 #@param {type:\"slider\", min:0, max:1, step:0.1}\n",
|
599 |
"_, (sample_rate, audio3) = audio_diffusion.generate_spectrogram_and_audio(\n",
|
600 |
+
" noise=audio_diffusion.pipe.slerp(latents, latents2, alpha),\n",
|
601 |
" generator=generator)\n",
|
602 |
"display(Audio(audio, rate=mel.get_sample_rate()))\n",
|
603 |
"display(Audio(audio2, rate=mel.get_sample_rate()))\n",
|
|
|
607 |
{
|
608 |
"cell_type": "code",
|
609 |
"execution_count": null,
|
610 |
+
"id": "ce6c9cc1",
|
611 |
"metadata": {},
|
612 |
"outputs": [],
|
613 |
"source": []
|