Spaces:

teticio
/

audio-diffusion

Runtime error

App Files Files Community

teticio commited on Aug 10, 2022

Commit

6dff871

1 Parent(s): f4441f8

work with grayscale images

Browse files

Files changed (4) hide show

notebooks/test-model.ipynb +0 -0
requirements-lock.txt +198 -0
src/audio_to_images.py +5 -0
src/train_unconditional.py +4 -5

notebooks/test-model.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

requirements-lock.txt ADDED Viewed

	@@ -0,0 +1,198 @@

+absl-py==1.2.0
+accelerate==0.12.0
+aiobotocore==2.3.4
+aiohttp==3.8.1
+aioitertools==0.10.0
+aiosignal==1.2.0
+analytics-python==1.4.0
+anyio==3.6.1
+appdirs==1.4.4
+argon2-cffi==21.3.0
+argon2-cffi-bindings==21.2.0
+async-timeout==4.0.2
+attrs==21.4.0
+audioread==2.1.9
+backcall==0.2.0
+backoff==1.10.0
+bcrypt==3.2.2
+beautifulsoup4==4.11.1
+bertviz==1.4.0
+black==22.6.0
+bleach==5.0.1
+boto3==1.21.21
+botocore==1.24.21
+cachetools==5.2.0
+captum==0.5.0
+certifi==2022.6.15
+cffi==1.15.1
+charset-normalizer==2.1.0
+click==8.1.3
+cloudpickle==2.1.0
+cryptography==37.0.4
+cycler==0.11.0
+datasets==2.4.0
+debugpy==1.6.2
+decorator==5.1.1
+deepspeed==0.7.0
+defusedxml==0.7.1
+diffusers==0.1.3
+dill==0.3.5.1
+entrypoints==0.4
+fastapi==0.79.0
+fastjsonschema==2.16.1
+ffmpy==0.3.0
+filelock==3.8.0
+fonttools==4.34.4
+frozenlist==1.3.1
+fsspec==2022.7.1
+google-auth==2.10.0
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+gradio==3.1.4
+grpcio==1.47.0
+h11==0.12.0
+hjson==3.0.2
+httpcore==0.15.0
+httpx==0.23.0
+huggingface-hub==0.8.1
+idna==3.3
+importlib-metadata==4.12.0
+ipykernel==6.15.1
+ipython==7.34.0
+ipython-genutils==0.2.0
+ipywidgets==7.7.1
+jedi==0.18.1
+Jinja2==3.1.2
+jmespath==1.0.1
+joblib==1.1.0
+jsonschema==4.9.1
+jupyter-client==7.3.4
+jupyter-core==4.11.1
+jupyterlab-pygments==0.2.2
+jupyterlab-widgets==1.1.1
+kiwisolver==1.4.4
+librosa==0.9.2
+linkify-it-py==1.0.3
+llvmlite==0.39.0
+lxml==4.9.1
+Markdown==3.4.1
+markdown-it-py==2.1.0
+MarkupSafe==2.1.1
+matplotlib==3.5.2
+matplotlib-inline==0.1.3
+mdit-py-plugins==0.3.0
+mdurl==0.1.1
+mistune==0.8.4
+monotonic==1.6
+more-itertools==8.14.0
+multidict==6.0.2
+multiprocess==0.70.13
+munkres==1.1.4
+mypy-extensions==0.4.3
+nbclient==0.6.6
+nbconvert==6.5.1
+nbformat==5.4.0
+nest-asyncio==1.5.5
+networkx==2.8.5
+ninja==1.10.2.3
+nlp==0.4.0
+nltk==3.7
+notebook==6.4.12
+numba==0.56.0
+numpy==1.22.4
+oauthlib==3.2.0
+orjson==3.7.11
+packaging==21.3
+pandas==1.4.3
+pandocfilters==1.5.0
+paramiko==2.11.0
+parso==0.8.3
+pathos==0.2.9
+pathspec==0.9.0
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==9.2.0
+platformdirs==2.5.2
+pluggy==0.13.1
+pooch==1.6.0
+pox==0.3.1
+ppft==1.7.6.5
+prometheus-client==0.14.1
+prompt-toolkit==3.0.30
+protobuf==3.19.4
+protobuf3-to-dict==0.1.5
+psutil==5.9.1
+ptyprocess==0.7.0
+py==1.11.0
+py-cpuinfo==8.0.0
+pyarrow==9.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pycparser==2.21
+pycryptodome==3.15.0
+pydantic==1.9.1
+pydub==0.25.1
+Pygments==2.12.0
+PyNaCl==1.5.0
+pyparsing==3.0.9
+pyrsistent==0.18.1
+pytest==5.4.3
+python-dateutil==2.8.2
+python-dotenv==0.20.0
+python-multipart==0.0.5
+pytz==2022.1
+PyYAML==6.0
+pyzmq==23.2.0
+regex==2022.7.25
+requests==2.28.1
+requests-oauthlib==1.3.1
+resampy==0.4.0
+responses==0.18.0
+rfc3986==1.5.0
+rsa==4.9
+s3fs==2022.7.1
+s3transfer==0.5.2
+sagemaker==2.103.0
+scikit-learn==1.1.2
+scipy==1.9.0
+seaborn==0.11.2
+Send2Trash==1.8.0
+sentencepiece==0.1.97
+shap==0.41.0
+six==1.16.0
+slicer==0.0.7
+smdebug-rulesconfig==1.0.1
+sniffio==1.2.0
+snorkel==0.9.9
+SoundFile==0.10.3.post1
+soupsieve==2.3.2.post1
+starlette==0.19.1
+tensorboard==2.9.1
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+terminado==0.15.0
+threadpoolctl==3.1.0
+tinycss2==1.1.1
+tokenizers==0.12.1
+toml==0.10.2
+tomli==2.0.1
+torch==1.12.1
+torchvision==0.13.1
+tornado==6.2
+tqdm==4.64.0
+traitlets==5.3.0
+transformers==4.21.1
+transformers-interpret==0.7.5
+typing_extensions==4.3.0
+uc-micro-py==1.0.1
+urllib3==1.26.11
+uvicorn==0.18.2
+wcwidth==0.2.5
+webencodings==0.5.1
+Werkzeug==2.2.2
+widgetsnbextension==3.6.1
+wrapt==1.14.1
+xxhash==3.0.0
+yapf==0.32.0
+yarl==1.8.1
+zipp==3.8.1

src/audio_to_images.py CHANGED Viewed

@@ -1,3 +1,8 @@
 import os
 import re
 import io

+# TODO
+# run on sagemaker
+# run with deepspeed
 import os
 import re
 import io

src/train_unconditional.py CHANGED Viewed

@@ -39,8 +39,8 @@ def main(args):
     model = UNet2DModel(
         sample_size=args.resolution,
-        in_channels=3,
-        out_channels=3,
         layers_per_block=2,
         block_out_channels=(128, 128, 256, 256, 512, 512),
         down_block_types=(
@@ -101,7 +101,7 @@ def main(args):
         )
     def transforms(examples):
-        images = [augmentations(image.convert("RGB")) for image in examples["image"]]
         return {"input": images}
     dataset.set_transform(transforms)
@@ -215,8 +215,7 @@ def main(args):
                     "test_samples", images_processed, epoch
                 )
                 for _, image in enumerate(images_processed):
-                    image = Image.fromarray(np.mean(image, axis=0).astype("uint8"))
-                    audio = mel.image_to_audio(image)
                     accelerator.trackers[0].writer.add_audio(
                         f"test_audio_{_}",
                         audio,

     model = UNet2DModel(
         sample_size=args.resolution,
+        in_channels=1,
+        out_channels=1,
         layers_per_block=2,
         block_out_channels=(128, 128, 256, 256, 512, 512),
         down_block_types=(
         )
     def transforms(examples):
+        images = [augmentations(image) for image in examples["image"]]
         return {"input": images}
     dataset.set_transform(transforms)
                     "test_samples", images_processed, epoch
                 )
                 for _, image in enumerate(images_processed):
+                    audio = mel.image_to_audio(Image.fromarray(image[0]))
                     accelerator.trackers[0].writer.add_audio(
                         f"test_audio_{_}",
                         audio,