PsiPi commited on
Commit
6b7f5b5
1 Parent(s): 130b71a

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -1,35 +1,37 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ freesound_dataset_attribution2.csv filter=lfs diff=lfs merge=lfs -text
37
+ Stable_audio_open.jpg filter=lfs diff=lfs merge=lfs -text
LICENSE.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ STABILITY AI COMMUNITY LICENSE AGREEMENT
2
+
3
+ Last Updated: July 5, 2024
4
+
5
+ 1. INTRODUCTION
6
+
7
+ This Agreement applies to any individual person or entity (“You”, “Your” or “Licensee”) that uses or distributes any portion or element of the Stability AI Materials or Derivative Works thereof for any Research & Non-Commercial or Commercial purpose. Capitalized terms not otherwise defined herein are defined in Section V below.
8
+
9
+ This Agreement is intended to allow research, non-commercial, and limited commercial uses of the Models free of charge. In order to ensure that certain limited commercial uses of the Models continue to be allowed, this Agreement preserves free access to the Models for people or organizations generating annual revenue of less than US $1,000,000 (or local currency equivalent).
10
+
11
+ By clicking “I Accept” or by using or distributing or using any portion or element of the Stability Materials or Derivative Works, You agree that You have read, understood and are bound by the terms of this Agreement. If You are acting on behalf of a company, organization or other entity, then “You” includes you and that entity, and You agree that You: (i) are an authorized representative of such entity with the authority to bind such entity to this Agreement, and (ii) You agree to the terms of this Agreement on that entity’s behalf.
12
+
13
+ 2. RESEARCH & NON-COMMERCIAL USE LICENSE
14
+
15
+ Subject to the terms of this Agreement, Stability AI grants You a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable and royalty-free limited license under Stability AI’s intellectual property or other rights owned by Stability AI embodied in the Stability AI Materials to use, reproduce, distribute, and create Derivative Works of, and make modifications to, the Stability AI Materials for any Research or Non-Commercial Purpose. “Research Purpose” means academic or scientific advancement, and in each case, is not primarily intended for commercial advantage or monetary compensation to You or others. “Non-Commercial Purpose” means any purpose other than a Research Purpose that is not primarily intended for commercial advantage or monetary compensation to You or others, such as personal use (i.e., hobbyist) or evaluation and testing.
16
+
17
+ 3. COMMERCIAL USE LICENSE
18
+
19
+ Subject to the terms of this Agreement (including the remainder of this Section III), Stability AI grants You a non-exclusive, worldwide, non-transferable, non-sublicensable, revocable and royalty-free limited license under Stability AI’s intellectual property or other rights owned by Stability AI embodied in the Stability AI Materials to use, reproduce, distribute, and create Derivative Works of, and make modifications to, the Stability AI Materials for any Commercial Purpose. “Commercial Purpose” means any purpose other than a Research Purpose or Non-Commercial Purpose that is primarily intended for commercial advantage or monetary compensation to You or others, including but not limited to, (i) creating, modifying, or distributing Your product or service, including via a hosted service or application programming interface, and (ii) for Your business’s or organization’s internal operations.
20
+ If You are using or distributing the Stability AI Materials for a Commercial Purpose, You must register with Stability AI at (https://stability.ai/community-license). If at any time You or Your Affiliate(s), either individually or in aggregate, generate more than USD $1,000,000 in annual revenue (or the equivalent thereof in Your local currency), regardless of whether that revenue is generated directly or indirectly from the Stability AI Materials or Derivative Works, any licenses granted to You under this Agreement shall terminate as of such date. You must request a license from Stability AI at (https://stability.ai/enterprise) , which Stability AI may grant to You in its sole discretion. If you receive Stability AI Materials, or any Derivative Works thereof, from a Licensee as part of an integrated end user product, then Section III of this Agreement will not apply to you.
21
+
22
+ 4. GENERAL TERMS
23
+
24
+ Your Research, Non-Commercial, and Commercial License(s) under this Agreement are subject to the following terms.
25
+ a. Distribution & Attribution. If You distribute or make available the Stability AI Materials or a Derivative Work to a third party, or a product or service that uses any portion of them, You shall: (i) provide a copy of this Agreement to that third party, (ii) retain the following attribution notice within a "Notice" text file distributed as a part of such copies: "This Stability AI Model is licensed under the Stability AI Community License, Copyright © Stability AI Ltd. All Rights Reserved”, and (iii) prominently display “Powered by Stability AI” on a related website, user interface, blogpost, about page, or product documentation. If You create a Derivative Work, You may add your own attribution notice(s) to the “Notice” text file included with that Derivative Work, provided that You clearly indicate which attributions apply to the Stability AI Materials and state in the “Notice” text file that You changed the Stability AI Materials and how it was modified.
26
+ b. Use Restrictions. Your use of the Stability AI Materials and Derivative Works, including any output or results of the Stability AI Materials or Derivative Works, must comply with applicable laws and regulations (including Trade Control Laws and equivalent regulations) and adhere to the Documentation and Stability AI’s AUP, which is hereby incorporated by reference. Furthermore, You will not use the Stability AI Materials or Derivative Works, or any output or results of the Stability AI Materials or Derivative Works, to create or improve any foundational generative AI model (excluding the Models or Derivative Works).
27
+ c. Intellectual Property.
28
+ (i) Trademark License. No trademark licenses are granted under this Agreement, and in connection with the Stability AI Materials or Derivative Works, You may not use any name or mark owned by or associated with Stability AI or any of its Affiliates, except as required under Section IV(a) herein.
29
+ (ii) Ownership of Derivative Works. As between You and Stability AI, You are the owner of Derivative Works You create, subject to Stability AI’s ownership of the Stability AI Materials and any Derivative Works made by or for Stability AI.
30
+ (iii) Ownership of Outputs. As between You and Stability AI, You own any outputs generated from the Models or Derivative Works to the extent permitted by applicable law.
31
+ (iv) Disputes. If You or Your Affiliate(s) institute litigation or other proceedings against Stability AI (including a cross-claim or counterclaim in a lawsuit) alleging that the Stability AI Materials, Derivative Works or associated outputs or results, or any portion of any of the foregoing, constitutes infringement of intellectual property or other rights owned or licensable by You, then any licenses granted to You under this Agreement shall terminate as of the date such litigation or claim is filed or instituted. You will indemnify and hold harmless Stability AI from and against any claim by any third party arising out of or related to Your use or distribution of the Stability AI Materials or Derivative Works in violation of this Agreement.
32
+ (v) Feedback. From time to time, You may provide Stability AI with verbal and/or written suggestions, comments or other feedback related to Stability AI’s existing or prospective technology, products or services (collectively, “Feedback”). You are not obligated to provide Stability AI with Feedback, but to the extent that You do, You hereby grant Stability AI a perpetual, irrevocable, royalty-free, fully-paid, sub-licensable, transferable, non-exclusive, worldwide right and license to exploit the Feedback in any manner without restriction. Your Feedback is provided “AS IS” and You make no warranties whatsoever about any Feedback.
33
+ d. Disclaimer Of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE STABILITY AI MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OR LAWFULNESS OF USING OR REDISTRIBUTING THE STABILITY AI MATERIALS, DERIVATIVE WORKS OR ANY OUTPUT OR RESULTS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE STABILITY AI MATERIALS, DERIVATIVE WORKS AND ANY OUTPUT AND RESULTS.
34
+ e. Limitation Of Liability. IN NO EVENT WILL STABILITY AI OR ITS AFFILIATES BE LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT, NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS AGREEMENT, FOR ANY LOST PROFITS OR ANY DIRECT, INDIRECT, SPECIAL, CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN IF STABILITY AI OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF ANY OF THE FOREGOING.
35
+ f. Term And Termination. The term of this Agreement will commence upon Your acceptance of this Agreement or access to the Stability AI Materials and will continue in full force and effect until terminated in accordance with the terms and conditions herein. Stability AI may terminate this Agreement if You are in breach of any term or condition of this Agreement. Upon termination of this Agreement, You shall delete and cease use of any Stability AI Materials or Derivative Works. Section IV(d), (e), and (g) shall survive the termination of this Agreement.
36
+ g. Governing Law. This Agreement will be governed by and constructed in accordance with the laws of the United States and the State of California without regard to choice of law principles, and the UN Convention on Contracts for International Sale of Goods does not apply to this Agreement.
37
+
38
+ 5. DEFINITIONS
39
+
40
+ “Affiliate(s)” means any entity that directly or indirectly controls, is controlled by, or is under common control with the subject entity; for purposes of this definition, “control” means direct or indirect ownership or control of more than 50% of the voting interests of the subject entity.
41
+
42
+ "Agreement" means this Stability AI Community License Agreement.
43
+
44
+ “AUP” means the Stability AI Acceptable Use Policy available at (https://stability.ai/use-policy), as may be updated from time to time.
45
+
46
+ "Derivative Work(s)” means (a) any derivative work of the Stability AI Materials as recognized by U.S. copyright laws and (b) any modifications to a Model, and any other model created which is based on or derived from the Model or the Model’s output, including “fine tune” and “low-rank adaptation” models derived from a Model or a Model’s output, but do not include the output of any Model.
47
+
48
+ “Documentation” means any specifications, manuals, documentation, and other written information provided by Stability AI related to the Software or Models.
49
+
50
+ “Model(s)" means, collectively, Stability AI’s proprietary models and algorithms, including machine-learning models, trained model weights and other elements of the foregoing listed on Stability’s Core Models Webpage available at (https://stability.ai/core-models), as may be updated from time to time.
51
+
52
+ "Stability AI" or "we" means Stability AI Ltd. and its Affiliates.
53
+
54
+ "Software" means Stability AI’s proprietary software made available under this Agreement now or in the future.
55
+
56
+ “Stability AI Materials” means, collectively, Stability’s proprietary Models, Software and Documentation (and any portion or combination thereof) made available under this Agreement.
57
+
58
+ “Trade Control Laws” means any applicable U.S. and non-U.S. export control and trade sanctions laws and regulations.
README.md ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ library_name: stable-audio-tools
5
+ license: other
6
+ license_name: stable-audio-community
7
+ license_link: LICENSE
8
+ pipeline_tag: text-to-audio
9
+ extra_gated_prompt: By clicking "Agree", you agree to the [License Agreement](https://huggingface.co/stabilityai/stable-audio-open-1.0/blob/main/LICENSE.md)
10
+ and acknowledge Stability AI's [Privacy Policy](https://stability.ai/privacy-policy).
11
+ extra_gated_fields:
12
+ Name: text
13
+ Email: text
14
+ Country: country
15
+ Organization or Affiliation: text
16
+ Receive email updates and promotions on Stability AI products, services, and research?:
17
+ type: select
18
+ options:
19
+ - 'Yes'
20
+ - 'No'
21
+ ---
22
+
23
+ # Stable Audio Open 1.0
24
+
25
+ ![Stable Audio Open logo](./stable_audio_light.png)
26
+
27
+ Please note: For commercial use, please refer to [https://stability.ai/license](https://stability.ai/license)
28
+
29
+ ## Model Description
30
+ `Stable Audio Open 1.0` generates variable-length (up to 47s) stereo audio at 44.1kHz from text prompts. It comprises three components: an autoencoder that compresses waveforms into a manageable sequence length, a T5-based text embedding for text conditioning, and a transformer-based diffusion (DiT) model that operates in the latent space of the autoencoder.
31
+
32
+ ## Usage
33
+
34
+ This model can be used with:
35
+ 1. the [`stable-audio-tools`](https://github.com/Stability-AI/stable-audio-tools) library
36
+ 2. the [`diffusers`](https://huggingface.co/docs/diffusers/main/en/index) library
37
+
38
+
39
+ ### Using with `stable-audio-tools`
40
+
41
+ This model is made to be used with the [`stable-audio-tools`](https://github.com/Stability-AI/stable-audio-tools) library for inference, for example:
42
+
43
+ ```python
44
+ import torch
45
+ import torchaudio
46
+ from einops import rearrange
47
+ from stable_audio_tools import get_pretrained_model
48
+ from stable_audio_tools.inference.generation import generate_diffusion_cond
49
+
50
+ device = "cuda" if torch.cuda.is_available() else "cpu"
51
+
52
+ # Download model
53
+ model, model_config = get_pretrained_model("stabilityai/stable-audio-open-1.0")
54
+ sample_rate = model_config["sample_rate"]
55
+ sample_size = model_config["sample_size"]
56
+
57
+ model = model.to(device)
58
+
59
+ # Set up text and timing conditioning
60
+ conditioning = [{
61
+ "prompt": "128 BPM tech house drum loop",
62
+ "seconds_start": 0,
63
+ "seconds_total": 30
64
+ }]
65
+
66
+ # Generate stereo audio
67
+ output = generate_diffusion_cond(
68
+ model,
69
+ steps=100,
70
+ cfg_scale=7,
71
+ conditioning=conditioning,
72
+ sample_size=sample_size,
73
+ sigma_min=0.3,
74
+ sigma_max=500,
75
+ sampler_type="dpmpp-3m-sde",
76
+ device=device
77
+ )
78
+
79
+ # Rearrange audio batch to a single sequence
80
+ output = rearrange(output, "b d n -> d (b n)")
81
+
82
+ # Peak normalize, clip, convert to int16, and save to file
83
+ output = output.to(torch.float32).div(torch.max(torch.abs(output))).clamp(-1, 1).mul(32767).to(torch.int16).cpu()
84
+ torchaudio.save("output.wav", output, sample_rate)
85
+ ```
86
+
87
+ ## Using with `diffusers`
88
+
89
+ Make sure you upgrade to the latest version of diffusers: `pip install -U diffusers`. And then you can run:
90
+
91
+ ```py
92
+ import torch
93
+ import soundfile as sf
94
+ from diffusers import StableAudioPipeline
95
+
96
+ pipe = StableAudioPipeline.from_pretrained("stabilityai/stable-audio-open-1.0", torch_dtype=torch.float16)
97
+ pipe = pipe.to("cuda")
98
+
99
+ # define the prompts
100
+ prompt = "The sound of a hammer hitting a wooden surface."
101
+ negative_prompt = "Low quality."
102
+
103
+ # set the seed for generator
104
+ generator = torch.Generator("cuda").manual_seed(0)
105
+
106
+ # run the generation
107
+ audio = pipe(
108
+ prompt,
109
+ negative_prompt=negative_prompt,
110
+ num_inference_steps=200,
111
+ audio_end_in_s=10.0,
112
+ num_waveforms_per_prompt=3,
113
+ generator=generator,
114
+ ).audios
115
+
116
+ output = audio[0].T.float().cpu().numpy()
117
+ sf.write("hammer.wav", output, pipe.vae.sampling_rate)
118
+
119
+ ```
120
+ Refer to the [documentation](https://huggingface.co/docs/diffusers/main/en/index) for more details on optimization and usage.
121
+
122
+
123
+
124
+
125
+ ## Model Details
126
+ * **Model type**: `Stable Audio Open 1.0` is a latent diffusion model based on a transformer architecture.
127
+ * **Language(s)**: English
128
+ * **License**: [Stability AI Community License](https://huggingface.co/stabilityai/stable-audio-open-1.0/blob/main/LICENSE.md).
129
+ * **Commercial License**: to use this model commercially, please refer to [https://stability.ai/license](https://stability.ai/license)
130
+ * **Research Paper**: [https://arxiv.org/abs/2407.14358](https://arxiv.org/abs/2407.14358)
131
+
132
+ ## Training dataset
133
+
134
+ ### Datasets Used
135
+ Our dataset consists of 486492 audio recordings, where 472618 are from Freesound and 13874 are from the Free Music Archive (FMA). All audio files are licensed under CC0, CC BY, or CC Sampling+. This data is used to train our autoencoder and DiT. We use a publicly available pre-trained T5 model ([t5-base](https://huggingface.co/google-t5/t5-base)) for text conditioning.
136
+
137
+ ### Attribution
138
+ Attribution for all audio recordings used to train Stable Audio Open 1.0 can be found in this repository.
139
+ - Freesound attribution [[csv](https://huggingface.co/stabilityai/stable-audio-open-1.0/blob/main/freesound_dataset_attribution2.csv)]
140
+ - FMA attribution [[csv](https://huggingface.co/stabilityai/stable-audio-open-1.0/blob/main/fma_dataset_attribution2.csv)]
141
+
142
+ ### Mitigations
143
+ We conducted an in-depth analysis to ensure no unauthorized copyrighted music was present in our training data before we began training.
144
+
145
+ To that end, we first identified music samples in Freesound using the [PANNs](https://github.com/qiuqiangkong/audioset_tagging_cnn) music classifier based on AudioSet classes. The identified music samples had at least 30 seconds of music that was predicted to belong to a music-related class with a threshold of 0.15 (PANNs output probabilities range from 0 to 1). This threshold was determined by classifying known music examples from FMA and ensuring no false negatives were present.
146
+
147
+ The identified music samples were sent to Audible Magic’s identification services, a trusted content detection company, to ensure the absence of copyrighted music. Audible Magic flagged suspected copyrighted music, which we subsequently removed before training on the dataset. The majority of the removed content was field recordings in which copyrighted music was playing in the background. Following this procedure, we were left with 266324 CC0, 194840 CC-BY, and 11454 CC Sampling+ audio recordings.
148
+
149
+ We also conducted an in-depth analysis to ensure no copyrighted content was present in FMA's subset. In this case, the procedure was slightly different because the FMA subset consists of music signals. We did a metadata search against a large database of copyrighted music (https://www.kaggle.com/datasets/maharshipandya/-spotify-tracks-dataset) and flagged any potential match. The flagged content was reviewed individually by humans. After this process, we ended up with 8967 CC-BY and 4907 CC0 tracks.
150
+
151
+
152
+ ## Use and Limitations
153
+
154
+
155
+ ### Intended Use
156
+ The primary use of Stable Audio Open is research and experimentation on AI-based music and audio generation, including:
157
+
158
+ - Research efforts to better understand the limitations of generative models and further improve the state of science.
159
+ - Generation of music and audio guided by text to explore current abilities of generative AI models by machine learning practitioners and artists.
160
+
161
+
162
+ ### Out-of-Scope Use Cases
163
+ The model should not be used on downstream applications without further risk evaluation and mitigation. The model should not be used to intentionally create or disseminate audio or music pieces that create hostile or alienating environments for people.
164
+
165
+
166
+ ### Limitations
167
+ - The model is not able to generate realistic vocals.
168
+ - The model has been trained with English descriptions and will not perform as well in other languages.
169
+ - The model does not perform equally well for all music styles and cultures.
170
+ - The model is better at generating sound effects and field recordings than music.
171
+ - It is sometimes difficult to assess what types of text descriptions provide the best generations. Prompt engineering may be required to obtain satisfying results.
172
+
173
+
174
+ ### Biases
175
+ The source of data is potentially lacking diversity and all cultures are not equally represented in the dataset. The model may not perform equally well on the wide variety of music genres and sound effects that exist. The generated samples from the model will reflect the biases from the training data.
fma_dataset_attribution2.csv ADDED
The diff for this file is too large to render. See raw diff
 
freesound_dataset_attribution2.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8819b131646d3e48aa9e4571f7484c7a269bb9b639f2ce78a86e2cdefa5bb8eb
3
+ size 50433223
model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73bf8bb7b6975483418813a954bca66471acac7a74e5f87d37774057bd82b504
3
+ size 4164082314
model_config.json ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "diffusion_cond",
3
+ "sample_size": 1048576,
4
+ "sample_rate": 44100,
5
+ "audio_channels": 2,
6
+ "model": {
7
+ "pretransform": {
8
+ "type": "autoencoder",
9
+ "iterate_batch": true,
10
+ "config": {
11
+ "encoder": {
12
+ "type": "oobleck",
13
+ "requires_grad": false,
14
+ "config": {
15
+ "in_channels": 2,
16
+ "channels": 128,
17
+ "c_mults": [1, 2, 4, 8, 16],
18
+ "strides": [2, 4, 4, 8, 8],
19
+ "latent_dim": 128,
20
+ "use_snake": true
21
+ }
22
+ },
23
+ "decoder": {
24
+ "type": "oobleck",
25
+ "config": {
26
+ "out_channels": 2,
27
+ "channels": 128,
28
+ "c_mults": [1, 2, 4, 8, 16],
29
+ "strides": [2, 4, 4, 8, 8],
30
+ "latent_dim": 64,
31
+ "use_snake": true,
32
+ "final_tanh": false
33
+ }
34
+ },
35
+ "bottleneck": {
36
+ "type": "vae"
37
+ },
38
+ "latent_dim": 64,
39
+ "downsampling_ratio": 2048,
40
+ "io_channels": 2
41
+ }
42
+ },
43
+ "conditioning": {
44
+ "configs": [
45
+ {
46
+ "id": "prompt",
47
+ "type": "t5",
48
+ "config": {
49
+ "t5_model_name": "t5-base",
50
+ "max_length": 128
51
+ }
52
+ },
53
+ {
54
+ "id": "seconds_start",
55
+ "type": "number",
56
+ "config": {
57
+ "min_val": 0,
58
+ "max_val": 512
59
+ }
60
+ },
61
+ {
62
+ "id": "seconds_total",
63
+ "type": "number",
64
+ "config": {
65
+ "min_val": 0,
66
+ "max_val": 512
67
+ }
68
+ }
69
+ ],
70
+ "cond_dim": 768
71
+ },
72
+ "diffusion": {
73
+ "cross_attention_cond_ids": ["prompt", "seconds_start", "seconds_total"],
74
+ "global_cond_ids": ["seconds_start", "seconds_total"],
75
+ "type": "dit",
76
+ "config": {
77
+ "io_channels": 64,
78
+ "embed_dim": 1536,
79
+ "depth": 24,
80
+ "num_heads": 24,
81
+ "cond_token_dim": 768,
82
+ "global_cond_dim": 1536,
83
+ "project_cond_tokens": false,
84
+ "transformer_type": "continuous_transformer"
85
+ }
86
+ },
87
+ "io_channels": 64
88
+ },
89
+ "training": {
90
+ "use_ema": true,
91
+ "log_loss_info": false,
92
+ "optimizer_configs": {
93
+ "diffusion": {
94
+ "optimizer": {
95
+ "type": "AdamW",
96
+ "config": {
97
+ "lr": 5e-5,
98
+ "betas": [0.9, 0.999],
99
+ "weight_decay": 1e-3
100
+ }
101
+ },
102
+ "scheduler": {
103
+ "type": "InverseLR",
104
+ "config": {
105
+ "inv_gamma": 1000000,
106
+ "power": 0.5,
107
+ "warmup": 0.99
108
+ }
109
+ }
110
+ }
111
+ },
112
+ "demo": {
113
+ "demo_every": 2000,
114
+ "demo_steps": 250,
115
+ "num_demos": 4,
116
+ "demo_cond": [
117
+ {"prompt": "Amen break 174 BPM", "seconds_start": 0, "seconds_total": 12},
118
+ {"prompt": "A beautiful orchestral symphony, classical music", "seconds_start": 0, "seconds_total": 160},
119
+ {"prompt": "Chill hip-hop beat, chillhop", "seconds_start": 0, "seconds_total": 190},
120
+ {"prompt": "A pop song about love and loss", "seconds_start": 0, "seconds_total": 180}
121
+ ],
122
+ "demo_cfg_scales": [3, 6, 9]
123
+ }
124
+ }
125
+ }
model_index.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableAudioPipeline",
3
+ "_diffusers_version": "0.30.0.dev0",
4
+ "projection_model": [
5
+ "stable_audio",
6
+ "StableAudioProjectionModel"
7
+ ],
8
+ "scheduler": [
9
+ "diffusers",
10
+ "CosineDPMSolverMultistepScheduler"
11
+ ],
12
+ "text_encoder": [
13
+ "transformers",
14
+ "T5EncoderModel"
15
+ ],
16
+ "tokenizer": [
17
+ "transformers",
18
+ "T5TokenizerFast"
19
+ ],
20
+ "transformer": [
21
+ "diffusers",
22
+ "StableAudioDiTModel"
23
+ ],
24
+ "vae": [
25
+ "diffusers",
26
+ "AutoencoderOobleck"
27
+ ]
28
+ }
model_org.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6049ae92ec8362804cb4cb8a2845be93071439da2daff9997c285f8119d7ea40
3
+ size 4853997287
projection_model/config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableAudioProjectionModel",
3
+ "_diffusers_version": "0.30.0.dev0",
4
+ "conditioning_dim": 768,
5
+ "max_value": 512,
6
+ "min_value": 0,
7
+ "text_encoder_dim": 768
8
+ }
projection_model/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:032102e2b0e4b9b9e19c648dc476345157b97906752a7b4b7a0133900f340926
3
+ size 1586944
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "CosineDPMSolverMultistepScheduler",
3
+ "_diffusers_version": "0.30.0.dev0",
4
+ "euler_at_final": false,
5
+ "final_sigmas_type": "zero",
6
+ "lower_order_final": true,
7
+ "num_train_timesteps": 1000,
8
+ "prediction_type": "v_prediction",
9
+ "rho": 7.0,
10
+ "sigma_data": 1.0,
11
+ "sigma_max": 500,
12
+ "sigma_min": 0.3,
13
+ "sigma_schedule": "exponential",
14
+ "solver_order": 2,
15
+ "solver_type": "midpoint"
16
+ }
stable_audio_light.png ADDED
text_encoder/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-base",
3
+ "architectures": [
4
+ "T5EncoderModel"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 12,
22
+ "num_heads": 12,
23
+ "num_layers": 12,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.42.0.dev0",
59
+ "use_cache": true,
60
+ "vocab_size": 32128
61
+ }
text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:095878c81d21f17d235e1b4aa20e9595289177e2658ef4dcec69fa5aa3b07ca2
3
+ size 438525864
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "eos_token": "</s>",
105
+ "pad_token": "<pad>",
106
+ "unk_token": "<unk>"
107
+ }
tokenizer/spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
3
+ size 791656
tokenizer/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<unk>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "32000": {
28
+ "content": "<extra_id_99>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "32001": {
36
+ "content": "<extra_id_98>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "32002": {
44
+ "content": "<extra_id_97>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "32003": {
52
+ "content": "<extra_id_96>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "32004": {
60
+ "content": "<extra_id_95>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "32005": {
68
+ "content": "<extra_id_94>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "32006": {
76
+ "content": "<extra_id_93>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "32007": {
84
+ "content": "<extra_id_92>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "32008": {
92
+ "content": "<extra_id_91>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "32009": {
100
+ "content": "<extra_id_90>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "32010": {
108
+ "content": "<extra_id_89>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "32011": {
116
+ "content": "<extra_id_88>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "32012": {
124
+ "content": "<extra_id_87>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "32013": {
132
+ "content": "<extra_id_86>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "32014": {
140
+ "content": "<extra_id_85>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "32015": {
148
+ "content": "<extra_id_84>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "32016": {
156
+ "content": "<extra_id_83>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "32017": {
164
+ "content": "<extra_id_82>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "32018": {
172
+ "content": "<extra_id_81>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "32019": {
180
+ "content": "<extra_id_80>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "32020": {
188
+ "content": "<extra_id_79>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "32021": {
196
+ "content": "<extra_id_78>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "32022": {
204
+ "content": "<extra_id_77>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "32023": {
212
+ "content": "<extra_id_76>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "32024": {
220
+ "content": "<extra_id_75>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "32025": {
228
+ "content": "<extra_id_74>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "32026": {
236
+ "content": "<extra_id_73>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "32027": {
244
+ "content": "<extra_id_72>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "32028": {
252
+ "content": "<extra_id_71>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "32029": {
260
+ "content": "<extra_id_70>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "32030": {
268
+ "content": "<extra_id_69>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "32031": {
276
+ "content": "<extra_id_68>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "32032": {
284
+ "content": "<extra_id_67>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "32033": {
292
+ "content": "<extra_id_66>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "32034": {
300
+ "content": "<extra_id_65>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "32035": {
308
+ "content": "<extra_id_64>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "32036": {
316
+ "content": "<extra_id_63>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "32037": {
324
+ "content": "<extra_id_62>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "32038": {
332
+ "content": "<extra_id_61>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "32039": {
340
+ "content": "<extra_id_60>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "32040": {
348
+ "content": "<extra_id_59>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "32041": {
356
+ "content": "<extra_id_58>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "32042": {
364
+ "content": "<extra_id_57>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "32043": {
372
+ "content": "<extra_id_56>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "32044": {
380
+ "content": "<extra_id_55>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "32045": {
388
+ "content": "<extra_id_54>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "32046": {
396
+ "content": "<extra_id_53>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "32047": {
404
+ "content": "<extra_id_52>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "32048": {
412
+ "content": "<extra_id_51>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "32049": {
420
+ "content": "<extra_id_50>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "32050": {
428
+ "content": "<extra_id_49>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "32051": {
436
+ "content": "<extra_id_48>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "32052": {
444
+ "content": "<extra_id_47>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "32053": {
452
+ "content": "<extra_id_46>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "32054": {
460
+ "content": "<extra_id_45>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "32055": {
468
+ "content": "<extra_id_44>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "32056": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "32057": {
484
+ "content": "<extra_id_42>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "32058": {
492
+ "content": "<extra_id_41>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "32059": {
500
+ "content": "<extra_id_40>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "32060": {
508
+ "content": "<extra_id_39>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "32061": {
516
+ "content": "<extra_id_38>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "32062": {
524
+ "content": "<extra_id_37>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "32063": {
532
+ "content": "<extra_id_36>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "32064": {
540
+ "content": "<extra_id_35>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "32065": {
548
+ "content": "<extra_id_34>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "32066": {
556
+ "content": "<extra_id_33>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "32067": {
564
+ "content": "<extra_id_32>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "32068": {
572
+ "content": "<extra_id_31>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "32069": {
580
+ "content": "<extra_id_30>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "32070": {
588
+ "content": "<extra_id_29>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "32071": {
596
+ "content": "<extra_id_28>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "32072": {
604
+ "content": "<extra_id_27>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "32073": {
612
+ "content": "<extra_id_26>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "32074": {
620
+ "content": "<extra_id_25>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "32075": {
628
+ "content": "<extra_id_24>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "32076": {
636
+ "content": "<extra_id_23>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "32077": {
644
+ "content": "<extra_id_22>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "32078": {
652
+ "content": "<extra_id_21>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "32079": {
660
+ "content": "<extra_id_20>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "32080": {
668
+ "content": "<extra_id_19>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "32081": {
676
+ "content": "<extra_id_18>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "32082": {
684
+ "content": "<extra_id_17>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "32083": {
692
+ "content": "<extra_id_16>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "32084": {
700
+ "content": "<extra_id_15>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "32085": {
708
+ "content": "<extra_id_14>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "32086": {
716
+ "content": "<extra_id_13>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "32087": {
724
+ "content": "<extra_id_12>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "32088": {
732
+ "content": "<extra_id_11>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "32089": {
740
+ "content": "<extra_id_10>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "32090": {
748
+ "content": "<extra_id_9>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "32091": {
756
+ "content": "<extra_id_8>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "32092": {
764
+ "content": "<extra_id_7>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "32093": {
772
+ "content": "<extra_id_6>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "32094": {
780
+ "content": "<extra_id_5>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "32095": {
788
+ "content": "<extra_id_4>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "32096": {
796
+ "content": "<extra_id_3>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "32097": {
804
+ "content": "<extra_id_2>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "32098": {
812
+ "content": "<extra_id_1>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "32099": {
820
+ "content": "<extra_id_0>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ }
827
+ },
828
+ "additional_special_tokens": [
829
+ "<extra_id_0>",
830
+ "<extra_id_1>",
831
+ "<extra_id_2>",
832
+ "<extra_id_3>",
833
+ "<extra_id_4>",
834
+ "<extra_id_5>",
835
+ "<extra_id_6>",
836
+ "<extra_id_7>",
837
+ "<extra_id_8>",
838
+ "<extra_id_9>",
839
+ "<extra_id_10>",
840
+ "<extra_id_11>",
841
+ "<extra_id_12>",
842
+ "<extra_id_13>",
843
+ "<extra_id_14>",
844
+ "<extra_id_15>",
845
+ "<extra_id_16>",
846
+ "<extra_id_17>",
847
+ "<extra_id_18>",
848
+ "<extra_id_19>",
849
+ "<extra_id_20>",
850
+ "<extra_id_21>",
851
+ "<extra_id_22>",
852
+ "<extra_id_23>",
853
+ "<extra_id_24>",
854
+ "<extra_id_25>",
855
+ "<extra_id_26>",
856
+ "<extra_id_27>",
857
+ "<extra_id_28>",
858
+ "<extra_id_29>",
859
+ "<extra_id_30>",
860
+ "<extra_id_31>",
861
+ "<extra_id_32>",
862
+ "<extra_id_33>",
863
+ "<extra_id_34>",
864
+ "<extra_id_35>",
865
+ "<extra_id_36>",
866
+ "<extra_id_37>",
867
+ "<extra_id_38>",
868
+ "<extra_id_39>",
869
+ "<extra_id_40>",
870
+ "<extra_id_41>",
871
+ "<extra_id_42>",
872
+ "<extra_id_43>",
873
+ "<extra_id_44>",
874
+ "<extra_id_45>",
875
+ "<extra_id_46>",
876
+ "<extra_id_47>",
877
+ "<extra_id_48>",
878
+ "<extra_id_49>",
879
+ "<extra_id_50>",
880
+ "<extra_id_51>",
881
+ "<extra_id_52>",
882
+ "<extra_id_53>",
883
+ "<extra_id_54>",
884
+ "<extra_id_55>",
885
+ "<extra_id_56>",
886
+ "<extra_id_57>",
887
+ "<extra_id_58>",
888
+ "<extra_id_59>",
889
+ "<extra_id_60>",
890
+ "<extra_id_61>",
891
+ "<extra_id_62>",
892
+ "<extra_id_63>",
893
+ "<extra_id_64>",
894
+ "<extra_id_65>",
895
+ "<extra_id_66>",
896
+ "<extra_id_67>",
897
+ "<extra_id_68>",
898
+ "<extra_id_69>",
899
+ "<extra_id_70>",
900
+ "<extra_id_71>",
901
+ "<extra_id_72>",
902
+ "<extra_id_73>",
903
+ "<extra_id_74>",
904
+ "<extra_id_75>",
905
+ "<extra_id_76>",
906
+ "<extra_id_77>",
907
+ "<extra_id_78>",
908
+ "<extra_id_79>",
909
+ "<extra_id_80>",
910
+ "<extra_id_81>",
911
+ "<extra_id_82>",
912
+ "<extra_id_83>",
913
+ "<extra_id_84>",
914
+ "<extra_id_85>",
915
+ "<extra_id_86>",
916
+ "<extra_id_87>",
917
+ "<extra_id_88>",
918
+ "<extra_id_89>",
919
+ "<extra_id_90>",
920
+ "<extra_id_91>",
921
+ "<extra_id_92>",
922
+ "<extra_id_93>",
923
+ "<extra_id_94>",
924
+ "<extra_id_95>",
925
+ "<extra_id_96>",
926
+ "<extra_id_97>",
927
+ "<extra_id_98>",
928
+ "<extra_id_99>"
929
+ ],
930
+ "clean_up_tokenization_spaces": true,
931
+ "eos_token": "</s>",
932
+ "extra_ids": 100,
933
+ "model_max_length": 128,
934
+ "pad_token": "<pad>",
935
+ "tokenizer_class": "T5Tokenizer",
936
+ "truncation": true,
937
+ "unk_token": "<unk>"
938
+ }
transformer/config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableAudioDiTModel",
3
+ "_diffusers_version": "0.30.0.dev0",
4
+ "attention_head_dim": 64,
5
+ "cross_attention_dim": 768,
6
+ "cross_attention_input_dim": 768,
7
+ "global_states_input_dim": 1536,
8
+ "in_channels": 64,
9
+ "num_attention_heads": 24,
10
+ "num_key_value_attention_heads": 12,
11
+ "num_layers": 24,
12
+ "out_channels": 64,
13
+ "sample_size": 1024.0,
14
+ "time_proj_dim": 256
15
+ }
transformer/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65ae9715febfeb2dc9f33aa506c2bda69fd2ddf8ebc483fb2df090df4ca506fd
3
+ size 4227806200
vae/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderOobleck",
3
+ "_diffusers_version": "0.30.0.dev0",
4
+ "audio_channels": 2,
5
+ "channel_multiples": [
6
+ 1,
7
+ 2,
8
+ 4,
9
+ 8,
10
+ 16
11
+ ],
12
+ "decoder_channels": 128,
13
+ "decoder_input_channels": 64,
14
+ "downsampling_ratios": [
15
+ 2,
16
+ 4,
17
+ 4,
18
+ 8,
19
+ 8
20
+ ],
21
+ "encoder_hidden_size": 128,
22
+ "sampling_rate": 44100
23
+ }
vae/diffusion_pytorch_model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d4d46ad90feafc966011f7fd90a4a538b1f39c7d62b740fa99e95a2c1373aaa
3
+ size 533824514
vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2131cdb52020b2473707465449d8bdb4f6cca61c93150a947baca02bc58ffd7b
3
+ size 624490240