toilaluan commited on
Commit
d186c97
1 Parent(s): f9cb034

Trained for 0 epochs and 500 steps.

Browse files

Trained with datasets ['text-embeds', 'mj-v6']
Learning rate 8e-06, batch size 2, and 1 gradient accumulation steps.
Used DDPM noise scheduler for training with epsilon prediction type and rescaled_betas_zero_snr=False
Using 'trailing' timestep spacing.
Base model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
VAE: madebyollin/sdxl-vae-fp16-fix

README.md ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: creativeml-openrail-m
3
+ base_model: "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS"
4
+ tags:
5
+ - stable-diffusion
6
+ - stable-diffusion-diffusers
7
+ - text-to-image
8
+ - diffusers
9
+ - full
10
+
11
+ inference: true
12
+ widget:
13
+ - text: 'unconditional (blank prompt)'
14
+ parameters:
15
+ negative_prompt: 'blurry, cropped, ugly'
16
+ output:
17
+ url: ./assets/image_0_0.png
18
+ - text: 'a woman sitting on the grass'
19
+ parameters:
20
+ negative_prompt: 'blurry, cropped, ugly'
21
+ output:
22
+ url: ./assets/image_1_0.png
23
+ - text: 'a professional photo headshot of a man in studio lighting'
24
+ parameters:
25
+ negative_prompt: 'blurry, cropped, ugly'
26
+ output:
27
+ url: ./assets/image_2_0.png
28
+ - text: 'a person holding a sign that reads ''SOON'''
29
+ parameters:
30
+ negative_prompt: 'blurry, cropped, ugly'
31
+ output:
32
+ url: ./assets/image_3_0.png
33
+ - text: 'Alien marketplace, bizarre creatures, exotic goods, vibrant colors, otherworldly atmosphere'
34
+ parameters:
35
+ negative_prompt: 'blurry, cropped, ugly'
36
+ output:
37
+ url: ./assets/image_4_0.png
38
+ - text: 'Child holding a balloon, happy expression, colorful balloons, sunny day, high detail'
39
+ parameters:
40
+ negative_prompt: 'blurry, cropped, ugly'
41
+ output:
42
+ url: ./assets/image_5_0.png
43
+ - text: 'a 4-panel comic strip showing an orange cat saying the words ''HELP'' and ''LASAGNA'''
44
+ parameters:
45
+ negative_prompt: 'blurry, cropped, ugly'
46
+ output:
47
+ url: ./assets/image_6_0.png
48
+ - text: 'a hand is holding a comic book with a cover that reads ''The Adventures of Superhero'''
49
+ parameters:
50
+ negative_prompt: 'blurry, cropped, ugly'
51
+ output:
52
+ url: ./assets/image_7_0.png
53
+ - text: 'Underground cave filled with crystals, glowing lights, reflective surfaces, fantasy environment, high detail'
54
+ parameters:
55
+ negative_prompt: 'blurry, cropped, ugly'
56
+ output:
57
+ url: ./assets/image_8_0.png
58
+ - text: 'Bustling cyberpunk bazaar, vendors, neon signs, advanced tech, crowded, high detail'
59
+ parameters:
60
+ negative_prompt: 'blurry, cropped, ugly'
61
+ output:
62
+ url: ./assets/image_9_0.png
63
+ - text: 'Cyberpunk hacker in a dark room, neon glow, multiple screens, intense focus, high detail'
64
+ parameters:
65
+ negative_prompt: 'blurry, cropped, ugly'
66
+ output:
67
+ url: ./assets/image_10_0.png
68
+ - text: 'a cybernetic anne of green gables with neural implant and bio mech augmentations'
69
+ parameters:
70
+ negative_prompt: 'blurry, cropped, ugly'
71
+ output:
72
+ url: ./assets/image_11_0.png
73
+ - text: 'Post-apocalyptic cityscape, ruined buildings, overgrown vegetation, dark and gritty, high detail'
74
+ parameters:
75
+ negative_prompt: 'blurry, cropped, ugly'
76
+ output:
77
+ url: ./assets/image_12_0.png
78
+ - text: 'Magical castle in a lush forest, glowing windows, fantasy architecture, high resolution, detailed textures'
79
+ parameters:
80
+ negative_prompt: 'blurry, cropped, ugly'
81
+ output:
82
+ url: ./assets/image_13_0.png
83
+ - text: 'Ruins of an ancient temple in an enchanted forest, glowing runes, mystical creatures, high detail'
84
+ parameters:
85
+ negative_prompt: 'blurry, cropped, ugly'
86
+ output:
87
+ url: ./assets/image_14_0.png
88
+ - text: 'Mystical forest, glowing plants, fairies, magical creatures, fantasy art, high detail'
89
+ parameters:
90
+ negative_prompt: 'blurry, cropped, ugly'
91
+ output:
92
+ url: ./assets/image_15_0.png
93
+ - text: 'Magical garden with glowing flowers, fairies, serene atmosphere, detailed plants, high resolution'
94
+ parameters:
95
+ negative_prompt: 'blurry, cropped, ugly'
96
+ output:
97
+ url: ./assets/image_16_0.png
98
+ - text: 'Whimsical garden filled with fairies, magical plants, sparkling lights, serene atmosphere, high detail'
99
+ parameters:
100
+ negative_prompt: 'blurry, cropped, ugly'
101
+ output:
102
+ url: ./assets/image_17_0.png
103
+ - text: 'Majestic dragon soaring through the sky, detailed scales, dynamic pose, fantasy art, high resolution'
104
+ parameters:
105
+ negative_prompt: 'blurry, cropped, ugly'
106
+ output:
107
+ url: ./assets/image_18_0.png
108
+ - text: 'Fantasy world, floating islands in the sky, waterfalls, lush vegetation, detailed landscape, high resolution'
109
+ parameters:
110
+ negative_prompt: 'blurry, cropped, ugly'
111
+ output:
112
+ url: ./assets/image_19_0.png
113
+ - text: 'Futuristic city skyline at night, neon lights, cyberpunk style, high contrast, sharp focus'
114
+ parameters:
115
+ negative_prompt: 'blurry, cropped, ugly'
116
+ output:
117
+ url: ./assets/image_20_0.png
118
+ - text: 'Space battle scene, starships fighting, laser beams, explosions, cosmic background'
119
+ parameters:
120
+ negative_prompt: 'blurry, cropped, ugly'
121
+ output:
122
+ url: ./assets/image_21_0.png
123
+ - text: 'Abandoned fairground at night, eerie rides, ghostly figures, fog, dark atmosphere, high detail'
124
+ parameters:
125
+ negative_prompt: 'blurry, cropped, ugly'
126
+ output:
127
+ url: ./assets/image_22_0.png
128
+ - text: 'Spooky haunted mansion on a hill, dark and eerie, glowing windows, ghostly atmosphere, high detail'
129
+ parameters:
130
+ negative_prompt: 'blurry, cropped, ugly'
131
+ output:
132
+ url: ./assets/image_23_0.png
133
+ - text: 'a hardcover physics textbook that is called PHYSICS FOR DUMMIES'
134
+ parameters:
135
+ negative_prompt: 'blurry, cropped, ugly'
136
+ output:
137
+ url: ./assets/image_24_0.png
138
+ - text: 'Epic medieval battle, knights in armor, dynamic action, detailed landscape, high resolution'
139
+ parameters:
140
+ negative_prompt: 'blurry, cropped, ugly'
141
+ output:
142
+ url: ./assets/image_25_0.png
143
+ - text: 'Bustling medieval market with merchants, knights, and jesters, vibrant colors, detailed'
144
+ parameters:
145
+ negative_prompt: 'blurry, cropped, ugly'
146
+ output:
147
+ url: ./assets/image_26_0.png
148
+ - text: 'Cozy medieval tavern, warm firelight, adventurers drinking, detailed interior, rustic atmosphere'
149
+ parameters:
150
+ negative_prompt: 'blurry, cropped, ugly'
151
+ output:
152
+ url: ./assets/image_27_0.png
153
+ - text: 'Futuristic city skyline at night, neon lights, cyberpunk style, high contrast, sharp focus'
154
+ parameters:
155
+ negative_prompt: 'blurry, cropped, ugly'
156
+ output:
157
+ url: ./assets/image_28_0.png
158
+ - text: 'Forest with neon-lit trees, glowing plants, bioluminescence, surreal atmosphere, high detail'
159
+ parameters:
160
+ negative_prompt: 'blurry, cropped, ugly'
161
+ output:
162
+ url: ./assets/image_29_0.png
163
+ - text: 'Bright neon sign in a busy city street, ''Open 24 Hours'', bold typography, glowing lights'
164
+ parameters:
165
+ negative_prompt: 'blurry, cropped, ugly'
166
+ output:
167
+ url: ./assets/image_30_0.png
168
+ - text: 'Retro diner sign, ''Joe''s Diner'', classic 1950s design, neon lights, weathered look'
169
+ parameters:
170
+ negative_prompt: 'blurry, cropped, ugly'
171
+ output:
172
+ url: ./assets/image_31_0.png
173
+ - text: 'Vintage store sign with elaborate typography, ''Antique Shop'', hand-painted, weathered look'
174
+ parameters:
175
+ negative_prompt: 'blurry, cropped, ugly'
176
+ output:
177
+ url: ./assets/image_32_0.png
178
+ ---
179
+
180
+ # sigmajourney-v2
181
+
182
+ This is a full rank finetune derived from [PixArt-alpha/PixArt-Sigma-XL-2-1024-MS](https://huggingface.co/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS).
183
+
184
+
185
+
186
+ No validation prompt was used during training.
187
+
188
+
189
+ None
190
+
191
+
192
+ ## Validation settings
193
+ - CFG: `7.5`
194
+ - CFG Rescale: `0.0`
195
+ - Steps: `30`
196
+ - Sampler: `euler`
197
+ - Seed: `42`
198
+ - Resolution: `1024`
199
+
200
+ Note: The validation settings are not necessarily the same as the [training settings](#training-settings).
201
+
202
+ You can find some example images in the following gallery:
203
+
204
+
205
+ <Gallery />
206
+
207
+ The text encoder **was not** trained.
208
+ You may reuse the base model text encoder for inference.
209
+
210
+
211
+ ## Training settings
212
+
213
+ - Training epochs: 0
214
+ - Training steps: 500
215
+ - Learning rate: 8e-06
216
+ - Effective batch size: 2
217
+ - Micro-batch size: 2
218
+ - Gradient accumulation steps: 1
219
+ - Number of GPUs: 1
220
+ - Prediction type: epsilon
221
+ - Rescaled betas zero SNR: False
222
+ - Optimizer: AdamW, stochastic bf16
223
+ - Precision: Pure BF16
224
+ - Xformers: Enabled
225
+
226
+
227
+ ## Datasets
228
+
229
+ ### mj-v6
230
+ - Repeats: 0
231
+ - Total number of images: 2180
232
+ - Total number of aspect buckets: 1
233
+ - Resolution: 1.0 megapixels
234
+ - Cropped: False
235
+ - Crop style: None
236
+ - Crop aspect: None
237
+
238
+
239
+ ## Inference
240
+
241
+
242
+ ```python
243
+ import torch
244
+ from diffusers import DiffusionPipeline
245
+
246
+
247
+
248
+ model_id = "sigmajourney-v2"
249
+ prompt = "An astronaut is riding a horse through the jungles of Thailand."
250
+ negative_prompt = "malformed, disgusting, overexposed, washed-out"
251
+
252
+ pipeline = DiffusionPipeline.from_pretrained(model_id)
253
+ pipeline.to('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
254
+ image = pipeline(
255
+ prompt=prompt,
256
+ negative_prompt='blurry, cropped, ugly',
257
+ num_inference_steps=30,
258
+ generator=torch.Generator(device='cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu').manual_seed(1641421826),
259
+ width=1152,
260
+ height=768,
261
+ guidance_scale=7.5,
262
+ guidance_rescale=0.0,
263
+ ).images[0]
264
+ image.save("output.png", format="PNG")
265
+ ```
266
+
optimizer.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8426b07be8a37f46230b7b56bb0cf8e17842217a7589914202a6881b7537e8c4
3
+ size 3665677155
random_states_0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be9c604f0a8dd8b7f0ec43deee5d8ee84042dbfc56dabc9da4bd2ec7d15ff9bc
3
+ size 14344
scheduler.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57feaeea732a8232dc14923ac8e8cff564f2d6d11728d1405a7f3cfc02efb7ed
3
+ size 1000
training_state-mj-v6.json ADDED
The diff for this file is too large to render. See raw diff
 
training_state.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"global_step": 500, "epoch_step": 500, "epoch": 1, "exhausted_backends": [], "repeats": {}}
transformer/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PixArtTransformer2DModel",
3
+ "_diffusers_version": "0.29.0",
4
+ "_name_or_path": "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS",
5
+ "activation_fn": "gelu-approximate",
6
+ "attention_bias": true,
7
+ "attention_head_dim": 72,
8
+ "attention_type": "default",
9
+ "caption_channels": 4096,
10
+ "cross_attention_dim": 1152,
11
+ "double_self_attention": false,
12
+ "dropout": 0.0,
13
+ "in_channels": 4,
14
+ "interpolation_scale": 2,
15
+ "norm_elementwise_affine": false,
16
+ "norm_eps": 1e-06,
17
+ "norm_num_groups": 32,
18
+ "norm_type": "ada_norm_single",
19
+ "num_attention_heads": 16,
20
+ "num_embeds_ada_norm": 1000,
21
+ "num_layers": 28,
22
+ "num_vector_embeds": null,
23
+ "only_cross_attention": false,
24
+ "out_channels": 8,
25
+ "patch_size": 2,
26
+ "sample_size": 128,
27
+ "upcast_attention": false,
28
+ "use_additional_conditions": false,
29
+ "use_linear_projection": false
30
+ }
transformer/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1a2a9da8bfa0f08ca9fb435e6dbaec4afdc4e0497afeccb7c3bd9623bfda7ed
3
+ size 1221780352