Trained for 0 epochs and 500 steps.

Trained with datasets ['text-embeds', 'mj-v6']
Learning rate 8e-06, batch size 2, and 1 gradient accumulation steps.
Used DDPM noise scheduler for training with epsilon prediction type and rescaled_betas_zero_snr=False
Using 'trailing' timestep spacing.
Base model: PixArt-alpha/PixArt-Sigma-XL-2-1024-MS
VAE: madebyollin/sdxl-vae-fp16-fix

Files changed (8) hide show

README.md +266 -0
optimizer.bin +3 -0
random_states_0.pkl +3 -0
scheduler.bin +3 -0
training_state-mj-v6.json +0 -0
training_state.json +1 -0
transformer/config.json +30 -0
transformer/diffusion_pytorch_model.safetensors +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,266 @@

+---
+license: creativeml-openrail-m
+base_model: "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS"
+tags:
+ - stable-diffusion
+ - stable-diffusion-diffusers
+ - text-to-image
+ - diffusers
+ - full
+inference: true
+widget:
+- text: 'unconditional (blank prompt)'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_0_0.png
+- text: 'a woman sitting on the grass'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_1_0.png
+- text: 'a professional photo headshot of a man in studio lighting'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_2_0.png
+- text: 'a person holding a sign that reads ''SOON'''
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_3_0.png
+- text: 'Alien marketplace, bizarre creatures, exotic goods, vibrant colors, otherworldly atmosphere'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_4_0.png
+- text: 'Child holding a balloon, happy expression, colorful balloons, sunny day, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_5_0.png
+- text: 'a 4-panel comic strip showing an orange cat saying the words ''HELP'' and ''LASAGNA'''
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_6_0.png
+- text: 'a hand is holding a comic book with a cover that reads ''The Adventures of Superhero'''
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_7_0.png
+- text: 'Underground cave filled with crystals, glowing lights, reflective surfaces, fantasy environment, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_8_0.png
+- text: 'Bustling cyberpunk bazaar, vendors, neon signs, advanced tech, crowded, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_9_0.png
+- text: 'Cyberpunk hacker in a dark room, neon glow, multiple screens, intense focus, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_10_0.png
+- text: 'a cybernetic anne of green gables with neural implant and bio mech augmentations'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_11_0.png
+- text: 'Post-apocalyptic cityscape, ruined buildings, overgrown vegetation, dark and gritty, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_12_0.png
+- text: 'Magical castle in a lush forest, glowing windows, fantasy architecture, high resolution, detailed textures'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_13_0.png
+- text: 'Ruins of an ancient temple in an enchanted forest, glowing runes, mystical creatures, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_14_0.png
+- text: 'Mystical forest, glowing plants, fairies, magical creatures, fantasy art, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_15_0.png
+- text: 'Magical garden with glowing flowers, fairies, serene atmosphere, detailed plants, high resolution'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_16_0.png
+- text: 'Whimsical garden filled with fairies, magical plants, sparkling lights, serene atmosphere, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_17_0.png
+- text: 'Majestic dragon soaring through the sky, detailed scales, dynamic pose, fantasy art, high resolution'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_18_0.png
+- text: 'Fantasy world, floating islands in the sky, waterfalls, lush vegetation, detailed landscape, high resolution'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_19_0.png
+- text: 'Futuristic city skyline at night, neon lights, cyberpunk style, high contrast, sharp focus'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_20_0.png
+- text: 'Space battle scene, starships fighting, laser beams, explosions, cosmic background'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_21_0.png
+- text: 'Abandoned fairground at night, eerie rides, ghostly figures, fog, dark atmosphere, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_22_0.png
+- text: 'Spooky haunted mansion on a hill, dark and eerie, glowing windows, ghostly atmosphere, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_23_0.png
+- text: 'a hardcover physics textbook that is called PHYSICS FOR DUMMIES'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_24_0.png
+- text: 'Epic medieval battle, knights in armor, dynamic action, detailed landscape, high resolution'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_25_0.png
+- text: 'Bustling medieval market with merchants, knights, and jesters, vibrant colors, detailed'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_26_0.png
+- text: 'Cozy medieval tavern, warm firelight, adventurers drinking, detailed interior, rustic atmosphere'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_27_0.png
+- text: 'Futuristic city skyline at night, neon lights, cyberpunk style, high contrast, sharp focus'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_28_0.png
+- text: 'Forest with neon-lit trees, glowing plants, bioluminescence, surreal atmosphere, high detail'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_29_0.png
+- text: 'Bright neon sign in a busy city street, ''Open 24 Hours'', bold typography, glowing lights'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_30_0.png
+- text: 'Retro diner sign, ''Joe''s Diner'', classic 1950s design, neon lights, weathered look'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_31_0.png
+- text: 'Vintage store sign with elaborate typography, ''Antique Shop'', hand-painted, weathered look'
+ parameters:
+ negative_prompt: 'blurry, cropped, ugly'
+ output:
+ url: ./assets/image_32_0.png
+---
+# sigmajourney-v2
+This is a full rank finetune derived from [PixArt-alpha/PixArt-Sigma-XL-2-1024-MS](https://huggingface.co/PixArt-alpha/PixArt-Sigma-XL-2-1024-MS).
+No validation prompt was used during training.
+None
+## Validation settings
+- CFG: `7.5`
+- CFG Rescale: `0.0`
+- Steps: `30`
+- Sampler: `euler`
+- Seed: `42`
+- Resolution: `1024`
+Note: The validation settings are not necessarily the same as the [training settings](#training-settings).
+You can find some example images in the following gallery:
+<Gallery />
+The text encoder **was not** trained.
+You may reuse the base model text encoder for inference.
+## Training settings
+- Training epochs: 0
+- Training steps: 500
+- Learning rate: 8e-06
+- Effective batch size: 2
+ - Micro-batch size: 2
+ - Gradient accumulation steps: 1
+ - Number of GPUs: 1
+- Prediction type: epsilon
+- Rescaled betas zero SNR: False
+- Optimizer: AdamW, stochastic bf16
+- Precision: Pure BF16
+- Xformers: Enabled
+## Datasets
+### mj-v6
+- Repeats: 0
+- Total number of images: 2180
+- Total number of aspect buckets: 1
+- Resolution: 1.0 megapixels
+- Cropped: False
+- Crop style: None
+- Crop aspect: None
+## Inference
+```python
+import torch
+from diffusers import DiffusionPipeline
+model_id = "sigmajourney-v2"
+prompt = "An astronaut is riding a horse through the jungles of Thailand."
+negative_prompt = "malformed, disgusting, overexposed, washed-out"
+pipeline = DiffusionPipeline.from_pretrained(model_id)
+pipeline.to('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
+image = pipeline(
+ prompt=prompt,
+ negative_prompt='blurry, cropped, ugly',
+ num_inference_steps=30,
+ generator=torch.Generator(device='cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu').manual_seed(1641421826),
+ width=1152,
+ height=768,
+ guidance_scale=7.5,
+ guidance_rescale=0.0,
+).images[0]
+image.save("output.png", format="PNG")
+```

optimizer.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8426b07be8a37f46230b7b56bb0cf8e17842217a7589914202a6881b7537e8c4
+size 3665677155

random_states_0.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be9c604f0a8dd8b7f0ec43deee5d8ee84042dbfc56dabc9da4bd2ec7d15ff9bc
+size 14344

scheduler.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57feaeea732a8232dc14923ac8e8cff564f2d6d11728d1405a7f3cfc02efb7ed
+size 1000

training_state-mj-v6.json ADDED Viewed

The diff for this file is too large to render. See raw diff

training_state.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"global_step": 500, "epoch_step": 500, "epoch": 1, "exhausted_backends": [], "repeats": {}}

transformer/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+ "_class_name": "PixArtTransformer2DModel",
+ "_diffusers_version": "0.29.0",
+ "_name_or_path": "PixArt-alpha/PixArt-Sigma-XL-2-1024-MS",
+ "activation_fn": "gelu-approximate",
+ "attention_bias": true,
+ "attention_head_dim": 72,
+ "attention_type": "default",
+ "caption_channels": 4096,
+ "cross_attention_dim": 1152,
+ "double_self_attention": false,
+ "dropout": 0.0,
+ "in_channels": 4,
+ "interpolation_scale": 2,
+ "norm_elementwise_affine": false,
+ "norm_eps": 1e-06,
+ "norm_num_groups": 32,
+ "norm_type": "ada_norm_single",
+ "num_attention_heads": 16,
+ "num_embeds_ada_norm": 1000,
+ "num_layers": 28,
+ "num_vector_embeds": null,
+ "only_cross_attention": false,
+ "out_channels": 8,
+ "patch_size": 2,
+ "sample_size": 128,
+ "upcast_attention": false,
+ "use_additional_conditions": false,
+ "use_linear_projection": false
+}

transformer/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1a2a9da8bfa0f08ca9fb435e6dbaec4afdc4e0497afeccb7c3bd9623bfda7ed
+size 1221780352