Add WD 1.3 float32 weights

Browse files

Files changed (9) hide show

.gitattributes +4 -0
README.md +11 -33
model_index.json +5 -5
safety_checker/config.json +7 -3
scheduler/scheduler_config.json +3 -7
text_encoder/config.json +2 -1
unet/config.json +1 -1
unet/diffusion_pytorch_model.bin +1 -1
vae/config.json +3 -2

.gitattributes CHANGED Viewed

@@ -29,3 +29,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+safety_checker/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+text_encoder/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+unet/diffusion_pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
+vae/diffusion_pytorch_model.bin filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -17,24 +17,15 @@ waifu-diffusion is a latent text-to-image diffusion model that has been conditio
 [Original Weights](https://huggingface.co/hakurei/waifu-diffusion-v1-3)
-# Gradio
-We also support a [Gradio](https://github.com/gradio-app/gradio) web ui with diffusers to run Waifu Diffusion:
 [![Open In Spaces](https://camo.githubusercontent.com/00380c35e60d6b04be65d3d94a58332be5cc93779f630bcdfc18ab9a3a7d3388/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f25463025394625413425393725323048756767696e67253230466163652d5370616365732d626c7565)](https://huggingface.co/spaces/hakurei/waifu-diffusion-demo)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1_8wPN7dJO746QXsFnB09Uq2VGgSRFuYE#scrollTo=1HaCauSq546O)
 ## Model Description
-The model originally used for fine-tuning is [Stable Diffusion V1-4](https://huggingface.co/CompVis/stable-diffusion-v1-4), which is a latent image diffusion model trained on [LAION2B-en](https://huggingface.co/datasets/laion/laion2B-en).
-The current model has been fine-tuned with a learning rate of 5.0e-6 for 4 epochs on 56k text-image pairs obtained through Danbooru which all have an aesthetic rating greater than `6.0`.
-**Note:** This project has **no affiliation with Danbooru.**
-## Training Data & Annotative Prompting
-The data used for fine-tuning has come from a random sample of 56k Danbooru images, which were filtered based on [CLIP Aesthetic Scoring](https://github.com/christophschuhmann/improved-aesthetic-predictor) where only images with an aesthetic score greater than `6.0` were used.
 ## License
@@ -55,31 +46,18 @@ This model can be used for entertainment purposes and as a generative art assist
 ```python
 import torch
 from torch import autocast
-from diffusers import StableDiffusionPipeline, DDIMScheduler
-model_id = "hakurei/waifu-diffusion"
-device = "cuda"
 pipe = StableDiffusionPipeline.from_pretrained(
-    model_id,
-    torch_dtype=torch.float16,
-    revision="fp16",
-    scheduler=DDIMScheduler(
-        beta_start=0.00085,
-        beta_end=0.012,
-        beta_schedule="scaled_linear",
-        clip_sample=False,
-        set_alpha_to_one=False,
-    ),
-)
-pipe = pipe.to(device)
-prompt = "touhou hakurei_reimu 1girl solo portrait"
 with autocast("cuda"):
-    image = pipe(prompt, guidance_scale=7.5)["sample"][0]
-image.save("reimu_hakurei.png")
 ```
 ## Team Members and Acknowledgements

 [Original Weights](https://huggingface.co/hakurei/waifu-diffusion-v1-3)
+# Gradio & Colab
+We also support a [Gradio](https://github.com/gradio-app/gradio) Web UI and Colab with Diffusers to run Waifu Diffusion:
 [![Open In Spaces](https://camo.githubusercontent.com/00380c35e60d6b04be65d3d94a58332be5cc93779f630bcdfc18ab9a3a7d3388/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f25463025394625413425393725323048756767696e67253230466163652d5370616365732d626c7565)](https://huggingface.co/spaces/hakurei/waifu-diffusion-demo)
 [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1_8wPN7dJO746QXsFnB09Uq2VGgSRFuYE#scrollTo=1HaCauSq546O)
 ## Model Description
+[See here for a full model overview.](https://gist.github.com/harubaru/f727cedacae336d1f7877c4bbe2196e1)
 ## License
 ```python
 import torch
 from torch import autocast
+from diffusers import StableDiffusionPipeline
 pipe = StableDiffusionPipeline.from_pretrained(
+    'waifu-diffusion',
+    torch_dtype=torch.float32
+).to('cuda')
+prompt = "1girl, aqua eyes, baseball cap, blonde hair, closed mouth, earrings, green background, hat, hoop earrings, jewelry, looking at viewer, shirt, short hair, simple background, solo, upper body, yellow shirt"
 with autocast("cuda"):
+    image = pipe(prompt, guidance_scale=6)["sample"][0]
+image.save("test.png")
 ```
 ## Team Members and Acknowledgements

model_index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "_class_name": "StableDiffusionPipeline",
-  "_diffusers_version": "0.2.4",
   "feature_extractor": [
     "transformers",
     "CLIPFeatureExtractor"
@@ -9,14 +9,14 @@
     "stable_diffusion",
     "StableDiffusionSafetyChecker"
   ],
   "text_encoder": [
     "transformers",
     "CLIPTextModel"
   ],
-  "scheduler": [
-    "diffusers",
-    "DDIMScheduler"
-  ],
   "tokenizer": [
     "transformers",
     "CLIPTokenizer"

 {
   "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.4.1",
   "feature_extractor": [
     "transformers",
     "CLIPFeatureExtractor"
     "stable_diffusion",
     "StableDiffusionSafetyChecker"
   ],
+  "scheduler": [
+    "diffusers",
+    "LMSDiscreteScheduler"
+  ],
   "text_encoder": [
     "transformers",
     "CLIPTextModel"
   ],
   "tokenizer": [
     "transformers",
     "CLIPTokenizer"

safety_checker/config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
-  "_name_or_path": "./safety_module",
   "architectures": [
     "StableDiffusionSafetyChecker"
   ],
@@ -68,6 +69,7 @@
     "sep_token_id": null,
     "task_specific_params": null,
     "temperature": 1.0,
     "tie_encoder_decoder": false,
     "tie_word_embeddings": true,
     "tokenizer_class": null,
@@ -75,7 +77,7 @@
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
-    "transformers_version": "4.21.0.dev0",
     "typical_p": 1.0,
     "use_bfloat16": false,
     "vocab_size": 49408
@@ -133,6 +135,7 @@
     "num_attention_heads": 16,
     "num_beam_groups": 1,
     "num_beams": 1,
     "num_hidden_layers": 24,
     "num_return_sequences": 1,
     "output_attentions": false,
@@ -150,6 +153,7 @@
     "sep_token_id": null,
     "task_specific_params": null,
     "temperature": 1.0,
     "tie_encoder_decoder": false,
     "tie_word_embeddings": true,
     "tokenizer_class": null,
@@ -157,7 +161,7 @@
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
-    "transformers_version": "4.21.0.dev0",
     "typical_p": 1.0,
     "use_bfloat16": false
   },

 {
+  "_commit_hash": null,
+  "_name_or_path": "CompVis/stable-diffusion-safety-checker",
   "architectures": [
     "StableDiffusionSafetyChecker"
   ],
     "sep_token_id": null,
     "task_specific_params": null,
     "temperature": 1.0,
+    "tf_legacy_loss": false,
     "tie_encoder_decoder": false,
     "tie_word_embeddings": true,
     "tokenizer_class": null,
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
+    "transformers_version": "4.22.2",
     "typical_p": 1.0,
     "use_bfloat16": false,
     "vocab_size": 49408
     "num_attention_heads": 16,
     "num_beam_groups": 1,
     "num_beams": 1,
+    "num_channels": 3,
     "num_hidden_layers": 24,
     "num_return_sequences": 1,
     "output_attentions": false,
     "sep_token_id": null,
     "task_specific_params": null,
     "temperature": 1.0,
+    "tf_legacy_loss": false,
     "tie_encoder_decoder": false,
     "tie_word_embeddings": true,
     "tokenizer_class": null,
     "top_p": 1.0,
     "torch_dtype": null,
     "torchscript": false,
+    "transformers_version": "4.22.2",
     "typical_p": 1.0,
     "use_bfloat16": false
   },

scheduler/scheduler_config.json CHANGED Viewed

@@ -1,13 +1,9 @@
 {
-  "_class_name": "DDIMScheduler",
-  "_diffusers_version": "0.2.4",
   "beta_end": 0.012,
   "beta_schedule": "scaled_linear",
   "beta_start": 0.00085,
-  "clip_sample": false,
   "num_train_timesteps": 1000,
-  "set_alpha_to_one": false,
-  "timestep_values": null,
-  "trained_betas": null,
-  "steps_offset": 1
 }

 {
+  "_class_name": "LMSDiscreteScheduler",
+  "_diffusers_version": "0.4.1",
   "beta_end": 0.012,
   "beta_schedule": "scaled_linear",
   "beta_start": 0.00085,
   "num_train_timesteps": 1000,
+  "trained_betas": null
 }

text_encoder/config.json CHANGED Viewed

@@ -18,7 +18,8 @@
   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 1,
   "torch_dtype": "float32",
-  "transformers_version": "4.21.3",
   "vocab_size": 49408
 }

   "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "pad_token_id": 1,
+  "projection_dim": 768,
   "torch_dtype": "float32",
+  "transformers_version": "4.22.2",
   "vocab_size": 49408
 }

unet/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "_class_name": "UNet2DConditionModel",
-  "_diffusers_version": "0.2.4",
   "act_fn": "silu",
   "attention_head_dim": 8,
   "block_out_channels": [

 {
   "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.4.1",
   "act_fn": "silu",
   "attention_head_dim": 8,
   "block_out_channels": [

unet/diffusion_pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9879a41e1f8b02bbe3937110c4f4b0171e3c04f9c6f02817cde986a3c4d09afe
 size 3438354725

 version https://git-lfs.github.com/spec/v1
+oid sha256:f47e5665f0e85155a5f6f58683b04940c6b132023d584396226bf54419a78831
 size 3438354725

vae/config.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "_class_name": "AutoencoderKL",
-  "_diffusers_version": "0.2.4",
   "act_fn": "silu",
   "block_out_channels": [
     128,
@@ -17,8 +17,9 @@
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,
   "out_channels": 3,
-  "sample_size": 512,
   "up_block_types": [
     "UpDecoderBlock2D",
     "UpDecoderBlock2D",

 {
   "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.4.1",
   "act_fn": "silu",
   "block_out_channels": [
     128,
   "in_channels": 3,
   "latent_channels": 4,
   "layers_per_block": 2,
+  "norm_num_groups": 32,
   "out_channels": 3,
+  "sample_size": 256,
   "up_block_types": [
     "UpDecoderBlock2D",
     "UpDecoderBlock2D",