Add WD 1.3 float32 weights
Browse files- .gitattributes +4 -0
- README.md +11 -33
- model_index.json +5 -5
- safety_checker/config.json +7 -3
- scheduler/scheduler_config.json +3 -7
- text_encoder/config.json +2 -1
- unet/config.json +1 -1
- unet/diffusion_pytorch_model.bin +1 -1
- vae/config.json +3 -2
.gitattributes
CHANGED
@@ -29,3 +29,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
29 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
30 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
31 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
32 |
+
safety_checker/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
33 |
+
text_encoder/pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
34 |
+
unet/diffusion_pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
35 |
+
vae/diffusion_pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -17,24 +17,15 @@ waifu-diffusion is a latent text-to-image diffusion model that has been conditio
|
|
17 |
|
18 |
[Original Weights](https://huggingface.co/hakurei/waifu-diffusion-v1-3)
|
19 |
|
20 |
-
# Gradio
|
21 |
|
22 |
-
We also support a [Gradio](https://github.com/gradio-app/gradio)
|
23 |
[![Open In Spaces](https://camo.githubusercontent.com/00380c35e60d6b04be65d3d94a58332be5cc93779f630bcdfc18ab9a3a7d3388/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f25463025394625413425393725323048756767696e67253230466163652d5370616365732d626c7565)](https://huggingface.co/spaces/hakurei/waifu-diffusion-demo)
|
24 |
-
|
25 |
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1_8wPN7dJO746QXsFnB09Uq2VGgSRFuYE#scrollTo=1HaCauSq546O)
|
26 |
|
27 |
## Model Description
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
The current model has been fine-tuned with a learning rate of 5.0e-6 for 4 epochs on 56k text-image pairs obtained through Danbooru which all have an aesthetic rating greater than `6.0`.
|
32 |
-
|
33 |
-
**Note:** This project has **no affiliation with Danbooru.**
|
34 |
-
|
35 |
-
## Training Data & Annotative Prompting
|
36 |
-
|
37 |
-
The data used for fine-tuning has come from a random sample of 56k Danbooru images, which were filtered based on [CLIP Aesthetic Scoring](https://github.com/christophschuhmann/improved-aesthetic-predictor) where only images with an aesthetic score greater than `6.0` were used.
|
38 |
|
39 |
## License
|
40 |
|
@@ -55,31 +46,18 @@ This model can be used for entertainment purposes and as a generative art assist
|
|
55 |
```python
|
56 |
import torch
|
57 |
from torch import autocast
|
58 |
-
from diffusers import StableDiffusionPipeline
|
59 |
-
|
60 |
-
model_id = "hakurei/waifu-diffusion"
|
61 |
-
device = "cuda"
|
62 |
-
|
63 |
|
64 |
pipe = StableDiffusionPipeline.from_pretrained(
|
65 |
-
|
66 |
-
torch_dtype=torch.
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
beta_end=0.012,
|
71 |
-
beta_schedule="scaled_linear",
|
72 |
-
clip_sample=False,
|
73 |
-
set_alpha_to_one=False,
|
74 |
-
),
|
75 |
-
)
|
76 |
-
pipe = pipe.to(device)
|
77 |
-
|
78 |
-
prompt = "touhou hakurei_reimu 1girl solo portrait"
|
79 |
with autocast("cuda"):
|
80 |
-
image = pipe(prompt, guidance_scale=
|
81 |
|
82 |
-
image.save("
|
83 |
```
|
84 |
|
85 |
## Team Members and Acknowledgements
|
|
|
17 |
|
18 |
[Original Weights](https://huggingface.co/hakurei/waifu-diffusion-v1-3)
|
19 |
|
20 |
+
# Gradio & Colab
|
21 |
|
22 |
+
We also support a [Gradio](https://github.com/gradio-app/gradio) Web UI and Colab with Diffusers to run Waifu Diffusion:
|
23 |
[![Open In Spaces](https://camo.githubusercontent.com/00380c35e60d6b04be65d3d94a58332be5cc93779f630bcdfc18ab9a3a7d3388/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f25463025394625413425393725323048756767696e67253230466163652d5370616365732d626c7565)](https://huggingface.co/spaces/hakurei/waifu-diffusion-demo)
|
|
|
24 |
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1_8wPN7dJO746QXsFnB09Uq2VGgSRFuYE#scrollTo=1HaCauSq546O)
|
25 |
|
26 |
## Model Description
|
27 |
|
28 |
+
[See here for a full model overview.](https://gist.github.com/harubaru/f727cedacae336d1f7877c4bbe2196e1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
## License
|
31 |
|
|
|
46 |
```python
|
47 |
import torch
|
48 |
from torch import autocast
|
49 |
+
from diffusers import StableDiffusionPipeline
|
|
|
|
|
|
|
|
|
50 |
|
51 |
pipe = StableDiffusionPipeline.from_pretrained(
|
52 |
+
'waifu-diffusion',
|
53 |
+
torch_dtype=torch.float32
|
54 |
+
).to('cuda')
|
55 |
+
|
56 |
+
prompt = "1girl, aqua eyes, baseball cap, blonde hair, closed mouth, earrings, green background, hat, hoop earrings, jewelry, looking at viewer, shirt, short hair, simple background, solo, upper body, yellow shirt"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
with autocast("cuda"):
|
58 |
+
image = pipe(prompt, guidance_scale=6)["sample"][0]
|
59 |
|
60 |
+
image.save("test.png")
|
61 |
```
|
62 |
|
63 |
## Team Members and Acknowledgements
|
model_index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_class_name": "StableDiffusionPipeline",
|
3 |
-
"_diffusers_version": "0.
|
4 |
"feature_extractor": [
|
5 |
"transformers",
|
6 |
"CLIPFeatureExtractor"
|
@@ -9,14 +9,14 @@
|
|
9 |
"stable_diffusion",
|
10 |
"StableDiffusionSafetyChecker"
|
11 |
],
|
|
|
|
|
|
|
|
|
12 |
"text_encoder": [
|
13 |
"transformers",
|
14 |
"CLIPTextModel"
|
15 |
],
|
16 |
-
"scheduler": [
|
17 |
-
"diffusers",
|
18 |
-
"DDIMScheduler"
|
19 |
-
],
|
20 |
"tokenizer": [
|
21 |
"transformers",
|
22 |
"CLIPTokenizer"
|
|
|
1 |
{
|
2 |
"_class_name": "StableDiffusionPipeline",
|
3 |
+
"_diffusers_version": "0.4.1",
|
4 |
"feature_extractor": [
|
5 |
"transformers",
|
6 |
"CLIPFeatureExtractor"
|
|
|
9 |
"stable_diffusion",
|
10 |
"StableDiffusionSafetyChecker"
|
11 |
],
|
12 |
+
"scheduler": [
|
13 |
+
"diffusers",
|
14 |
+
"LMSDiscreteScheduler"
|
15 |
+
],
|
16 |
"text_encoder": [
|
17 |
"transformers",
|
18 |
"CLIPTextModel"
|
19 |
],
|
|
|
|
|
|
|
|
|
20 |
"tokenizer": [
|
21 |
"transformers",
|
22 |
"CLIPTokenizer"
|
safety_checker/config.json
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
{
|
2 |
-
"
|
|
|
3 |
"architectures": [
|
4 |
"StableDiffusionSafetyChecker"
|
5 |
],
|
@@ -68,6 +69,7 @@
|
|
68 |
"sep_token_id": null,
|
69 |
"task_specific_params": null,
|
70 |
"temperature": 1.0,
|
|
|
71 |
"tie_encoder_decoder": false,
|
72 |
"tie_word_embeddings": true,
|
73 |
"tokenizer_class": null,
|
@@ -75,7 +77,7 @@
|
|
75 |
"top_p": 1.0,
|
76 |
"torch_dtype": null,
|
77 |
"torchscript": false,
|
78 |
-
"transformers_version": "4.
|
79 |
"typical_p": 1.0,
|
80 |
"use_bfloat16": false,
|
81 |
"vocab_size": 49408
|
@@ -133,6 +135,7 @@
|
|
133 |
"num_attention_heads": 16,
|
134 |
"num_beam_groups": 1,
|
135 |
"num_beams": 1,
|
|
|
136 |
"num_hidden_layers": 24,
|
137 |
"num_return_sequences": 1,
|
138 |
"output_attentions": false,
|
@@ -150,6 +153,7 @@
|
|
150 |
"sep_token_id": null,
|
151 |
"task_specific_params": null,
|
152 |
"temperature": 1.0,
|
|
|
153 |
"tie_encoder_decoder": false,
|
154 |
"tie_word_embeddings": true,
|
155 |
"tokenizer_class": null,
|
@@ -157,7 +161,7 @@
|
|
157 |
"top_p": 1.0,
|
158 |
"torch_dtype": null,
|
159 |
"torchscript": false,
|
160 |
-
"transformers_version": "4.
|
161 |
"typical_p": 1.0,
|
162 |
"use_bfloat16": false
|
163 |
},
|
|
|
1 |
{
|
2 |
+
"_commit_hash": null,
|
3 |
+
"_name_or_path": "CompVis/stable-diffusion-safety-checker",
|
4 |
"architectures": [
|
5 |
"StableDiffusionSafetyChecker"
|
6 |
],
|
|
|
69 |
"sep_token_id": null,
|
70 |
"task_specific_params": null,
|
71 |
"temperature": 1.0,
|
72 |
+
"tf_legacy_loss": false,
|
73 |
"tie_encoder_decoder": false,
|
74 |
"tie_word_embeddings": true,
|
75 |
"tokenizer_class": null,
|
|
|
77 |
"top_p": 1.0,
|
78 |
"torch_dtype": null,
|
79 |
"torchscript": false,
|
80 |
+
"transformers_version": "4.22.2",
|
81 |
"typical_p": 1.0,
|
82 |
"use_bfloat16": false,
|
83 |
"vocab_size": 49408
|
|
|
135 |
"num_attention_heads": 16,
|
136 |
"num_beam_groups": 1,
|
137 |
"num_beams": 1,
|
138 |
+
"num_channels": 3,
|
139 |
"num_hidden_layers": 24,
|
140 |
"num_return_sequences": 1,
|
141 |
"output_attentions": false,
|
|
|
153 |
"sep_token_id": null,
|
154 |
"task_specific_params": null,
|
155 |
"temperature": 1.0,
|
156 |
+
"tf_legacy_loss": false,
|
157 |
"tie_encoder_decoder": false,
|
158 |
"tie_word_embeddings": true,
|
159 |
"tokenizer_class": null,
|
|
|
161 |
"top_p": 1.0,
|
162 |
"torch_dtype": null,
|
163 |
"torchscript": false,
|
164 |
+
"transformers_version": "4.22.2",
|
165 |
"typical_p": 1.0,
|
166 |
"use_bfloat16": false
|
167 |
},
|
scheduler/scheduler_config.json
CHANGED
@@ -1,13 +1,9 @@
|
|
1 |
{
|
2 |
-
"_class_name": "
|
3 |
-
"_diffusers_version": "0.
|
4 |
"beta_end": 0.012,
|
5 |
"beta_schedule": "scaled_linear",
|
6 |
"beta_start": 0.00085,
|
7 |
-
"clip_sample": false,
|
8 |
"num_train_timesteps": 1000,
|
9 |
-
"
|
10 |
-
"timestep_values": null,
|
11 |
-
"trained_betas": null,
|
12 |
-
"steps_offset": 1
|
13 |
}
|
|
|
1 |
{
|
2 |
+
"_class_name": "LMSDiscreteScheduler",
|
3 |
+
"_diffusers_version": "0.4.1",
|
4 |
"beta_end": 0.012,
|
5 |
"beta_schedule": "scaled_linear",
|
6 |
"beta_start": 0.00085,
|
|
|
7 |
"num_train_timesteps": 1000,
|
8 |
+
"trained_betas": null
|
|
|
|
|
|
|
9 |
}
|
text_encoder/config.json
CHANGED
@@ -18,7 +18,8 @@
|
|
18 |
"num_attention_heads": 12,
|
19 |
"num_hidden_layers": 12,
|
20 |
"pad_token_id": 1,
|
|
|
21 |
"torch_dtype": "float32",
|
22 |
-
"transformers_version": "4.
|
23 |
"vocab_size": 49408
|
24 |
}
|
|
|
18 |
"num_attention_heads": 12,
|
19 |
"num_hidden_layers": 12,
|
20 |
"pad_token_id": 1,
|
21 |
+
"projection_dim": 768,
|
22 |
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.22.2",
|
24 |
"vocab_size": 49408
|
25 |
}
|
unet/config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_class_name": "UNet2DConditionModel",
|
3 |
-
"_diffusers_version": "0.
|
4 |
"act_fn": "silu",
|
5 |
"attention_head_dim": 8,
|
6 |
"block_out_channels": [
|
|
|
1 |
{
|
2 |
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.4.1",
|
4 |
"act_fn": "silu",
|
5 |
"attention_head_dim": 8,
|
6 |
"block_out_channels": [
|
unet/diffusion_pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3438354725
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f47e5665f0e85155a5f6f58683b04940c6b132023d584396226bf54419a78831
|
3 |
size 3438354725
|
vae/config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_class_name": "AutoencoderKL",
|
3 |
-
"_diffusers_version": "0.
|
4 |
"act_fn": "silu",
|
5 |
"block_out_channels": [
|
6 |
128,
|
@@ -17,8 +17,9 @@
|
|
17 |
"in_channels": 3,
|
18 |
"latent_channels": 4,
|
19 |
"layers_per_block": 2,
|
|
|
20 |
"out_channels": 3,
|
21 |
-
"sample_size":
|
22 |
"up_block_types": [
|
23 |
"UpDecoderBlock2D",
|
24 |
"UpDecoderBlock2D",
|
|
|
1 |
{
|
2 |
"_class_name": "AutoencoderKL",
|
3 |
+
"_diffusers_version": "0.4.1",
|
4 |
"act_fn": "silu",
|
5 |
"block_out_channels": [
|
6 |
128,
|
|
|
17 |
"in_channels": 3,
|
18 |
"latent_channels": 4,
|
19 |
"layers_per_block": 2,
|
20 |
+
"norm_num_groups": 32,
|
21 |
"out_channels": 3,
|
22 |
+
"sample_size": 256,
|
23 |
"up_block_types": [
|
24 |
"UpDecoderBlock2D",
|
25 |
"UpDecoderBlock2D",
|