Duplicate from haor/Evt_V4-preview
Browse filesCo-authored-by: haor233 <[email protected]>
- .gitattributes +38 -0
- Evt_V4_e02_ema.safetensors +3 -0
- Evt_V4_e04_ema.safetensors +3 -0
- Evt_V4_e06_ema.safetensors +3 -0
- Evt_V4_e08_ema.safetensors +3 -0
- Evt_V4_e10_ema.ckpt +3 -0
- Evt_V4_e10_ema.safetensors +3 -0
- Evt_V4_e10_noema.ckpt +3 -0
- README.md +119 -0
- feature_extractor/preprocessor_config.json +28 -0
- model_index.json +33 -0
- safety_checker/config.json +181 -0
- safety_checker/pytorch_model.bin +3 -0
- samples/image_2023-01-09_17-05-09.png +3 -0
- samples/image_2023-01-09_17-08-53.png +3 -0
- samples/image_2023-01-09_17-11-36.png +3 -0
- samples/image_2023-01-09_17-15-39.png +3 -0
- scheduler/scheduler_config.json +13 -0
- sd1.4.vae.pt +3 -0
- text_encoder/config.json +25 -0
- text_encoder/pytorch_model.bin +3 -0
- tokenizer/merges.txt +0 -0
- tokenizer/special_tokens_map.json +24 -0
- tokenizer/tokenizer_config.json +34 -0
- tokenizer/vocab.json +0 -0
- unet/config.json +44 -0
- unet/diffusion_pytorch_model.bin +3 -0
- vae/config.json +30 -0
- vae/diffusion_pytorch_model.bin +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
samples/image_2023-01-09_17-05-09.png filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
samples/image_2023-01-09_17-08-53.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
samples/image_2023-01-09_17-11-36.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
samples/image_2023-01-09_17-15-39.png filter=lfs diff=lfs merge=lfs -text
|
Evt_V4_e02_ema.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:086397fe111f29487664b4c96bb777e27febf0f7ab48c96447aaba8b7b260171
|
| 3 |
+
size 4265096997
|
Evt_V4_e04_ema.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90a067aa5e9dc881759ae9e3c88f46ac93f4414f70251dbd0c64b64efa9aa9b7
|
| 3 |
+
size 4236743134
|
Evt_V4_e06_ema.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d31ac032a924d03c11e0f8a0f0955021519030b03b07eb1bf673b3eec492322
|
| 3 |
+
size 4236743134
|
Evt_V4_e08_ema.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b4d103f8178bd4c7e047b374eaf49917596b60082bc911a583193886b9290f0
|
| 3 |
+
size 4236743134
|
Evt_V4_e10_ema.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c71d596fa591c2c515d658bbc3f98de2293706c22f6b1d9a795229c1b40ca0b
|
| 3 |
+
size 4236894509
|
Evt_V4_e10_ema.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df38895c169fb86326ed2bf612c155a947660d6e0e7efe89299dc3486d112cdd
|
| 3 |
+
size 4236743134
|
Evt_V4_e10_noema.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:baeb6cb33c5be7bd602fbbfbdbdc9cefa8523687542742c70c34109e136ab62f
|
| 3 |
+
size 4236978953
|
README.md
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
language:
|
| 3 |
+
- en
|
| 4 |
+
license: creativeml-openrail-m
|
| 5 |
+
tags:
|
| 6 |
+
- stable-diffusion
|
| 7 |
+
- stable-diffusion-diffusers
|
| 8 |
+
- text-to-image
|
| 9 |
+
- diffusers
|
| 10 |
+
inference: true
|
| 11 |
+
duplicated_from: haor/Evt_V4-preview
|
| 12 |
+
---
|
| 13 |
+
# Evt_V4-preview
|
| 14 |
+
EVT series is an experimental project for finetune with large datasets on animation style model.
|
| 15 |
+
Evt_V4 uses a larger dataset than before, and its cosine similarity with ACertainty reaches 85%.
|
| 16 |
+
It may behave differently from other models, hope you enjoy it.
|
| 17 |
+
## 🧨 Diffusers
|
| 18 |
+
|
| 19 |
+
This model can be used just like any other Stable Diffusion model. For more information,
|
| 20 |
+
please have a look at the [Stable Diffusion](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion).
|
| 21 |
+
|
| 22 |
+
You can also export the model to [ONNX](https://huggingface.co/docs/diffusers/optimization/onnx), [MPS](https://huggingface.co/docs/diffusers/optimization/mps) and/or FLAX/JAX.
|
| 23 |
+
|
| 24 |
+
```python
|
| 25 |
+
from diffusers import StableDiffusionPipeline
|
| 26 |
+
import torch
|
| 27 |
+
|
| 28 |
+
model_id = "haor/Evt_V4-preview"
|
| 29 |
+
branch_name= "main"
|
| 30 |
+
|
| 31 |
+
pipe = StableDiffusionPipeline.from_pretrained(model_id, revision=branch_name, torch_dtype=torch.float16)
|
| 32 |
+
pipe = pipe.to("cuda")
|
| 33 |
+
|
| 34 |
+
prompt = "1girl"
|
| 35 |
+
image = pipe(prompt).images[0]
|
| 36 |
+
|
| 37 |
+
image.save("./1girl.png")
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
## Examples
|
| 41 |
+
|
| 42 |
+
**Prompt1:**
|
| 43 |
+

|
| 44 |
+

|
| 45 |
+
```
|
| 46 |
+
1girl in black serafuku standing in a field solo, food, fruit, lemon, bubble, planet, moon, orange \(fruit\), lemon slice, leaf, fish, orange slice, by (tabi:1.25), spot color, looking at viewer, closeup cowboy shot
|
| 47 |
+
Negative prompt: (bad:0.81), (comic:0.81), (cropped:0.81), (error:0.81), (extra:0.81), (low:0.81), (lowres:0.81), (speech:0.81), (worst:0.81), (blush:0.9), 2koma, 3koma, 4koma, collage, lipstick
|
| 48 |
+
Steps: 20, Sampler: DPM++ SDE Karras, CFG scale: 7, Seed: 2285895007, Size: 512x1152, Denoising strength: 0.7, Clip skip: 2
|
| 49 |
+
```
|
| 50 |
+
**Prompt2:**
|
| 51 |
+

|
| 52 |
+

|
| 53 |
+
```
|
| 54 |
+
{Masterpiece, Kaname_Madoka, tall and long double tails, well rooted hair, (pink hair), pink eyes, crossed bangs, ojousama, jk, thigh bandages, wrist cuffs, (pink bow: 1.2)}, plain color, sketch, masterpiece, high detail, masterpiece portrait, best quality, ray tracing, {:<, look at the edge}
|
| 55 |
+
Negative prompt: ((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)),extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((bad proportions))), ((extra limbs)), (((deformed))), (((disfigured))), cloned face, gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), too many fingers, (((long neck))), (((low quality))), normal quality, blurry, bad feet, text font ui, ((((worst quality)))), anatomical nonsense, (((bad shadow))), unnatural body, liquid body, 3D, 3D game, 3D game scene, 3D character, bad hairs, poorly drawn hairs, fused hairs, big muscles, bad face, extra eyes, furry, pony, mosaic, disappearing calf, disappearing legs, extra digit, fewer digit, fused digit, missing digit, fused feet, poorly drawn eyes, big face, long face, bad eyes, thick lips, obesity, strong girl, beard,Excess legs
|
| 56 |
+
Steps: 20, Sampler: DPM++ SDE Karras, CFG scale: 7, Seed: 2468255263, Size: 512x1152, Denoising strength: 0.7, Clip skip: 2
|
| 57 |
+
```
|
| 58 |
+
## Training
|
| 59 |
+
base model:[ACertainty](https://huggingface.co/JosephusCheung/ACertainty)
|
| 60 |
+
Trained for 10 epochs using around 550k anime-style images(pixiv and yandere).
|
| 61 |
+
Resolution: 512
|
| 62 |
+
UCG:0.1
|
| 63 |
+
Use arb:True
|
| 64 |
+
Trainer:[Mikubill/naifu-diffusion](https://github.com/Mikubill/naifu-diffusion)
|
| 65 |
+
```
|
| 66 |
+
arb:
|
| 67 |
+
enabled: true
|
| 68 |
+
debug: false
|
| 69 |
+
base_res: [512, 512]
|
| 70 |
+
max_size: [768, 512]
|
| 71 |
+
divisible: 64
|
| 72 |
+
max_ar_error: 4
|
| 73 |
+
min_dim: 256
|
| 74 |
+
dim_limit: 1024
|
| 75 |
+
```
|
| 76 |
+
```
|
| 77 |
+
scheduler:
|
| 78 |
+
name: diffusers.DDIMScheduler
|
| 79 |
+
params:
|
| 80 |
+
beta_end: 0.012
|
| 81 |
+
beta_schedule: "scaled_linear"
|
| 82 |
+
beta_start: 0.00085
|
| 83 |
+
clip_sample: false
|
| 84 |
+
num_train_timesteps: 1000
|
| 85 |
+
set_alpha_to_one: false
|
| 86 |
+
steps_offset: 1
|
| 87 |
+
trained_betas: null
|
| 88 |
+
|
| 89 |
+
optimizer:
|
| 90 |
+
name: bitsandbytes.optim.AdamW8bit
|
| 91 |
+
params:
|
| 92 |
+
lr: 2e-6
|
| 93 |
+
weight_decay: 5e-2
|
| 94 |
+
eps: 1e-7
|
| 95 |
+
|
| 96 |
+
lr_scheduler:
|
| 97 |
+
name: torch.optim.lr_scheduler.CosineAnnealingWarmRestarts
|
| 98 |
+
warmup:
|
| 99 |
+
enabled: true
|
| 100 |
+
init_lr: 2e-8
|
| 101 |
+
num_warmup: 50
|
| 102 |
+
strategy: "cos"
|
| 103 |
+
params:
|
| 104 |
+
T_0: 5
|
| 105 |
+
T_mult: 1
|
| 106 |
+
eta_min: 6e-7
|
| 107 |
+
last_epoch: -1
|
| 108 |
+
```
|
| 109 |
+
Spent about 300 V100 GPU hours.
|
| 110 |
+
## License
|
| 111 |
+
|
| 112 |
+
This model is open access and available to all, with a CreativeML OpenRAIL-M license further specifying rights and usage.
|
| 113 |
+
The CreativeML OpenRAIL License specifies:
|
| 114 |
+
|
| 115 |
+
1. You can't use the model to deliberately produce nor share illegal or harmful outputs or content
|
| 116 |
+
2. The authors claims no rights on the outputs you generate, you are free to use them and are accountable for their use which must not go against the provisions set in the license
|
| 117 |
+
3. You may re-distribute the weights and use the model commercially and/or as a service. If you do, please be aware you have to include the same use restrictions as the ones in the license and share a copy of the CreativeML OpenRAIL-M to all your users (please read the license entirely and carefully)
|
| 118 |
+
[Please read the full license here](https://huggingface.co/spaces/CompVis/stable-diffusion-license)
|
| 119 |
+
|
feature_extractor/preprocessor_config.json
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"crop_size": {
|
| 3 |
+
"height": 224,
|
| 4 |
+
"width": 224
|
| 5 |
+
},
|
| 6 |
+
"do_center_crop": true,
|
| 7 |
+
"do_convert_rgb": true,
|
| 8 |
+
"do_normalize": true,
|
| 9 |
+
"do_rescale": true,
|
| 10 |
+
"do_resize": true,
|
| 11 |
+
"feature_extractor_type": "CLIPFeatureExtractor",
|
| 12 |
+
"image_mean": [
|
| 13 |
+
0.48145466,
|
| 14 |
+
0.4578275,
|
| 15 |
+
0.40821073
|
| 16 |
+
],
|
| 17 |
+
"image_processor_type": "CLIPImageProcessor",
|
| 18 |
+
"image_std": [
|
| 19 |
+
0.26862954,
|
| 20 |
+
0.26130258,
|
| 21 |
+
0.27577711
|
| 22 |
+
],
|
| 23 |
+
"resample": 3,
|
| 24 |
+
"rescale_factor": 0.00392156862745098,
|
| 25 |
+
"size": {
|
| 26 |
+
"shortest_edge": 224
|
| 27 |
+
}
|
| 28 |
+
}
|
model_index.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "StableDiffusionPipeline",
|
| 3 |
+
"_diffusers_version": "0.11.1",
|
| 4 |
+
"feature_extractor": [
|
| 5 |
+
"transformers",
|
| 6 |
+
"CLIPImageProcessor"
|
| 7 |
+
],
|
| 8 |
+
"requires_safety_checker": true,
|
| 9 |
+
"safety_checker": [
|
| 10 |
+
"stable_diffusion",
|
| 11 |
+
"StableDiffusionSafetyChecker"
|
| 12 |
+
],
|
| 13 |
+
"scheduler": [
|
| 14 |
+
"diffusers",
|
| 15 |
+
"PNDMScheduler"
|
| 16 |
+
],
|
| 17 |
+
"text_encoder": [
|
| 18 |
+
"transformers",
|
| 19 |
+
"CLIPTextModel"
|
| 20 |
+
],
|
| 21 |
+
"tokenizer": [
|
| 22 |
+
"transformers",
|
| 23 |
+
"CLIPTokenizer"
|
| 24 |
+
],
|
| 25 |
+
"unet": [
|
| 26 |
+
"diffusers",
|
| 27 |
+
"UNet2DConditionModel"
|
| 28 |
+
],
|
| 29 |
+
"vae": [
|
| 30 |
+
"diffusers",
|
| 31 |
+
"AutoencoderKL"
|
| 32 |
+
]
|
| 33 |
+
}
|
safety_checker/config.json
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_commit_hash": "cb41f3a270d63d454d385fc2e4f571c487c253c5",
|
| 3 |
+
"_name_or_path": "CompVis/stable-diffusion-safety-checker",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"StableDiffusionSafetyChecker"
|
| 6 |
+
],
|
| 7 |
+
"initializer_factor": 1.0,
|
| 8 |
+
"logit_scale_init_value": 2.6592,
|
| 9 |
+
"model_type": "clip",
|
| 10 |
+
"projection_dim": 768,
|
| 11 |
+
"text_config": {
|
| 12 |
+
"_name_or_path": "",
|
| 13 |
+
"add_cross_attention": false,
|
| 14 |
+
"architectures": null,
|
| 15 |
+
"attention_dropout": 0.0,
|
| 16 |
+
"bad_words_ids": null,
|
| 17 |
+
"begin_suppress_tokens": null,
|
| 18 |
+
"bos_token_id": 0,
|
| 19 |
+
"chunk_size_feed_forward": 0,
|
| 20 |
+
"cross_attention_hidden_size": null,
|
| 21 |
+
"decoder_start_token_id": null,
|
| 22 |
+
"diversity_penalty": 0.0,
|
| 23 |
+
"do_sample": false,
|
| 24 |
+
"dropout": 0.0,
|
| 25 |
+
"early_stopping": false,
|
| 26 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 27 |
+
"eos_token_id": 2,
|
| 28 |
+
"exponential_decay_length_penalty": null,
|
| 29 |
+
"finetuning_task": null,
|
| 30 |
+
"forced_bos_token_id": null,
|
| 31 |
+
"forced_eos_token_id": null,
|
| 32 |
+
"hidden_act": "quick_gelu",
|
| 33 |
+
"hidden_size": 768,
|
| 34 |
+
"id2label": {
|
| 35 |
+
"0": "LABEL_0",
|
| 36 |
+
"1": "LABEL_1"
|
| 37 |
+
},
|
| 38 |
+
"initializer_factor": 1.0,
|
| 39 |
+
"initializer_range": 0.02,
|
| 40 |
+
"intermediate_size": 3072,
|
| 41 |
+
"is_decoder": false,
|
| 42 |
+
"is_encoder_decoder": false,
|
| 43 |
+
"label2id": {
|
| 44 |
+
"LABEL_0": 0,
|
| 45 |
+
"LABEL_1": 1
|
| 46 |
+
},
|
| 47 |
+
"layer_norm_eps": 1e-05,
|
| 48 |
+
"length_penalty": 1.0,
|
| 49 |
+
"max_length": 20,
|
| 50 |
+
"max_position_embeddings": 77,
|
| 51 |
+
"min_length": 0,
|
| 52 |
+
"model_type": "clip_text_model",
|
| 53 |
+
"no_repeat_ngram_size": 0,
|
| 54 |
+
"num_attention_heads": 12,
|
| 55 |
+
"num_beam_groups": 1,
|
| 56 |
+
"num_beams": 1,
|
| 57 |
+
"num_hidden_layers": 12,
|
| 58 |
+
"num_return_sequences": 1,
|
| 59 |
+
"output_attentions": false,
|
| 60 |
+
"output_hidden_states": false,
|
| 61 |
+
"output_scores": false,
|
| 62 |
+
"pad_token_id": 1,
|
| 63 |
+
"prefix": null,
|
| 64 |
+
"problem_type": null,
|
| 65 |
+
"projection_dim": 512,
|
| 66 |
+
"pruned_heads": {},
|
| 67 |
+
"remove_invalid_values": false,
|
| 68 |
+
"repetition_penalty": 1.0,
|
| 69 |
+
"return_dict": true,
|
| 70 |
+
"return_dict_in_generate": false,
|
| 71 |
+
"sep_token_id": null,
|
| 72 |
+
"suppress_tokens": null,
|
| 73 |
+
"task_specific_params": null,
|
| 74 |
+
"temperature": 1.0,
|
| 75 |
+
"tf_legacy_loss": false,
|
| 76 |
+
"tie_encoder_decoder": false,
|
| 77 |
+
"tie_word_embeddings": true,
|
| 78 |
+
"tokenizer_class": null,
|
| 79 |
+
"top_k": 50,
|
| 80 |
+
"top_p": 1.0,
|
| 81 |
+
"torch_dtype": null,
|
| 82 |
+
"torchscript": false,
|
| 83 |
+
"transformers_version": "4.25.1",
|
| 84 |
+
"typical_p": 1.0,
|
| 85 |
+
"use_bfloat16": false,
|
| 86 |
+
"vocab_size": 49408
|
| 87 |
+
},
|
| 88 |
+
"text_config_dict": {
|
| 89 |
+
"hidden_size": 768,
|
| 90 |
+
"intermediate_size": 3072,
|
| 91 |
+
"num_attention_heads": 12,
|
| 92 |
+
"num_hidden_layers": 12
|
| 93 |
+
},
|
| 94 |
+
"torch_dtype": "float32",
|
| 95 |
+
"transformers_version": null,
|
| 96 |
+
"vision_config": {
|
| 97 |
+
"_name_or_path": "",
|
| 98 |
+
"add_cross_attention": false,
|
| 99 |
+
"architectures": null,
|
| 100 |
+
"attention_dropout": 0.0,
|
| 101 |
+
"bad_words_ids": null,
|
| 102 |
+
"begin_suppress_tokens": null,
|
| 103 |
+
"bos_token_id": null,
|
| 104 |
+
"chunk_size_feed_forward": 0,
|
| 105 |
+
"cross_attention_hidden_size": null,
|
| 106 |
+
"decoder_start_token_id": null,
|
| 107 |
+
"diversity_penalty": 0.0,
|
| 108 |
+
"do_sample": false,
|
| 109 |
+
"dropout": 0.0,
|
| 110 |
+
"early_stopping": false,
|
| 111 |
+
"encoder_no_repeat_ngram_size": 0,
|
| 112 |
+
"eos_token_id": null,
|
| 113 |
+
"exponential_decay_length_penalty": null,
|
| 114 |
+
"finetuning_task": null,
|
| 115 |
+
"forced_bos_token_id": null,
|
| 116 |
+
"forced_eos_token_id": null,
|
| 117 |
+
"hidden_act": "quick_gelu",
|
| 118 |
+
"hidden_size": 1024,
|
| 119 |
+
"id2label": {
|
| 120 |
+
"0": "LABEL_0",
|
| 121 |
+
"1": "LABEL_1"
|
| 122 |
+
},
|
| 123 |
+
"image_size": 224,
|
| 124 |
+
"initializer_factor": 1.0,
|
| 125 |
+
"initializer_range": 0.02,
|
| 126 |
+
"intermediate_size": 4096,
|
| 127 |
+
"is_decoder": false,
|
| 128 |
+
"is_encoder_decoder": false,
|
| 129 |
+
"label2id": {
|
| 130 |
+
"LABEL_0": 0,
|
| 131 |
+
"LABEL_1": 1
|
| 132 |
+
},
|
| 133 |
+
"layer_norm_eps": 1e-05,
|
| 134 |
+
"length_penalty": 1.0,
|
| 135 |
+
"max_length": 20,
|
| 136 |
+
"min_length": 0,
|
| 137 |
+
"model_type": "clip_vision_model",
|
| 138 |
+
"no_repeat_ngram_size": 0,
|
| 139 |
+
"num_attention_heads": 16,
|
| 140 |
+
"num_beam_groups": 1,
|
| 141 |
+
"num_beams": 1,
|
| 142 |
+
"num_channels": 3,
|
| 143 |
+
"num_hidden_layers": 24,
|
| 144 |
+
"num_return_sequences": 1,
|
| 145 |
+
"output_attentions": false,
|
| 146 |
+
"output_hidden_states": false,
|
| 147 |
+
"output_scores": false,
|
| 148 |
+
"pad_token_id": null,
|
| 149 |
+
"patch_size": 14,
|
| 150 |
+
"prefix": null,
|
| 151 |
+
"problem_type": null,
|
| 152 |
+
"projection_dim": 512,
|
| 153 |
+
"pruned_heads": {},
|
| 154 |
+
"remove_invalid_values": false,
|
| 155 |
+
"repetition_penalty": 1.0,
|
| 156 |
+
"return_dict": true,
|
| 157 |
+
"return_dict_in_generate": false,
|
| 158 |
+
"sep_token_id": null,
|
| 159 |
+
"suppress_tokens": null,
|
| 160 |
+
"task_specific_params": null,
|
| 161 |
+
"temperature": 1.0,
|
| 162 |
+
"tf_legacy_loss": false,
|
| 163 |
+
"tie_encoder_decoder": false,
|
| 164 |
+
"tie_word_embeddings": true,
|
| 165 |
+
"tokenizer_class": null,
|
| 166 |
+
"top_k": 50,
|
| 167 |
+
"top_p": 1.0,
|
| 168 |
+
"torch_dtype": null,
|
| 169 |
+
"torchscript": false,
|
| 170 |
+
"transformers_version": "4.25.1",
|
| 171 |
+
"typical_p": 1.0,
|
| 172 |
+
"use_bfloat16": false
|
| 173 |
+
},
|
| 174 |
+
"vision_config_dict": {
|
| 175 |
+
"hidden_size": 1024,
|
| 176 |
+
"intermediate_size": 4096,
|
| 177 |
+
"num_attention_heads": 16,
|
| 178 |
+
"num_hidden_layers": 24,
|
| 179 |
+
"patch_size": 14
|
| 180 |
+
}
|
| 181 |
+
}
|
safety_checker/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16d28f2b37109f222cdc33620fdd262102ac32112be0352a7f77e9614b35a394
|
| 3 |
+
size 1216064769
|
samples/image_2023-01-09_17-05-09.png
ADDED
|
Git LFS Details
|
samples/image_2023-01-09_17-08-53.png
ADDED
|
Git LFS Details
|
samples/image_2023-01-09_17-11-36.png
ADDED
|
Git LFS Details
|
samples/image_2023-01-09_17-15-39.png
ADDED
|
Git LFS Details
|
scheduler/scheduler_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "PNDMScheduler",
|
| 3 |
+
"_diffusers_version": "0.11.1",
|
| 4 |
+
"beta_end": 0.012,
|
| 5 |
+
"beta_schedule": "scaled_linear",
|
| 6 |
+
"beta_start": 0.00085,
|
| 7 |
+
"num_train_timesteps": 1000,
|
| 8 |
+
"prediction_type": "epsilon",
|
| 9 |
+
"set_alpha_to_one": false,
|
| 10 |
+
"skip_prk_steps": true,
|
| 11 |
+
"steps_offset": 1,
|
| 12 |
+
"trained_betas": null
|
| 13 |
+
}
|
sd1.4.vae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4b5b9d3a6c21c9b702f99c4dd312b37d5fa4bdfc6483def50477e67c411bb4c
|
| 3 |
+
size 334692421
|
text_encoder/config.json
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "openai/clip-vit-large-patch14",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"CLIPTextModel"
|
| 5 |
+
],
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"dropout": 0.0,
|
| 9 |
+
"eos_token_id": 2,
|
| 10 |
+
"hidden_act": "quick_gelu",
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_factor": 1.0,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 3072,
|
| 15 |
+
"layer_norm_eps": 1e-05,
|
| 16 |
+
"max_position_embeddings": 77,
|
| 17 |
+
"model_type": "clip_text_model",
|
| 18 |
+
"num_attention_heads": 12,
|
| 19 |
+
"num_hidden_layers": 12,
|
| 20 |
+
"pad_token_id": 1,
|
| 21 |
+
"projection_dim": 768,
|
| 22 |
+
"torch_dtype": "float32",
|
| 23 |
+
"transformers_version": "4.25.1",
|
| 24 |
+
"vocab_size": 49408
|
| 25 |
+
}
|
text_encoder/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98124f3d5663b2f14ff08d4c29db93800622b4fcfa3d952bb6f9112f5d6dadd7
|
| 3 |
+
size 492307041
|
tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer/special_tokens_map.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|startoftext|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|endoftext|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": true,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": "<|endoftext|>",
|
| 17 |
+
"unk_token": {
|
| 18 |
+
"content": "<|endoftext|>",
|
| 19 |
+
"lstrip": false,
|
| 20 |
+
"normalized": true,
|
| 21 |
+
"rstrip": false,
|
| 22 |
+
"single_word": false
|
| 23 |
+
}
|
| 24 |
+
}
|
tokenizer/tokenizer_config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"bos_token": {
|
| 4 |
+
"__type": "AddedToken",
|
| 5 |
+
"content": "<|startoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": true,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false
|
| 10 |
+
},
|
| 11 |
+
"do_lower_case": true,
|
| 12 |
+
"eos_token": {
|
| 13 |
+
"__type": "AddedToken",
|
| 14 |
+
"content": "<|endoftext|>",
|
| 15 |
+
"lstrip": false,
|
| 16 |
+
"normalized": true,
|
| 17 |
+
"rstrip": false,
|
| 18 |
+
"single_word": false
|
| 19 |
+
},
|
| 20 |
+
"errors": "replace",
|
| 21 |
+
"model_max_length": 77,
|
| 22 |
+
"name_or_path": "openai/clip-vit-large-patch14",
|
| 23 |
+
"pad_token": "<|endoftext|>",
|
| 24 |
+
"special_tokens_map_file": "./special_tokens_map.json",
|
| 25 |
+
"tokenizer_class": "CLIPTokenizer",
|
| 26 |
+
"unk_token": {
|
| 27 |
+
"__type": "AddedToken",
|
| 28 |
+
"content": "<|endoftext|>",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": true,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false
|
| 33 |
+
}
|
| 34 |
+
}
|
tokenizer/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
unet/config.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "UNet2DConditionModel",
|
| 3 |
+
"_diffusers_version": "0.11.1",
|
| 4 |
+
"act_fn": "silu",
|
| 5 |
+
"attention_head_dim": 8,
|
| 6 |
+
"block_out_channels": [
|
| 7 |
+
320,
|
| 8 |
+
640,
|
| 9 |
+
1280,
|
| 10 |
+
1280
|
| 11 |
+
],
|
| 12 |
+
"center_input_sample": false,
|
| 13 |
+
"class_embed_type": null,
|
| 14 |
+
"cross_attention_dim": 768,
|
| 15 |
+
"down_block_types": [
|
| 16 |
+
"CrossAttnDownBlock2D",
|
| 17 |
+
"CrossAttnDownBlock2D",
|
| 18 |
+
"CrossAttnDownBlock2D",
|
| 19 |
+
"DownBlock2D"
|
| 20 |
+
],
|
| 21 |
+
"downsample_padding": 1,
|
| 22 |
+
"dual_cross_attention": false,
|
| 23 |
+
"flip_sin_to_cos": true,
|
| 24 |
+
"freq_shift": 0,
|
| 25 |
+
"in_channels": 4,
|
| 26 |
+
"layers_per_block": 2,
|
| 27 |
+
"mid_block_scale_factor": 1,
|
| 28 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
| 29 |
+
"norm_eps": 1e-05,
|
| 30 |
+
"norm_num_groups": 32,
|
| 31 |
+
"num_class_embeds": null,
|
| 32 |
+
"only_cross_attention": false,
|
| 33 |
+
"out_channels": 4,
|
| 34 |
+
"resnet_time_scale_shift": "default",
|
| 35 |
+
"sample_size": 64,
|
| 36 |
+
"up_block_types": [
|
| 37 |
+
"UpBlock2D",
|
| 38 |
+
"CrossAttnUpBlock2D",
|
| 39 |
+
"CrossAttnUpBlock2D",
|
| 40 |
+
"CrossAttnUpBlock2D"
|
| 41 |
+
],
|
| 42 |
+
"upcast_attention": false,
|
| 43 |
+
"use_linear_projection": false
|
| 44 |
+
}
|
unet/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e385dc417927a93dc8ecedf90d4c5d2749ea999d46e9c9847f0069fc0369b771
|
| 3 |
+
size 3438366373
|
vae/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AutoencoderKL",
|
| 3 |
+
"_diffusers_version": "0.11.1",
|
| 4 |
+
"act_fn": "silu",
|
| 5 |
+
"block_out_channels": [
|
| 6 |
+
128,
|
| 7 |
+
256,
|
| 8 |
+
512,
|
| 9 |
+
512
|
| 10 |
+
],
|
| 11 |
+
"down_block_types": [
|
| 12 |
+
"DownEncoderBlock2D",
|
| 13 |
+
"DownEncoderBlock2D",
|
| 14 |
+
"DownEncoderBlock2D",
|
| 15 |
+
"DownEncoderBlock2D"
|
| 16 |
+
],
|
| 17 |
+
"in_channels": 3,
|
| 18 |
+
"latent_channels": 4,
|
| 19 |
+
"layers_per_block": 2,
|
| 20 |
+
"norm_num_groups": 32,
|
| 21 |
+
"out_channels": 3,
|
| 22 |
+
"sample_size": 256,
|
| 23 |
+
"scaling_factor": 0.18215,
|
| 24 |
+
"up_block_types": [
|
| 25 |
+
"UpDecoderBlock2D",
|
| 26 |
+
"UpDecoderBlock2D",
|
| 27 |
+
"UpDecoderBlock2D",
|
| 28 |
+
"UpDecoderBlock2D"
|
| 29 |
+
]
|
| 30 |
+
}
|
vae/diffusion_pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:876a906810a8b2470c3092f307742f6ad8b9dbf759fb7c0ff020d0c610c996da
|
| 3 |
+
size 334711857
|