AltLuv commited on
Commit
d5742ac
1 Parent(s): 206459a

End of training

Browse files
model_index.json CHANGED
@@ -1,21 +1,13 @@
1
  {
2
- "_class_name": "FlaxStableDiffusionPipeline",
3
  "_diffusers_version": "0.22.0.dev0",
4
- "feature_extractor": [
5
- "transformers",
6
- "CLIPImageProcessor"
7
- ],
8
- "safety_checker": [
9
- "stable_diffusion",
10
- "FlaxStableDiffusionSafetyChecker"
11
- ],
12
  "scheduler": [
13
- "diffusers",
14
- "FlaxPNDMScheduler"
15
  ],
16
  "text_encoder": [
17
  "transformers",
18
- "FlaxCLIPTextModel"
19
  ],
20
  "tokenizer": [
21
  "transformers",
@@ -23,10 +15,6 @@
23
  ],
24
  "unet": [
25
  "diffusers",
26
- "FlaxUNet2DConditionModel"
27
- ],
28
- "vae": [
29
- "diffusers",
30
- "FlaxAutoencoderKL"
31
  ]
32
  }
 
1
  {
2
+ "_class_name": "UTTIPipeline",
3
  "_diffusers_version": "0.22.0.dev0",
 
 
 
 
 
 
 
 
4
  "scheduler": [
5
+ null,
6
+ null
7
  ],
8
  "text_encoder": [
9
  "transformers",
10
+ "CLIPTextModel"
11
  ],
12
  "tokenizer": [
13
  "transformers",
 
15
  ],
16
  "unet": [
17
  "diffusers",
18
+ "UNet2DConditionModel"
 
 
 
 
19
  ]
20
  }
text_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "openai/clip-vit-large-patch14",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
@@ -19,7 +19,7 @@
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
- "torch_dtype": "float32",
23
  "transformers_version": "4.33.1",
24
  "vocab_size": 49408
25
  }
 
1
  {
2
+ "_name_or_path": "CompVis/stable-diffusion-v1-4",
3
  "architectures": [
4
  "CLIPTextModel"
5
  ],
 
19
  "num_hidden_layers": 12,
20
  "pad_token_id": 1,
21
  "projection_dim": 512,
22
+ "torch_dtype": "float16",
23
  "transformers_version": "4.33.1",
24
  "vocab_size": 49408
25
  }
text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660c6f5b1abae9dc498ac2d21e1347d2abdb0cf6c0c0c8576cd796491d9a6cdd
3
+ size 246144152
unet/config.json CHANGED
@@ -1,34 +1,60 @@
1
  {
2
- "_class_name": "FlaxUNet2DConditionModel",
3
  "_diffusers_version": "0.22.0.dev0",
 
4
  "addition_embed_type": null,
5
  "addition_embed_type_num_heads": 64,
6
  "addition_time_embed_dim": null,
7
  "attention_head_dim": 8,
 
8
  "block_out_channels": [
9
  320,
10
  640,
11
  1280,
12
  1280
13
  ],
 
 
 
 
 
14
  "cross_attention_dim": 768,
 
15
  "down_block_types": [
16
  "CrossAttnDownBlock2D",
17
  "CrossAttnDownBlock2D",
18
  "CrossAttnDownBlock2D",
19
  "DownBlock2D"
20
  ],
 
21
  "dropout": 0.0,
 
 
 
22
  "flip_sin_to_cos": true,
23
  "freq_shift": 0,
24
- "in_channels": 4,
25
  "layers_per_block": 2,
 
 
 
 
 
26
  "num_attention_heads": null,
 
27
  "only_cross_attention": false,
28
- "out_channels": 4,
29
  "projection_class_embeddings_input_dim": null,
30
- "sample_size": 64,
31
- "split_head_dim": false,
 
 
 
 
 
 
 
 
32
  "transformer_layers_per_block": 1,
33
  "up_block_types": [
34
  "UpBlock2D",
@@ -36,6 +62,6 @@
36
  "CrossAttnUpBlock2D",
37
  "CrossAttnUpBlock2D"
38
  ],
39
- "use_linear_projection": false,
40
- "use_memory_efficient_attention": false
41
  }
 
1
  {
2
+ "_class_name": "UNet2DConditionModel",
3
  "_diffusers_version": "0.22.0.dev0",
4
+ "act_fn": "silu",
5
  "addition_embed_type": null,
6
  "addition_embed_type_num_heads": 64,
7
  "addition_time_embed_dim": null,
8
  "attention_head_dim": 8,
9
+ "attention_type": "default",
10
  "block_out_channels": [
11
  320,
12
  640,
13
  1280,
14
  1280
15
  ],
16
+ "center_input_sample": false,
17
+ "class_embed_type": null,
18
+ "class_embeddings_concat": false,
19
+ "conv_in_kernel": 3,
20
+ "conv_out_kernel": 3,
21
  "cross_attention_dim": 768,
22
+ "cross_attention_norm": null,
23
  "down_block_types": [
24
  "CrossAttnDownBlock2D",
25
  "CrossAttnDownBlock2D",
26
  "CrossAttnDownBlock2D",
27
  "DownBlock2D"
28
  ],
29
+ "downsample_padding": 1,
30
  "dropout": 0.0,
31
+ "dual_cross_attention": false,
32
+ "encoder_hid_dim": null,
33
+ "encoder_hid_dim_type": null,
34
  "flip_sin_to_cos": true,
35
  "freq_shift": 0,
36
+ "in_channels": 3,
37
  "layers_per_block": 2,
38
+ "mid_block_only_cross_attention": null,
39
+ "mid_block_scale_factor": 1,
40
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
41
+ "norm_eps": 1e-05,
42
+ "norm_num_groups": 32,
43
  "num_attention_heads": null,
44
+ "num_class_embeds": null,
45
  "only_cross_attention": false,
46
+ "out_channels": 3,
47
  "projection_class_embeddings_input_dim": null,
48
+ "resnet_out_scale_factor": 1.0,
49
+ "resnet_skip_time_act": false,
50
+ "resnet_time_scale_shift": "default",
51
+ "reverse_transformer_layers_per_block": null,
52
+ "sample_size": 32,
53
+ "time_cond_proj_dim": null,
54
+ "time_embedding_act_fn": null,
55
+ "time_embedding_dim": null,
56
+ "time_embedding_type": "positional",
57
+ "timestep_post_act": null,
58
  "transformer_layers_per_block": 1,
59
  "up_block_types": [
60
  "UpBlock2D",
 
62
  "CrossAttnUpBlock2D",
63
  "CrossAttnUpBlock2D"
64
  ],
65
+ "upcast_attention": false,
66
+ "use_linear_projection": false
67
  }
unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38bbb9f6dce4605ddffcac9228979fc02d7fdcf8c8d8a40f594ecba1f6b8689b
3
+ size 3438144492