a-r-r-o-w HF staff commited on
Commit
7defb98
1 Parent(s): 43d0668
Files changed (2) hide show
  1. transformer/config.json +1 -13
  2. vae/config.json +1 -23
transformer/config.json CHANGED
@@ -4,29 +4,21 @@
4
  "activation_fn": "gelu-approximate",
5
  "attention_bias": true,
6
  "attention_head_dim": 96,
7
- "ca_attention_mode": "xformers",
8
  "caption_channels": 4096,
9
  "cross_attention_dim": 2304,
10
- "double_self_attention": false,
11
- "downsampler": null,
12
  "dropout": 0.0,
13
  "in_channels": 4,
14
  "interpolation_scale_h": 2.0,
15
  "interpolation_scale_t": 2.2,
16
  "interpolation_scale_w": 2.0,
17
- "model_max_length": 300,
18
  "norm_elementwise_affine": false,
19
  "norm_eps": 1e-06,
20
  "norm_type": "ada_norm_single",
21
  "num_attention_heads": 24,
22
- "num_embeds_ada_norm": 1000,
23
  "num_layers": 32,
24
- "only_cross_attention": false,
25
  "out_channels": 4,
26
  "patch_size": 2,
27
  "patch_size_t": 1,
28
- "patch_size_temporal": 1,
29
- "sa_attention_mode": "flash",
30
  "sample_frames": 22,
31
  "sample_height": 90,
32
  "sample_size": [
@@ -34,9 +26,5 @@
34
  160
35
  ],
36
  "sample_size_t": 22,
37
- "sample_width": 160,
38
- "upcast_attention": false,
39
- "use_additional_conditions": null,
40
- "use_linear_projection": false,
41
- "use_rope": true
42
  }
 
4
  "activation_fn": "gelu-approximate",
5
  "attention_bias": true,
6
  "attention_head_dim": 96,
 
7
  "caption_channels": 4096,
8
  "cross_attention_dim": 2304,
 
 
9
  "dropout": 0.0,
10
  "in_channels": 4,
11
  "interpolation_scale_h": 2.0,
12
  "interpolation_scale_t": 2.2,
13
  "interpolation_scale_w": 2.0,
 
14
  "norm_elementwise_affine": false,
15
  "norm_eps": 1e-06,
16
  "norm_type": "ada_norm_single",
17
  "num_attention_heads": 24,
 
18
  "num_layers": 32,
 
19
  "out_channels": 4,
20
  "patch_size": 2,
21
  "patch_size_t": 1,
 
 
22
  "sample_frames": 22,
23
  "sample_height": 90,
24
  "sample_size": [
 
26
  160
27
  ],
28
  "sample_size_t": 22,
29
+ "sample_width": 160
 
 
 
 
30
  }
vae/config.json CHANGED
@@ -8,20 +8,6 @@
8
  512,
9
  512
10
  ],
11
- "blocks_tempdown_li": [
12
- true,
13
- true,
14
- false,
15
- false
16
- ],
17
- "blocks_tempup_li": [
18
- false,
19
- true,
20
- true,
21
- false
22
- ],
23
- "chunk_len": 24,
24
- "down_block_num": 4,
25
  "down_block_types": [
26
  "AllegroDownBlock3D",
27
  "AllegroDownBlock3D",
@@ -32,13 +18,10 @@
32
  "in_channels": 3,
33
  "latent_channels": 4,
34
  "layers_per_block": 2,
35
- "load_mode": "full",
36
  "norm_num_groups": 32,
37
  "out_channels": 3,
38
  "sample_size": 320,
39
- "scale_factor": 0.13,
40
- "scaling_factor": 0.13235,
41
- "t_over": 8,
42
  "temporal_compression_ratio": 4,
43
  "temporal_downsample_blocks": [
44
  true,
@@ -52,11 +35,6 @@
52
  true,
53
  false
54
  ],
55
- "tile_overlap": [
56
- 120,
57
- 80
58
- ],
59
- "up_block_num": 4,
60
  "up_block_types": [
61
  "AllegroUpBlock3D",
62
  "AllegroUpBlock3D",
 
8
  512,
9
  512
10
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  "down_block_types": [
12
  "AllegroDownBlock3D",
13
  "AllegroDownBlock3D",
 
18
  "in_channels": 3,
19
  "latent_channels": 4,
20
  "layers_per_block": 2,
 
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
23
  "sample_size": 320,
24
+ "scaling_factor": 0.13,
 
 
25
  "temporal_compression_ratio": 4,
26
  "temporal_downsample_blocks": [
27
  true,
 
35
  true,
36
  false
37
  ],
 
 
 
 
 
38
  "up_block_types": [
39
  "AllegroUpBlock3D",
40
  "AllegroUpBlock3D",