Update configs
Browse files- diffusion_net/config.json +2 -24
- reference_net/config.json +0 -1
diffusion_net/config.json
CHANGED
|
@@ -2,7 +2,6 @@
|
|
| 2 |
"_center_input_sample": false,
|
| 3 |
"_class_name": "UNet3DConditionModel",
|
| 4 |
"_diffusers_version": "0.31.0",
|
| 5 |
-
"_landmark_net": false,
|
| 6 |
"_out_channels": 4,
|
| 7 |
"act_fn": "silu",
|
| 8 |
"addition_embed_type": null,
|
|
@@ -32,7 +31,6 @@
|
|
| 32 |
"dropout": 0.0,
|
| 33 |
"dual_cross_attention": false,
|
| 34 |
"emo_drop_rate": 0.05,
|
| 35 |
-
"emotion_signal": true,
|
| 36 |
"encoder_hid_dim": null,
|
| 37 |
"encoder_hid_dim_type": null,
|
| 38 |
"flip_sin_to_cos": true,
|
|
@@ -42,7 +40,6 @@
|
|
| 42 |
"mid_block_only_cross_attention": null,
|
| 43 |
"mid_block_scale_factor": 1,
|
| 44 |
"mid_block_type": "UNetMidBlock3DCrossAttn",
|
| 45 |
-
"motion_module_decoder_only": false,
|
| 46 |
"motion_module_kwargs": {
|
| 47 |
"attention_block_types": [
|
| 48 |
"Temporal_Self",
|
|
@@ -52,17 +49,14 @@
|
|
| 52 |
"num_transformer_block": 1,
|
| 53 |
"temporal_attention_dim_div": 1,
|
| 54 |
"temporal_position_encoding": true,
|
| 55 |
-
"temporal_position_encoding_max_len": 32
|
| 56 |
-
"use_linear_attn": true
|
| 57 |
},
|
| 58 |
-
"motion_module_mid_block": true,
|
| 59 |
"motion_module_resolutions": [
|
| 60 |
1,
|
| 61 |
2,
|
| 62 |
4,
|
| 63 |
8
|
| 64 |
],
|
| 65 |
-
"motion_module_type": "MemoryLinearAttn",
|
| 66 |
"norm_eps": 1e-05,
|
| 67 |
"norm_num_groups": 32,
|
| 68 |
"num_attention_heads": null,
|
|
@@ -73,24 +67,12 @@
|
|
| 73 |
"resnet_time_scale_shift": "default",
|
| 74 |
"reverse_transformer_layers_per_block": null,
|
| 75 |
"sample_size": 64,
|
| 76 |
-
"stack_enable_blocks_depth": [
|
| 77 |
-
0,
|
| 78 |
-
1,
|
| 79 |
-
2,
|
| 80 |
-
3
|
| 81 |
-
],
|
| 82 |
-
"stack_enable_blocks_name": [
|
| 83 |
-
"up",
|
| 84 |
-
"down",
|
| 85 |
-
"mid"
|
| 86 |
-
],
|
| 87 |
"time_cond_proj_dim": null,
|
| 88 |
"time_embedding_act_fn": null,
|
| 89 |
"time_embedding_dim": null,
|
| 90 |
"time_embedding_type": "positional",
|
| 91 |
"timestep_post_act": null,
|
| 92 |
"transformer_layers_per_block": 1,
|
| 93 |
-
"two_branches_atten": true,
|
| 94 |
"unet_use_cross_frame_attention": false,
|
| 95 |
"unet_use_temporal_attention": false,
|
| 96 |
"up_block_types": [
|
|
@@ -100,10 +82,6 @@
|
|
| 100 |
"CrossAttnUpBlock3D"
|
| 101 |
],
|
| 102 |
"upcast_attention": false,
|
| 103 |
-
"use_audio_module": true,
|
| 104 |
-
"use_face_masks": false,
|
| 105 |
"use_inflated_groupnorm": true,
|
| 106 |
-
"use_linear_projection": false
|
| 107 |
-
"use_motion_module": true,
|
| 108 |
-
"use_past_frames": true
|
| 109 |
}
|
|
|
|
| 2 |
"_center_input_sample": false,
|
| 3 |
"_class_name": "UNet3DConditionModel",
|
| 4 |
"_diffusers_version": "0.31.0",
|
|
|
|
| 5 |
"_out_channels": 4,
|
| 6 |
"act_fn": "silu",
|
| 7 |
"addition_embed_type": null,
|
|
|
|
| 31 |
"dropout": 0.0,
|
| 32 |
"dual_cross_attention": false,
|
| 33 |
"emo_drop_rate": 0.05,
|
|
|
|
| 34 |
"encoder_hid_dim": null,
|
| 35 |
"encoder_hid_dim_type": null,
|
| 36 |
"flip_sin_to_cos": true,
|
|
|
|
| 40 |
"mid_block_only_cross_attention": null,
|
| 41 |
"mid_block_scale_factor": 1,
|
| 42 |
"mid_block_type": "UNetMidBlock3DCrossAttn",
|
|
|
|
| 43 |
"motion_module_kwargs": {
|
| 44 |
"attention_block_types": [
|
| 45 |
"Temporal_Self",
|
|
|
|
| 49 |
"num_transformer_block": 1,
|
| 50 |
"temporal_attention_dim_div": 1,
|
| 51 |
"temporal_position_encoding": true,
|
| 52 |
+
"temporal_position_encoding_max_len": 32
|
|
|
|
| 53 |
},
|
|
|
|
| 54 |
"motion_module_resolutions": [
|
| 55 |
1,
|
| 56 |
2,
|
| 57 |
4,
|
| 58 |
8
|
| 59 |
],
|
|
|
|
| 60 |
"norm_eps": 1e-05,
|
| 61 |
"norm_num_groups": 32,
|
| 62 |
"num_attention_heads": null,
|
|
|
|
| 67 |
"resnet_time_scale_shift": "default",
|
| 68 |
"reverse_transformer_layers_per_block": null,
|
| 69 |
"sample_size": 64,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
"time_cond_proj_dim": null,
|
| 71 |
"time_embedding_act_fn": null,
|
| 72 |
"time_embedding_dim": null,
|
| 73 |
"time_embedding_type": "positional",
|
| 74 |
"timestep_post_act": null,
|
| 75 |
"transformer_layers_per_block": 1,
|
|
|
|
| 76 |
"unet_use_cross_frame_attention": false,
|
| 77 |
"unet_use_temporal_attention": false,
|
| 78 |
"up_block_types": [
|
|
|
|
| 82 |
"CrossAttnUpBlock3D"
|
| 83 |
],
|
| 84 |
"upcast_attention": false,
|
|
|
|
|
|
|
| 85 |
"use_inflated_groupnorm": true,
|
| 86 |
+
"use_linear_projection": false
|
|
|
|
|
|
|
| 87 |
}
|
reference_net/config.json
CHANGED
|
@@ -2,7 +2,6 @@
|
|
| 2 |
"_center_input_sample": false,
|
| 3 |
"_class_name": "UNet2DConditionModel",
|
| 4 |
"_diffusers_version": "0.31.0",
|
| 5 |
-
"_landmark_net": false,
|
| 6 |
"_out_channels": 4,
|
| 7 |
"act_fn": "silu",
|
| 8 |
"addition_embed_type": null,
|
|
|
|
| 2 |
"_center_input_sample": false,
|
| 3 |
"_class_name": "UNet2DConditionModel",
|
| 4 |
"_diffusers_version": "0.31.0",
|
|
|
|
| 5 |
"_out_channels": 4,
|
| 6 |
"act_fn": "silu",
|
| 7 |
"addition_embed_type": null,
|