MaykaGR commited on
Commit
078a1ac
·
verified ·
1 Parent(s): fb0e426

Upload 21 files

Browse files
GraphView-CUSGEqGS.js ADDED
The diff for this file is too large to render. See raw diff
 
causal_conv3d.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import comfy.ops
6
+ ops = comfy.ops.disable_weight_init
7
+
8
+
9
+ class CausalConv3d(nn.Module):
10
+ def __init__(
11
+ self,
12
+ in_channels,
13
+ out_channels,
14
+ kernel_size: int = 3,
15
+ stride: Union[int, Tuple[int]] = 1,
16
+ dilation: int = 1,
17
+ groups: int = 1,
18
+ **kwargs,
19
+ ):
20
+ super().__init__()
21
+
22
+ self.in_channels = in_channels
23
+ self.out_channels = out_channels
24
+
25
+ kernel_size = (kernel_size, kernel_size, kernel_size)
26
+ self.time_kernel_size = kernel_size[0]
27
+
28
+ dilation = (dilation, 1, 1)
29
+
30
+ height_pad = kernel_size[1] // 2
31
+ width_pad = kernel_size[2] // 2
32
+ padding = (0, height_pad, width_pad)
33
+
34
+ self.conv = ops.Conv3d(
35
+ in_channels,
36
+ out_channels,
37
+ kernel_size,
38
+ stride=stride,
39
+ dilation=dilation,
40
+ padding=padding,
41
+ padding_mode="zeros",
42
+ groups=groups,
43
+ )
44
+
45
+ def forward(self, x, causal: bool = True):
46
+ if causal:
47
+ first_frame_pad = x[:, :, :1, :, :].repeat(
48
+ (1, 1, self.time_kernel_size - 1, 1, 1)
49
+ )
50
+ x = torch.concatenate((first_frame_pad, x), dim=2)
51
+ else:
52
+ first_frame_pad = x[:, :, :1, :, :].repeat(
53
+ (1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
54
+ )
55
+ last_frame_pad = x[:, :, -1:, :, :].repeat(
56
+ (1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
57
+ )
58
+ x = torch.concatenate((first_frame_pad, x, last_frame_pad), dim=2)
59
+ x = self.conv(x)
60
+ return x
61
+
62
+ @property
63
+ def weight(self):
64
+ return self.conv.weight
causal_video_autoencoder.py ADDED
@@ -0,0 +1,907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from functools import partial
4
+ import math
5
+ from einops import rearrange
6
+ from typing import Optional, Tuple, Union
7
+ from .conv_nd_factory import make_conv_nd, make_linear_nd
8
+ from .pixel_norm import PixelNorm
9
+ from ..model import PixArtAlphaCombinedTimestepSizeEmbeddings
10
+ import comfy.ops
11
+ ops = comfy.ops.disable_weight_init
12
+
13
+ class Encoder(nn.Module):
14
+ r"""
15
+ The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation.
16
+
17
+ Args:
18
+ dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3):
19
+ The number of dimensions to use in convolutions.
20
+ in_channels (`int`, *optional*, defaults to 3):
21
+ The number of input channels.
22
+ out_channels (`int`, *optional*, defaults to 3):
23
+ The number of output channels.
24
+ blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`):
25
+ The blocks to use. Each block is a tuple of the block name and the number of layers.
26
+ base_channels (`int`, *optional*, defaults to 128):
27
+ The number of output channels for the first convolutional layer.
28
+ norm_num_groups (`int`, *optional*, defaults to 32):
29
+ The number of groups for normalization.
30
+ patch_size (`int`, *optional*, defaults to 1):
31
+ The patch size to use. Should be a power of 2.
32
+ norm_layer (`str`, *optional*, defaults to `group_norm`):
33
+ The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
34
+ latent_log_var (`str`, *optional*, defaults to `per_channel`):
35
+ The number of channels for the log variance. Can be either `per_channel`, `uniform`, or `none`.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ dims: Union[int, Tuple[int, int]] = 3,
41
+ in_channels: int = 3,
42
+ out_channels: int = 3,
43
+ blocks=[("res_x", 1)],
44
+ base_channels: int = 128,
45
+ norm_num_groups: int = 32,
46
+ patch_size: Union[int, Tuple[int]] = 1,
47
+ norm_layer: str = "group_norm", # group_norm, pixel_norm
48
+ latent_log_var: str = "per_channel",
49
+ ):
50
+ super().__init__()
51
+ self.patch_size = patch_size
52
+ self.norm_layer = norm_layer
53
+ self.latent_channels = out_channels
54
+ self.latent_log_var = latent_log_var
55
+ self.blocks_desc = blocks
56
+
57
+ in_channels = in_channels * patch_size**2
58
+ output_channel = base_channels
59
+
60
+ self.conv_in = make_conv_nd(
61
+ dims=dims,
62
+ in_channels=in_channels,
63
+ out_channels=output_channel,
64
+ kernel_size=3,
65
+ stride=1,
66
+ padding=1,
67
+ causal=True,
68
+ )
69
+
70
+ self.down_blocks = nn.ModuleList([])
71
+
72
+ for block_name, block_params in blocks:
73
+ input_channel = output_channel
74
+ if isinstance(block_params, int):
75
+ block_params = {"num_layers": block_params}
76
+
77
+ if block_name == "res_x":
78
+ block = UNetMidBlock3D(
79
+ dims=dims,
80
+ in_channels=input_channel,
81
+ num_layers=block_params["num_layers"],
82
+ resnet_eps=1e-6,
83
+ resnet_groups=norm_num_groups,
84
+ norm_layer=norm_layer,
85
+ )
86
+ elif block_name == "res_x_y":
87
+ output_channel = block_params.get("multiplier", 2) * output_channel
88
+ block = ResnetBlock3D(
89
+ dims=dims,
90
+ in_channels=input_channel,
91
+ out_channels=output_channel,
92
+ eps=1e-6,
93
+ groups=norm_num_groups,
94
+ norm_layer=norm_layer,
95
+ )
96
+ elif block_name == "compress_time":
97
+ block = make_conv_nd(
98
+ dims=dims,
99
+ in_channels=input_channel,
100
+ out_channels=output_channel,
101
+ kernel_size=3,
102
+ stride=(2, 1, 1),
103
+ causal=True,
104
+ )
105
+ elif block_name == "compress_space":
106
+ block = make_conv_nd(
107
+ dims=dims,
108
+ in_channels=input_channel,
109
+ out_channels=output_channel,
110
+ kernel_size=3,
111
+ stride=(1, 2, 2),
112
+ causal=True,
113
+ )
114
+ elif block_name == "compress_all":
115
+ block = make_conv_nd(
116
+ dims=dims,
117
+ in_channels=input_channel,
118
+ out_channels=output_channel,
119
+ kernel_size=3,
120
+ stride=(2, 2, 2),
121
+ causal=True,
122
+ )
123
+ elif block_name == "compress_all_x_y":
124
+ output_channel = block_params.get("multiplier", 2) * output_channel
125
+ block = make_conv_nd(
126
+ dims=dims,
127
+ in_channels=input_channel,
128
+ out_channels=output_channel,
129
+ kernel_size=3,
130
+ stride=(2, 2, 2),
131
+ causal=True,
132
+ )
133
+ else:
134
+ raise ValueError(f"unknown block: {block_name}")
135
+
136
+ self.down_blocks.append(block)
137
+
138
+ # out
139
+ if norm_layer == "group_norm":
140
+ self.conv_norm_out = nn.GroupNorm(
141
+ num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6
142
+ )
143
+ elif norm_layer == "pixel_norm":
144
+ self.conv_norm_out = PixelNorm()
145
+ elif norm_layer == "layer_norm":
146
+ self.conv_norm_out = LayerNorm(output_channel, eps=1e-6)
147
+
148
+ self.conv_act = nn.SiLU()
149
+
150
+ conv_out_channels = out_channels
151
+ if latent_log_var == "per_channel":
152
+ conv_out_channels *= 2
153
+ elif latent_log_var == "uniform":
154
+ conv_out_channels += 1
155
+ elif latent_log_var != "none":
156
+ raise ValueError(f"Invalid latent_log_var: {latent_log_var}")
157
+ self.conv_out = make_conv_nd(
158
+ dims, output_channel, conv_out_channels, 3, padding=1, causal=True
159
+ )
160
+
161
+ self.gradient_checkpointing = False
162
+
163
+ def forward(self, sample: torch.FloatTensor) -> torch.FloatTensor:
164
+ r"""The forward method of the `Encoder` class."""
165
+
166
+ sample = patchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
167
+ sample = self.conv_in(sample)
168
+
169
+ checkpoint_fn = (
170
+ partial(torch.utils.checkpoint.checkpoint, use_reentrant=False)
171
+ if self.gradient_checkpointing and self.training
172
+ else lambda x: x
173
+ )
174
+
175
+ for down_block in self.down_blocks:
176
+ sample = checkpoint_fn(down_block)(sample)
177
+
178
+ sample = self.conv_norm_out(sample)
179
+ sample = self.conv_act(sample)
180
+ sample = self.conv_out(sample)
181
+
182
+ if self.latent_log_var == "uniform":
183
+ last_channel = sample[:, -1:, ...]
184
+ num_dims = sample.dim()
185
+
186
+ if num_dims == 4:
187
+ # For shape (B, C, H, W)
188
+ repeated_last_channel = last_channel.repeat(
189
+ 1, sample.shape[1] - 2, 1, 1
190
+ )
191
+ sample = torch.cat([sample, repeated_last_channel], dim=1)
192
+ elif num_dims == 5:
193
+ # For shape (B, C, F, H, W)
194
+ repeated_last_channel = last_channel.repeat(
195
+ 1, sample.shape[1] - 2, 1, 1, 1
196
+ )
197
+ sample = torch.cat([sample, repeated_last_channel], dim=1)
198
+ else:
199
+ raise ValueError(f"Invalid input shape: {sample.shape}")
200
+
201
+ return sample
202
+
203
+
204
+ class Decoder(nn.Module):
205
+ r"""
206
+ The `Decoder` layer of a variational autoencoder that decodes its latent representation into an output sample.
207
+
208
+ Args:
209
+ dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3):
210
+ The number of dimensions to use in convolutions.
211
+ in_channels (`int`, *optional*, defaults to 3):
212
+ The number of input channels.
213
+ out_channels (`int`, *optional*, defaults to 3):
214
+ The number of output channels.
215
+ blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`):
216
+ The blocks to use. Each block is a tuple of the block name and the number of layers.
217
+ base_channels (`int`, *optional*, defaults to 128):
218
+ The number of output channels for the first convolutional layer.
219
+ norm_num_groups (`int`, *optional*, defaults to 32):
220
+ The number of groups for normalization.
221
+ patch_size (`int`, *optional*, defaults to 1):
222
+ The patch size to use. Should be a power of 2.
223
+ norm_layer (`str`, *optional*, defaults to `group_norm`):
224
+ The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
225
+ causal (`bool`, *optional*, defaults to `True`):
226
+ Whether to use causal convolutions or not.
227
+ """
228
+
229
+ def __init__(
230
+ self,
231
+ dims,
232
+ in_channels: int = 3,
233
+ out_channels: int = 3,
234
+ blocks=[("res_x", 1)],
235
+ base_channels: int = 128,
236
+ layers_per_block: int = 2,
237
+ norm_num_groups: int = 32,
238
+ patch_size: int = 1,
239
+ norm_layer: str = "group_norm",
240
+ causal: bool = True,
241
+ timestep_conditioning: bool = False,
242
+ ):
243
+ super().__init__()
244
+ self.patch_size = patch_size
245
+ self.layers_per_block = layers_per_block
246
+ out_channels = out_channels * patch_size**2
247
+ self.causal = causal
248
+ self.blocks_desc = blocks
249
+
250
+ # Compute output channel to be product of all channel-multiplier blocks
251
+ output_channel = base_channels
252
+ for block_name, block_params in list(reversed(blocks)):
253
+ block_params = block_params if isinstance(block_params, dict) else {}
254
+ if block_name == "res_x_y":
255
+ output_channel = output_channel * block_params.get("multiplier", 2)
256
+ if block_name == "compress_all":
257
+ output_channel = output_channel * block_params.get("multiplier", 1)
258
+
259
+ self.conv_in = make_conv_nd(
260
+ dims,
261
+ in_channels,
262
+ output_channel,
263
+ kernel_size=3,
264
+ stride=1,
265
+ padding=1,
266
+ causal=True,
267
+ )
268
+
269
+ self.up_blocks = nn.ModuleList([])
270
+
271
+ for block_name, block_params in list(reversed(blocks)):
272
+ input_channel = output_channel
273
+ if isinstance(block_params, int):
274
+ block_params = {"num_layers": block_params}
275
+
276
+ if block_name == "res_x":
277
+ block = UNetMidBlock3D(
278
+ dims=dims,
279
+ in_channels=input_channel,
280
+ num_layers=block_params["num_layers"],
281
+ resnet_eps=1e-6,
282
+ resnet_groups=norm_num_groups,
283
+ norm_layer=norm_layer,
284
+ inject_noise=block_params.get("inject_noise", False),
285
+ timestep_conditioning=timestep_conditioning,
286
+ )
287
+ elif block_name == "attn_res_x":
288
+ block = UNetMidBlock3D(
289
+ dims=dims,
290
+ in_channels=input_channel,
291
+ num_layers=block_params["num_layers"],
292
+ resnet_groups=norm_num_groups,
293
+ norm_layer=norm_layer,
294
+ inject_noise=block_params.get("inject_noise", False),
295
+ timestep_conditioning=timestep_conditioning,
296
+ attention_head_dim=block_params["attention_head_dim"],
297
+ )
298
+ elif block_name == "res_x_y":
299
+ output_channel = output_channel // block_params.get("multiplier", 2)
300
+ block = ResnetBlock3D(
301
+ dims=dims,
302
+ in_channels=input_channel,
303
+ out_channels=output_channel,
304
+ eps=1e-6,
305
+ groups=norm_num_groups,
306
+ norm_layer=norm_layer,
307
+ inject_noise=block_params.get("inject_noise", False),
308
+ timestep_conditioning=False,
309
+ )
310
+ elif block_name == "compress_time":
311
+ block = DepthToSpaceUpsample(
312
+ dims=dims, in_channels=input_channel, stride=(2, 1, 1)
313
+ )
314
+ elif block_name == "compress_space":
315
+ block = DepthToSpaceUpsample(
316
+ dims=dims, in_channels=input_channel, stride=(1, 2, 2)
317
+ )
318
+ elif block_name == "compress_all":
319
+ output_channel = output_channel // block_params.get("multiplier", 1)
320
+ block = DepthToSpaceUpsample(
321
+ dims=dims,
322
+ in_channels=input_channel,
323
+ stride=(2, 2, 2),
324
+ residual=block_params.get("residual", False),
325
+ out_channels_reduction_factor=block_params.get("multiplier", 1),
326
+ )
327
+ else:
328
+ raise ValueError(f"unknown layer: {block_name}")
329
+
330
+ self.up_blocks.append(block)
331
+
332
+ if norm_layer == "group_norm":
333
+ self.conv_norm_out = nn.GroupNorm(
334
+ num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6
335
+ )
336
+ elif norm_layer == "pixel_norm":
337
+ self.conv_norm_out = PixelNorm()
338
+ elif norm_layer == "layer_norm":
339
+ self.conv_norm_out = LayerNorm(output_channel, eps=1e-6)
340
+
341
+ self.conv_act = nn.SiLU()
342
+ self.conv_out = make_conv_nd(
343
+ dims, output_channel, out_channels, 3, padding=1, causal=True
344
+ )
345
+
346
+ self.gradient_checkpointing = False
347
+
348
+ self.timestep_conditioning = timestep_conditioning
349
+
350
+ if timestep_conditioning:
351
+ self.timestep_scale_multiplier = nn.Parameter(
352
+ torch.tensor(1000.0, dtype=torch.float32)
353
+ )
354
+ self.last_time_embedder = PixArtAlphaCombinedTimestepSizeEmbeddings(
355
+ output_channel * 2, 0, operations=ops,
356
+ )
357
+ self.last_scale_shift_table = nn.Parameter(torch.empty(2, output_channel))
358
+
359
+ # def forward(self, sample: torch.FloatTensor, target_shape) -> torch.FloatTensor:
360
+ def forward(
361
+ self,
362
+ sample: torch.FloatTensor,
363
+ timestep: Optional[torch.Tensor] = None,
364
+ ) -> torch.FloatTensor:
365
+ r"""The forward method of the `Decoder` class."""
366
+ batch_size = sample.shape[0]
367
+
368
+ sample = self.conv_in(sample, causal=self.causal)
369
+
370
+ checkpoint_fn = (
371
+ partial(torch.utils.checkpoint.checkpoint, use_reentrant=False)
372
+ if self.gradient_checkpointing and self.training
373
+ else lambda x: x
374
+ )
375
+
376
+ scaled_timestep = None
377
+ if self.timestep_conditioning:
378
+ assert (
379
+ timestep is not None
380
+ ), "should pass timestep with timestep_conditioning=True"
381
+ scaled_timestep = timestep * self.timestep_scale_multiplier.to(dtype=sample.dtype, device=sample.device)
382
+
383
+ for up_block in self.up_blocks:
384
+ if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
385
+ sample = checkpoint_fn(up_block)(
386
+ sample, causal=self.causal, timestep=scaled_timestep
387
+ )
388
+ else:
389
+ sample = checkpoint_fn(up_block)(sample, causal=self.causal)
390
+
391
+ sample = self.conv_norm_out(sample)
392
+
393
+ if self.timestep_conditioning:
394
+ embedded_timestep = self.last_time_embedder(
395
+ timestep=scaled_timestep.flatten(),
396
+ resolution=None,
397
+ aspect_ratio=None,
398
+ batch_size=sample.shape[0],
399
+ hidden_dtype=sample.dtype,
400
+ )
401
+ embedded_timestep = embedded_timestep.view(
402
+ batch_size, embedded_timestep.shape[-1], 1, 1, 1
403
+ )
404
+ ada_values = self.last_scale_shift_table[
405
+ None, ..., None, None, None
406
+ ].to(device=sample.device, dtype=sample.dtype) + embedded_timestep.reshape(
407
+ batch_size,
408
+ 2,
409
+ -1,
410
+ embedded_timestep.shape[-3],
411
+ embedded_timestep.shape[-2],
412
+ embedded_timestep.shape[-1],
413
+ )
414
+ shift, scale = ada_values.unbind(dim=1)
415
+ sample = sample * (1 + scale) + shift
416
+
417
+ sample = self.conv_act(sample)
418
+ sample = self.conv_out(sample, causal=self.causal)
419
+
420
+ sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
421
+
422
+ return sample
423
+
424
+
425
+ class UNetMidBlock3D(nn.Module):
426
+ """
427
+ A 3D UNet mid-block [`UNetMidBlock3D`] with multiple residual blocks.
428
+
429
+ Args:
430
+ in_channels (`int`): The number of input channels.
431
+ dropout (`float`, *optional*, defaults to 0.0): The dropout rate.
432
+ num_layers (`int`, *optional*, defaults to 1): The number of residual blocks.
433
+ resnet_eps (`float`, *optional*, 1e-6 ): The epsilon value for the resnet blocks.
434
+ resnet_groups (`int`, *optional*, defaults to 32):
435
+ The number of groups to use in the group normalization layers of the resnet blocks.
436
+
437
+ Returns:
438
+ `torch.FloatTensor`: The output of the last residual block, which is a tensor of shape `(batch_size,
439
+ in_channels, height, width)`.
440
+
441
+ """
442
+
443
+ def __init__(
444
+ self,
445
+ dims: Union[int, Tuple[int, int]],
446
+ in_channels: int,
447
+ dropout: float = 0.0,
448
+ num_layers: int = 1,
449
+ resnet_eps: float = 1e-6,
450
+ resnet_groups: int = 32,
451
+ norm_layer: str = "group_norm",
452
+ inject_noise: bool = False,
453
+ timestep_conditioning: bool = False,
454
+ ):
455
+ super().__init__()
456
+ resnet_groups = (
457
+ resnet_groups if resnet_groups is not None else min(in_channels // 4, 32)
458
+ )
459
+
460
+ self.timestep_conditioning = timestep_conditioning
461
+
462
+ if timestep_conditioning:
463
+ self.time_embedder = PixArtAlphaCombinedTimestepSizeEmbeddings(
464
+ in_channels * 4, 0, operations=ops,
465
+ )
466
+
467
+ self.res_blocks = nn.ModuleList(
468
+ [
469
+ ResnetBlock3D(
470
+ dims=dims,
471
+ in_channels=in_channels,
472
+ out_channels=in_channels,
473
+ eps=resnet_eps,
474
+ groups=resnet_groups,
475
+ dropout=dropout,
476
+ norm_layer=norm_layer,
477
+ inject_noise=inject_noise,
478
+ timestep_conditioning=timestep_conditioning,
479
+ )
480
+ for _ in range(num_layers)
481
+ ]
482
+ )
483
+
484
+ def forward(
485
+ self, hidden_states: torch.FloatTensor, causal: bool = True, timestep: Optional[torch.Tensor] = None
486
+ ) -> torch.FloatTensor:
487
+ timestep_embed = None
488
+ if self.timestep_conditioning:
489
+ assert (
490
+ timestep is not None
491
+ ), "should pass timestep with timestep_conditioning=True"
492
+ batch_size = hidden_states.shape[0]
493
+ timestep_embed = self.time_embedder(
494
+ timestep=timestep.flatten(),
495
+ resolution=None,
496
+ aspect_ratio=None,
497
+ batch_size=batch_size,
498
+ hidden_dtype=hidden_states.dtype,
499
+ )
500
+ timestep_embed = timestep_embed.view(
501
+ batch_size, timestep_embed.shape[-1], 1, 1, 1
502
+ )
503
+
504
+ for resnet in self.res_blocks:
505
+ hidden_states = resnet(hidden_states, causal=causal, timestep=timestep_embed)
506
+
507
+ return hidden_states
508
+
509
+
510
+ class DepthToSpaceUpsample(nn.Module):
511
+ def __init__(
512
+ self, dims, in_channels, stride, residual=False, out_channels_reduction_factor=1
513
+ ):
514
+ super().__init__()
515
+ self.stride = stride
516
+ self.out_channels = (
517
+ math.prod(stride) * in_channels // out_channels_reduction_factor
518
+ )
519
+ self.conv = make_conv_nd(
520
+ dims=dims,
521
+ in_channels=in_channels,
522
+ out_channels=self.out_channels,
523
+ kernel_size=3,
524
+ stride=1,
525
+ causal=True,
526
+ )
527
+ self.residual = residual
528
+ self.out_channels_reduction_factor = out_channels_reduction_factor
529
+
530
+ def forward(self, x, causal: bool = True, timestep: Optional[torch.Tensor] = None):
531
+ if self.residual:
532
+ # Reshape and duplicate the input to match the output shape
533
+ x_in = rearrange(
534
+ x,
535
+ "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
536
+ p1=self.stride[0],
537
+ p2=self.stride[1],
538
+ p3=self.stride[2],
539
+ )
540
+ num_repeat = math.prod(self.stride) // self.out_channels_reduction_factor
541
+ x_in = x_in.repeat(1, num_repeat, 1, 1, 1)
542
+ if self.stride[0] == 2:
543
+ x_in = x_in[:, :, 1:, :, :]
544
+ x = self.conv(x, causal=causal)
545
+ x = rearrange(
546
+ x,
547
+ "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
548
+ p1=self.stride[0],
549
+ p2=self.stride[1],
550
+ p3=self.stride[2],
551
+ )
552
+ if self.stride[0] == 2:
553
+ x = x[:, :, 1:, :, :]
554
+ if self.residual:
555
+ x = x + x_in
556
+ return x
557
+
558
+ class LayerNorm(nn.Module):
559
+ def __init__(self, dim, eps, elementwise_affine=True) -> None:
560
+ super().__init__()
561
+ self.norm = nn.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine)
562
+
563
+ def forward(self, x):
564
+ x = rearrange(x, "b c d h w -> b d h w c")
565
+ x = self.norm(x)
566
+ x = rearrange(x, "b d h w c -> b c d h w")
567
+ return x
568
+
569
+
570
+ class ResnetBlock3D(nn.Module):
571
+ r"""
572
+ A Resnet block.
573
+
574
+ Parameters:
575
+ in_channels (`int`): The number of channels in the input.
576
+ out_channels (`int`, *optional*, default to be `None`):
577
+ The number of output channels for the first conv layer. If None, same as `in_channels`.
578
+ dropout (`float`, *optional*, defaults to `0.0`): The dropout probability to use.
579
+ groups (`int`, *optional*, default to `32`): The number of groups to use for the first normalization layer.
580
+ eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the normalization.
581
+ """
582
+
583
+ def __init__(
584
+ self,
585
+ dims: Union[int, Tuple[int, int]],
586
+ in_channels: int,
587
+ out_channels: Optional[int] = None,
588
+ dropout: float = 0.0,
589
+ groups: int = 32,
590
+ eps: float = 1e-6,
591
+ norm_layer: str = "group_norm",
592
+ inject_noise: bool = False,
593
+ timestep_conditioning: bool = False,
594
+ ):
595
+ super().__init__()
596
+ self.in_channels = in_channels
597
+ out_channels = in_channels if out_channels is None else out_channels
598
+ self.out_channels = out_channels
599
+ self.inject_noise = inject_noise
600
+
601
+ if norm_layer == "group_norm":
602
+ self.norm1 = nn.GroupNorm(
603
+ num_groups=groups, num_channels=in_channels, eps=eps, affine=True
604
+ )
605
+ elif norm_layer == "pixel_norm":
606
+ self.norm1 = PixelNorm()
607
+ elif norm_layer == "layer_norm":
608
+ self.norm1 = LayerNorm(in_channels, eps=eps, elementwise_affine=True)
609
+
610
+ self.non_linearity = nn.SiLU()
611
+
612
+ self.conv1 = make_conv_nd(
613
+ dims,
614
+ in_channels,
615
+ out_channels,
616
+ kernel_size=3,
617
+ stride=1,
618
+ padding=1,
619
+ causal=True,
620
+ )
621
+
622
+ if inject_noise:
623
+ self.per_channel_scale1 = nn.Parameter(torch.zeros((in_channels, 1, 1)))
624
+
625
+ if norm_layer == "group_norm":
626
+ self.norm2 = nn.GroupNorm(
627
+ num_groups=groups, num_channels=out_channels, eps=eps, affine=True
628
+ )
629
+ elif norm_layer == "pixel_norm":
630
+ self.norm2 = PixelNorm()
631
+ elif norm_layer == "layer_norm":
632
+ self.norm2 = LayerNorm(out_channels, eps=eps, elementwise_affine=True)
633
+
634
+ self.dropout = torch.nn.Dropout(dropout)
635
+
636
+ self.conv2 = make_conv_nd(
637
+ dims,
638
+ out_channels,
639
+ out_channels,
640
+ kernel_size=3,
641
+ stride=1,
642
+ padding=1,
643
+ causal=True,
644
+ )
645
+
646
+ if inject_noise:
647
+ self.per_channel_scale2 = nn.Parameter(torch.zeros((in_channels, 1, 1)))
648
+
649
+ self.conv_shortcut = (
650
+ make_linear_nd(
651
+ dims=dims, in_channels=in_channels, out_channels=out_channels
652
+ )
653
+ if in_channels != out_channels
654
+ else nn.Identity()
655
+ )
656
+
657
+ self.norm3 = (
658
+ LayerNorm(in_channels, eps=eps, elementwise_affine=True)
659
+ if in_channels != out_channels
660
+ else nn.Identity()
661
+ )
662
+
663
+ self.timestep_conditioning = timestep_conditioning
664
+
665
+ if timestep_conditioning:
666
+ self.scale_shift_table = nn.Parameter(
667
+ torch.randn(4, in_channels) / in_channels**0.5
668
+ )
669
+
670
+ def _feed_spatial_noise(
671
+ self, hidden_states: torch.FloatTensor, per_channel_scale: torch.FloatTensor
672
+ ) -> torch.FloatTensor:
673
+ spatial_shape = hidden_states.shape[-2:]
674
+ device = hidden_states.device
675
+ dtype = hidden_states.dtype
676
+
677
+ # similar to the "explicit noise inputs" method in style-gan
678
+ spatial_noise = torch.randn(spatial_shape, device=device, dtype=dtype)[None]
679
+ scaled_noise = (spatial_noise * per_channel_scale)[None, :, None, ...]
680
+ hidden_states = hidden_states + scaled_noise
681
+
682
+ return hidden_states
683
+
684
+ def forward(
685
+ self,
686
+ input_tensor: torch.FloatTensor,
687
+ causal: bool = True,
688
+ timestep: Optional[torch.Tensor] = None,
689
+ ) -> torch.FloatTensor:
690
+ hidden_states = input_tensor
691
+ batch_size = hidden_states.shape[0]
692
+
693
+ hidden_states = self.norm1(hidden_states)
694
+ if self.timestep_conditioning:
695
+ assert (
696
+ timestep is not None
697
+ ), "should pass timestep with timestep_conditioning=True"
698
+ ada_values = self.scale_shift_table[
699
+ None, ..., None, None, None
700
+ ].to(device=hidden_states.device, dtype=hidden_states.dtype) + timestep.reshape(
701
+ batch_size,
702
+ 4,
703
+ -1,
704
+ timestep.shape[-3],
705
+ timestep.shape[-2],
706
+ timestep.shape[-1],
707
+ )
708
+ shift1, scale1, shift2, scale2 = ada_values.unbind(dim=1)
709
+
710
+ hidden_states = hidden_states * (1 + scale1) + shift1
711
+
712
+ hidden_states = self.non_linearity(hidden_states)
713
+
714
+ hidden_states = self.conv1(hidden_states, causal=causal)
715
+
716
+ if self.inject_noise:
717
+ hidden_states = self._feed_spatial_noise(
718
+ hidden_states, self.per_channel_scale1.to(device=hidden_states.device, dtype=hidden_states.dtype)
719
+ )
720
+
721
+ hidden_states = self.norm2(hidden_states)
722
+
723
+ if self.timestep_conditioning:
724
+ hidden_states = hidden_states * (1 + scale2) + shift2
725
+
726
+ hidden_states = self.non_linearity(hidden_states)
727
+
728
+ hidden_states = self.dropout(hidden_states)
729
+
730
+ hidden_states = self.conv2(hidden_states, causal=causal)
731
+
732
+ if self.inject_noise:
733
+ hidden_states = self._feed_spatial_noise(
734
+ hidden_states, self.per_channel_scale2.to(device=hidden_states.device, dtype=hidden_states.dtype)
735
+ )
736
+
737
+ input_tensor = self.norm3(input_tensor)
738
+
739
+ batch_size = input_tensor.shape[0]
740
+
741
+ input_tensor = self.conv_shortcut(input_tensor)
742
+
743
+ output_tensor = input_tensor + hidden_states
744
+
745
+ return output_tensor
746
+
747
+
748
+ def patchify(x, patch_size_hw, patch_size_t=1):
749
+ if patch_size_hw == 1 and patch_size_t == 1:
750
+ return x
751
+ if x.dim() == 4:
752
+ x = rearrange(
753
+ x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size_hw, r=patch_size_hw
754
+ )
755
+ elif x.dim() == 5:
756
+ x = rearrange(
757
+ x,
758
+ "b c (f p) (h q) (w r) -> b (c p r q) f h w",
759
+ p=patch_size_t,
760
+ q=patch_size_hw,
761
+ r=patch_size_hw,
762
+ )
763
+ else:
764
+ raise ValueError(f"Invalid input shape: {x.shape}")
765
+
766
+ return x
767
+
768
+
769
+ def unpatchify(x, patch_size_hw, patch_size_t=1):
770
+ if patch_size_hw == 1 and patch_size_t == 1:
771
+ return x
772
+
773
+ if x.dim() == 4:
774
+ x = rearrange(
775
+ x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size_hw, r=patch_size_hw
776
+ )
777
+ elif x.dim() == 5:
778
+ x = rearrange(
779
+ x,
780
+ "b (c p r q) f h w -> b c (f p) (h q) (w r)",
781
+ p=patch_size_t,
782
+ q=patch_size_hw,
783
+ r=patch_size_hw,
784
+ )
785
+
786
+ return x
787
+
788
+ class processor(nn.Module):
789
+ def __init__(self):
790
+ super().__init__()
791
+ self.register_buffer("std-of-means", torch.empty(128))
792
+ self.register_buffer("mean-of-means", torch.empty(128))
793
+ self.register_buffer("mean-of-stds", torch.empty(128))
794
+ self.register_buffer("mean-of-stds_over_std-of-means", torch.empty(128))
795
+ self.register_buffer("channel", torch.empty(128))
796
+
797
+ def un_normalize(self, x):
798
+ return (x * self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)) + self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)
799
+
800
+ def normalize(self, x):
801
+ return (x - self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)) / self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)
802
+
803
+ class VideoVAE(nn.Module):
804
+ def __init__(self, version=0):
805
+ super().__init__()
806
+
807
+ if version == 0:
808
+ config = {
809
+ "_class_name": "CausalVideoAutoencoder",
810
+ "dims": 3,
811
+ "in_channels": 3,
812
+ "out_channels": 3,
813
+ "latent_channels": 128,
814
+ "blocks": [
815
+ ["res_x", 4],
816
+ ["compress_all", 1],
817
+ ["res_x_y", 1],
818
+ ["res_x", 3],
819
+ ["compress_all", 1],
820
+ ["res_x_y", 1],
821
+ ["res_x", 3],
822
+ ["compress_all", 1],
823
+ ["res_x", 3],
824
+ ["res_x", 4],
825
+ ],
826
+ "scaling_factor": 1.0,
827
+ "norm_layer": "pixel_norm",
828
+ "patch_size": 4,
829
+ "latent_log_var": "uniform",
830
+ "use_quant_conv": False,
831
+ "causal_decoder": False,
832
+ }
833
+ else:
834
+ config = {
835
+ "_class_name": "CausalVideoAutoencoder",
836
+ "dims": 3,
837
+ "in_channels": 3,
838
+ "out_channels": 3,
839
+ "latent_channels": 128,
840
+ "decoder_blocks": [
841
+ ["res_x", {"num_layers": 5, "inject_noise": True}],
842
+ ["compress_all", {"residual": True, "multiplier": 2}],
843
+ ["res_x", {"num_layers": 6, "inject_noise": True}],
844
+ ["compress_all", {"residual": True, "multiplier": 2}],
845
+ ["res_x", {"num_layers": 7, "inject_noise": True}],
846
+ ["compress_all", {"residual": True, "multiplier": 2}],
847
+ ["res_x", {"num_layers": 8, "inject_noise": False}]
848
+ ],
849
+ "encoder_blocks": [
850
+ ["res_x", {"num_layers": 4}],
851
+ ["compress_all", {}],
852
+ ["res_x_y", 1],
853
+ ["res_x", {"num_layers": 3}],
854
+ ["compress_all", {}],
855
+ ["res_x_y", 1],
856
+ ["res_x", {"num_layers": 3}],
857
+ ["compress_all", {}],
858
+ ["res_x", {"num_layers": 3}],
859
+ ["res_x", {"num_layers": 4}]
860
+ ],
861
+ "scaling_factor": 1.0,
862
+ "norm_layer": "pixel_norm",
863
+ "patch_size": 4,
864
+ "latent_log_var": "uniform",
865
+ "use_quant_conv": False,
866
+ "causal_decoder": False,
867
+ "timestep_conditioning": True,
868
+ }
869
+
870
+ double_z = config.get("double_z", True)
871
+ latent_log_var = config.get(
872
+ "latent_log_var", "per_channel" if double_z else "none"
873
+ )
874
+
875
+ self.encoder = Encoder(
876
+ dims=config["dims"],
877
+ in_channels=config.get("in_channels", 3),
878
+ out_channels=config["latent_channels"],
879
+ blocks=config.get("encoder_blocks", config.get("encoder_blocks", config.get("blocks"))),
880
+ patch_size=config.get("patch_size", 1),
881
+ latent_log_var=latent_log_var,
882
+ norm_layer=config.get("norm_layer", "group_norm"),
883
+ )
884
+
885
+ self.decoder = Decoder(
886
+ dims=config["dims"],
887
+ in_channels=config["latent_channels"],
888
+ out_channels=config.get("out_channels", 3),
889
+ blocks=config.get("decoder_blocks", config.get("decoder_blocks", config.get("blocks"))),
890
+ patch_size=config.get("patch_size", 1),
891
+ norm_layer=config.get("norm_layer", "group_norm"),
892
+ causal=config.get("causal_decoder", False),
893
+ timestep_conditioning=config.get("timestep_conditioning", False),
894
+ )
895
+
896
+ self.timestep_conditioning = config.get("timestep_conditioning", False)
897
+ self.per_channel_statistics = processor()
898
+
899
+ def encode(self, x):
900
+ means, logvar = torch.chunk(self.encoder(x), 2, dim=1)
901
+ return self.per_channel_statistics.normalize(means)
902
+
903
+ def decode(self, x, timestep=0.05, noise_scale=0.025):
904
+ if self.timestep_conditioning: #TODO: seed
905
+ x = torch.randn_like(x) * noise_scale + (1.0 - noise_scale) * x
906
+ return self.decoder(self.per_channel_statistics.un_normalize(x), timestep=timestep)
907
+
conv_nd_factory.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+
4
+ from .dual_conv3d import DualConv3d
5
+ from .causal_conv3d import CausalConv3d
6
+ import comfy.ops
7
+ ops = comfy.ops.disable_weight_init
8
+
9
+ def make_conv_nd(
10
+ dims: Union[int, Tuple[int, int]],
11
+ in_channels: int,
12
+ out_channels: int,
13
+ kernel_size: int,
14
+ stride=1,
15
+ padding=0,
16
+ dilation=1,
17
+ groups=1,
18
+ bias=True,
19
+ causal=False,
20
+ ):
21
+ if dims == 2:
22
+ return ops.Conv2d(
23
+ in_channels=in_channels,
24
+ out_channels=out_channels,
25
+ kernel_size=kernel_size,
26
+ stride=stride,
27
+ padding=padding,
28
+ dilation=dilation,
29
+ groups=groups,
30
+ bias=bias,
31
+ )
32
+ elif dims == 3:
33
+ if causal:
34
+ return CausalConv3d(
35
+ in_channels=in_channels,
36
+ out_channels=out_channels,
37
+ kernel_size=kernel_size,
38
+ stride=stride,
39
+ padding=padding,
40
+ dilation=dilation,
41
+ groups=groups,
42
+ bias=bias,
43
+ )
44
+ return ops.Conv3d(
45
+ in_channels=in_channels,
46
+ out_channels=out_channels,
47
+ kernel_size=kernel_size,
48
+ stride=stride,
49
+ padding=padding,
50
+ dilation=dilation,
51
+ groups=groups,
52
+ bias=bias,
53
+ )
54
+ elif dims == (2, 1):
55
+ return DualConv3d(
56
+ in_channels=in_channels,
57
+ out_channels=out_channels,
58
+ kernel_size=kernel_size,
59
+ stride=stride,
60
+ padding=padding,
61
+ bias=bias,
62
+ )
63
+ else:
64
+ raise ValueError(f"unsupported dimensions: {dims}")
65
+
66
+
67
+ def make_linear_nd(
68
+ dims: int,
69
+ in_channels: int,
70
+ out_channels: int,
71
+ bias=True,
72
+ ):
73
+ if dims == 2:
74
+ return ops.Conv2d(
75
+ in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias
76
+ )
77
+ elif dims == 3 or dims == (2, 1):
78
+ return ops.Conv3d(
79
+ in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias
80
+ )
81
+ else:
82
+ raise ValueError(f"unsupported dimensions: {dims}")
dual_conv3d.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import Tuple, Union
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ from einops import rearrange
8
+
9
+
10
+ class DualConv3d(nn.Module):
11
+ def __init__(
12
+ self,
13
+ in_channels,
14
+ out_channels,
15
+ kernel_size,
16
+ stride: Union[int, Tuple[int, int, int]] = 1,
17
+ padding: Union[int, Tuple[int, int, int]] = 0,
18
+ dilation: Union[int, Tuple[int, int, int]] = 1,
19
+ groups=1,
20
+ bias=True,
21
+ ):
22
+ super(DualConv3d, self).__init__()
23
+
24
+ self.in_channels = in_channels
25
+ self.out_channels = out_channels
26
+ # Ensure kernel_size, stride, padding, and dilation are tuples of length 3
27
+ if isinstance(kernel_size, int):
28
+ kernel_size = (kernel_size, kernel_size, kernel_size)
29
+ if kernel_size == (1, 1, 1):
30
+ raise ValueError(
31
+ "kernel_size must be greater than 1. Use make_linear_nd instead."
32
+ )
33
+ if isinstance(stride, int):
34
+ stride = (stride, stride, stride)
35
+ if isinstance(padding, int):
36
+ padding = (padding, padding, padding)
37
+ if isinstance(dilation, int):
38
+ dilation = (dilation, dilation, dilation)
39
+
40
+ # Set parameters for convolutions
41
+ self.groups = groups
42
+ self.bias = bias
43
+
44
+ # Define the size of the channels after the first convolution
45
+ intermediate_channels = (
46
+ out_channels if in_channels < out_channels else in_channels
47
+ )
48
+
49
+ # Define parameters for the first convolution
50
+ self.weight1 = nn.Parameter(
51
+ torch.Tensor(
52
+ intermediate_channels,
53
+ in_channels // groups,
54
+ 1,
55
+ kernel_size[1],
56
+ kernel_size[2],
57
+ )
58
+ )
59
+ self.stride1 = (1, stride[1], stride[2])
60
+ self.padding1 = (0, padding[1], padding[2])
61
+ self.dilation1 = (1, dilation[1], dilation[2])
62
+ if bias:
63
+ self.bias1 = nn.Parameter(torch.Tensor(intermediate_channels))
64
+ else:
65
+ self.register_parameter("bias1", None)
66
+
67
+ # Define parameters for the second convolution
68
+ self.weight2 = nn.Parameter(
69
+ torch.Tensor(
70
+ out_channels, intermediate_channels // groups, kernel_size[0], 1, 1
71
+ )
72
+ )
73
+ self.stride2 = (stride[0], 1, 1)
74
+ self.padding2 = (padding[0], 0, 0)
75
+ self.dilation2 = (dilation[0], 1, 1)
76
+ if bias:
77
+ self.bias2 = nn.Parameter(torch.Tensor(out_channels))
78
+ else:
79
+ self.register_parameter("bias2", None)
80
+
81
+ # Initialize weights and biases
82
+ self.reset_parameters()
83
+
84
+ def reset_parameters(self):
85
+ nn.init.kaiming_uniform_(self.weight1, a=math.sqrt(5))
86
+ nn.init.kaiming_uniform_(self.weight2, a=math.sqrt(5))
87
+ if self.bias:
88
+ fan_in1, _ = nn.init._calculate_fan_in_and_fan_out(self.weight1)
89
+ bound1 = 1 / math.sqrt(fan_in1)
90
+ nn.init.uniform_(self.bias1, -bound1, bound1)
91
+ fan_in2, _ = nn.init._calculate_fan_in_and_fan_out(self.weight2)
92
+ bound2 = 1 / math.sqrt(fan_in2)
93
+ nn.init.uniform_(self.bias2, -bound2, bound2)
94
+
95
+ def forward(self, x, use_conv3d=False, skip_time_conv=False):
96
+ if use_conv3d:
97
+ return self.forward_with_3d(x=x, skip_time_conv=skip_time_conv)
98
+ else:
99
+ return self.forward_with_2d(x=x, skip_time_conv=skip_time_conv)
100
+
101
+ def forward_with_3d(self, x, skip_time_conv):
102
+ # First convolution
103
+ x = F.conv3d(
104
+ x,
105
+ self.weight1,
106
+ self.bias1,
107
+ self.stride1,
108
+ self.padding1,
109
+ self.dilation1,
110
+ self.groups,
111
+ )
112
+
113
+ if skip_time_conv:
114
+ return x
115
+
116
+ # Second convolution
117
+ x = F.conv3d(
118
+ x,
119
+ self.weight2,
120
+ self.bias2,
121
+ self.stride2,
122
+ self.padding2,
123
+ self.dilation2,
124
+ self.groups,
125
+ )
126
+
127
+ return x
128
+
129
+ def forward_with_2d(self, x, skip_time_conv):
130
+ b, c, d, h, w = x.shape
131
+
132
+ # First 2D convolution
133
+ x = rearrange(x, "b c d h w -> (b d) c h w")
134
+ # Squeeze the depth dimension out of weight1 since it's 1
135
+ weight1 = self.weight1.squeeze(2)
136
+ # Select stride, padding, and dilation for the 2D convolution
137
+ stride1 = (self.stride1[1], self.stride1[2])
138
+ padding1 = (self.padding1[1], self.padding1[2])
139
+ dilation1 = (self.dilation1[1], self.dilation1[2])
140
+ x = F.conv2d(x, weight1, self.bias1, stride1, padding1, dilation1, self.groups)
141
+
142
+ _, _, h, w = x.shape
143
+
144
+ if skip_time_conv:
145
+ x = rearrange(x, "(b d) c h w -> b c d h w", b=b)
146
+ return x
147
+
148
+ # Second convolution which is essentially treated as a 1D convolution across the 'd' dimension
149
+ x = rearrange(x, "(b d) c h w -> (b h w) c d", b=b)
150
+
151
+ # Reshape weight2 to match the expected dimensions for conv1d
152
+ weight2 = self.weight2.squeeze(-1).squeeze(-1)
153
+ # Use only the relevant dimension for stride, padding, and dilation for the 1D convolution
154
+ stride2 = self.stride2[0]
155
+ padding2 = self.padding2[0]
156
+ dilation2 = self.dilation2[0]
157
+ x = F.conv1d(x, weight2, self.bias2, stride2, padding2, dilation2, self.groups)
158
+ x = rearrange(x, "(b h w) c d -> b c d h w", b=b, h=h, w=w)
159
+
160
+ return x
161
+
162
+ @property
163
+ def weight(self):
164
+ return self.weight2
165
+
166
+
167
+ def test_dual_conv3d_consistency():
168
+ # Initialize parameters
169
+ in_channels = 3
170
+ out_channels = 5
171
+ kernel_size = (3, 3, 3)
172
+ stride = (2, 2, 2)
173
+ padding = (1, 1, 1)
174
+
175
+ # Create an instance of the DualConv3d class
176
+ dual_conv3d = DualConv3d(
177
+ in_channels=in_channels,
178
+ out_channels=out_channels,
179
+ kernel_size=kernel_size,
180
+ stride=stride,
181
+ padding=padding,
182
+ bias=True,
183
+ )
184
+
185
+ # Example input tensor
186
+ test_input = torch.randn(1, 3, 10, 10, 10)
187
+
188
+ # Perform forward passes with both 3D and 2D settings
189
+ output_conv3d = dual_conv3d(test_input, use_conv3d=True)
190
+ output_2d = dual_conv3d(test_input, use_conv3d=False)
191
+
192
+ # Assert that the outputs from both methods are sufficiently close
193
+ assert torch.allclose(
194
+ output_conv3d, output_2d, atol=1e-6
195
+ ), "Outputs are not consistent between 3D and 2D convolutions."
index-4Hb32CNk.js ADDED
The diff for this file is too large to render. See raw diff
 
index-C1Hb_Yo9.css ADDED
@@ -0,0 +1,5129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* this CSS contains only the basic CSS needed to run the app and use it */
2
+
3
+ .lgraphcanvas {
4
+ /*cursor: crosshair;*/
5
+ user-select: none;
6
+ -moz-user-select: none;
7
+ -webkit-user-select: none;
8
+ outline: none;
9
+ font-family: Tahoma, sans-serif;
10
+ }
11
+
12
+ .lgraphcanvas * {
13
+ box-sizing: border-box;
14
+ }
15
+
16
+ .litegraph.litecontextmenu {
17
+ font-family: Tahoma, sans-serif;
18
+ position: fixed;
19
+ top: 100px;
20
+ left: 100px;
21
+ min-width: 100px;
22
+ color: #aaf;
23
+ padding: 0;
24
+ box-shadow: 0 0 10px black !important;
25
+ background-color: #2e2e2e !important;
26
+ z-index: 10;
27
+ max-height: -webkit-fill-available;
28
+ overflow-y: auto;
29
+ }
30
+
31
+ /* Enable scrolling overflow in Firefox */
32
+ @supports not (max-height: -webkit-fill-available) {
33
+ .litegraph.litecontextmenu {
34
+ max-height: 80vh;
35
+ overflow-y: scroll;
36
+ }
37
+ }
38
+
39
+ .litegraph.litecontextmenu.dark {
40
+ background-color: #000 !important;
41
+ }
42
+
43
+ .litegraph.litecontextmenu .litemenu-title img {
44
+ margin-top: 2px;
45
+ margin-left: 2px;
46
+ margin-right: 4px;
47
+ }
48
+
49
+ .litegraph.litecontextmenu .litemenu-entry {
50
+ margin: 2px;
51
+ padding: 2px;
52
+ }
53
+
54
+ .litegraph.litecontextmenu .litemenu-entry.submenu {
55
+ background-color: #2e2e2e !important;
56
+ }
57
+
58
+ .litegraph.litecontextmenu.dark .litemenu-entry.submenu {
59
+ background-color: #000 !important;
60
+ }
61
+
62
+ .litegraph .litemenubar ul {
63
+ font-family: Tahoma, sans-serif;
64
+ margin: 0;
65
+ padding: 0;
66
+ }
67
+
68
+ .litegraph .litemenubar li {
69
+ font-size: 14px;
70
+ color: #999;
71
+ display: inline-block;
72
+ min-width: 50px;
73
+ padding-left: 10px;
74
+ padding-right: 10px;
75
+ user-select: none;
76
+ -moz-user-select: none;
77
+ -webkit-user-select: none;
78
+ cursor: pointer;
79
+ }
80
+
81
+ .litegraph .litemenubar li:hover {
82
+ background-color: #777;
83
+ color: #eee;
84
+ }
85
+
86
+ .litegraph .litegraph .litemenubar-panel {
87
+ position: absolute;
88
+ top: 5px;
89
+ left: 5px;
90
+ min-width: 100px;
91
+ background-color: #444;
92
+ box-shadow: 0 0 3px black;
93
+ padding: 4px;
94
+ border-bottom: 2px solid #aaf;
95
+ z-index: 10;
96
+ }
97
+
98
+ .litegraph .litemenu-entry,
99
+ .litemenu-title {
100
+ font-size: 12px;
101
+ color: #aaa;
102
+ padding: 0 0 0 4px;
103
+ margin: 2px;
104
+ padding-left: 2px;
105
+ -moz-user-select: none;
106
+ -webkit-user-select: none;
107
+ user-select: none;
108
+ cursor: pointer;
109
+ }
110
+
111
+ .litegraph .litemenu-entry .icon {
112
+ display: inline-block;
113
+ width: 12px;
114
+ height: 12px;
115
+ margin: 2px;
116
+ vertical-align: top;
117
+ }
118
+
119
+ .litegraph .litemenu-entry.checked .icon {
120
+ background-color: #aaf;
121
+ }
122
+
123
+ .litegraph .litemenu-entry .more {
124
+ float: right;
125
+ padding-right: 5px;
126
+ }
127
+
128
+ .litegraph .litemenu-entry.disabled {
129
+ opacity: 0.5;
130
+ cursor: default;
131
+ }
132
+
133
+ .litegraph .litemenu-entry.separator {
134
+ display: block;
135
+ border-top: 1px solid #333;
136
+ border-bottom: 1px solid #666;
137
+ width: 100%;
138
+ height: 0px;
139
+ margin: 3px 0 2px 0;
140
+ background-color: transparent;
141
+ padding: 0 !important;
142
+ cursor: default !important;
143
+ }
144
+
145
+ .litegraph .litemenu-entry.has_submenu {
146
+ border-right: 2px solid cyan;
147
+ }
148
+
149
+ .litegraph .litemenu-title {
150
+ color: #dde;
151
+ background-color: #111;
152
+ margin: 0;
153
+ padding: 2px;
154
+ cursor: default;
155
+ }
156
+
157
+ .litegraph .litemenu-entry:hover:not(.disabled):not(.separator) {
158
+ background-color: #444 !important;
159
+ color: #eee;
160
+ transition: all 0.2s;
161
+ }
162
+
163
+ .litegraph .litemenu-entry .property_name {
164
+ display: inline-block;
165
+ text-align: left;
166
+ min-width: 80px;
167
+ min-height: 1.2em;
168
+ }
169
+
170
+ .litegraph .litemenu-entry .property_value {
171
+ display: inline-block;
172
+ background-color: rgba(0, 0, 0, 0.5);
173
+ text-align: right;
174
+ min-width: 80px;
175
+ min-height: 1.2em;
176
+ vertical-align: middle;
177
+ padding-right: 10px;
178
+ }
179
+
180
+ .litegraph.litesearchbox {
181
+ font-family: Tahoma, sans-serif;
182
+ position: absolute;
183
+ background-color: rgba(0, 0, 0, 0.5);
184
+ padding-top: 4px;
185
+ }
186
+
187
+ .litegraph.litesearchbox input,
188
+ .litegraph.litesearchbox select {
189
+ margin-top: 3px;
190
+ min-width: 60px;
191
+ min-height: 1.5em;
192
+ background-color: black;
193
+ border: 0;
194
+ color: white;
195
+ padding-left: 10px;
196
+ margin-right: 5px;
197
+ max-width: 300px;
198
+ }
199
+
200
+ .litegraph.litesearchbox .name {
201
+ display: inline-block;
202
+ min-width: 60px;
203
+ min-height: 1.5em;
204
+ padding-left: 10px;
205
+ }
206
+
207
+ .litegraph.litesearchbox .helper {
208
+ overflow: auto;
209
+ max-height: 200px;
210
+ margin-top: 2px;
211
+ }
212
+
213
+ .litegraph.lite-search-item {
214
+ font-family: Tahoma, sans-serif;
215
+ background-color: rgba(0, 0, 0, 0.5);
216
+ color: white;
217
+ padding-top: 2px;
218
+ }
219
+
220
+ .litegraph.lite-search-item.not_in_filter {
221
+ /*background-color: rgba(50, 50, 50, 0.5);*/
222
+ /*color: #999;*/
223
+ color: #b99;
224
+ font-style: italic;
225
+ }
226
+
227
+ .litegraph.lite-search-item.generic_type {
228
+ /*background-color: rgba(50, 50, 50, 0.5);*/
229
+ /*color: #DD9;*/
230
+ color: #999;
231
+ font-style: italic;
232
+ }
233
+
234
+ .litegraph.lite-search-item:hover,
235
+ .litegraph.lite-search-item.selected {
236
+ cursor: pointer;
237
+ background-color: white;
238
+ color: black;
239
+ }
240
+
241
+ .litegraph.lite-search-item-type {
242
+ display: inline-block;
243
+ background: rgba(0, 0, 0, 0.2);
244
+ margin-left: 5px;
245
+ font-size: 14px;
246
+ padding: 2px 5px;
247
+ position: relative;
248
+ top: -2px;
249
+ opacity: 0.8;
250
+ border-radius: 4px;
251
+ }
252
+
253
+ /* DIALOGs ******/
254
+
255
+ .litegraph .dialog {
256
+ position: absolute;
257
+ top: 50%;
258
+ left: 50%;
259
+ margin-top: -150px;
260
+ margin-left: -200px;
261
+
262
+ background-color: #2a2a2a;
263
+
264
+ min-width: 400px;
265
+ min-height: 200px;
266
+ box-shadow: 0 0 4px #111;
267
+ border-radius: 6px;
268
+ }
269
+
270
+ .litegraph .dialog.settings {
271
+ left: 10px;
272
+ top: 10px;
273
+ height: calc(100% - 20px);
274
+ margin: auto;
275
+ max-width: 50%;
276
+ }
277
+
278
+ .litegraph .dialog.centered {
279
+ top: 50px;
280
+ left: 50%;
281
+ position: absolute;
282
+ transform: translateX(-50%);
283
+ min-width: 600px;
284
+ min-height: 300px;
285
+ height: calc(100% - 100px);
286
+ margin: auto;
287
+ }
288
+
289
+ .litegraph .dialog .close {
290
+ float: right;
291
+ margin: 4px;
292
+ margin-right: 10px;
293
+ cursor: pointer;
294
+ font-size: 1.4em;
295
+ }
296
+
297
+ .litegraph .dialog .close:hover {
298
+ color: white;
299
+ }
300
+
301
+ .litegraph .dialog .dialog-header {
302
+ color: #aaa;
303
+ border-bottom: 1px solid #161616;
304
+ height: 40px;
305
+ }
306
+ .litegraph .dialog .dialog-footer {
307
+ height: 50px;
308
+ padding: 10px;
309
+ border-top: 1px solid #1a1a1a;
310
+ }
311
+
312
+ .litegraph .dialog .dialog-header .dialog-title {
313
+ font: 20px "Arial";
314
+ margin: 4px;
315
+ padding: 4px 10px;
316
+ display: inline-block;
317
+ }
318
+
319
+ .litegraph .dialog .dialog-content,
320
+ .litegraph .dialog .dialog-alt-content {
321
+ height: calc(100% - 90px);
322
+ width: 100%;
323
+ min-height: 100px;
324
+ display: inline-block;
325
+ color: #aaa;
326
+ /*background-color: black;*/
327
+ overflow: auto;
328
+ }
329
+
330
+ .litegraph .dialog .dialog-content h3 {
331
+ margin: 10px;
332
+ }
333
+
334
+ .litegraph .dialog .dialog-content .connections {
335
+ flex-direction: row;
336
+ }
337
+
338
+ .litegraph .dialog .dialog-content .connections .connections_side {
339
+ width: calc(50% - 5px);
340
+ min-height: 100px;
341
+ background-color: black;
342
+ display: flex;
343
+ }
344
+
345
+ .litegraph .dialog .node_type {
346
+ font-size: 1.2em;
347
+ display: block;
348
+ margin: 10px;
349
+ }
350
+
351
+ .litegraph .dialog .node_desc {
352
+ opacity: 0.5;
353
+ display: block;
354
+ margin: 10px;
355
+ }
356
+
357
+ .litegraph .dialog .separator {
358
+ display: block;
359
+ width: calc(100% - 4px);
360
+ height: 1px;
361
+ border-top: 1px solid #000;
362
+ border-bottom: 1px solid #333;
363
+ margin: 10px 2px;
364
+ padding: 0;
365
+ }
366
+
367
+ .litegraph .dialog .property {
368
+ margin-bottom: 2px;
369
+ padding: 4px;
370
+ }
371
+
372
+ .litegraph .dialog .property:hover {
373
+ background: #545454;
374
+ }
375
+
376
+ .litegraph .dialog .property_name {
377
+ color: #737373;
378
+ display: inline-block;
379
+ text-align: left;
380
+ vertical-align: top;
381
+ width: 160px;
382
+ padding-left: 4px;
383
+ overflow: hidden;
384
+ margin-right: 6px;
385
+ }
386
+
387
+ .litegraph .dialog .property:hover .property_name {
388
+ color: white;
389
+ }
390
+
391
+ .litegraph .dialog .property_value {
392
+ display: inline-block;
393
+ text-align: right;
394
+ color: #aaa;
395
+ background-color: #1a1a1a;
396
+ /*width: calc( 100% - 122px );*/
397
+ max-width: calc(100% - 162px);
398
+ min-width: 200px;
399
+ max-height: 300px;
400
+ min-height: 20px;
401
+ padding: 4px;
402
+ padding-right: 12px;
403
+ overflow: hidden;
404
+ cursor: pointer;
405
+ border-radius: 3px;
406
+ }
407
+
408
+ .litegraph .dialog .property_value:hover {
409
+ color: white;
410
+ }
411
+
412
+ .litegraph .dialog .property.boolean .property_value {
413
+ padding-right: 30px;
414
+ color: #a88;
415
+ /*width: auto;
416
+ float: right;*/
417
+ }
418
+
419
+ .litegraph .dialog .property.boolean.bool-on .property_name {
420
+ color: #8a8;
421
+ }
422
+ .litegraph .dialog .property.boolean.bool-on .property_value {
423
+ color: #8a8;
424
+ }
425
+
426
+ .litegraph .dialog .btn {
427
+ border: 0;
428
+ border-radius: 4px;
429
+ padding: 4px 20px;
430
+ margin-left: 0px;
431
+ background-color: #060606;
432
+ color: #8e8e8e;
433
+ }
434
+
435
+ .litegraph .dialog .btn:hover {
436
+ background-color: #111;
437
+ color: #fff;
438
+ }
439
+
440
+ .litegraph .dialog .btn.delete:hover {
441
+ background-color: #f33;
442
+ color: black;
443
+ }
444
+
445
+ .litegraph .subgraph_property {
446
+ padding: 4px;
447
+ }
448
+
449
+ .litegraph .subgraph_property:hover {
450
+ background-color: #333;
451
+ }
452
+
453
+ .litegraph .subgraph_property.extra {
454
+ margin-top: 8px;
455
+ }
456
+
457
+ .litegraph .subgraph_property span.name {
458
+ font-size: 1.3em;
459
+ padding-left: 4px;
460
+ }
461
+
462
+ .litegraph .subgraph_property span.type {
463
+ opacity: 0.5;
464
+ margin-right: 20px;
465
+ padding-left: 4px;
466
+ }
467
+
468
+ .litegraph .subgraph_property span.label {
469
+ display: inline-block;
470
+ width: 60px;
471
+ padding: 0px 10px;
472
+ }
473
+
474
+ .litegraph .subgraph_property input {
475
+ width: 140px;
476
+ color: #999;
477
+ background-color: #1a1a1a;
478
+ border-radius: 4px;
479
+ border: 0;
480
+ margin-right: 10px;
481
+ padding: 4px;
482
+ padding-left: 10px;
483
+ }
484
+
485
+ .litegraph .subgraph_property button {
486
+ background-color: #1c1c1c;
487
+ color: #aaa;
488
+ border: 0;
489
+ border-radius: 2px;
490
+ padding: 4px 10px;
491
+ cursor: pointer;
492
+ }
493
+
494
+ .litegraph .subgraph_property.extra {
495
+ color: #ccc;
496
+ }
497
+
498
+ .litegraph .subgraph_property.extra input {
499
+ background-color: #111;
500
+ }
501
+
502
+ .litegraph .bullet_icon {
503
+ margin-left: 10px;
504
+ border-radius: 10px;
505
+ width: 12px;
506
+ height: 12px;
507
+ background-color: #666;
508
+ display: inline-block;
509
+ margin-top: 2px;
510
+ margin-right: 4px;
511
+ transition: background-color 0.1s ease 0s;
512
+ -moz-transition: background-color 0.1s ease 0s;
513
+ }
514
+
515
+ .litegraph .bullet_icon:hover {
516
+ background-color: #698;
517
+ cursor: pointer;
518
+ }
519
+
520
+ /* OLD */
521
+
522
+ .graphcontextmenu {
523
+ padding: 4px;
524
+ min-width: 100px;
525
+ }
526
+
527
+ .graphcontextmenu-title {
528
+ color: #dde;
529
+ background-color: #222;
530
+ margin: 0;
531
+ padding: 2px;
532
+ cursor: default;
533
+ }
534
+
535
+ .graphmenu-entry {
536
+ box-sizing: border-box;
537
+ margin: 2px;
538
+ padding-left: 20px;
539
+ user-select: none;
540
+ -moz-user-select: none;
541
+ -webkit-user-select: none;
542
+ transition: all linear 0.3s;
543
+ }
544
+
545
+ .graphmenu-entry.event,
546
+ .litemenu-entry.event {
547
+ border-left: 8px solid orange;
548
+ padding-left: 12px;
549
+ }
550
+
551
+ .graphmenu-entry.disabled {
552
+ opacity: 0.3;
553
+ }
554
+
555
+ .graphmenu-entry.submenu {
556
+ border-right: 2px solid #eee;
557
+ }
558
+
559
+ .graphmenu-entry:hover {
560
+ background-color: #555;
561
+ }
562
+
563
+ .graphmenu-entry.separator {
564
+ background-color: #111;
565
+ border-bottom: 1px solid #666;
566
+ height: 1px;
567
+ width: calc(100% - 20px);
568
+ -moz-width: calc(100% - 20px);
569
+ -webkit-width: calc(100% - 20px);
570
+ }
571
+
572
+ .graphmenu-entry .property_name {
573
+ display: inline-block;
574
+ text-align: left;
575
+ min-width: 80px;
576
+ min-height: 1.2em;
577
+ }
578
+
579
+ .graphmenu-entry .property_value,
580
+ .litemenu-entry .property_value {
581
+ display: inline-block;
582
+ background-color: rgba(0, 0, 0, 0.5);
583
+ text-align: right;
584
+ min-width: 80px;
585
+ min-height: 1.2em;
586
+ vertical-align: middle;
587
+ padding-right: 10px;
588
+ }
589
+
590
+ .graphdialog {
591
+ position: absolute;
592
+ top: 10px;
593
+ left: 10px;
594
+ min-height: 2em;
595
+ background-color: #333;
596
+ font-size: 1.2em;
597
+ box-shadow: 0 0 10px black !important;
598
+ z-index: 10;
599
+ }
600
+
601
+ .graphdialog.rounded {
602
+ border-radius: 12px;
603
+ padding-right: 2px;
604
+ }
605
+
606
+ .graphdialog .name {
607
+ display: inline-block;
608
+ min-width: 60px;
609
+ min-height: 1.5em;
610
+ padding-left: 10px;
611
+ }
612
+
613
+ .graphdialog input,
614
+ .graphdialog textarea,
615
+ .graphdialog select {
616
+ margin: 3px;
617
+ min-width: 60px;
618
+ min-height: 1.5em;
619
+ background-color: black;
620
+ border: 0;
621
+ color: white;
622
+ padding-left: 10px;
623
+ outline: none;
624
+ }
625
+
626
+ .graphdialog textarea {
627
+ min-height: 150px;
628
+ }
629
+
630
+ .graphdialog button {
631
+ margin-top: 3px;
632
+ vertical-align: top;
633
+ background-color: #999;
634
+ border: 0;
635
+ }
636
+
637
+ .graphdialog button.rounded,
638
+ .graphdialog input.rounded {
639
+ border-radius: 0 12px 12px 0;
640
+ }
641
+
642
+ .graphdialog .helper {
643
+ overflow: auto;
644
+ max-height: 200px;
645
+ }
646
+
647
+ .graphdialog .help-item {
648
+ padding-left: 10px;
649
+ }
650
+
651
+ .graphdialog .help-item:hover,
652
+ .graphdialog .help-item.selected {
653
+ cursor: pointer;
654
+ background-color: white;
655
+ color: black;
656
+ }
657
+
658
+ .litegraph .dialog {
659
+ min-height: 0;
660
+ }
661
+ .litegraph .dialog .dialog-content {
662
+ display: block;
663
+ }
664
+ .litegraph .dialog .dialog-content .subgraph_property {
665
+ padding: 5px;
666
+ }
667
+ .litegraph .dialog .dialog-footer {
668
+ margin: 0;
669
+ }
670
+ .litegraph .dialog .dialog-footer .subgraph_property {
671
+ margin-top: 0;
672
+ display: flex;
673
+ align-items: center;
674
+ padding: 5px;
675
+ }
676
+ .litegraph .dialog .dialog-footer .subgraph_property .name {
677
+ flex: 1;
678
+ }
679
+ .litegraph .graphdialog {
680
+ display: flex;
681
+ align-items: center;
682
+ border-radius: 20px;
683
+ padding: 4px 10px;
684
+ position: fixed;
685
+ }
686
+ .litegraph .graphdialog .name {
687
+ padding: 0;
688
+ min-height: 0;
689
+ font-size: 16px;
690
+ vertical-align: middle;
691
+ }
692
+ .litegraph .graphdialog .value {
693
+ font-size: 16px;
694
+ min-height: 0;
695
+ margin: 0 10px;
696
+ padding: 2px 5px;
697
+ }
698
+ .litegraph .graphdialog input[type="checkbox"] {
699
+ width: 16px;
700
+ height: 16px;
701
+ }
702
+ .litegraph .graphdialog button {
703
+ padding: 4px 18px;
704
+ border-radius: 20px;
705
+ cursor: pointer;
706
+ }
707
+ @font-face {
708
+ font-family: 'primeicons';
709
+ font-display: block;
710
+ src: url('./primeicons-DMOk5skT.eot');
711
+ src: url('./primeicons-DMOk5skT.eot?#iefix') format('embedded-opentype'), url('./primeicons-C6QP2o4f.woff2') format('woff2'), url('./primeicons-WjwUDZjB.woff') format('woff'), url('./primeicons-MpK4pl85.ttf') format('truetype'), url('./primeicons-Dr5RGzOO.svg?#primeicons') format('svg');
712
+ font-weight: normal;
713
+ font-style: normal;
714
+ }
715
+
716
+ .pi {
717
+ font-family: 'primeicons';
718
+ speak: none;
719
+ font-style: normal;
720
+ font-weight: normal;
721
+ font-variant: normal;
722
+ text-transform: none;
723
+ line-height: 1;
724
+ display: inline-block;
725
+ -webkit-font-smoothing: antialiased;
726
+ -moz-osx-font-smoothing: grayscale;
727
+ }
728
+
729
+ .pi:before {
730
+ --webkit-backface-visibility:hidden;
731
+ backface-visibility: hidden;
732
+ }
733
+
734
+ .pi-fw {
735
+ width: 1.28571429em;
736
+ text-align: center;
737
+ }
738
+
739
+ .pi-spin {
740
+ animation: fa-spin 2s infinite linear;
741
+ }
742
+
743
+ @media (prefers-reduced-motion: reduce) {
744
+ .pi-spin {
745
+ animation-delay: -1ms;
746
+ animation-duration: 1ms;
747
+ animation-iteration-count: 1;
748
+ transition-delay: 0s;
749
+ transition-duration: 0s;
750
+ }
751
+ }
752
+
753
+ @keyframes fa-spin {
754
+ 0% {
755
+ transform: rotate(0deg);
756
+ }
757
+ 100% {
758
+ transform: rotate(359deg);
759
+ }
760
+ }
761
+
762
+ .pi-folder-plus:before {
763
+ content: "\ea05";
764
+ }
765
+
766
+ .pi-receipt:before {
767
+ content: "\ea06";
768
+ }
769
+
770
+ .pi-asterisk:before {
771
+ content: "\ea07";
772
+ }
773
+
774
+ .pi-face-smile:before {
775
+ content: "\ea08";
776
+ }
777
+
778
+ .pi-pinterest:before {
779
+ content: "\ea09";
780
+ }
781
+
782
+ .pi-expand:before {
783
+ content: "\ea0a";
784
+ }
785
+
786
+ .pi-pen-to-square:before {
787
+ content: "\ea0b";
788
+ }
789
+
790
+ .pi-wave-pulse:before {
791
+ content: "\ea0c";
792
+ }
793
+
794
+ .pi-turkish-lira:before {
795
+ content: "\ea0d";
796
+ }
797
+
798
+ .pi-spinner-dotted:before {
799
+ content: "\ea0e";
800
+ }
801
+
802
+ .pi-crown:before {
803
+ content: "\ea0f";
804
+ }
805
+
806
+ .pi-pause-circle:before {
807
+ content: "\ea10";
808
+ }
809
+
810
+ .pi-warehouse:before {
811
+ content: "\ea11";
812
+ }
813
+
814
+ .pi-objects-column:before {
815
+ content: "\ea12";
816
+ }
817
+
818
+ .pi-clipboard:before {
819
+ content: "\ea13";
820
+ }
821
+
822
+ .pi-play-circle:before {
823
+ content: "\ea14";
824
+ }
825
+
826
+ .pi-venus:before {
827
+ content: "\ea15";
828
+ }
829
+
830
+ .pi-cart-minus:before {
831
+ content: "\ea16";
832
+ }
833
+
834
+ .pi-file-plus:before {
835
+ content: "\ea17";
836
+ }
837
+
838
+ .pi-microchip:before {
839
+ content: "\ea18";
840
+ }
841
+
842
+ .pi-twitch:before {
843
+ content: "\ea19";
844
+ }
845
+
846
+ .pi-building-columns:before {
847
+ content: "\ea1a";
848
+ }
849
+
850
+ .pi-file-check:before {
851
+ content: "\ea1b";
852
+ }
853
+
854
+ .pi-microchip-ai:before {
855
+ content: "\ea1c";
856
+ }
857
+
858
+ .pi-trophy:before {
859
+ content: "\ea1d";
860
+ }
861
+
862
+ .pi-barcode:before {
863
+ content: "\ea1e";
864
+ }
865
+
866
+ .pi-file-arrow-up:before {
867
+ content: "\ea1f";
868
+ }
869
+
870
+ .pi-mars:before {
871
+ content: "\ea20";
872
+ }
873
+
874
+ .pi-tiktok:before {
875
+ content: "\ea21";
876
+ }
877
+
878
+ .pi-arrow-up-right-and-arrow-down-left-from-center:before {
879
+ content: "\ea22";
880
+ }
881
+
882
+ .pi-ethereum:before {
883
+ content: "\ea23";
884
+ }
885
+
886
+ .pi-list-check:before {
887
+ content: "\ea24";
888
+ }
889
+
890
+ .pi-thumbtack:before {
891
+ content: "\ea25";
892
+ }
893
+
894
+ .pi-arrow-down-left-and-arrow-up-right-to-center:before {
895
+ content: "\ea26";
896
+ }
897
+
898
+ .pi-equals:before {
899
+ content: "\ea27";
900
+ }
901
+
902
+ .pi-lightbulb:before {
903
+ content: "\ea28";
904
+ }
905
+
906
+ .pi-star-half:before {
907
+ content: "\ea29";
908
+ }
909
+
910
+ .pi-address-book:before {
911
+ content: "\ea2a";
912
+ }
913
+
914
+ .pi-chart-scatter:before {
915
+ content: "\ea2b";
916
+ }
917
+
918
+ .pi-indian-rupee:before {
919
+ content: "\ea2c";
920
+ }
921
+
922
+ .pi-star-half-fill:before {
923
+ content: "\ea2d";
924
+ }
925
+
926
+ .pi-cart-arrow-down:before {
927
+ content: "\ea2e";
928
+ }
929
+
930
+ .pi-calendar-clock:before {
931
+ content: "\ea2f";
932
+ }
933
+
934
+ .pi-sort-up-fill:before {
935
+ content: "\ea30";
936
+ }
937
+
938
+ .pi-sparkles:before {
939
+ content: "\ea31";
940
+ }
941
+
942
+ .pi-bullseye:before {
943
+ content: "\ea32";
944
+ }
945
+
946
+ .pi-sort-down-fill:before {
947
+ content: "\ea33";
948
+ }
949
+
950
+ .pi-graduation-cap:before {
951
+ content: "\ea34";
952
+ }
953
+
954
+ .pi-hammer:before {
955
+ content: "\ea35";
956
+ }
957
+
958
+ .pi-bell-slash:before {
959
+ content: "\ea36";
960
+ }
961
+
962
+ .pi-gauge:before {
963
+ content: "\ea37";
964
+ }
965
+
966
+ .pi-shop:before {
967
+ content: "\ea38";
968
+ }
969
+
970
+ .pi-headphones:before {
971
+ content: "\ea39";
972
+ }
973
+
974
+ .pi-eraser:before {
975
+ content: "\ea04";
976
+ }
977
+
978
+ .pi-stopwatch:before {
979
+ content: "\ea01";
980
+ }
981
+
982
+ .pi-verified:before {
983
+ content: "\ea02";
984
+ }
985
+
986
+ .pi-delete-left:before {
987
+ content: "\ea03";
988
+ }
989
+
990
+ .pi-hourglass:before {
991
+ content: "\e9fe";
992
+ }
993
+
994
+ .pi-truck:before {
995
+ content: "\ea00";
996
+ }
997
+
998
+ .pi-wrench:before {
999
+ content: "\e9ff";
1000
+ }
1001
+
1002
+ .pi-microphone:before {
1003
+ content: "\e9fa";
1004
+ }
1005
+
1006
+ .pi-megaphone:before {
1007
+ content: "\e9fb";
1008
+ }
1009
+
1010
+ .pi-arrow-right-arrow-left:before {
1011
+ content: "\e9fc";
1012
+ }
1013
+
1014
+ .pi-bitcoin:before {
1015
+ content: "\e9fd";
1016
+ }
1017
+
1018
+ .pi-file-edit:before {
1019
+ content: "\e9f6";
1020
+ }
1021
+
1022
+ .pi-language:before {
1023
+ content: "\e9f7";
1024
+ }
1025
+
1026
+ .pi-file-export:before {
1027
+ content: "\e9f8";
1028
+ }
1029
+
1030
+ .pi-file-import:before {
1031
+ content: "\e9f9";
1032
+ }
1033
+
1034
+ .pi-file-word:before {
1035
+ content: "\e9f1";
1036
+ }
1037
+
1038
+ .pi-gift:before {
1039
+ content: "\e9f2";
1040
+ }
1041
+
1042
+ .pi-cart-plus:before {
1043
+ content: "\e9f3";
1044
+ }
1045
+
1046
+ .pi-thumbs-down-fill:before {
1047
+ content: "\e9f4";
1048
+ }
1049
+
1050
+ .pi-thumbs-up-fill:before {
1051
+ content: "\e9f5";
1052
+ }
1053
+
1054
+ .pi-arrows-alt:before {
1055
+ content: "\e9f0";
1056
+ }
1057
+
1058
+ .pi-calculator:before {
1059
+ content: "\e9ef";
1060
+ }
1061
+
1062
+ .pi-sort-alt-slash:before {
1063
+ content: "\e9ee";
1064
+ }
1065
+
1066
+ .pi-arrows-h:before {
1067
+ content: "\e9ec";
1068
+ }
1069
+
1070
+ .pi-arrows-v:before {
1071
+ content: "\e9ed";
1072
+ }
1073
+
1074
+ .pi-pound:before {
1075
+ content: "\e9eb";
1076
+ }
1077
+
1078
+ .pi-prime:before {
1079
+ content: "\e9ea";
1080
+ }
1081
+
1082
+ .pi-chart-pie:before {
1083
+ content: "\e9e9";
1084
+ }
1085
+
1086
+ .pi-reddit:before {
1087
+ content: "\e9e8";
1088
+ }
1089
+
1090
+ .pi-code:before {
1091
+ content: "\e9e7";
1092
+ }
1093
+
1094
+ .pi-sync:before {
1095
+ content: "\e9e6";
1096
+ }
1097
+
1098
+ .pi-shopping-bag:before {
1099
+ content: "\e9e5";
1100
+ }
1101
+
1102
+ .pi-server:before {
1103
+ content: "\e9e4";
1104
+ }
1105
+
1106
+ .pi-database:before {
1107
+ content: "\e9e3";
1108
+ }
1109
+
1110
+ .pi-hashtag:before {
1111
+ content: "\e9e2";
1112
+ }
1113
+
1114
+ .pi-bookmark-fill:before {
1115
+ content: "\e9df";
1116
+ }
1117
+
1118
+ .pi-filter-fill:before {
1119
+ content: "\e9e0";
1120
+ }
1121
+
1122
+ .pi-heart-fill:before {
1123
+ content: "\e9e1";
1124
+ }
1125
+
1126
+ .pi-flag-fill:before {
1127
+ content: "\e9de";
1128
+ }
1129
+
1130
+ .pi-circle:before {
1131
+ content: "\e9dc";
1132
+ }
1133
+
1134
+ .pi-circle-fill:before {
1135
+ content: "\e9dd";
1136
+ }
1137
+
1138
+ .pi-bolt:before {
1139
+ content: "\e9db";
1140
+ }
1141
+
1142
+ .pi-history:before {
1143
+ content: "\e9da";
1144
+ }
1145
+
1146
+ .pi-box:before {
1147
+ content: "\e9d9";
1148
+ }
1149
+
1150
+ .pi-at:before {
1151
+ content: "\e9d8";
1152
+ }
1153
+
1154
+ .pi-arrow-up-right:before {
1155
+ content: "\e9d4";
1156
+ }
1157
+
1158
+ .pi-arrow-up-left:before {
1159
+ content: "\e9d5";
1160
+ }
1161
+
1162
+ .pi-arrow-down-left:before {
1163
+ content: "\e9d6";
1164
+ }
1165
+
1166
+ .pi-arrow-down-right:before {
1167
+ content: "\e9d7";
1168
+ }
1169
+
1170
+ .pi-telegram:before {
1171
+ content: "\e9d3";
1172
+ }
1173
+
1174
+ .pi-stop-circle:before {
1175
+ content: "\e9d2";
1176
+ }
1177
+
1178
+ .pi-stop:before {
1179
+ content: "\e9d1";
1180
+ }
1181
+
1182
+ .pi-whatsapp:before {
1183
+ content: "\e9d0";
1184
+ }
1185
+
1186
+ .pi-building:before {
1187
+ content: "\e9cf";
1188
+ }
1189
+
1190
+ .pi-qrcode:before {
1191
+ content: "\e9ce";
1192
+ }
1193
+
1194
+ .pi-car:before {
1195
+ content: "\e9cd";
1196
+ }
1197
+
1198
+ .pi-instagram:before {
1199
+ content: "\e9cc";
1200
+ }
1201
+
1202
+ .pi-linkedin:before {
1203
+ content: "\e9cb";
1204
+ }
1205
+
1206
+ .pi-send:before {
1207
+ content: "\e9ca";
1208
+ }
1209
+
1210
+ .pi-slack:before {
1211
+ content: "\e9c9";
1212
+ }
1213
+
1214
+ .pi-sun:before {
1215
+ content: "\e9c8";
1216
+ }
1217
+
1218
+ .pi-moon:before {
1219
+ content: "\e9c7";
1220
+ }
1221
+
1222
+ .pi-vimeo:before {
1223
+ content: "\e9c6";
1224
+ }
1225
+
1226
+ .pi-youtube:before {
1227
+ content: "\e9c5";
1228
+ }
1229
+
1230
+ .pi-flag:before {
1231
+ content: "\e9c4";
1232
+ }
1233
+
1234
+ .pi-wallet:before {
1235
+ content: "\e9c3";
1236
+ }
1237
+
1238
+ .pi-map:before {
1239
+ content: "\e9c2";
1240
+ }
1241
+
1242
+ .pi-link:before {
1243
+ content: "\e9c1";
1244
+ }
1245
+
1246
+ .pi-credit-card:before {
1247
+ content: "\e9bf";
1248
+ }
1249
+
1250
+ .pi-discord:before {
1251
+ content: "\e9c0";
1252
+ }
1253
+
1254
+ .pi-percentage:before {
1255
+ content: "\e9be";
1256
+ }
1257
+
1258
+ .pi-euro:before {
1259
+ content: "\e9bd";
1260
+ }
1261
+
1262
+ .pi-book:before {
1263
+ content: "\e9ba";
1264
+ }
1265
+
1266
+ .pi-shield:before {
1267
+ content: "\e9b9";
1268
+ }
1269
+
1270
+ .pi-paypal:before {
1271
+ content: "\e9bb";
1272
+ }
1273
+
1274
+ .pi-amazon:before {
1275
+ content: "\e9bc";
1276
+ }
1277
+
1278
+ .pi-phone:before {
1279
+ content: "\e9b8";
1280
+ }
1281
+
1282
+ .pi-filter-slash:before {
1283
+ content: "\e9b7";
1284
+ }
1285
+
1286
+ .pi-facebook:before {
1287
+ content: "\e9b4";
1288
+ }
1289
+
1290
+ .pi-github:before {
1291
+ content: "\e9b5";
1292
+ }
1293
+
1294
+ .pi-twitter:before {
1295
+ content: "\e9b6";
1296
+ }
1297
+
1298
+ .pi-step-backward-alt:before {
1299
+ content: "\e9ac";
1300
+ }
1301
+
1302
+ .pi-step-forward-alt:before {
1303
+ content: "\e9ad";
1304
+ }
1305
+
1306
+ .pi-forward:before {
1307
+ content: "\e9ae";
1308
+ }
1309
+
1310
+ .pi-backward:before {
1311
+ content: "\e9af";
1312
+ }
1313
+
1314
+ .pi-fast-backward:before {
1315
+ content: "\e9b0";
1316
+ }
1317
+
1318
+ .pi-fast-forward:before {
1319
+ content: "\e9b1";
1320
+ }
1321
+
1322
+ .pi-pause:before {
1323
+ content: "\e9b2";
1324
+ }
1325
+
1326
+ .pi-play:before {
1327
+ content: "\e9b3";
1328
+ }
1329
+
1330
+ .pi-compass:before {
1331
+ content: "\e9ab";
1332
+ }
1333
+
1334
+ .pi-id-card:before {
1335
+ content: "\e9aa";
1336
+ }
1337
+
1338
+ .pi-ticket:before {
1339
+ content: "\e9a9";
1340
+ }
1341
+
1342
+ .pi-file-o:before {
1343
+ content: "\e9a8";
1344
+ }
1345
+
1346
+ .pi-reply:before {
1347
+ content: "\e9a7";
1348
+ }
1349
+
1350
+ .pi-directions-alt:before {
1351
+ content: "\e9a5";
1352
+ }
1353
+
1354
+ .pi-directions:before {
1355
+ content: "\e9a6";
1356
+ }
1357
+
1358
+ .pi-thumbs-up:before {
1359
+ content: "\e9a3";
1360
+ }
1361
+
1362
+ .pi-thumbs-down:before {
1363
+ content: "\e9a4";
1364
+ }
1365
+
1366
+ .pi-sort-numeric-down-alt:before {
1367
+ content: "\e996";
1368
+ }
1369
+
1370
+ .pi-sort-numeric-up-alt:before {
1371
+ content: "\e997";
1372
+ }
1373
+
1374
+ .pi-sort-alpha-down-alt:before {
1375
+ content: "\e998";
1376
+ }
1377
+
1378
+ .pi-sort-alpha-up-alt:before {
1379
+ content: "\e999";
1380
+ }
1381
+
1382
+ .pi-sort-numeric-down:before {
1383
+ content: "\e99a";
1384
+ }
1385
+
1386
+ .pi-sort-numeric-up:before {
1387
+ content: "\e99b";
1388
+ }
1389
+
1390
+ .pi-sort-alpha-down:before {
1391
+ content: "\e99c";
1392
+ }
1393
+
1394
+ .pi-sort-alpha-up:before {
1395
+ content: "\e99d";
1396
+ }
1397
+
1398
+ .pi-sort-alt:before {
1399
+ content: "\e99e";
1400
+ }
1401
+
1402
+ .pi-sort-amount-up:before {
1403
+ content: "\e99f";
1404
+ }
1405
+
1406
+ .pi-sort-amount-down:before {
1407
+ content: "\e9a0";
1408
+ }
1409
+
1410
+ .pi-sort-amount-down-alt:before {
1411
+ content: "\e9a1";
1412
+ }
1413
+
1414
+ .pi-sort-amount-up-alt:before {
1415
+ content: "\e9a2";
1416
+ }
1417
+
1418
+ .pi-palette:before {
1419
+ content: "\e995";
1420
+ }
1421
+
1422
+ .pi-undo:before {
1423
+ content: "\e994";
1424
+ }
1425
+
1426
+ .pi-desktop:before {
1427
+ content: "\e993";
1428
+ }
1429
+
1430
+ .pi-sliders-v:before {
1431
+ content: "\e991";
1432
+ }
1433
+
1434
+ .pi-sliders-h:before {
1435
+ content: "\e992";
1436
+ }
1437
+
1438
+ .pi-search-plus:before {
1439
+ content: "\e98f";
1440
+ }
1441
+
1442
+ .pi-search-minus:before {
1443
+ content: "\e990";
1444
+ }
1445
+
1446
+ .pi-file-excel:before {
1447
+ content: "\e98e";
1448
+ }
1449
+
1450
+ .pi-file-pdf:before {
1451
+ content: "\e98d";
1452
+ }
1453
+
1454
+ .pi-check-square:before {
1455
+ content: "\e98c";
1456
+ }
1457
+
1458
+ .pi-chart-line:before {
1459
+ content: "\e98b";
1460
+ }
1461
+
1462
+ .pi-user-edit:before {
1463
+ content: "\e98a";
1464
+ }
1465
+
1466
+ .pi-exclamation-circle:before {
1467
+ content: "\e989";
1468
+ }
1469
+
1470
+ .pi-android:before {
1471
+ content: "\e985";
1472
+ }
1473
+
1474
+ .pi-google:before {
1475
+ content: "\e986";
1476
+ }
1477
+
1478
+ .pi-apple:before {
1479
+ content: "\e987";
1480
+ }
1481
+
1482
+ .pi-microsoft:before {
1483
+ content: "\e988";
1484
+ }
1485
+
1486
+ .pi-heart:before {
1487
+ content: "\e984";
1488
+ }
1489
+
1490
+ .pi-mobile:before {
1491
+ content: "\e982";
1492
+ }
1493
+
1494
+ .pi-tablet:before {
1495
+ content: "\e983";
1496
+ }
1497
+
1498
+ .pi-key:before {
1499
+ content: "\e981";
1500
+ }
1501
+
1502
+ .pi-shopping-cart:before {
1503
+ content: "\e980";
1504
+ }
1505
+
1506
+ .pi-comments:before {
1507
+ content: "\e97e";
1508
+ }
1509
+
1510
+ .pi-comment:before {
1511
+ content: "\e97f";
1512
+ }
1513
+
1514
+ .pi-briefcase:before {
1515
+ content: "\e97d";
1516
+ }
1517
+
1518
+ .pi-bell:before {
1519
+ content: "\e97c";
1520
+ }
1521
+
1522
+ .pi-paperclip:before {
1523
+ content: "\e97b";
1524
+ }
1525
+
1526
+ .pi-share-alt:before {
1527
+ content: "\e97a";
1528
+ }
1529
+
1530
+ .pi-envelope:before {
1531
+ content: "\e979";
1532
+ }
1533
+
1534
+ .pi-volume-down:before {
1535
+ content: "\e976";
1536
+ }
1537
+
1538
+ .pi-volume-up:before {
1539
+ content: "\e977";
1540
+ }
1541
+
1542
+ .pi-volume-off:before {
1543
+ content: "\e978";
1544
+ }
1545
+
1546
+ .pi-eject:before {
1547
+ content: "\e975";
1548
+ }
1549
+
1550
+ .pi-money-bill:before {
1551
+ content: "\e974";
1552
+ }
1553
+
1554
+ .pi-images:before {
1555
+ content: "\e973";
1556
+ }
1557
+
1558
+ .pi-image:before {
1559
+ content: "\e972";
1560
+ }
1561
+
1562
+ .pi-sign-in:before {
1563
+ content: "\e970";
1564
+ }
1565
+
1566
+ .pi-sign-out:before {
1567
+ content: "\e971";
1568
+ }
1569
+
1570
+ .pi-wifi:before {
1571
+ content: "\e96f";
1572
+ }
1573
+
1574
+ .pi-sitemap:before {
1575
+ content: "\e96e";
1576
+ }
1577
+
1578
+ .pi-chart-bar:before {
1579
+ content: "\e96d";
1580
+ }
1581
+
1582
+ .pi-camera:before {
1583
+ content: "\e96c";
1584
+ }
1585
+
1586
+ .pi-dollar:before {
1587
+ content: "\e96b";
1588
+ }
1589
+
1590
+ .pi-lock-open:before {
1591
+ content: "\e96a";
1592
+ }
1593
+
1594
+ .pi-table:before {
1595
+ content: "\e969";
1596
+ }
1597
+
1598
+ .pi-map-marker:before {
1599
+ content: "\e968";
1600
+ }
1601
+
1602
+ .pi-list:before {
1603
+ content: "\e967";
1604
+ }
1605
+
1606
+ .pi-eye-slash:before {
1607
+ content: "\e965";
1608
+ }
1609
+
1610
+ .pi-eye:before {
1611
+ content: "\e966";
1612
+ }
1613
+
1614
+ .pi-folder-open:before {
1615
+ content: "\e964";
1616
+ }
1617
+
1618
+ .pi-folder:before {
1619
+ content: "\e963";
1620
+ }
1621
+
1622
+ .pi-video:before {
1623
+ content: "\e962";
1624
+ }
1625
+
1626
+ .pi-inbox:before {
1627
+ content: "\e961";
1628
+ }
1629
+
1630
+ .pi-lock:before {
1631
+ content: "\e95f";
1632
+ }
1633
+
1634
+ .pi-unlock:before {
1635
+ content: "\e960";
1636
+ }
1637
+
1638
+ .pi-tags:before {
1639
+ content: "\e95d";
1640
+ }
1641
+
1642
+ .pi-tag:before {
1643
+ content: "\e95e";
1644
+ }
1645
+
1646
+ .pi-power-off:before {
1647
+ content: "\e95c";
1648
+ }
1649
+
1650
+ .pi-save:before {
1651
+ content: "\e95b";
1652
+ }
1653
+
1654
+ .pi-question-circle:before {
1655
+ content: "\e959";
1656
+ }
1657
+
1658
+ .pi-question:before {
1659
+ content: "\e95a";
1660
+ }
1661
+
1662
+ .pi-copy:before {
1663
+ content: "\e957";
1664
+ }
1665
+
1666
+ .pi-file:before {
1667
+ content: "\e958";
1668
+ }
1669
+
1670
+ .pi-clone:before {
1671
+ content: "\e955";
1672
+ }
1673
+
1674
+ .pi-calendar-times:before {
1675
+ content: "\e952";
1676
+ }
1677
+
1678
+ .pi-calendar-minus:before {
1679
+ content: "\e953";
1680
+ }
1681
+
1682
+ .pi-calendar-plus:before {
1683
+ content: "\e954";
1684
+ }
1685
+
1686
+ .pi-ellipsis-v:before {
1687
+ content: "\e950";
1688
+ }
1689
+
1690
+ .pi-ellipsis-h:before {
1691
+ content: "\e951";
1692
+ }
1693
+
1694
+ .pi-bookmark:before {
1695
+ content: "\e94e";
1696
+ }
1697
+
1698
+ .pi-globe:before {
1699
+ content: "\e94f";
1700
+ }
1701
+
1702
+ .pi-replay:before {
1703
+ content: "\e94d";
1704
+ }
1705
+
1706
+ .pi-filter:before {
1707
+ content: "\e94c";
1708
+ }
1709
+
1710
+ .pi-print:before {
1711
+ content: "\e94b";
1712
+ }
1713
+
1714
+ .pi-align-right:before {
1715
+ content: "\e946";
1716
+ }
1717
+
1718
+ .pi-align-left:before {
1719
+ content: "\e947";
1720
+ }
1721
+
1722
+ .pi-align-center:before {
1723
+ content: "\e948";
1724
+ }
1725
+
1726
+ .pi-align-justify:before {
1727
+ content: "\e949";
1728
+ }
1729
+
1730
+ .pi-cog:before {
1731
+ content: "\e94a";
1732
+ }
1733
+
1734
+ .pi-cloud-download:before {
1735
+ content: "\e943";
1736
+ }
1737
+
1738
+ .pi-cloud-upload:before {
1739
+ content: "\e944";
1740
+ }
1741
+
1742
+ .pi-cloud:before {
1743
+ content: "\e945";
1744
+ }
1745
+
1746
+ .pi-pencil:before {
1747
+ content: "\e942";
1748
+ }
1749
+
1750
+ .pi-users:before {
1751
+ content: "\e941";
1752
+ }
1753
+
1754
+ .pi-clock:before {
1755
+ content: "\e940";
1756
+ }
1757
+
1758
+ .pi-user-minus:before {
1759
+ content: "\e93e";
1760
+ }
1761
+
1762
+ .pi-user-plus:before {
1763
+ content: "\e93f";
1764
+ }
1765
+
1766
+ .pi-trash:before {
1767
+ content: "\e93d";
1768
+ }
1769
+
1770
+ .pi-external-link:before {
1771
+ content: "\e93c";
1772
+ }
1773
+
1774
+ .pi-window-maximize:before {
1775
+ content: "\e93b";
1776
+ }
1777
+
1778
+ .pi-window-minimize:before {
1779
+ content: "\e93a";
1780
+ }
1781
+
1782
+ .pi-refresh:before {
1783
+ content: "\e938";
1784
+ }
1785
+
1786
+ .pi-user:before {
1787
+ content: "\e939";
1788
+ }
1789
+
1790
+ .pi-exclamation-triangle:before {
1791
+ content: "\e922";
1792
+ }
1793
+
1794
+ .pi-calendar:before {
1795
+ content: "\e927";
1796
+ }
1797
+
1798
+ .pi-chevron-circle-left:before {
1799
+ content: "\e928";
1800
+ }
1801
+
1802
+ .pi-chevron-circle-down:before {
1803
+ content: "\e929";
1804
+ }
1805
+
1806
+ .pi-chevron-circle-right:before {
1807
+ content: "\e92a";
1808
+ }
1809
+
1810
+ .pi-chevron-circle-up:before {
1811
+ content: "\e92b";
1812
+ }
1813
+
1814
+ .pi-angle-double-down:before {
1815
+ content: "\e92c";
1816
+ }
1817
+
1818
+ .pi-angle-double-left:before {
1819
+ content: "\e92d";
1820
+ }
1821
+
1822
+ .pi-angle-double-right:before {
1823
+ content: "\e92e";
1824
+ }
1825
+
1826
+ .pi-angle-double-up:before {
1827
+ content: "\e92f";
1828
+ }
1829
+
1830
+ .pi-angle-down:before {
1831
+ content: "\e930";
1832
+ }
1833
+
1834
+ .pi-angle-left:before {
1835
+ content: "\e931";
1836
+ }
1837
+
1838
+ .pi-angle-right:before {
1839
+ content: "\e932";
1840
+ }
1841
+
1842
+ .pi-angle-up:before {
1843
+ content: "\e933";
1844
+ }
1845
+
1846
+ .pi-upload:before {
1847
+ content: "\e934";
1848
+ }
1849
+
1850
+ .pi-download:before {
1851
+ content: "\e956";
1852
+ }
1853
+
1854
+ .pi-ban:before {
1855
+ content: "\e935";
1856
+ }
1857
+
1858
+ .pi-star-fill:before {
1859
+ content: "\e936";
1860
+ }
1861
+
1862
+ .pi-star:before {
1863
+ content: "\e937";
1864
+ }
1865
+
1866
+ .pi-chevron-left:before {
1867
+ content: "\e900";
1868
+ }
1869
+
1870
+ .pi-chevron-right:before {
1871
+ content: "\e901";
1872
+ }
1873
+
1874
+ .pi-chevron-down:before {
1875
+ content: "\e902";
1876
+ }
1877
+
1878
+ .pi-chevron-up:before {
1879
+ content: "\e903";
1880
+ }
1881
+
1882
+ .pi-caret-left:before {
1883
+ content: "\e904";
1884
+ }
1885
+
1886
+ .pi-caret-right:before {
1887
+ content: "\e905";
1888
+ }
1889
+
1890
+ .pi-caret-down:before {
1891
+ content: "\e906";
1892
+ }
1893
+
1894
+ .pi-caret-up:before {
1895
+ content: "\e907";
1896
+ }
1897
+
1898
+ .pi-search:before {
1899
+ content: "\e908";
1900
+ }
1901
+
1902
+ .pi-check:before {
1903
+ content: "\e909";
1904
+ }
1905
+
1906
+ .pi-check-circle:before {
1907
+ content: "\e90a";
1908
+ }
1909
+
1910
+ .pi-times:before {
1911
+ content: "\e90b";
1912
+ }
1913
+
1914
+ .pi-times-circle:before {
1915
+ content: "\e90c";
1916
+ }
1917
+
1918
+ .pi-plus:before {
1919
+ content: "\e90d";
1920
+ }
1921
+
1922
+ .pi-plus-circle:before {
1923
+ content: "\e90e";
1924
+ }
1925
+
1926
+ .pi-minus:before {
1927
+ content: "\e90f";
1928
+ }
1929
+
1930
+ .pi-minus-circle:before {
1931
+ content: "\e910";
1932
+ }
1933
+
1934
+ .pi-circle-on:before {
1935
+ content: "\e911";
1936
+ }
1937
+
1938
+ .pi-circle-off:before {
1939
+ content: "\e912";
1940
+ }
1941
+
1942
+ .pi-sort-down:before {
1943
+ content: "\e913";
1944
+ }
1945
+
1946
+ .pi-sort-up:before {
1947
+ content: "\e914";
1948
+ }
1949
+
1950
+ .pi-sort:before {
1951
+ content: "\e915";
1952
+ }
1953
+
1954
+ .pi-step-backward:before {
1955
+ content: "\e916";
1956
+ }
1957
+
1958
+ .pi-step-forward:before {
1959
+ content: "\e917";
1960
+ }
1961
+
1962
+ .pi-th-large:before {
1963
+ content: "\e918";
1964
+ }
1965
+
1966
+ .pi-arrow-down:before {
1967
+ content: "\e919";
1968
+ }
1969
+
1970
+ .pi-arrow-left:before {
1971
+ content: "\e91a";
1972
+ }
1973
+
1974
+ .pi-arrow-right:before {
1975
+ content: "\e91b";
1976
+ }
1977
+
1978
+ .pi-arrow-up:before {
1979
+ content: "\e91c";
1980
+ }
1981
+
1982
+ .pi-bars:before {
1983
+ content: "\e91d";
1984
+ }
1985
+
1986
+ .pi-arrow-circle-down:before {
1987
+ content: "\e91e";
1988
+ }
1989
+
1990
+ .pi-arrow-circle-left:before {
1991
+ content: "\e91f";
1992
+ }
1993
+
1994
+ .pi-arrow-circle-right:before {
1995
+ content: "\e920";
1996
+ }
1997
+
1998
+ .pi-arrow-circle-up:before {
1999
+ content: "\e921";
2000
+ }
2001
+
2002
+ .pi-info:before {
2003
+ content: "\e923";
2004
+ }
2005
+
2006
+ .pi-info-circle:before {
2007
+ content: "\e924";
2008
+ }
2009
+
2010
+ .pi-home:before {
2011
+ content: "\e925";
2012
+ }
2013
+
2014
+ .pi-spinner:before {
2015
+ content: "\e926";
2016
+ }
2017
+ @layer primevue, tailwind-utilities;
2018
+
2019
+ @layer tailwind-utilities {
2020
+ .container{
2021
+ width: 100%;
2022
+ }
2023
+ @media (min-width: 640px){
2024
+
2025
+ .container{
2026
+ max-width: 640px;
2027
+ }
2028
+ }
2029
+ @media (min-width: 768px){
2030
+
2031
+ .container{
2032
+ max-width: 768px;
2033
+ }
2034
+ }
2035
+ @media (min-width: 1024px){
2036
+
2037
+ .container{
2038
+ max-width: 1024px;
2039
+ }
2040
+ }
2041
+ @media (min-width: 1280px){
2042
+
2043
+ .container{
2044
+ max-width: 1280px;
2045
+ }
2046
+ }
2047
+ @media (min-width: 1536px){
2048
+
2049
+ .container{
2050
+ max-width: 1536px;
2051
+ }
2052
+ }
2053
+ @media (min-width: 1800px){
2054
+
2055
+ .container{
2056
+ max-width: 1800px;
2057
+ }
2058
+ }
2059
+ @media (min-width: 2500px){
2060
+
2061
+ .container{
2062
+ max-width: 2500px;
2063
+ }
2064
+ }
2065
+ @media (min-width: 3200px){
2066
+
2067
+ .container{
2068
+ max-width: 3200px;
2069
+ }
2070
+ }
2071
+ .pointer-events-none{
2072
+ pointer-events: none;
2073
+ }
2074
+ .pointer-events-auto{
2075
+ pointer-events: auto;
2076
+ }
2077
+ .\!visible{
2078
+ visibility: visible !important;
2079
+ }
2080
+ .visible{
2081
+ visibility: visible;
2082
+ }
2083
+ .invisible{
2084
+ visibility: hidden;
2085
+ }
2086
+ .collapse{
2087
+ visibility: collapse;
2088
+ }
2089
+ .static{
2090
+ position: static;
2091
+ }
2092
+ .fixed{
2093
+ position: fixed;
2094
+ }
2095
+ .absolute{
2096
+ position: absolute;
2097
+ }
2098
+ .relative{
2099
+ position: relative;
2100
+ }
2101
+ .inset-0{
2102
+ inset: 0px;
2103
+ }
2104
+ .-bottom-4{
2105
+ bottom: -1rem;
2106
+ }
2107
+ .-right-14{
2108
+ right: -3.5rem;
2109
+ }
2110
+ .-right-4{
2111
+ right: -1rem;
2112
+ }
2113
+ .bottom-\[10px\]{
2114
+ bottom: 10px;
2115
+ }
2116
+ .bottom-full{
2117
+ bottom: 100%;
2118
+ }
2119
+ .left-0{
2120
+ left: 0px;
2121
+ }
2122
+ .left-\[-350px\]{
2123
+ left: -350px;
2124
+ }
2125
+ .right-\[10px\]{
2126
+ right: 10px;
2127
+ }
2128
+ .top-0{
2129
+ top: 0px;
2130
+ }
2131
+ .top-\[50px\]{
2132
+ top: 50px;
2133
+ }
2134
+ .top-auto{
2135
+ top: auto;
2136
+ }
2137
+ .z-10{
2138
+ z-index: 10;
2139
+ }
2140
+ .z-\[1000\]{
2141
+ z-index: 1000;
2142
+ }
2143
+ .z-\[9999\]{
2144
+ z-index: 9999;
2145
+ }
2146
+ .col-span-full{
2147
+ grid-column: 1 / -1;
2148
+ }
2149
+ .row-span-full{
2150
+ grid-row: 1 / -1;
2151
+ }
2152
+ .m-0{
2153
+ margin: 0px;
2154
+ }
2155
+ .m-1{
2156
+ margin: 0.25rem;
2157
+ }
2158
+ .m-12{
2159
+ margin: 3rem;
2160
+ }
2161
+ .m-2{
2162
+ margin: 0.5rem;
2163
+ }
2164
+ .m-8{
2165
+ margin: 2rem;
2166
+ }
2167
+ .mx-1{
2168
+ margin-left: 0.25rem;
2169
+ margin-right: 0.25rem;
2170
+ }
2171
+ .mx-2{
2172
+ margin-left: 0.5rem;
2173
+ margin-right: 0.5rem;
2174
+ }
2175
+ .mx-6{
2176
+ margin-left: 1.5rem;
2177
+ margin-right: 1.5rem;
2178
+ }
2179
+ .my-0{
2180
+ margin-top: 0px;
2181
+ margin-bottom: 0px;
2182
+ }
2183
+ .my-1{
2184
+ margin-top: 0.25rem;
2185
+ margin-bottom: 0.25rem;
2186
+ }
2187
+ .my-2{
2188
+ margin-top: 0.5rem;
2189
+ margin-bottom: 0.5rem;
2190
+ }
2191
+ .my-2\.5{
2192
+ margin-top: 0.625rem;
2193
+ margin-bottom: 0.625rem;
2194
+ }
2195
+ .my-4{
2196
+ margin-top: 1rem;
2197
+ margin-bottom: 1rem;
2198
+ }
2199
+ .mb-2{
2200
+ margin-bottom: 0.5rem;
2201
+ }
2202
+ .mb-3{
2203
+ margin-bottom: 0.75rem;
2204
+ }
2205
+ .mb-4{
2206
+ margin-bottom: 1rem;
2207
+ }
2208
+ .mb-6{
2209
+ margin-bottom: 1.5rem;
2210
+ }
2211
+ .mb-7{
2212
+ margin-bottom: 1.75rem;
2213
+ }
2214
+ .ml-2{
2215
+ margin-left: 0.5rem;
2216
+ }
2217
+ .ml-\[-13px\]{
2218
+ margin-left: -13px;
2219
+ }
2220
+ .ml-auto{
2221
+ margin-left: auto;
2222
+ }
2223
+ .mr-1{
2224
+ margin-right: 0.25rem;
2225
+ }
2226
+ .mr-2{
2227
+ margin-right: 0.5rem;
2228
+ }
2229
+ .mt-0{
2230
+ margin-top: 0px;
2231
+ }
2232
+ .mt-1{
2233
+ margin-top: 0.25rem;
2234
+ }
2235
+ .mt-2{
2236
+ margin-top: 0.5rem;
2237
+ }
2238
+ .mt-24{
2239
+ margin-top: 6rem;
2240
+ }
2241
+ .mt-4{
2242
+ margin-top: 1rem;
2243
+ }
2244
+ .mt-5{
2245
+ margin-top: 1.25rem;
2246
+ }
2247
+ .mt-6{
2248
+ margin-top: 1.5rem;
2249
+ }
2250
+ .block{
2251
+ display: block;
2252
+ }
2253
+ .inline-block{
2254
+ display: inline-block;
2255
+ }
2256
+ .inline{
2257
+ display: inline;
2258
+ }
2259
+ .flex{
2260
+ display: flex;
2261
+ }
2262
+ .inline-flex{
2263
+ display: inline-flex;
2264
+ }
2265
+ .table{
2266
+ display: table;
2267
+ }
2268
+ .grid{
2269
+ display: grid;
2270
+ }
2271
+ .contents{
2272
+ display: contents;
2273
+ }
2274
+ .hidden{
2275
+ display: none;
2276
+ }
2277
+ .h-0{
2278
+ height: 0px;
2279
+ }
2280
+ .h-1{
2281
+ height: 0.25rem;
2282
+ }
2283
+ .h-1\/2{
2284
+ height: 50%;
2285
+ }
2286
+ .h-16{
2287
+ height: 4rem;
2288
+ }
2289
+ .h-6{
2290
+ height: 1.5rem;
2291
+ }
2292
+ .h-64{
2293
+ height: 16rem;
2294
+ }
2295
+ .h-8{
2296
+ height: 2rem;
2297
+ }
2298
+ .h-96{
2299
+ height: 26rem;
2300
+ }
2301
+ .h-\[22px\]{
2302
+ height: 22px;
2303
+ }
2304
+ .h-\[30rem\]{
2305
+ height: 30rem;
2306
+ }
2307
+ .h-\[var\(--comfy-topbar-height\)\]{
2308
+ height: var(--comfy-topbar-height);
2309
+ }
2310
+ .h-full{
2311
+ height: 100%;
2312
+ }
2313
+ .h-screen{
2314
+ height: 100vh;
2315
+ }
2316
+ .max-h-96{
2317
+ max-height: 26rem;
2318
+ }
2319
+ .max-h-full{
2320
+ max-height: 100%;
2321
+ }
2322
+ .min-h-52{
2323
+ min-height: 13rem;
2324
+ }
2325
+ .min-h-8{
2326
+ min-height: 2rem;
2327
+ }
2328
+ .min-h-full{
2329
+ min-height: 100%;
2330
+ }
2331
+ .min-h-screen{
2332
+ min-height: 100vh;
2333
+ }
2334
+ .w-1\/2{
2335
+ width: 50%;
2336
+ }
2337
+ .w-12{
2338
+ width: 3rem;
2339
+ }
2340
+ .w-14{
2341
+ width: 3.5rem;
2342
+ }
2343
+ .w-16{
2344
+ width: 4rem;
2345
+ }
2346
+ .w-28{
2347
+ width: 7rem;
2348
+ }
2349
+ .w-3\/12{
2350
+ width: 25%;
2351
+ }
2352
+ .w-44{
2353
+ width: 11rem;
2354
+ }
2355
+ .w-48{
2356
+ width: 12rem;
2357
+ }
2358
+ .w-6{
2359
+ width: 1.5rem;
2360
+ }
2361
+ .w-64{
2362
+ width: 16rem;
2363
+ }
2364
+ .w-8{
2365
+ width: 2rem;
2366
+ }
2367
+ .w-\[22px\]{
2368
+ width: 22px;
2369
+ }
2370
+ .w-\[600px\]{
2371
+ width: 600px;
2372
+ }
2373
+ .w-auto{
2374
+ width: auto;
2375
+ }
2376
+ .w-fit{
2377
+ width: -moz-fit-content;
2378
+ width: fit-content;
2379
+ }
2380
+ .w-full{
2381
+ width: 100%;
2382
+ }
2383
+ .w-screen{
2384
+ width: 100vw;
2385
+ }
2386
+ .min-w-0{
2387
+ min-width: 0px;
2388
+ }
2389
+ .min-w-110{
2390
+ min-width: 32rem;
2391
+ }
2392
+ .min-w-32{
2393
+ min-width: 8rem;
2394
+ }
2395
+ .min-w-84{
2396
+ min-width: 22rem;
2397
+ }
2398
+ .min-w-96{
2399
+ min-width: 26rem;
2400
+ }
2401
+ .min-w-full{
2402
+ min-width: 100%;
2403
+ }
2404
+ .max-w-110{
2405
+ max-width: 32rem;
2406
+ }
2407
+ .max-w-48{
2408
+ max-width: 12rem;
2409
+ }
2410
+ .max-w-64{
2411
+ max-width: 16rem;
2412
+ }
2413
+ .max-w-\[150px\]{
2414
+ max-width: 150px;
2415
+ }
2416
+ .max-w-\[600px\]{
2417
+ max-width: 600px;
2418
+ }
2419
+ .max-w-full{
2420
+ max-width: 100%;
2421
+ }
2422
+ .max-w-screen-sm{
2423
+ max-width: 640px;
2424
+ }
2425
+ .flex-1{
2426
+ flex: 1 1 0%;
2427
+ }
2428
+ .flex-shrink-0{
2429
+ flex-shrink: 0;
2430
+ }
2431
+ .shrink-0{
2432
+ flex-shrink: 0;
2433
+ }
2434
+ .flex-grow{
2435
+ flex-grow: 1;
2436
+ }
2437
+ .grow{
2438
+ flex-grow: 1;
2439
+ }
2440
+ .border-collapse{
2441
+ border-collapse: collapse;
2442
+ }
2443
+ .-translate-y-40{
2444
+ --tw-translate-y: -10rem;
2445
+ transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
2446
+ }
2447
+ .scale-75{
2448
+ --tw-scale-x: .75;
2449
+ --tw-scale-y: .75;
2450
+ transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
2451
+ }
2452
+ .transform{
2453
+ transform: translate(var(--tw-translate-x), var(--tw-translate-y)) rotate(var(--tw-rotate)) skewX(var(--tw-skew-x)) skewY(var(--tw-skew-y)) scaleX(var(--tw-scale-x)) scaleY(var(--tw-scale-y));
2454
+ }
2455
+ .cursor-move{
2456
+ cursor: move;
2457
+ }
2458
+ .cursor-pointer{
2459
+ cursor: pointer;
2460
+ }
2461
+ .select-none{
2462
+ -webkit-user-select: none;
2463
+ -moz-user-select: none;
2464
+ user-select: none;
2465
+ }
2466
+ .resize{
2467
+ resize: both;
2468
+ }
2469
+ .list-inside{
2470
+ list-style-position: inside;
2471
+ }
2472
+ .list-disc{
2473
+ list-style-type: disc;
2474
+ }
2475
+ .grid-cols-2{
2476
+ grid-template-columns: repeat(2, minmax(0, 1fr));
2477
+ }
2478
+ .flex-row{
2479
+ flex-direction: row;
2480
+ }
2481
+ .flex-row-reverse{
2482
+ flex-direction: row-reverse;
2483
+ }
2484
+ .flex-col{
2485
+ flex-direction: column;
2486
+ }
2487
+ .flex-wrap{
2488
+ flex-wrap: wrap;
2489
+ }
2490
+ .flex-nowrap{
2491
+ flex-wrap: nowrap;
2492
+ }
2493
+ .content-center{
2494
+ align-content: center;
2495
+ }
2496
+ .items-center{
2497
+ align-items: center;
2498
+ }
2499
+ .justify-end{
2500
+ justify-content: flex-end;
2501
+ }
2502
+ .justify-center{
2503
+ justify-content: center;
2504
+ }
2505
+ .justify-between{
2506
+ justify-content: space-between;
2507
+ }
2508
+ .justify-around{
2509
+ justify-content: space-around;
2510
+ }
2511
+ .justify-evenly{
2512
+ justify-content: space-evenly;
2513
+ }
2514
+ .gap-0{
2515
+ gap: 0px;
2516
+ }
2517
+ .gap-1{
2518
+ gap: 0.25rem;
2519
+ }
2520
+ .gap-2{
2521
+ gap: 0.5rem;
2522
+ }
2523
+ .gap-3{
2524
+ gap: 0.75rem;
2525
+ }
2526
+ .gap-4{
2527
+ gap: 1rem;
2528
+ }
2529
+ .gap-6{
2530
+ gap: 1.5rem;
2531
+ }
2532
+ .gap-8{
2533
+ gap: 2rem;
2534
+ }
2535
+ .space-x-1 > :not([hidden]) ~ :not([hidden]){
2536
+ --tw-space-x-reverse: 0;
2537
+ margin-right: calc(0.25rem * var(--tw-space-x-reverse));
2538
+ margin-left: calc(0.25rem * calc(1 - var(--tw-space-x-reverse)));
2539
+ }
2540
+ .space-y-1 > :not([hidden]) ~ :not([hidden]){
2541
+ --tw-space-y-reverse: 0;
2542
+ margin-top: calc(0.25rem * calc(1 - var(--tw-space-y-reverse)));
2543
+ margin-bottom: calc(0.25rem * var(--tw-space-y-reverse));
2544
+ }
2545
+ .space-y-2 > :not([hidden]) ~ :not([hidden]){
2546
+ --tw-space-y-reverse: 0;
2547
+ margin-top: calc(0.5rem * calc(1 - var(--tw-space-y-reverse)));
2548
+ margin-bottom: calc(0.5rem * var(--tw-space-y-reverse));
2549
+ }
2550
+ .space-y-4 > :not([hidden]) ~ :not([hidden]){
2551
+ --tw-space-y-reverse: 0;
2552
+ margin-top: calc(1rem * calc(1 - var(--tw-space-y-reverse)));
2553
+ margin-bottom: calc(1rem * var(--tw-space-y-reverse));
2554
+ }
2555
+ .place-self-end{
2556
+ place-self: end;
2557
+ }
2558
+ .justify-self-end{
2559
+ justify-self: end;
2560
+ }
2561
+ .overflow-auto{
2562
+ overflow: auto;
2563
+ }
2564
+ .overflow-hidden{
2565
+ overflow: hidden;
2566
+ }
2567
+ .overflow-y-auto{
2568
+ overflow-y: auto;
2569
+ }
2570
+ .overflow-x-hidden{
2571
+ overflow-x: hidden;
2572
+ }
2573
+ .truncate{
2574
+ overflow: hidden;
2575
+ text-overflow: ellipsis;
2576
+ white-space: nowrap;
2577
+ }
2578
+ .text-ellipsis{
2579
+ text-overflow: ellipsis;
2580
+ }
2581
+ .whitespace-nowrap{
2582
+ white-space: nowrap;
2583
+ }
2584
+ .whitespace-pre-line{
2585
+ white-space: pre-line;
2586
+ }
2587
+ .text-wrap{
2588
+ text-wrap: wrap;
2589
+ }
2590
+ .text-nowrap{
2591
+ text-wrap: nowrap;
2592
+ }
2593
+ .rounded{
2594
+ border-radius: 0.25rem;
2595
+ }
2596
+ .rounded-lg{
2597
+ border-radius: 0.5rem;
2598
+ }
2599
+ .rounded-none{
2600
+ border-radius: 0px;
2601
+ }
2602
+ .rounded-t-lg{
2603
+ border-top-left-radius: 0.5rem;
2604
+ border-top-right-radius: 0.5rem;
2605
+ }
2606
+ .border{
2607
+ border-width: 1px;
2608
+ }
2609
+ .border-0{
2610
+ border-width: 0px;
2611
+ }
2612
+ .border-x-0{
2613
+ border-left-width: 0px;
2614
+ border-right-width: 0px;
2615
+ }
2616
+ .border-y{
2617
+ border-top-width: 1px;
2618
+ border-bottom-width: 1px;
2619
+ }
2620
+ .border-b{
2621
+ border-bottom-width: 1px;
2622
+ }
2623
+ .border-l{
2624
+ border-left-width: 1px;
2625
+ }
2626
+ .border-r{
2627
+ border-right-width: 1px;
2628
+ }
2629
+ .border-t-0{
2630
+ border-top-width: 0px;
2631
+ }
2632
+ .border-solid{
2633
+ border-style: solid;
2634
+ }
2635
+ .border-hidden{
2636
+ border-style: hidden;
2637
+ }
2638
+ .border-none{
2639
+ border-style: none;
2640
+ }
2641
+ .border-neutral-700{
2642
+ --tw-border-opacity: 1;
2643
+ border-color: rgb(64 64 64 / var(--tw-border-opacity));
2644
+ }
2645
+ .bg-\[var\(--comfy-menu-bg\)\]{
2646
+ background-color: var(--comfy-menu-bg);
2647
+ }
2648
+ .bg-\[var\(--p-tree-background\)\]{
2649
+ background-color: var(--p-tree-background);
2650
+ }
2651
+ .bg-black{
2652
+ --tw-bg-opacity: 1;
2653
+ background-color: rgb(0 0 0 / var(--tw-bg-opacity));
2654
+ }
2655
+ .bg-blue-500{
2656
+ --tw-bg-opacity: 1;
2657
+ background-color: rgb(66 153 225 / var(--tw-bg-opacity));
2658
+ }
2659
+ .bg-gray-100{
2660
+ --tw-bg-opacity: 1;
2661
+ background-color: rgb(243 246 250 / var(--tw-bg-opacity));
2662
+ }
2663
+ .bg-gray-800{
2664
+ --tw-bg-opacity: 1;
2665
+ background-color: rgb(45 55 72 / var(--tw-bg-opacity));
2666
+ }
2667
+ .bg-green-500{
2668
+ --tw-bg-opacity: 1;
2669
+ background-color: rgb(150 206 76 / var(--tw-bg-opacity));
2670
+ }
2671
+ .bg-neutral-300{
2672
+ --tw-bg-opacity: 1;
2673
+ background-color: rgb(212 212 212 / var(--tw-bg-opacity));
2674
+ }
2675
+ .bg-neutral-700{
2676
+ --tw-bg-opacity: 1;
2677
+ background-color: rgb(64 64 64 / var(--tw-bg-opacity));
2678
+ }
2679
+ .bg-neutral-800{
2680
+ --tw-bg-opacity: 1;
2681
+ background-color: rgb(38 38 38 / var(--tw-bg-opacity));
2682
+ }
2683
+ .bg-neutral-900{
2684
+ --tw-bg-opacity: 1;
2685
+ background-color: rgb(23 23 23 / var(--tw-bg-opacity));
2686
+ }
2687
+ .bg-red-500{
2688
+ --tw-bg-opacity: 1;
2689
+ background-color: rgb(239 68 68 / var(--tw-bg-opacity));
2690
+ }
2691
+ .bg-red-700{
2692
+ --tw-bg-opacity: 1;
2693
+ background-color: rgb(185 28 28 / var(--tw-bg-opacity));
2694
+ }
2695
+ .bg-transparent{
2696
+ background-color: transparent;
2697
+ }
2698
+ .bg-opacity-50{
2699
+ --tw-bg-opacity: 0.5;
2700
+ }
2701
+ .bg-\[url\(\'\/assets\/images\/Git-Logo-White\.svg\'\)\]{
2702
+ background-image: url('../assets/images/Git-Logo-White.svg');
2703
+ }
2704
+ .bg-right-top{
2705
+ background-position: right top;
2706
+ }
2707
+ .bg-no-repeat{
2708
+ background-repeat: no-repeat;
2709
+ }
2710
+ .bg-origin-padding{
2711
+ background-origin: padding-box;
2712
+ }
2713
+ .object-contain{
2714
+ -o-object-fit: contain;
2715
+ object-fit: contain;
2716
+ }
2717
+ .object-cover{
2718
+ -o-object-fit: cover;
2719
+ object-fit: cover;
2720
+ }
2721
+ .p-0{
2722
+ padding: 0px;
2723
+ }
2724
+ .p-1{
2725
+ padding: 0.25rem;
2726
+ }
2727
+ .p-2{
2728
+ padding: 0.5rem;
2729
+ }
2730
+ .p-3{
2731
+ padding: 0.75rem;
2732
+ }
2733
+ .p-4{
2734
+ padding: 1rem;
2735
+ }
2736
+ .p-5{
2737
+ padding: 1.25rem;
2738
+ }
2739
+ .p-6{
2740
+ padding: 1.5rem;
2741
+ }
2742
+ .p-8{
2743
+ padding: 2rem;
2744
+ }
2745
+ .px-0{
2746
+ padding-left: 0px;
2747
+ padding-right: 0px;
2748
+ }
2749
+ .px-10{
2750
+ padding-left: 2.5rem;
2751
+ padding-right: 2.5rem;
2752
+ }
2753
+ .px-2{
2754
+ padding-left: 0.5rem;
2755
+ padding-right: 0.5rem;
2756
+ }
2757
+ .px-4{
2758
+ padding-left: 1rem;
2759
+ padding-right: 1rem;
2760
+ }
2761
+ .py-0{
2762
+ padding-top: 0px;
2763
+ padding-bottom: 0px;
2764
+ }
2765
+ .py-1{
2766
+ padding-top: 0.25rem;
2767
+ padding-bottom: 0.25rem;
2768
+ }
2769
+ .pb-0{
2770
+ padding-bottom: 0px;
2771
+ }
2772
+ .pl-4{
2773
+ padding-left: 1rem;
2774
+ }
2775
+ .pl-6{
2776
+ padding-left: 1.5rem;
2777
+ }
2778
+ .pr-0{
2779
+ padding-right: 0px;
2780
+ }
2781
+ .pr-2{
2782
+ padding-right: 0.5rem;
2783
+ }
2784
+ .pt-2{
2785
+ padding-top: 0.5rem;
2786
+ }
2787
+ .pt-4{
2788
+ padding-top: 1rem;
2789
+ }
2790
+ .pt-6{
2791
+ padding-top: 1.5rem;
2792
+ }
2793
+ .pt-8{
2794
+ padding-top: 2rem;
2795
+ }
2796
+ .text-center{
2797
+ text-align: center;
2798
+ }
2799
+ .text-right{
2800
+ text-align: right;
2801
+ }
2802
+ .font-mono{
2803
+ font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
2804
+ }
2805
+ .font-sans{
2806
+ font-family: ui-sans-serif, system-ui, sans-serif, "Apple Color Emoji", "Segoe UI Emoji", "Segoe UI Symbol", "Noto Color Emoji";
2807
+ }
2808
+ .text-2xl{
2809
+ font-size: 1.5rem;
2810
+ }
2811
+ .text-3xl{
2812
+ font-size: 1.875rem;
2813
+ }
2814
+ .text-4xl{
2815
+ font-size: 2.25rem;
2816
+ }
2817
+ .text-lg{
2818
+ font-size: 1.125rem;
2819
+ }
2820
+ .text-sm{
2821
+ font-size: 0.875rem;
2822
+ }
2823
+ .text-xl{
2824
+ font-size: 1.25rem;
2825
+ }
2826
+ .text-xs{
2827
+ font-size: 0.75rem;
2828
+ }
2829
+ .font-bold{
2830
+ font-weight: 700;
2831
+ }
2832
+ .font-light{
2833
+ font-weight: 300;
2834
+ }
2835
+ .font-medium{
2836
+ font-weight: 500;
2837
+ }
2838
+ .font-normal{
2839
+ font-weight: 400;
2840
+ }
2841
+ .font-semibold{
2842
+ font-weight: 600;
2843
+ }
2844
+ .uppercase{
2845
+ text-transform: uppercase;
2846
+ }
2847
+ .italic{
2848
+ font-style: italic;
2849
+ }
2850
+ .text-blue-400{
2851
+ --tw-text-opacity: 1;
2852
+ color: rgb(99 179 237 / var(--tw-text-opacity));
2853
+ }
2854
+ .text-gray-400{
2855
+ --tw-text-opacity: 1;
2856
+ color: rgb(203 213 224 / var(--tw-text-opacity));
2857
+ }
2858
+ .text-green-500{
2859
+ --tw-text-opacity: 1;
2860
+ color: rgb(150 206 76 / var(--tw-text-opacity));
2861
+ }
2862
+ .text-highlight{
2863
+ color: var(--p-primary-color);
2864
+ }
2865
+ .text-muted{
2866
+ color: var(--p-text-muted-color);
2867
+ }
2868
+ .text-neutral-100{
2869
+ --tw-text-opacity: 1;
2870
+ color: rgb(245 245 245 / var(--tw-text-opacity));
2871
+ }
2872
+ .text-neutral-200{
2873
+ --tw-text-opacity: 1;
2874
+ color: rgb(229 229 229 / var(--tw-text-opacity));
2875
+ }
2876
+ .text-neutral-300{
2877
+ --tw-text-opacity: 1;
2878
+ color: rgb(212 212 212 / var(--tw-text-opacity));
2879
+ }
2880
+ .text-neutral-400{
2881
+ --tw-text-opacity: 1;
2882
+ color: rgb(163 163 163 / var(--tw-text-opacity));
2883
+ }
2884
+ .text-neutral-800{
2885
+ --tw-text-opacity: 1;
2886
+ color: rgb(38 38 38 / var(--tw-text-opacity));
2887
+ }
2888
+ .text-neutral-900{
2889
+ --tw-text-opacity: 1;
2890
+ color: rgb(23 23 23 / var(--tw-text-opacity));
2891
+ }
2892
+ .text-red-500{
2893
+ --tw-text-opacity: 1;
2894
+ color: rgb(239 68 68 / var(--tw-text-opacity));
2895
+ }
2896
+ .underline{
2897
+ text-decoration-line: underline;
2898
+ }
2899
+ .no-underline{
2900
+ text-decoration-line: none;
2901
+ }
2902
+ .antialiased{
2903
+ -webkit-font-smoothing: antialiased;
2904
+ -moz-osx-font-smoothing: grayscale;
2905
+ }
2906
+ .opacity-0{
2907
+ opacity: 0;
2908
+ }
2909
+ .opacity-100{
2910
+ opacity: 1;
2911
+ }
2912
+ .opacity-15{
2913
+ opacity: 0.15;
2914
+ }
2915
+ .opacity-25{
2916
+ opacity: 0.25;
2917
+ }
2918
+ .opacity-40{
2919
+ opacity: 0.4;
2920
+ }
2921
+ .opacity-50{
2922
+ opacity: 0.5;
2923
+ }
2924
+ .opacity-65{
2925
+ opacity: 0.65;
2926
+ }
2927
+ .opacity-75{
2928
+ opacity: 0.75;
2929
+ }
2930
+ .shadow-lg{
2931
+ --tw-shadow: 0 10px 15px -3px rgb(0 0 0 / 0.1), 0 4px 6px -4px rgb(0 0 0 / 0.1);
2932
+ --tw-shadow-colored: 0 10px 15px -3px var(--tw-shadow-color), 0 4px 6px -4px var(--tw-shadow-color);
2933
+ box-shadow: var(--tw-ring-offset-shadow, 0 0 #0000), var(--tw-ring-shadow, 0 0 #0000), var(--tw-shadow);
2934
+ }
2935
+ .outline{
2936
+ outline-style: solid;
2937
+ }
2938
+ .blur{
2939
+ --tw-blur: blur(8px);
2940
+ filter: var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow);
2941
+ }
2942
+ .drop-shadow{
2943
+ --tw-drop-shadow: drop-shadow(0 1px 2px rgb(0 0 0 / 0.1)) drop-shadow(0 1px 1px rgb(0 0 0 / 0.06));
2944
+ filter: var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow);
2945
+ }
2946
+ .invert{
2947
+ --tw-invert: invert(100%);
2948
+ filter: var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow);
2949
+ }
2950
+ .filter{
2951
+ filter: var(--tw-blur) var(--tw-brightness) var(--tw-contrast) var(--tw-grayscale) var(--tw-hue-rotate) var(--tw-invert) var(--tw-saturate) var(--tw-sepia) var(--tw-drop-shadow);
2952
+ }
2953
+ .backdrop-filter{
2954
+ -webkit-backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
2955
+ backdrop-filter: var(--tw-backdrop-blur) var(--tw-backdrop-brightness) var(--tw-backdrop-contrast) var(--tw-backdrop-grayscale) var(--tw-backdrop-hue-rotate) var(--tw-backdrop-invert) var(--tw-backdrop-opacity) var(--tw-backdrop-saturate) var(--tw-backdrop-sepia);
2956
+ }
2957
+ .transition{
2958
+ transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, -webkit-backdrop-filter;
2959
+ transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter;
2960
+ transition-property: color, background-color, border-color, text-decoration-color, fill, stroke, opacity, box-shadow, transform, filter, backdrop-filter, -webkit-backdrop-filter;
2961
+ transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
2962
+ transition-duration: 150ms;
2963
+ }
2964
+ .transition-all{
2965
+ transition-property: all;
2966
+ transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
2967
+ transition-duration: 150ms;
2968
+ }
2969
+ .transition-opacity{
2970
+ transition-property: opacity;
2971
+ transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
2972
+ transition-duration: 150ms;
2973
+ }
2974
+ .duration-100{
2975
+ transition-duration: 100ms;
2976
+ }
2977
+ .duration-200{
2978
+ transition-duration: 200ms;
2979
+ }
2980
+ .duration-300{
2981
+ transition-duration: 300ms;
2982
+ }
2983
+ .ease-in{
2984
+ transition-timing-function: cubic-bezier(0.4, 0, 1, 1);
2985
+ }
2986
+ .ease-in-out{
2987
+ transition-timing-function: cubic-bezier(0.4, 0, 0.2, 1);
2988
+ }
2989
+ .ease-out{
2990
+ transition-timing-function: cubic-bezier(0, 0, 0.2, 1);
2991
+ }
2992
+ .content-\[\'\'\]{
2993
+ --tw-content: '';
2994
+ content: var(--tw-content);
2995
+ }
2996
+ }
2997
+
2998
+ :root {
2999
+ --fg-color: #000;
3000
+ --bg-color: #fff;
3001
+ --comfy-menu-bg: #353535;
3002
+ --comfy-menu-secondary-bg: #292929;
3003
+ --comfy-topbar-height: 2.5rem;
3004
+ --comfy-input-bg: #222;
3005
+ --input-text: #ddd;
3006
+ --descrip-text: #999;
3007
+ --drag-text: #ccc;
3008
+ --error-text: #ff4444;
3009
+ --border-color: #4e4e4e;
3010
+ --tr-even-bg-color: #222;
3011
+ --tr-odd-bg-color: #353535;
3012
+ --primary-bg: #236692;
3013
+ --primary-fg: #ffffff;
3014
+ --primary-hover-bg: #3485bb;
3015
+ --primary-hover-fg: #ffffff;
3016
+ --content-bg: #e0e0e0;
3017
+ --content-fg: #000;
3018
+ --content-hover-bg: #adadad;
3019
+ --content-hover-fg: #000;
3020
+ }
3021
+
3022
+ @media (prefers-color-scheme: dark) {
3023
+ :root {
3024
+ --fg-color: #fff;
3025
+ --bg-color: #202020;
3026
+ --content-bg: #4e4e4e;
3027
+ --content-fg: #fff;
3028
+ --content-hover-bg: #222;
3029
+ --content-hover-fg: #fff;
3030
+ }
3031
+ }
3032
+
3033
+ body {
3034
+ width: 100vw;
3035
+ height: 100vh;
3036
+ margin: 0;
3037
+ overflow: hidden;
3038
+ grid-template-columns: auto 1fr auto;
3039
+ grid-template-rows: auto 1fr auto;
3040
+ background: var(--bg-color) var(--bg-img);
3041
+ color: var(--fg-color);
3042
+ min-height: -webkit-fill-available;
3043
+ max-height: -webkit-fill-available;
3044
+ min-width: -webkit-fill-available;
3045
+ max-width: -webkit-fill-available;
3046
+ font-family: Arial, sans-serif;
3047
+ }
3048
+
3049
+ /**
3050
+ +------------------+------------------+------------------+
3051
+ | |
3052
+ | .comfyui-body- |
3053
+ | top |
3054
+ | (spans all cols) |
3055
+ | |
3056
+ +------------------+------------------+------------------+
3057
+ | | | |
3058
+ | .comfyui-body- | #graph-canvas | .comfyui-body- |
3059
+ | left | | right |
3060
+ | | | |
3061
+ | | | |
3062
+ +------------------+------------------+------------------+
3063
+ | |
3064
+ | .comfyui-body- |
3065
+ | bottom |
3066
+ | (spans all cols) |
3067
+ | |
3068
+ +------------------+------------------+------------------+
3069
+ */
3070
+
3071
+ .comfyui-body-top {
3072
+ order: -5;
3073
+ /* Span across all columns */
3074
+ grid-column: 1/-1;
3075
+ /* Position at the first row */
3076
+ grid-row: 1;
3077
+ /* Top menu bar dropdown needs to be above of graph canvas splitter overlay which is z-index: 999 */
3078
+ /* Top menu bar z-index needs to be higher than bottom menu bar z-index as by default
3079
+ pysssss's image feed is located at body-bottom, and it can overlap with the queue button, which
3080
+ is located in body-top. */
3081
+ z-index: 1001;
3082
+ display: flex;
3083
+ flex-direction: column;
3084
+ }
3085
+
3086
+ .comfyui-body-left {
3087
+ order: -4;
3088
+ /* Position in the first column */
3089
+ grid-column: 1;
3090
+ /* Position below the top element */
3091
+ grid-row: 2;
3092
+ z-index: 10;
3093
+ display: flex;
3094
+ }
3095
+
3096
+ .graph-canvas-container {
3097
+ width: 100%;
3098
+ height: 100%;
3099
+ order: -3;
3100
+ grid-column: 2;
3101
+ grid-row: 2;
3102
+ position: relative;
3103
+ overflow: hidden;
3104
+ }
3105
+
3106
+ #graph-canvas {
3107
+ width: 100%;
3108
+ height: 100%;
3109
+ touch-action: none;
3110
+ }
3111
+
3112
+ .comfyui-body-right {
3113
+ order: -2;
3114
+ z-index: 10;
3115
+ grid-column: 3;
3116
+ grid-row: 2;
3117
+ }
3118
+
3119
+ .comfyui-body-bottom {
3120
+ order: 4;
3121
+ /* Span across all columns */
3122
+ grid-column: 1/-1;
3123
+ grid-row: 3;
3124
+ /* Bottom menu bar dropdown needs to be above of graph canvas splitter overlay which is z-index: 999 */
3125
+ z-index: 1000;
3126
+ display: flex;
3127
+ flex-direction: column;
3128
+ }
3129
+
3130
+ .comfy-multiline-input {
3131
+ background-color: var(--comfy-input-bg);
3132
+ color: var(--input-text);
3133
+ overflow: hidden;
3134
+ overflow-y: auto;
3135
+ padding: 2px;
3136
+ resize: none;
3137
+ border: none;
3138
+ box-sizing: border-box;
3139
+ font-size: var(--comfy-textarea-font-size);
3140
+ }
3141
+
3142
+ .comfy-markdown {
3143
+ /* We assign the textarea and the Tiptap editor to the same CSS grid area to stack them on top of one another. */
3144
+ display: grid;
3145
+ }
3146
+
3147
+ .comfy-markdown > textarea {
3148
+ grid-area: 1 / 1 / 2 / 2;
3149
+ }
3150
+
3151
+ .comfy-markdown .tiptap {
3152
+ grid-area: 1 / 1 / 2 / 2;
3153
+ background-color: var(--comfy-input-bg);
3154
+ color: var(--input-text);
3155
+ overflow: hidden;
3156
+ overflow-y: auto;
3157
+ resize: none;
3158
+ border: none;
3159
+ box-sizing: border-box;
3160
+ font-size: var(--comfy-textarea-font-size);
3161
+ height: 100%;
3162
+ padding: 0.5em;
3163
+ }
3164
+
3165
+ .comfy-markdown.editing .tiptap {
3166
+ display: none;
3167
+ }
3168
+
3169
+ .comfy-markdown .tiptap :first-child {
3170
+ margin-top: 0;
3171
+ }
3172
+
3173
+ .comfy-markdown .tiptap :last-child {
3174
+ margin-bottom: 0;
3175
+ }
3176
+
3177
+ .comfy-markdown .tiptap blockquote {
3178
+ border-left: medium solid;
3179
+ margin-left: 1em;
3180
+ padding-left: 0.5em;
3181
+ }
3182
+
3183
+ .comfy-markdown .tiptap pre {
3184
+ border: thin dotted;
3185
+ border-radius: 0.5em;
3186
+ margin: 0.5em;
3187
+ padding: 0.5em;
3188
+ }
3189
+
3190
+ .comfy-markdown .tiptap table {
3191
+ border-collapse: collapse;
3192
+ }
3193
+
3194
+ .comfy-markdown .tiptap th {
3195
+ text-align: left;
3196
+ background: var(--comfy-menu-bg);
3197
+ }
3198
+
3199
+ .comfy-markdown .tiptap th,
3200
+ .comfy-markdown .tiptap td {
3201
+ padding: 0.5em;
3202
+ border: thin solid;
3203
+ }
3204
+
3205
+ .comfy-modal {
3206
+ display: none; /* Hidden by default */
3207
+ position: fixed; /* Stay in place */
3208
+ z-index: 100; /* Sit on top */
3209
+ padding: 30px 30px 10px 30px;
3210
+ background-color: var(--comfy-menu-bg); /* Modal background */
3211
+ color: var(--error-text);
3212
+ box-shadow: 0 0 20px #888888;
3213
+ border-radius: 10px;
3214
+ top: 50%;
3215
+ left: 50%;
3216
+ max-width: 80vw;
3217
+ max-height: 80vh;
3218
+ transform: translate(-50%, -50%);
3219
+ overflow: hidden;
3220
+ justify-content: center;
3221
+ font-family: monospace;
3222
+ font-size: 15px;
3223
+ }
3224
+
3225
+ .comfy-modal-content {
3226
+ display: flex;
3227
+ flex-direction: column;
3228
+ }
3229
+
3230
+ .comfy-modal p {
3231
+ overflow: auto;
3232
+ white-space: pre-line; /* This will respect line breaks */
3233
+ margin-bottom: 20px; /* Add some margin between the text and the close button*/
3234
+ }
3235
+
3236
+ .comfy-modal select,
3237
+ .comfy-modal input[type='button'],
3238
+ .comfy-modal input[type='checkbox'] {
3239
+ margin: 3px 3px 3px 4px;
3240
+ }
3241
+
3242
+ .comfy-menu {
3243
+ font-size: 15px;
3244
+ position: absolute;
3245
+ top: 50%;
3246
+ right: 0;
3247
+ text-align: center;
3248
+ z-index: 999;
3249
+ width: 190px;
3250
+ display: flex;
3251
+ flex-direction: column;
3252
+ align-items: center;
3253
+ color: var(--descrip-text);
3254
+ background-color: var(--comfy-menu-bg);
3255
+ font-family: sans-serif;
3256
+ padding: 10px;
3257
+ border-radius: 0 8px 8px 8px;
3258
+ box-shadow: 3px 3px 8px rgba(0, 0, 0, 0.4);
3259
+ }
3260
+
3261
+ .comfy-menu-header {
3262
+ display: flex;
3263
+ }
3264
+
3265
+ .comfy-menu-actions {
3266
+ display: flex;
3267
+ gap: 3px;
3268
+ align-items: center;
3269
+ height: 20px;
3270
+ position: relative;
3271
+ top: -1px;
3272
+ font-size: 22px;
3273
+ }
3274
+
3275
+ .comfy-menu .comfy-menu-actions button {
3276
+ background-color: rgba(0, 0, 0, 0);
3277
+ padding: 0;
3278
+ border: none;
3279
+ cursor: pointer;
3280
+ font-size: inherit;
3281
+ }
3282
+
3283
+ .comfy-menu .comfy-menu-actions .comfy-settings-btn {
3284
+ font-size: 0.6em;
3285
+ }
3286
+
3287
+ button.comfy-close-menu-btn {
3288
+ font-size: 1em;
3289
+ line-height: 12px;
3290
+ color: #ccc;
3291
+ position: relative;
3292
+ top: -1px;
3293
+ }
3294
+
3295
+ .comfy-menu-queue-size {
3296
+ flex: auto;
3297
+ }
3298
+
3299
+ .comfy-menu button,
3300
+ .comfy-modal button {
3301
+ font-size: 20px;
3302
+ }
3303
+
3304
+ .comfy-menu-btns {
3305
+ margin-bottom: 10px;
3306
+ width: 100%;
3307
+ }
3308
+
3309
+ .comfy-menu-btns button {
3310
+ font-size: 10px;
3311
+ width: 50%;
3312
+ color: var(--descrip-text) !important;
3313
+ }
3314
+
3315
+ .comfy-menu > button {
3316
+ width: 100%;
3317
+ }
3318
+
3319
+ .comfy-btn,
3320
+ .comfy-menu > button,
3321
+ .comfy-menu-btns button,
3322
+ .comfy-menu .comfy-list button,
3323
+ .comfy-modal button {
3324
+ color: var(--input-text);
3325
+ background-color: var(--comfy-input-bg);
3326
+ border-radius: 8px;
3327
+ border-color: var(--border-color);
3328
+ border-style: solid;
3329
+ margin-top: 2px;
3330
+ }
3331
+
3332
+ .comfy-btn:hover:not(:disabled),
3333
+ .comfy-menu > button:hover,
3334
+ .comfy-menu-btns button:hover,
3335
+ .comfy-menu .comfy-list button:hover,
3336
+ .comfy-modal button:hover,
3337
+ .comfy-menu-actions button:hover {
3338
+ filter: brightness(1.2);
3339
+ will-change: transform;
3340
+ cursor: pointer;
3341
+ }
3342
+
3343
+ span.drag-handle {
3344
+ width: 10px;
3345
+ height: 20px;
3346
+ display: inline-block;
3347
+ overflow: hidden;
3348
+ line-height: 5px;
3349
+ padding: 3px 4px;
3350
+ cursor: move;
3351
+ vertical-align: middle;
3352
+ margin-top: -0.4em;
3353
+ margin-left: -0.2em;
3354
+ font-size: 12px;
3355
+ font-family: sans-serif;
3356
+ letter-spacing: 2px;
3357
+ color: var(--drag-text);
3358
+ text-shadow: 1px 0 1px black;
3359
+ touch-action: none;
3360
+ }
3361
+
3362
+ span.drag-handle::after {
3363
+ content: '.. .. ..';
3364
+ }
3365
+
3366
+ .comfy-queue-btn {
3367
+ width: 100%;
3368
+ }
3369
+
3370
+ .comfy-list {
3371
+ color: var(--descrip-text);
3372
+ background-color: var(--comfy-menu-bg);
3373
+ margin-bottom: 10px;
3374
+ border-color: var(--border-color);
3375
+ border-style: solid;
3376
+ }
3377
+
3378
+ .comfy-list-items {
3379
+ overflow-y: scroll;
3380
+ max-height: 100px;
3381
+ min-height: 25px;
3382
+ background-color: var(--comfy-input-bg);
3383
+ padding: 5px;
3384
+ }
3385
+
3386
+ .comfy-list h4 {
3387
+ min-width: 160px;
3388
+ margin: 0;
3389
+ padding: 3px;
3390
+ font-weight: normal;
3391
+ }
3392
+
3393
+ .comfy-list-items button {
3394
+ font-size: 10px;
3395
+ }
3396
+
3397
+ .comfy-list-actions {
3398
+ margin: 5px;
3399
+ display: flex;
3400
+ gap: 5px;
3401
+ justify-content: center;
3402
+ }
3403
+
3404
+ .comfy-list-actions button {
3405
+ font-size: 12px;
3406
+ }
3407
+
3408
+ button.comfy-queue-btn {
3409
+ margin: 6px 0 !important;
3410
+ }
3411
+
3412
+ .comfy-modal.comfy-settings,
3413
+ .comfy-modal.comfy-manage-templates {
3414
+ text-align: center;
3415
+ font-family: sans-serif;
3416
+ color: var(--descrip-text);
3417
+ z-index: 99;
3418
+ }
3419
+
3420
+ .comfy-modal.comfy-settings input[type='range'] {
3421
+ vertical-align: middle;
3422
+ }
3423
+
3424
+ .comfy-modal.comfy-settings input[type='range'] + input[type='number'] {
3425
+ width: 3.5em;
3426
+ }
3427
+
3428
+ .comfy-modal input,
3429
+ .comfy-modal select {
3430
+ color: var(--input-text);
3431
+ background-color: var(--comfy-input-bg);
3432
+ border-radius: 8px;
3433
+ border-color: var(--border-color);
3434
+ border-style: solid;
3435
+ font-size: inherit;
3436
+ }
3437
+
3438
+ .comfy-tooltip-indicator {
3439
+ text-decoration: underline;
3440
+ text-decoration-style: dashed;
3441
+ }
3442
+
3443
+ @media only screen and (max-height: 850px) {
3444
+ .comfy-menu {
3445
+ top: 0 !important;
3446
+ bottom: 0 !important;
3447
+ left: auto !important;
3448
+ right: 0 !important;
3449
+ border-radius: 0;
3450
+ }
3451
+
3452
+ .comfy-menu span.drag-handle {
3453
+ display: none;
3454
+ }
3455
+
3456
+ .comfy-menu-queue-size {
3457
+ flex: unset;
3458
+ }
3459
+
3460
+ .comfy-menu-header {
3461
+ justify-content: space-between;
3462
+ }
3463
+ .comfy-menu-actions {
3464
+ gap: 10px;
3465
+ font-size: 28px;
3466
+ }
3467
+ }
3468
+
3469
+ /* Input popup */
3470
+
3471
+ .graphdialog {
3472
+ min-height: 1em;
3473
+ background-color: var(--comfy-menu-bg);
3474
+ }
3475
+
3476
+ .graphdialog .name {
3477
+ font-size: 14px;
3478
+ font-family: sans-serif;
3479
+ color: var(--descrip-text);
3480
+ }
3481
+
3482
+ .graphdialog button {
3483
+ margin-top: unset;
3484
+ vertical-align: unset;
3485
+ height: 1.6em;
3486
+ padding-right: 8px;
3487
+ }
3488
+
3489
+ .graphdialog input,
3490
+ .graphdialog textarea,
3491
+ .graphdialog select {
3492
+ background-color: var(--comfy-input-bg);
3493
+ border: 2px solid;
3494
+ border-color: var(--border-color);
3495
+ color: var(--input-text);
3496
+ border-radius: 12px 0 0 12px;
3497
+ }
3498
+
3499
+ /* Dialogs */
3500
+
3501
+ dialog {
3502
+ box-shadow: 0 0 20px #888888;
3503
+ }
3504
+
3505
+ dialog::backdrop {
3506
+ background: rgba(0, 0, 0, 0.5);
3507
+ }
3508
+
3509
+ .comfy-dialog.comfyui-dialog.comfy-modal {
3510
+ top: 0;
3511
+ left: 0;
3512
+ right: 0;
3513
+ bottom: 0;
3514
+ transform: none;
3515
+ }
3516
+
3517
+ .comfy-dialog.comfy-modal {
3518
+ font-family: Arial, sans-serif;
3519
+ border-color: var(--bg-color);
3520
+ box-shadow: none;
3521
+ border: 2px solid var(--border-color);
3522
+ }
3523
+
3524
+ .comfy-dialog .comfy-modal-content {
3525
+ flex-direction: row;
3526
+ flex-wrap: wrap;
3527
+ gap: 10px;
3528
+ color: var(--fg-color);
3529
+ }
3530
+
3531
+ .comfy-dialog .comfy-modal-content h3 {
3532
+ margin-top: 0;
3533
+ }
3534
+
3535
+ .comfy-dialog .comfy-modal-content > p {
3536
+ width: 100%;
3537
+ }
3538
+
3539
+ .comfy-dialog .comfy-modal-content > .comfyui-button {
3540
+ flex: 1;
3541
+ justify-content: center;
3542
+ }
3543
+
3544
+ #comfy-settings-dialog {
3545
+ padding: 0;
3546
+ width: 41rem;
3547
+ }
3548
+
3549
+ #comfy-settings-dialog tr > td:first-child {
3550
+ text-align: right;
3551
+ }
3552
+
3553
+ #comfy-settings-dialog tbody button,
3554
+ #comfy-settings-dialog table > button {
3555
+ background-color: var(--bg-color);
3556
+ border: 1px var(--border-color) solid;
3557
+ border-radius: 0;
3558
+ color: var(--input-text);
3559
+ font-size: 1rem;
3560
+ padding: 0.5rem;
3561
+ }
3562
+
3563
+ #comfy-settings-dialog button:hover {
3564
+ background-color: var(--tr-odd-bg-color);
3565
+ }
3566
+
3567
+ /* General CSS for tables */
3568
+
3569
+ .comfy-table {
3570
+ border-collapse: collapse;
3571
+ color: var(--input-text);
3572
+ font-family: Arial, sans-serif;
3573
+ width: 100%;
3574
+ }
3575
+
3576
+ .comfy-table caption {
3577
+ position: sticky;
3578
+ top: 0;
3579
+ background-color: var(--bg-color);
3580
+ color: var(--input-text);
3581
+ font-size: 1rem;
3582
+ font-weight: bold;
3583
+ padding: 8px;
3584
+ text-align: center;
3585
+ border-bottom: 1px solid var(--border-color);
3586
+ }
3587
+
3588
+ .comfy-table caption .comfy-btn {
3589
+ position: absolute;
3590
+ top: -2px;
3591
+ right: 0;
3592
+ bottom: 0;
3593
+ cursor: pointer;
3594
+ border: none;
3595
+ height: 100%;
3596
+ border-radius: 0;
3597
+ aspect-ratio: 1/1;
3598
+ -webkit-user-select: none;
3599
+ -moz-user-select: none;
3600
+ user-select: none;
3601
+ font-size: 20px;
3602
+ }
3603
+
3604
+ .comfy-table caption .comfy-btn:focus {
3605
+ outline: none;
3606
+ }
3607
+
3608
+ .comfy-table tr:nth-child(even) {
3609
+ background-color: var(--tr-even-bg-color);
3610
+ }
3611
+
3612
+ .comfy-table tr:nth-child(odd) {
3613
+ background-color: var(--tr-odd-bg-color);
3614
+ }
3615
+
3616
+ .comfy-table td,
3617
+ .comfy-table th {
3618
+ border: 1px solid var(--border-color);
3619
+ padding: 8px;
3620
+ }
3621
+
3622
+ /* Context menu */
3623
+
3624
+ .litegraph .dialog {
3625
+ z-index: 1;
3626
+ font-family: Arial, sans-serif;
3627
+ }
3628
+
3629
+ .litegraph .litemenu-entry.has_submenu {
3630
+ position: relative;
3631
+ padding-right: 20px;
3632
+ }
3633
+
3634
+ .litemenu-entry.has_submenu::after {
3635
+ content: '>';
3636
+ position: absolute;
3637
+ top: 0;
3638
+ right: 2px;
3639
+ }
3640
+
3641
+ .litegraph.litecontextmenu,
3642
+ .litegraph.litecontextmenu.dark {
3643
+ z-index: 9999 !important;
3644
+ background-color: var(--comfy-menu-bg) !important;
3645
+ }
3646
+
3647
+ .litegraph.litecontextmenu
3648
+ .litemenu-entry:hover:not(.disabled):not(.separator) {
3649
+ background-color: var(--comfy-menu-hover-bg, var(--border-color)) !important;
3650
+ color: var(--fg-color);
3651
+ }
3652
+
3653
+ .litegraph.litecontextmenu .litemenu-entry.submenu,
3654
+ .litegraph.litecontextmenu.dark .litemenu-entry.submenu {
3655
+ background-color: var(--comfy-menu-bg) !important;
3656
+ color: var(--input-text);
3657
+ }
3658
+
3659
+ .litegraph.litecontextmenu input {
3660
+ background-color: var(--comfy-input-bg) !important;
3661
+ color: var(--input-text) !important;
3662
+ }
3663
+
3664
+ .comfy-context-menu-filter {
3665
+ box-sizing: border-box;
3666
+ border: 1px solid #999;
3667
+ margin: 0 0 5px 5px;
3668
+ width: calc(100% - 10px);
3669
+ }
3670
+
3671
+ .comfy-img-preview {
3672
+ pointer-events: none;
3673
+ overflow: hidden;
3674
+ display: flex;
3675
+ flex-wrap: wrap;
3676
+ align-content: flex-start;
3677
+ justify-content: center;
3678
+ }
3679
+
3680
+ .comfy-img-preview img {
3681
+ -o-object-fit: contain;
3682
+ object-fit: contain;
3683
+ width: var(--comfy-img-preview-width);
3684
+ height: var(--comfy-img-preview-height);
3685
+ }
3686
+
3687
+ .comfy-missing-nodes li button {
3688
+ font-size: 12px;
3689
+ margin-left: 5px;
3690
+ }
3691
+
3692
+ /* Search box */
3693
+
3694
+ .litegraph.litesearchbox {
3695
+ z-index: 9999 !important;
3696
+ background-color: var(--comfy-menu-bg) !important;
3697
+ overflow: hidden;
3698
+ display: block;
3699
+ }
3700
+
3701
+ .litegraph.litesearchbox input,
3702
+ .litegraph.litesearchbox select {
3703
+ background-color: var(--comfy-input-bg) !important;
3704
+ color: var(--input-text);
3705
+ }
3706
+
3707
+ .litegraph.lite-search-item {
3708
+ color: var(--input-text);
3709
+ background-color: var(--comfy-input-bg);
3710
+ filter: brightness(80%);
3711
+ will-change: transform;
3712
+ padding-left: 0.2em;
3713
+ }
3714
+
3715
+ .litegraph.lite-search-item.generic_type {
3716
+ color: var(--input-text);
3717
+ filter: brightness(50%);
3718
+ will-change: transform;
3719
+ }
3720
+
3721
+ @media only screen and (max-width: 450px) {
3722
+ #comfy-settings-dialog .comfy-table tbody {
3723
+ display: grid;
3724
+ }
3725
+ #comfy-settings-dialog .comfy-table tr {
3726
+ display: grid;
3727
+ }
3728
+ #comfy-settings-dialog tr > td:first-child {
3729
+ text-align: center;
3730
+ border-bottom: none;
3731
+ padding-bottom: 0;
3732
+ }
3733
+ #comfy-settings-dialog tr > td:not(:first-child) {
3734
+ text-align: center;
3735
+ border-top: none;
3736
+ }
3737
+ }
3738
+
3739
+ audio.comfy-audio.empty-audio-widget {
3740
+ display: none;
3741
+ }
3742
+
3743
+ #vue-app {
3744
+ position: absolute;
3745
+ top: 0;
3746
+ left: 0;
3747
+ width: 100%;
3748
+ height: 100%;
3749
+ pointer-events: none;
3750
+ }
3751
+
3752
+ /* Set auto complete panel's width as it is not accessible within vue-root */
3753
+ .p-autocomplete-overlay {
3754
+ max-width: 25vw;
3755
+ }
3756
+
3757
+ .p-tree-node-content {
3758
+ padding: var(--comfy-tree-explorer-item-padding) !important;
3759
+ }
3760
+
3761
+ /* Load3d styles */
3762
+ .comfy-load-3d,
3763
+ .comfy-load-3d-animation,
3764
+ .comfy-preview-3d,
3765
+ .comfy-preview-3d-animation{
3766
+ display: flex;
3767
+ flex-direction: column;
3768
+ background: transparent;
3769
+ flex: 1;
3770
+ position: relative;
3771
+ overflow: hidden;
3772
+ }
3773
+
3774
+ .comfy-load-3d canvas,
3775
+ .comfy-load-3d-animation canvas,
3776
+ .comfy-preview-3d canvas,
3777
+ .comfy-preview-3d-animation canvas{
3778
+ display: flex;
3779
+ width: 100% !important;
3780
+ height: 100% !important;
3781
+ }
3782
+
3783
+ /* End of Load3d styles */
3784
+
3785
+ /* [Desktop] Electron window specific styles */
3786
+ .app-drag {
3787
+ app-region: drag;
3788
+ }
3789
+
3790
+ .no-drag {
3791
+ app-region: no-drag;
3792
+ }
3793
+
3794
+ .window-actions-spacer {
3795
+ width: calc(100vw - env(titlebar-area-width, 100vw));
3796
+ }
3797
+ /* End of [Desktop] Electron window specific styles */
3798
+ .hover\:bg-neutral-700:hover{
3799
+ --tw-bg-opacity: 1;
3800
+ background-color: rgb(64 64 64 / var(--tw-bg-opacity));
3801
+ }
3802
+ .hover\:bg-opacity-75:hover{
3803
+ --tw-bg-opacity: 0.75;
3804
+ }
3805
+ .hover\:text-blue-300:hover{
3806
+ --tw-text-opacity: 1;
3807
+ color: rgb(144 205 244 / var(--tw-text-opacity));
3808
+ }
3809
+ .hover\:opacity-100:hover{
3810
+ opacity: 1;
3811
+ }
3812
+ @media (prefers-reduced-motion: no-preference){
3813
+
3814
+ .motion-safe\:w-0{
3815
+ width: 0px;
3816
+ }
3817
+
3818
+ .motion-safe\:opacity-0{
3819
+ opacity: 0;
3820
+ }
3821
+
3822
+ .group\/sidebar-tab:focus-within .motion-safe\:group-focus-within\/sidebar-tab\:w-auto{
3823
+ width: auto;
3824
+ }
3825
+
3826
+ .group\/sidebar-tab:focus-within .motion-safe\:group-focus-within\/sidebar-tab\:opacity-100{
3827
+ opacity: 1;
3828
+ }
3829
+
3830
+ .group\/sidebar-tab:hover .motion-safe\:group-hover\/sidebar-tab\:w-auto{
3831
+ width: auto;
3832
+ }
3833
+
3834
+ .group\/sidebar-tab:hover .motion-safe\:group-hover\/sidebar-tab\:opacity-100{
3835
+ opacity: 1;
3836
+ }
3837
+
3838
+ .group\/tree-node:hover .motion-safe\:group-hover\/tree-node\:opacity-100{
3839
+ opacity: 1;
3840
+ }
3841
+ }
3842
+ @media not all and (min-width: 640px){
3843
+
3844
+ .max-sm\:hidden{
3845
+ display: none;
3846
+ }
3847
+ }
3848
+ @media (min-width: 768px){
3849
+
3850
+ .md\:flex{
3851
+ display: flex;
3852
+ }
3853
+
3854
+ .md\:hidden{
3855
+ display: none;
3856
+ }
3857
+ }
3858
+ @media (min-width: 1536px){
3859
+
3860
+ .\32xl\:mx-4{
3861
+ margin-left: 1rem;
3862
+ margin-right: 1rem;
3863
+ }
3864
+
3865
+ .\32xl\:w-64{
3866
+ width: 16rem;
3867
+ }
3868
+
3869
+ .\32xl\:max-w-full{
3870
+ max-width: 100%;
3871
+ }
3872
+
3873
+ .\32xl\:p-16{
3874
+ padding: 4rem;
3875
+ }
3876
+
3877
+ .\32xl\:p-4{
3878
+ padding: 1rem;
3879
+ }
3880
+
3881
+ .\32xl\:p-\[var\(--p-dialog-content-padding\)\]{
3882
+ padding: var(--p-dialog-content-padding);
3883
+ }
3884
+
3885
+ .\32xl\:p-\[var\(--p-dialog-header-padding\)\]{
3886
+ padding: var(--p-dialog-header-padding);
3887
+ }
3888
+
3889
+ .\32xl\:px-4{
3890
+ padding-left: 1rem;
3891
+ padding-right: 1rem;
3892
+ }
3893
+
3894
+ .\32xl\:text-sm{
3895
+ font-size: 0.875rem;
3896
+ }
3897
+ }
3898
+ @media (prefers-color-scheme: dark){
3899
+
3900
+ .dark\:bg-gray-800{
3901
+ --tw-bg-opacity: 1;
3902
+ background-color: rgb(45 55 72 / var(--tw-bg-opacity));
3903
+ }
3904
+ }
3905
+
3906
+ .global-dialog .p-dialog-header {
3907
+ padding: 0.5rem
3908
+ }
3909
+ @media (min-width: 1536px) {
3910
+ .global-dialog .p-dialog-header {
3911
+ padding: var(--p-dialog-header-padding)
3912
+ }
3913
+ }
3914
+ .global-dialog .p-dialog-header {
3915
+ padding-bottom: 0px
3916
+ }
3917
+ .global-dialog .p-dialog-content {
3918
+ padding: 0.5rem
3919
+ }
3920
+ @media (min-width: 1536px) {
3921
+ .global-dialog .p-dialog-content {
3922
+ padding: var(--p-dialog-content-padding)
3923
+ }
3924
+ }
3925
+ .global-dialog .p-dialog-content {
3926
+ padding-top: 0px
3927
+ }
3928
+
3929
+ .prompt-dialog-content[data-v-3df70997] {
3930
+ white-space: pre-wrap;
3931
+ }
3932
+
3933
+ .no-results-placeholder[data-v-f2b77816] .p-card {
3934
+ background-color: var(--surface-ground);
3935
+ text-align: center;
3936
+ box-shadow: unset;
3937
+ }
3938
+ .no-results-placeholder h3[data-v-f2b77816] {
3939
+ color: var(--text-color);
3940
+ margin-bottom: 0.5rem;
3941
+ }
3942
+ .no-results-placeholder p[data-v-f2b77816] {
3943
+ color: var(--text-color-secondary);
3944
+ margin-bottom: 1rem;
3945
+ }
3946
+
3947
+ .comfy-error-report[data-v-3faf7785] {
3948
+ display: flex;
3949
+ flex-direction: column;
3950
+ gap: 1rem;
3951
+ }
3952
+ .action-container[data-v-3faf7785] {
3953
+ display: flex;
3954
+ gap: 1rem;
3955
+ justify-content: flex-end;
3956
+ }
3957
+ .wrapper-pre[data-v-3faf7785] {
3958
+ white-space: pre-wrap;
3959
+ word-wrap: break-word;
3960
+ }
3961
+
3962
+ .comfy-missing-nodes[data-v-425cc3ac] {
3963
+ max-height: 300px;
3964
+ overflow-y: auto;
3965
+ }
3966
+ .node-hint[data-v-425cc3ac] {
3967
+ margin-left: 0.5rem;
3968
+ font-style: italic;
3969
+ color: var(--text-color-secondary);
3970
+ }
3971
+ [data-v-425cc3ac] .p-button {
3972
+ margin-left: auto;
3973
+ }
3974
+
3975
+ .comfy-missing-models[data-v-f8d63775] {
3976
+ max-height: 300px;
3977
+ overflow-y: auto;
3978
+ }
3979
+
3980
+ [data-v-53692f7e] .i-badge {
3981
+
3982
+ --tw-bg-opacity: 1;
3983
+
3984
+ background-color: rgb(150 206 76 / var(--tw-bg-opacity));
3985
+
3986
+ --tw-text-opacity: 1;
3987
+
3988
+ color: rgb(255 255 255 / var(--tw-text-opacity))
3989
+ }
3990
+ [data-v-53692f7e] .o-badge {
3991
+
3992
+ --tw-bg-opacity: 1;
3993
+
3994
+ background-color: rgb(239 68 68 / var(--tw-bg-opacity));
3995
+
3996
+ --tw-text-opacity: 1;
3997
+
3998
+ color: rgb(255 255 255 / var(--tw-text-opacity))
3999
+ }
4000
+ [data-v-53692f7e] .c-badge {
4001
+
4002
+ --tw-bg-opacity: 1;
4003
+
4004
+ background-color: rgb(66 153 225 / var(--tw-bg-opacity));
4005
+
4006
+ --tw-text-opacity: 1;
4007
+
4008
+ color: rgb(255 255 255 / var(--tw-text-opacity))
4009
+ }
4010
+ [data-v-53692f7e] .s-badge {
4011
+
4012
+ --tw-bg-opacity: 1;
4013
+
4014
+ background-color: rgb(234 179 8 / var(--tw-bg-opacity))
4015
+ }
4016
+
4017
+ [data-v-b3ab067d] .p-inputtext {
4018
+ --p-form-field-padding-x: 0.625rem;
4019
+ }
4020
+ .p-button.p-inputicon[data-v-b3ab067d] {
4021
+ width: auto;
4022
+ border-style: none;
4023
+ padding: 0px;
4024
+ }
4025
+
4026
+ .form-input[data-v-1451da7b] .input-slider .p-inputnumber input,
4027
+ .form-input[data-v-1451da7b] .input-slider .slider-part {
4028
+
4029
+ width: 5rem
4030
+ }
4031
+ .form-input[data-v-1451da7b] .p-inputtext,
4032
+ .form-input[data-v-1451da7b] .p-select {
4033
+
4034
+ width: 11rem
4035
+ }
4036
+
4037
+ .settings-tab-panels {
4038
+ padding-top: 0px !important;
4039
+ }
4040
+
4041
+ .settings-container[data-v-2e21278f] {
4042
+ display: flex;
4043
+ height: 70vh;
4044
+ width: 60vw;
4045
+ max-width: 1024px;
4046
+ overflow: hidden;
4047
+ }
4048
+ @media (max-width: 768px) {
4049
+ .settings-container[data-v-2e21278f] {
4050
+ flex-direction: column;
4051
+ height: auto;
4052
+ width: 80vw;
4053
+ }
4054
+ .settings-sidebar[data-v-2e21278f] {
4055
+ width: 100%;
4056
+ }
4057
+ .settings-content[data-v-2e21278f] {
4058
+ height: 350px;
4059
+ }
4060
+ }
4061
+
4062
+ /* Show a separator line above the Keybinding tab */
4063
+ /* This indicates the start of custom setting panels */
4064
+ .settings-sidebar[data-v-2e21278f] .p-listbox-option[aria-label='Keybinding'] {
4065
+ position: relative;
4066
+ }
4067
+ .settings-sidebar[data-v-2e21278f] .p-listbox-option[aria-label='Keybinding']::before {
4068
+ position: absolute;
4069
+ top: 0px;
4070
+ left: 0px;
4071
+ width: 100%;
4072
+ --tw-content: '';
4073
+ content: var(--tw-content);
4074
+ border-top: 1px solid var(--p-divider-border-color);
4075
+ }
4076
+
4077
+ .pi-cog[data-v-43089afc] {
4078
+ font-size: 1.25rem;
4079
+ margin-right: 0.5rem;
4080
+ }
4081
+ .version-tag[data-v-43089afc] {
4082
+ margin-left: 0.5rem;
4083
+ }
4084
+
4085
+ .p-card[data-v-ffc83afa] {
4086
+ --p-card-body-padding: 10px 0 0 0;
4087
+ overflow: hidden;
4088
+ }
4089
+ [data-v-ffc83afa] .p-card-subtitle {
4090
+ text-align: center;
4091
+ }
4092
+
4093
+ .carousel[data-v-d9962275] {
4094
+ width: 66vw;
4095
+ }
4096
+ /**
4097
+ * Copyright (c) 2014 The xterm.js authors. All rights reserved.
4098
+ * Copyright (c) 2012-2013, Christopher Jeffrey (MIT License)
4099
+ * https://github.com/chjj/term.js
4100
+ * @license MIT
4101
+ *
4102
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
4103
+ * of this software and associated documentation files (the "Software"), to deal
4104
+ * in the Software without restriction, including without limitation the rights
4105
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
4106
+ * copies of the Software, and to permit persons to whom the Software is
4107
+ * furnished to do so, subject to the following conditions:
4108
+ *
4109
+ * The above copyright notice and this permission notice shall be included in
4110
+ * all copies or substantial portions of the Software.
4111
+ *
4112
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
4113
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
4114
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
4115
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
4116
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
4117
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
4118
+ * THE SOFTWARE.
4119
+ *
4120
+ * Originally forked from (with the author's permission):
4121
+ * Fabrice Bellard's javascript vt100 for jslinux:
4122
+ * http://bellard.org/jslinux/
4123
+ * Copyright (c) 2011 Fabrice Bellard
4124
+ * The original design remains. The terminal itself
4125
+ * has been extended to include xterm CSI codes, among
4126
+ * other features.
4127
+ */
4128
+
4129
+ /**
4130
+ * Default styles for xterm.js
4131
+ */
4132
+
4133
+ .xterm {
4134
+ cursor: text;
4135
+ position: relative;
4136
+ -moz-user-select: none;
4137
+ user-select: none;
4138
+ -ms-user-select: none;
4139
+ -webkit-user-select: none;
4140
+ }
4141
+
4142
+ .xterm.focus,
4143
+ .xterm:focus {
4144
+ outline: none;
4145
+ }
4146
+
4147
+ .xterm .xterm-helpers {
4148
+ position: absolute;
4149
+ top: 0;
4150
+ /**
4151
+ * The z-index of the helpers must be higher than the canvases in order for
4152
+ * IMEs to appear on top.
4153
+ */
4154
+ z-index: 5;
4155
+ }
4156
+
4157
+ .xterm .xterm-helper-textarea {
4158
+ padding: 0;
4159
+ border: 0;
4160
+ margin: 0;
4161
+ /* Move textarea out of the screen to the far left, so that the cursor is not visible */
4162
+ position: absolute;
4163
+ opacity: 0;
4164
+ left: -9999em;
4165
+ top: 0;
4166
+ width: 0;
4167
+ height: 0;
4168
+ z-index: -5;
4169
+ /** Prevent wrapping so the IME appears against the textarea at the correct position */
4170
+ white-space: nowrap;
4171
+ overflow: hidden;
4172
+ resize: none;
4173
+ }
4174
+
4175
+ .xterm .composition-view {
4176
+ /* TODO: Composition position got messed up somewhere */
4177
+ background: #000;
4178
+ color: #FFF;
4179
+ display: none;
4180
+ position: absolute;
4181
+ white-space: nowrap;
4182
+ z-index: 1;
4183
+ }
4184
+
4185
+ .xterm .composition-view.active {
4186
+ display: block;
4187
+ }
4188
+
4189
+ .xterm .xterm-viewport {
4190
+ /* On OS X this is required in order for the scroll bar to appear fully opaque */
4191
+ background-color: #000;
4192
+ overflow-y: scroll;
4193
+ cursor: default;
4194
+ position: absolute;
4195
+ right: 0;
4196
+ left: 0;
4197
+ top: 0;
4198
+ bottom: 0;
4199
+ }
4200
+
4201
+ .xterm .xterm-screen {
4202
+ position: relative;
4203
+ }
4204
+
4205
+ .xterm .xterm-screen canvas {
4206
+ position: absolute;
4207
+ left: 0;
4208
+ top: 0;
4209
+ }
4210
+
4211
+ .xterm .xterm-scroll-area {
4212
+ visibility: hidden;
4213
+ }
4214
+
4215
+ .xterm-char-measure-element {
4216
+ display: inline-block;
4217
+ visibility: hidden;
4218
+ position: absolute;
4219
+ top: 0;
4220
+ left: -9999em;
4221
+ line-height: normal;
4222
+ }
4223
+
4224
+ .xterm.enable-mouse-events {
4225
+ /* When mouse events are enabled (eg. tmux), revert to the standard pointer cursor */
4226
+ cursor: default;
4227
+ }
4228
+
4229
+ .xterm.xterm-cursor-pointer,
4230
+ .xterm .xterm-cursor-pointer {
4231
+ cursor: pointer;
4232
+ }
4233
+
4234
+ .xterm.column-select.focus {
4235
+ /* Column selection mode */
4236
+ cursor: crosshair;
4237
+ }
4238
+
4239
+ .xterm .xterm-accessibility:not(.debug),
4240
+ .xterm .xterm-message {
4241
+ position: absolute;
4242
+ left: 0;
4243
+ top: 0;
4244
+ bottom: 0;
4245
+ right: 0;
4246
+ z-index: 10;
4247
+ color: transparent;
4248
+ pointer-events: none;
4249
+ }
4250
+
4251
+ .xterm .xterm-accessibility-tree:not(.debug) *::-moz-selection {
4252
+ color: transparent;
4253
+ }
4254
+
4255
+ .xterm .xterm-accessibility-tree:not(.debug) *::selection {
4256
+ color: transparent;
4257
+ }
4258
+
4259
+ .xterm .xterm-accessibility-tree {
4260
+ -webkit-user-select: text;
4261
+ -moz-user-select: text;
4262
+ user-select: text;
4263
+ white-space: pre;
4264
+ }
4265
+
4266
+ .xterm .live-region {
4267
+ position: absolute;
4268
+ left: -9999px;
4269
+ width: 1px;
4270
+ height: 1px;
4271
+ overflow: hidden;
4272
+ }
4273
+
4274
+ .xterm-dim {
4275
+ /* Dim should not apply to background, so the opacity of the foreground color is applied
4276
+ * explicitly in the generated class and reset to 1 here */
4277
+ opacity: 1 !important;
4278
+ }
4279
+
4280
+ .xterm-underline-1 { text-decoration: underline; }
4281
+ .xterm-underline-2 { -webkit-text-decoration: double underline; text-decoration: double underline; }
4282
+ .xterm-underline-3 { -webkit-text-decoration: wavy underline; text-decoration: wavy underline; }
4283
+ .xterm-underline-4 { -webkit-text-decoration: dotted underline; text-decoration: dotted underline; }
4284
+ .xterm-underline-5 { -webkit-text-decoration: dashed underline; text-decoration: dashed underline; }
4285
+
4286
+ .xterm-overline {
4287
+ text-decoration: overline;
4288
+ }
4289
+
4290
+ .xterm-overline.xterm-underline-1 { text-decoration: overline underline; }
4291
+ .xterm-overline.xterm-underline-2 { -webkit-text-decoration: overline double underline; text-decoration: overline double underline; }
4292
+ .xterm-overline.xterm-underline-3 { -webkit-text-decoration: overline wavy underline; text-decoration: overline wavy underline; }
4293
+ .xterm-overline.xterm-underline-4 { -webkit-text-decoration: overline dotted underline; text-decoration: overline dotted underline; }
4294
+ .xterm-overline.xterm-underline-5 { -webkit-text-decoration: overline dashed underline; text-decoration: overline dashed underline; }
4295
+
4296
+ .xterm-strikethrough {
4297
+ text-decoration: line-through;
4298
+ }
4299
+
4300
+ .xterm-screen .xterm-decoration-container .xterm-decoration {
4301
+ z-index: 6;
4302
+ position: absolute;
4303
+ }
4304
+
4305
+ .xterm-screen .xterm-decoration-container .xterm-decoration.xterm-decoration-top-layer {
4306
+ z-index: 7;
4307
+ }
4308
+
4309
+ .xterm-decoration-overview-ruler {
4310
+ z-index: 8;
4311
+ position: absolute;
4312
+ top: 0;
4313
+ right: 0;
4314
+ pointer-events: none;
4315
+ }
4316
+
4317
+ .xterm-decoration-top {
4318
+ z-index: 2;
4319
+ position: relative;
4320
+ }
4321
+
4322
+ [data-v-250ab9af] .p-terminal .xterm {
4323
+ overflow-x: auto;
4324
+ }
4325
+ [data-v-250ab9af] .p-terminal .xterm-screen {
4326
+ background-color: black;
4327
+ overflow-y: hidden;
4328
+ }
4329
+
4330
+ [data-v-90a7f075] .p-terminal .xterm {
4331
+ overflow-x: auto;
4332
+ }
4333
+ [data-v-90a7f075] .p-terminal .xterm-screen {
4334
+ background-color: black;
4335
+ overflow-y: hidden;
4336
+ }
4337
+
4338
+ [data-v-03daf1c8] .p-terminal .xterm {
4339
+ overflow-x: auto;
4340
+ }
4341
+ [data-v-03daf1c8] .p-terminal .xterm-screen {
4342
+ background-color: black;
4343
+ overflow-y: hidden;
4344
+ }
4345
+ .mdi.rotate270::before {
4346
+ transform: rotate(270deg);
4347
+ }
4348
+
4349
+ /* Generic */
4350
+ .comfyui-button {
4351
+ display: flex;
4352
+ align-items: center;
4353
+ gap: 0.5em;
4354
+ cursor: pointer;
4355
+ border: none;
4356
+ border-radius: 4px;
4357
+ padding: 4px 8px;
4358
+ box-sizing: border-box;
4359
+ margin: 0;
4360
+ transition: box-shadow 0.1s;
4361
+ }
4362
+
4363
+ .comfyui-button:active {
4364
+ box-shadow: inset 1px 1px 10px rgba(0, 0, 0, 0.5);
4365
+ }
4366
+
4367
+ .comfyui-button:disabled {
4368
+ opacity: 0.5;
4369
+ cursor: not-allowed;
4370
+ }
4371
+ .primary .comfyui-button,
4372
+ .primary.comfyui-button {
4373
+ background-color: var(--primary-bg) !important;
4374
+ color: var(--primary-fg) !important;
4375
+ }
4376
+
4377
+ .primary .comfyui-button:not(:disabled):hover,
4378
+ .primary.comfyui-button:not(:disabled):hover {
4379
+ background-color: var(--primary-hover-bg) !important;
4380
+ color: var(--primary-hover-fg) !important;
4381
+ }
4382
+
4383
+ /* Popup */
4384
+ .comfyui-popup {
4385
+ position: absolute;
4386
+ left: var(--left);
4387
+ right: var(--right);
4388
+ top: var(--top);
4389
+ bottom: var(--bottom);
4390
+ z-index: 2000;
4391
+ max-height: calc(100vh - var(--limit) - 10px);
4392
+ box-shadow: 3px 3px 5px 0px rgba(0, 0, 0, 0.3);
4393
+ }
4394
+
4395
+ .comfyui-popup:not(.open) {
4396
+ display: none;
4397
+ }
4398
+
4399
+ .comfyui-popup.right.open {
4400
+ border-top-left-radius: 4px;
4401
+ border-bottom-right-radius: 4px;
4402
+ border-bottom-left-radius: 4px;
4403
+ overflow: hidden;
4404
+ }
4405
+ /* Split button */
4406
+ .comfyui-split-button {
4407
+ position: relative;
4408
+ display: flex;
4409
+ }
4410
+
4411
+ .comfyui-split-primary {
4412
+ flex: auto;
4413
+ }
4414
+
4415
+ .comfyui-split-primary .comfyui-button {
4416
+ border-top-right-radius: 0;
4417
+ border-bottom-right-radius: 0;
4418
+ border-right: 1px solid var(--comfy-menu-bg);
4419
+ width: 100%;
4420
+ }
4421
+
4422
+ .comfyui-split-arrow .comfyui-button {
4423
+ border-top-left-radius: 0;
4424
+ border-bottom-left-radius: 0;
4425
+ padding-left: 2px;
4426
+ padding-right: 2px;
4427
+ }
4428
+
4429
+ .comfyui-split-button-popup {
4430
+ white-space: nowrap;
4431
+ background-color: var(--content-bg);
4432
+ color: var(--content-fg);
4433
+ display: flex;
4434
+ flex-direction: column;
4435
+ overflow: auto;
4436
+ }
4437
+
4438
+ .comfyui-split-button-popup.hover {
4439
+ z-index: 2001;
4440
+ }
4441
+ .comfyui-split-button-popup > .comfyui-button {
4442
+ border: none;
4443
+ background-color: transparent;
4444
+ color: var(--fg-color);
4445
+ padding: 8px 12px 8px 8px;
4446
+ }
4447
+
4448
+ .comfyui-split-button-popup > .comfyui-button:not(:disabled):hover {
4449
+ background-color: var(--comfy-input-bg);
4450
+ }
4451
+
4452
+ /* Button group */
4453
+ .comfyui-button-group {
4454
+ display: flex;
4455
+ border-radius: 4px;
4456
+ overflow: hidden;
4457
+ }
4458
+
4459
+ .comfyui-button-group:empty {
4460
+ display: none;
4461
+ }
4462
+ .comfyui-button-group > .comfyui-button,
4463
+ .comfyui-button-group > .comfyui-button-wrapper > .comfyui-button {
4464
+ padding: 4px 10px;
4465
+ border-radius: 0;
4466
+ }
4467
+
4468
+ /* Menu */
4469
+ .comfyui-menu .mdi::before {
4470
+ font-size: 18px;
4471
+ }
4472
+
4473
+ .comfyui-menu .comfyui-button {
4474
+ background: var(--comfy-input-bg);
4475
+ color: var(--fg-color);
4476
+ white-space: nowrap;
4477
+ }
4478
+
4479
+ .comfyui-menu .comfyui-button:not(:disabled):hover {
4480
+ background: var(--border-color);
4481
+ color: var(--content-fg);
4482
+ }
4483
+
4484
+ .comfyui-menu .comfyui-split-button-popup > .comfyui-button {
4485
+ border-radius: 0;
4486
+ background-color: transparent;
4487
+ }
4488
+
4489
+ .comfyui-menu .comfyui-split-button-popup > .comfyui-button:not(:disabled):hover {
4490
+ background-color: var(--comfy-input-bg);
4491
+ }
4492
+
4493
+ .comfyui-menu .comfyui-split-button-popup.left {
4494
+ border-top-right-radius: 4px;
4495
+ border-bottom-left-radius: 4px;
4496
+ border-bottom-right-radius: 4px;
4497
+ }
4498
+
4499
+ .comfyui-menu .comfyui-button.popup-open {
4500
+ background-color: var(--content-bg);
4501
+ color: var(--content-fg);
4502
+ }
4503
+
4504
+ .comfyui-menu-push {
4505
+ margin-left: -0.8em;
4506
+ flex: auto;
4507
+ }
4508
+
4509
+ /** Send to workflow widget selection dialog */
4510
+ .comfy-widget-selection-dialog {
4511
+ border: none;
4512
+ }
4513
+
4514
+ .comfy-widget-selection-dialog div {
4515
+ color: var(--fg-color);
4516
+ font-family: Arial, Helvetica, sans-serif;
4517
+ }
4518
+
4519
+ .comfy-widget-selection-dialog h2 {
4520
+ margin-top: 0;
4521
+ }
4522
+
4523
+ .comfy-widget-selection-dialog section {
4524
+ width: -moz-fit-content;
4525
+ width: fit-content;
4526
+ display: flex;
4527
+ flex-direction: column;
4528
+ }
4529
+
4530
+ .comfy-widget-selection-item {
4531
+ display: flex;
4532
+ gap: 10px;
4533
+ align-items: center;
4534
+ }
4535
+
4536
+ .comfy-widget-selection-item span {
4537
+ margin-right: auto;
4538
+ }
4539
+
4540
+ .comfy-widget-selection-item span::before {
4541
+ content: '#' attr(data-id);
4542
+ opacity: 0.5;
4543
+ margin-right: 5px;
4544
+ }
4545
+
4546
+ .comfy-modal .comfy-widget-selection-item button {
4547
+ font-size: 1em;
4548
+ }
4549
+
4550
+ /***** Responsive *****/
4551
+ .lg.comfyui-menu .lt-lg-show {
4552
+ display: none !important;
4553
+ }
4554
+ .comfyui-menu:not(.lg) .nlg-hide {
4555
+ display: none !important;
4556
+ }
4557
+ /** Large screen */
4558
+ .lg.comfyui-menu>.comfyui-menu-mobile-collapse .comfyui-button span,
4559
+ .lg.comfyui-menu>.comfyui-menu-mobile-collapse.comfyui-button span {
4560
+ display: none;
4561
+ }
4562
+ .lg.comfyui-menu>.comfyui-menu-mobile-collapse .comfyui-popup .comfyui-button span {
4563
+ display: unset;
4564
+ }
4565
+
4566
+ /** Non large screen */
4567
+ .lt-lg.comfyui-menu {
4568
+ flex-wrap: wrap;
4569
+ }
4570
+
4571
+ .lt-lg.comfyui-menu > *:not(.comfyui-menu-mobile-collapse) {
4572
+ order: 1;
4573
+ }
4574
+
4575
+ .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse {
4576
+ order: 9999;
4577
+ width: 100%;
4578
+ }
4579
+
4580
+ .comfyui-body-bottom .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse {
4581
+ order: -1;
4582
+ }
4583
+
4584
+ .comfyui-body-bottom .lt-lg.comfyui-menu > .comfyui-menu-button {
4585
+ top: unset;
4586
+ bottom: 4px;
4587
+ }
4588
+
4589
+ .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse.comfyui-button-group {
4590
+ flex-wrap: wrap;
4591
+ }
4592
+
4593
+ .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse .comfyui-button,
4594
+ .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse.comfyui-button {
4595
+ padding: 10px;
4596
+ }
4597
+ .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse .comfyui-button,
4598
+ .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse .comfyui-button-wrapper {
4599
+ width: 100%;
4600
+ }
4601
+
4602
+ .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse .comfyui-popup {
4603
+ position: static;
4604
+ background-color: var(--comfy-input-bg);
4605
+ max-width: unset;
4606
+ max-height: 50vh;
4607
+ overflow: auto;
4608
+ }
4609
+
4610
+ .lt-lg.comfyui-menu:not(.expanded) > .comfyui-menu-mobile-collapse {
4611
+ display: none;
4612
+ }
4613
+
4614
+ .lt-lg .comfyui-menu-button {
4615
+ position: absolute;
4616
+ top: 4px;
4617
+ right: 8px;
4618
+ }
4619
+
4620
+ .lt-lg.comfyui-menu > .comfyui-menu-mobile-collapse .comfyui-view-list-popup {
4621
+ border-radius: 0;
4622
+ }
4623
+
4624
+ .lt-lg.comfyui-menu .comfyui-workflows-popup {
4625
+ width: 100vw;
4626
+ }
4627
+
4628
+ /** Small */
4629
+ .lt-md .comfyui-workflows-button-inner {
4630
+ width: unset !important;
4631
+ }
4632
+ .lt-md .comfyui-workflows-label {
4633
+ display: none;
4634
+ }
4635
+
4636
+ /** Extra small */
4637
+ .lt-sm .comfyui-interrupt-button {
4638
+ margin-right: 45px;
4639
+ }
4640
+ .comfyui-body-bottom .lt-sm.comfyui-menu > .comfyui-menu-button{
4641
+ bottom: 41px;
4642
+ }
4643
+
4644
+
4645
+ .editable-text[data-v-d670c40f] {
4646
+ display: inline;
4647
+ }
4648
+ .editable-text input[data-v-d670c40f] {
4649
+ width: 100%;
4650
+ box-sizing: border-box;
4651
+ }
4652
+
4653
+ .tree-node[data-v-654109c7] {
4654
+ width: 100%;
4655
+ display: flex;
4656
+ align-items: center;
4657
+ justify-content: space-between;
4658
+ }
4659
+ .leaf-count-badge[data-v-654109c7] {
4660
+ margin-left: 0.5rem;
4661
+ }
4662
+ .node-content[data-v-654109c7] {
4663
+ display: flex;
4664
+ align-items: center;
4665
+ flex-grow: 1;
4666
+ }
4667
+ .leaf-label[data-v-654109c7] {
4668
+ margin-left: 0.5rem;
4669
+ }
4670
+ [data-v-654109c7] .editable-text span {
4671
+ word-break: break-all;
4672
+ }
4673
+
4674
+ [data-v-976a6d58] .tree-explorer-node-label {
4675
+ width: 100%;
4676
+ display: flex;
4677
+ align-items: center;
4678
+ margin-left: var(--p-tree-node-gap);
4679
+ flex-grow: 1;
4680
+ }
4681
+
4682
+ /*
4683
+ * The following styles are necessary to avoid layout shift when dragging nodes over folders.
4684
+ * By setting the position to relative on the parent and using an absolutely positioned pseudo-element,
4685
+ * we can create a visual indicator for the drop target without affecting the layout of other elements.
4686
+ */
4687
+ [data-v-976a6d58] .p-tree-node-content:has(.tree-folder) {
4688
+ position: relative;
4689
+ }
4690
+ [data-v-976a6d58] .p-tree-node-content:has(.tree-folder.can-drop)::after {
4691
+ content: '';
4692
+ position: absolute;
4693
+ top: 0;
4694
+ left: 0;
4695
+ right: 0;
4696
+ bottom: 0;
4697
+ border: 1px solid var(--p-content-color);
4698
+ pointer-events: none;
4699
+ }
4700
+
4701
+ [data-v-0061c432] .p-toolbar-end .p-button {
4702
+
4703
+ padding-top: 0.25rem;
4704
+
4705
+ padding-bottom: 0.25rem
4706
+ }
4707
+ @media (min-width: 1536px) {
4708
+ [data-v-0061c432] .p-toolbar-end .p-button {
4709
+
4710
+ padding-top: 0.5rem;
4711
+
4712
+ padding-bottom: 0.5rem
4713
+ }
4714
+ }
4715
+ [data-v-0061c432] .p-toolbar-start {
4716
+
4717
+ min-width: 0px;
4718
+
4719
+ flex: 1 1 0%;
4720
+
4721
+ overflow: hidden
4722
+ }
4723
+
4724
+ .model_preview[data-v-32e6c4d9] {
4725
+ background-color: var(--comfy-menu-bg);
4726
+ font-family: 'Open Sans', sans-serif;
4727
+ color: var(--descrip-text);
4728
+ border: 1px solid var(--descrip-text);
4729
+ min-width: 300px;
4730
+ max-width: 500px;
4731
+ width: -moz-fit-content;
4732
+ width: fit-content;
4733
+ height: -moz-fit-content;
4734
+ height: fit-content;
4735
+ z-index: 9999;
4736
+ border-radius: 12px;
4737
+ overflow: hidden;
4738
+ font-size: 12px;
4739
+ padding: 10px;
4740
+ }
4741
+ .model_preview_image[data-v-32e6c4d9] {
4742
+ margin: auto;
4743
+ width: -moz-fit-content;
4744
+ width: fit-content;
4745
+ }
4746
+ .model_preview_image img[data-v-32e6c4d9] {
4747
+ max-width: 100%;
4748
+ max-height: 150px;
4749
+ -o-object-fit: contain;
4750
+ object-fit: contain;
4751
+ }
4752
+ .model_preview_title[data-v-32e6c4d9] {
4753
+ font-weight: bold;
4754
+ text-align: center;
4755
+ font-size: 14px;
4756
+ }
4757
+ .model_preview_top_container[data-v-32e6c4d9] {
4758
+ text-align: center;
4759
+ line-height: 0.5;
4760
+ }
4761
+ .model_preview_filename[data-v-32e6c4d9],
4762
+ .model_preview_author[data-v-32e6c4d9],
4763
+ .model_preview_architecture[data-v-32e6c4d9] {
4764
+ display: inline-block;
4765
+ text-align: center;
4766
+ margin: 5px;
4767
+ font-size: 10px;
4768
+ }
4769
+ .model_preview_prefix[data-v-32e6c4d9] {
4770
+ font-weight: bold;
4771
+ }
4772
+
4773
+ .model-lib-model-icon-container[data-v-b45ea43e] {
4774
+ display: inline-block;
4775
+ position: relative;
4776
+ left: 0;
4777
+ height: 1.5rem;
4778
+ vertical-align: top;
4779
+ width: 0px;
4780
+ }
4781
+ .model-lib-model-icon[data-v-b45ea43e] {
4782
+ background-size: cover;
4783
+ background-position: center;
4784
+ display: inline-block;
4785
+ position: relative;
4786
+ left: -2.2rem;
4787
+ top: -0.1rem;
4788
+ height: 1.7rem;
4789
+ width: 1.7rem;
4790
+ vertical-align: top;
4791
+ }
4792
+
4793
+ [data-v-0bb2ac55] .pi-fake-spacer {
4794
+ height: 1px;
4795
+ width: 16px;
4796
+ }
4797
+
4798
+ .slot_row[data-v-d9792337] {
4799
+ padding: 2px;
4800
+ }
4801
+
4802
+ /* Original N-Sidebar styles */
4803
+ ._sb_dot[data-v-d9792337] {
4804
+ width: 8px;
4805
+ height: 8px;
4806
+ border-radius: 50%;
4807
+ background-color: grey;
4808
+ }
4809
+ .node_header[data-v-d9792337] {
4810
+ line-height: 1;
4811
+ padding: 8px 13px 7px;
4812
+ margin-bottom: 5px;
4813
+ font-size: 15px;
4814
+ text-wrap: nowrap;
4815
+ overflow: hidden;
4816
+ display: flex;
4817
+ align-items: center;
4818
+ }
4819
+ .headdot[data-v-d9792337] {
4820
+ width: 10px;
4821
+ height: 10px;
4822
+ float: inline-start;
4823
+ margin-right: 8px;
4824
+ }
4825
+ .IMAGE[data-v-d9792337] {
4826
+ background-color: #64b5f6;
4827
+ }
4828
+ .VAE[data-v-d9792337] {
4829
+ background-color: #ff6e6e;
4830
+ }
4831
+ .LATENT[data-v-d9792337] {
4832
+ background-color: #ff9cf9;
4833
+ }
4834
+ .MASK[data-v-d9792337] {
4835
+ background-color: #81c784;
4836
+ }
4837
+ .CONDITIONING[data-v-d9792337] {
4838
+ background-color: #ffa931;
4839
+ }
4840
+ .CLIP[data-v-d9792337] {
4841
+ background-color: #ffd500;
4842
+ }
4843
+ .MODEL[data-v-d9792337] {
4844
+ background-color: #b39ddb;
4845
+ }
4846
+ .CONTROL_NET[data-v-d9792337] {
4847
+ background-color: #a5d6a7;
4848
+ }
4849
+ ._sb_node_preview[data-v-d9792337] {
4850
+ background-color: var(--comfy-menu-bg);
4851
+ font-family: 'Open Sans', sans-serif;
4852
+ font-size: small;
4853
+ color: var(--descrip-text);
4854
+ border: 1px solid var(--descrip-text);
4855
+ min-width: 300px;
4856
+ width: -moz-min-content;
4857
+ width: min-content;
4858
+ height: -moz-fit-content;
4859
+ height: fit-content;
4860
+ z-index: 9999;
4861
+ border-radius: 12px;
4862
+ overflow: hidden;
4863
+ font-size: 12px;
4864
+ padding-bottom: 10px;
4865
+ }
4866
+ ._sb_node_preview ._sb_description[data-v-d9792337] {
4867
+ margin: 10px;
4868
+ padding: 6px;
4869
+ background: var(--border-color);
4870
+ border-radius: 5px;
4871
+ font-style: italic;
4872
+ font-weight: 500;
4873
+ font-size: 0.9rem;
4874
+ word-break: break-word;
4875
+ }
4876
+ ._sb_table[data-v-d9792337] {
4877
+ display: grid;
4878
+
4879
+ grid-column-gap: 10px;
4880
+ /* Spazio tra le colonne */
4881
+ width: 100%;
4882
+ /* Imposta la larghezza della tabella al 100% del contenitore */
4883
+ }
4884
+ ._sb_row[data-v-d9792337] {
4885
+ display: grid;
4886
+ grid-template-columns: 10px 1fr 1fr 1fr 10px;
4887
+ grid-column-gap: 10px;
4888
+ align-items: center;
4889
+ padding-left: 9px;
4890
+ padding-right: 9px;
4891
+ }
4892
+ ._sb_row_string[data-v-d9792337] {
4893
+ grid-template-columns: 10px 1fr 1fr 10fr 1fr;
4894
+ }
4895
+ ._sb_col[data-v-d9792337] {
4896
+ border: 0px solid #000;
4897
+ display: flex;
4898
+ align-items: flex-end;
4899
+ flex-direction: row-reverse;
4900
+ flex-wrap: nowrap;
4901
+ align-content: flex-start;
4902
+ justify-content: flex-end;
4903
+ }
4904
+ ._sb_inherit[data-v-d9792337] {
4905
+ display: inherit;
4906
+ }
4907
+ ._long_field[data-v-d9792337] {
4908
+ background: var(--bg-color);
4909
+ border: 2px solid var(--border-color);
4910
+ margin: 5px 5px 0 5px;
4911
+ border-radius: 10px;
4912
+ line-height: 1.7;
4913
+ text-wrap: nowrap;
4914
+ }
4915
+ ._sb_arrow[data-v-d9792337] {
4916
+ color: var(--fg-color);
4917
+ }
4918
+ ._sb_preview_badge[data-v-d9792337] {
4919
+ text-align: center;
4920
+ background: var(--comfy-input-bg);
4921
+ font-weight: bold;
4922
+ color: var(--error-text);
4923
+ }
4924
+
4925
+ ._content[data-v-c4279e6b] {
4926
+
4927
+ display: flex;
4928
+
4929
+ flex-direction: column
4930
+ }
4931
+ ._content[data-v-c4279e6b] > :not([hidden]) ~ :not([hidden]) {
4932
+
4933
+ --tw-space-y-reverse: 0;
4934
+
4935
+ margin-top: calc(0.5rem * calc(1 - var(--tw-space-y-reverse)));
4936
+
4937
+ margin-bottom: calc(0.5rem * var(--tw-space-y-reverse))
4938
+ }
4939
+ ._footer[data-v-c4279e6b] {
4940
+
4941
+ display: flex;
4942
+
4943
+ flex-direction: column;
4944
+
4945
+ align-items: flex-end;
4946
+
4947
+ padding-top: 1rem
4948
+ }
4949
+
4950
+ .node-lib-node-container[data-v-da9a8962] {
4951
+ height: 100%;
4952
+ width: 100%
4953
+ }
4954
+
4955
+ .p-selectbutton .p-button[data-v-bd06e12b] {
4956
+ padding: 0.5rem;
4957
+ }
4958
+ .p-selectbutton .p-button .pi[data-v-bd06e12b] {
4959
+ font-size: 1.5rem;
4960
+ }
4961
+ .field[data-v-bd06e12b] {
4962
+ display: flex;
4963
+ flex-direction: column;
4964
+ gap: 0.5rem;
4965
+ }
4966
+ .color-picker-container[data-v-bd06e12b] {
4967
+ display: flex;
4968
+ align-items: center;
4969
+ gap: 0.5rem;
4970
+ }
4971
+
4972
+ .scroll-container {
4973
+ &[data-v-ad33a347] {
4974
+ height: 100%;
4975
+ overflow-y: auto;
4976
+
4977
+ /* Firefox */
4978
+ scrollbar-width: none;
4979
+ }
4980
+ &[data-v-ad33a347]::-webkit-scrollbar {
4981
+ width: 1px;
4982
+ }
4983
+ &[data-v-ad33a347]::-webkit-scrollbar-thumb {
4984
+ background-color: transparent;
4985
+ }
4986
+ }
4987
+
4988
+ .comfy-image-wrap[data-v-a748ccd8] {
4989
+ display: contents;
4990
+ }
4991
+ .comfy-image-blur[data-v-a748ccd8] {
4992
+ position: absolute;
4993
+ top: 0;
4994
+ left: 0;
4995
+ width: 100%;
4996
+ height: 100%;
4997
+ -o-object-fit: cover;
4998
+ object-fit: cover;
4999
+ }
5000
+ .comfy-image-main[data-v-a748ccd8] {
5001
+ width: 100%;
5002
+ height: 100%;
5003
+ -o-object-fit: cover;
5004
+ object-fit: cover;
5005
+ -o-object-position: center;
5006
+ object-position: center;
5007
+ z-index: 1;
5008
+ }
5009
+ .contain .comfy-image-wrap[data-v-a748ccd8] {
5010
+ position: relative;
5011
+ width: 100%;
5012
+ height: 100%;
5013
+ }
5014
+ .contain .comfy-image-main[data-v-a748ccd8] {
5015
+ -o-object-fit: contain;
5016
+ object-fit: contain;
5017
+ -webkit-backdrop-filter: blur(10px);
5018
+ backdrop-filter: blur(10px);
5019
+ position: absolute;
5020
+ }
5021
+ .broken-image-placeholder[data-v-a748ccd8] {
5022
+ display: flex;
5023
+ flex-direction: column;
5024
+ align-items: center;
5025
+ justify-content: center;
5026
+ width: 100%;
5027
+ height: 100%;
5028
+ margin: 2rem;
5029
+ }
5030
+ .broken-image-placeholder i[data-v-a748ccd8] {
5031
+ font-size: 3rem;
5032
+ margin-bottom: 0.5rem;
5033
+ }
5034
+
5035
+ /* PrimeVue's galleria teleports the fullscreen gallery out of subtree so we
5036
+ cannot use scoped style here. */
5037
+ img.galleria-image {
5038
+ max-width: 100vw;
5039
+ max-height: 100vh;
5040
+ -o-object-fit: contain;
5041
+ object-fit: contain;
5042
+ }
5043
+ .p-galleria-close-button {
5044
+ /* Set z-index so the close button doesn't get hidden behind the image when image is large */
5045
+ z-index: 1;
5046
+ }
5047
+
5048
+ .result-container[data-v-2403edc6] {
5049
+ width: 100%;
5050
+ height: 100%;
5051
+ aspect-ratio: 1 / 1;
5052
+ overflow: hidden;
5053
+ position: relative;
5054
+ display: flex;
5055
+ justify-content: center;
5056
+ align-items: center;
5057
+ }
5058
+ .preview-mask[data-v-2403edc6] {
5059
+ position: absolute;
5060
+ left: 50%;
5061
+ top: 50%;
5062
+ transform: translate(-50%, -50%);
5063
+ display: flex;
5064
+ align-items: center;
5065
+ justify-content: center;
5066
+ opacity: 0;
5067
+ transition: opacity 0.3s ease;
5068
+ z-index: 1;
5069
+ }
5070
+ .result-container:hover .preview-mask[data-v-2403edc6] {
5071
+ opacity: 1;
5072
+ }
5073
+
5074
+ .task-result-preview[data-v-b676a511] {
5075
+ aspect-ratio: 1 / 1;
5076
+ overflow: hidden;
5077
+ display: flex;
5078
+ justify-content: center;
5079
+ align-items: center;
5080
+ width: 100%;
5081
+ height: 100%;
5082
+ }
5083
+ .task-result-preview i[data-v-b676a511],
5084
+ .task-result-preview span[data-v-b676a511] {
5085
+ font-size: 2rem;
5086
+ }
5087
+ .task-item[data-v-b676a511] {
5088
+ display: flex;
5089
+ flex-direction: column;
5090
+ border-radius: 4px;
5091
+ overflow: hidden;
5092
+ position: relative;
5093
+ }
5094
+ .task-item-details[data-v-b676a511] {
5095
+ position: absolute;
5096
+ bottom: 0;
5097
+ padding: 0.6rem;
5098
+ display: flex;
5099
+ justify-content: space-between;
5100
+ align-items: center;
5101
+ width: 100%;
5102
+ z-index: 1;
5103
+ }
5104
+ .task-node-link[data-v-b676a511] {
5105
+ padding: 2px;
5106
+ }
5107
+
5108
+ /* In dark mode, transparent background color for tags is not ideal for tags that
5109
+ are floating on top of images. */
5110
+ .tag-wrapper[data-v-b676a511] {
5111
+ background-color: var(--p-primary-contrast-color);
5112
+ border-radius: 6px;
5113
+ display: inline-flex;
5114
+ }
5115
+ .node-name-tag[data-v-b676a511] {
5116
+ word-break: break-all;
5117
+ }
5118
+ .status-tag-group[data-v-b676a511] {
5119
+ display: flex;
5120
+ flex-direction: column;
5121
+ }
5122
+ .progress-preview-img[data-v-b676a511] {
5123
+ width: 100%;
5124
+ height: 100%;
5125
+ -o-object-fit: cover;
5126
+ object-fit: cover;
5127
+ -o-object-position: center;
5128
+ object-position: center;
5129
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.py ADDED
@@ -0,0 +1,711 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #original code from https://github.com/genmoai/models under apache 2.0 license
2
+ #adapted to ComfyUI
3
+
4
+ from typing import List, Optional, Tuple, Union
5
+ from functools import partial
6
+ import math
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.nn.functional as F
11
+ from einops import rearrange
12
+
13
+ from comfy.ldm.modules.attention import optimized_attention
14
+
15
+ import comfy.ops
16
+ ops = comfy.ops.disable_weight_init
17
+
18
+ # import mochi_preview.dit.joint_model.context_parallel as cp
19
+ # from mochi_preview.vae.cp_conv import cp_pass_frames, gather_all_frames
20
+
21
+
22
+ def cast_tuple(t, length=1):
23
+ return t if isinstance(t, tuple) else ((t,) * length)
24
+
25
+
26
+ class GroupNormSpatial(ops.GroupNorm):
27
+ """
28
+ GroupNorm applied per-frame.
29
+ """
30
+
31
+ def forward(self, x: torch.Tensor, *, chunk_size: int = 8):
32
+ B, C, T, H, W = x.shape
33
+ x = rearrange(x, "B C T H W -> (B T) C H W")
34
+ # Run group norm in chunks.
35
+ output = torch.empty_like(x)
36
+ for b in range(0, B * T, chunk_size):
37
+ output[b : b + chunk_size] = super().forward(x[b : b + chunk_size])
38
+ return rearrange(output, "(B T) C H W -> B C T H W", B=B, T=T)
39
+
40
+ class PConv3d(ops.Conv3d):
41
+ def __init__(
42
+ self,
43
+ in_channels,
44
+ out_channels,
45
+ kernel_size: Union[int, Tuple[int, int, int]],
46
+ stride: Union[int, Tuple[int, int, int]],
47
+ causal: bool = True,
48
+ context_parallel: bool = True,
49
+ **kwargs,
50
+ ):
51
+ self.causal = causal
52
+ self.context_parallel = context_parallel
53
+ kernel_size = cast_tuple(kernel_size, 3)
54
+ stride = cast_tuple(stride, 3)
55
+ height_pad = (kernel_size[1] - 1) // 2
56
+ width_pad = (kernel_size[2] - 1) // 2
57
+
58
+ super().__init__(
59
+ in_channels=in_channels,
60
+ out_channels=out_channels,
61
+ kernel_size=kernel_size,
62
+ stride=stride,
63
+ dilation=(1, 1, 1),
64
+ padding=(0, height_pad, width_pad),
65
+ **kwargs,
66
+ )
67
+
68
+ def forward(self, x: torch.Tensor):
69
+ # Compute padding amounts.
70
+ context_size = self.kernel_size[0] - 1
71
+ if self.causal:
72
+ pad_front = context_size
73
+ pad_back = 0
74
+ else:
75
+ pad_front = context_size // 2
76
+ pad_back = context_size - pad_front
77
+
78
+ # Apply padding.
79
+ assert self.padding_mode == "replicate" # DEBUG
80
+ mode = "constant" if self.padding_mode == "zeros" else self.padding_mode
81
+ x = F.pad(x, (0, 0, 0, 0, pad_front, pad_back), mode=mode)
82
+ return super().forward(x)
83
+
84
+
85
+ class Conv1x1(ops.Linear):
86
+ """*1x1 Conv implemented with a linear layer."""
87
+
88
+ def __init__(self, in_features: int, out_features: int, *args, **kwargs):
89
+ super().__init__(in_features, out_features, *args, **kwargs)
90
+
91
+ def forward(self, x: torch.Tensor):
92
+ """Forward pass.
93
+
94
+ Args:
95
+ x: Input tensor. Shape: [B, C, *] or [B, *, C].
96
+
97
+ Returns:
98
+ x: Output tensor. Shape: [B, C', *] or [B, *, C'].
99
+ """
100
+ x = x.movedim(1, -1)
101
+ x = super().forward(x)
102
+ x = x.movedim(-1, 1)
103
+ return x
104
+
105
+
106
+ class DepthToSpaceTime(nn.Module):
107
+ def __init__(
108
+ self,
109
+ temporal_expansion: int,
110
+ spatial_expansion: int,
111
+ ):
112
+ super().__init__()
113
+ self.temporal_expansion = temporal_expansion
114
+ self.spatial_expansion = spatial_expansion
115
+
116
+ # When printed, this module should show the temporal and spatial expansion factors.
117
+ def extra_repr(self):
118
+ return f"texp={self.temporal_expansion}, sexp={self.spatial_expansion}"
119
+
120
+ def forward(self, x: torch.Tensor):
121
+ """Forward pass.
122
+
123
+ Args:
124
+ x: Input tensor. Shape: [B, C, T, H, W].
125
+
126
+ Returns:
127
+ x: Rearranged tensor. Shape: [B, C/(st*s*s), T*st, H*s, W*s].
128
+ """
129
+ x = rearrange(
130
+ x,
131
+ "B (C st sh sw) T H W -> B C (T st) (H sh) (W sw)",
132
+ st=self.temporal_expansion,
133
+ sh=self.spatial_expansion,
134
+ sw=self.spatial_expansion,
135
+ )
136
+
137
+ # cp_rank, _ = cp.get_cp_rank_size()
138
+ if self.temporal_expansion > 1: # and cp_rank == 0:
139
+ # Drop the first self.temporal_expansion - 1 frames.
140
+ # This is because we always want the 3x3x3 conv filter to only apply
141
+ # to the first frame, and the first frame doesn't need to be repeated.
142
+ assert all(x.shape)
143
+ x = x[:, :, self.temporal_expansion - 1 :]
144
+ assert all(x.shape)
145
+
146
+ return x
147
+
148
+
149
+ def norm_fn(
150
+ in_channels: int,
151
+ affine: bool = True,
152
+ ):
153
+ return GroupNormSpatial(affine=affine, num_groups=32, num_channels=in_channels)
154
+
155
+
156
+ class ResBlock(nn.Module):
157
+ """Residual block that preserves the spatial dimensions."""
158
+
159
+ def __init__(
160
+ self,
161
+ channels: int,
162
+ *,
163
+ affine: bool = True,
164
+ attn_block: Optional[nn.Module] = None,
165
+ causal: bool = True,
166
+ prune_bottleneck: bool = False,
167
+ padding_mode: str,
168
+ bias: bool = True,
169
+ ):
170
+ super().__init__()
171
+ self.channels = channels
172
+
173
+ assert causal
174
+ self.stack = nn.Sequential(
175
+ norm_fn(channels, affine=affine),
176
+ nn.SiLU(inplace=True),
177
+ PConv3d(
178
+ in_channels=channels,
179
+ out_channels=channels // 2 if prune_bottleneck else channels,
180
+ kernel_size=(3, 3, 3),
181
+ stride=(1, 1, 1),
182
+ padding_mode=padding_mode,
183
+ bias=bias,
184
+ causal=causal,
185
+ ),
186
+ norm_fn(channels, affine=affine),
187
+ nn.SiLU(inplace=True),
188
+ PConv3d(
189
+ in_channels=channels // 2 if prune_bottleneck else channels,
190
+ out_channels=channels,
191
+ kernel_size=(3, 3, 3),
192
+ stride=(1, 1, 1),
193
+ padding_mode=padding_mode,
194
+ bias=bias,
195
+ causal=causal,
196
+ ),
197
+ )
198
+
199
+ self.attn_block = attn_block if attn_block else nn.Identity()
200
+
201
+ def forward(self, x: torch.Tensor):
202
+ """Forward pass.
203
+
204
+ Args:
205
+ x: Input tensor. Shape: [B, C, T, H, W].
206
+ """
207
+ residual = x
208
+ x = self.stack(x)
209
+ x = x + residual
210
+ del residual
211
+
212
+ return self.attn_block(x)
213
+
214
+
215
+ class Attention(nn.Module):
216
+ def __init__(
217
+ self,
218
+ dim: int,
219
+ head_dim: int = 32,
220
+ qkv_bias: bool = False,
221
+ out_bias: bool = True,
222
+ qk_norm: bool = True,
223
+ ) -> None:
224
+ super().__init__()
225
+ self.head_dim = head_dim
226
+ self.num_heads = dim // head_dim
227
+ self.qk_norm = qk_norm
228
+
229
+ self.qkv = nn.Linear(dim, 3 * dim, bias=qkv_bias)
230
+ self.out = nn.Linear(dim, dim, bias=out_bias)
231
+
232
+ def forward(
233
+ self,
234
+ x: torch.Tensor,
235
+ ) -> torch.Tensor:
236
+ """Compute temporal self-attention.
237
+
238
+ Args:
239
+ x: Input tensor. Shape: [B, C, T, H, W].
240
+ chunk_size: Chunk size for large tensors.
241
+
242
+ Returns:
243
+ x: Output tensor. Shape: [B, C, T, H, W].
244
+ """
245
+ B, _, T, H, W = x.shape
246
+
247
+ if T == 1:
248
+ # No attention for single frame.
249
+ x = x.movedim(1, -1) # [B, C, T, H, W] -> [B, T, H, W, C]
250
+ qkv = self.qkv(x)
251
+ _, _, x = qkv.chunk(3, dim=-1) # Throw away queries and keys.
252
+ x = self.out(x)
253
+ return x.movedim(-1, 1) # [B, T, H, W, C] -> [B, C, T, H, W]
254
+
255
+ # 1D temporal attention.
256
+ x = rearrange(x, "B C t h w -> (B h w) t C")
257
+ qkv = self.qkv(x)
258
+
259
+ # Input: qkv with shape [B, t, 3 * num_heads * head_dim]
260
+ # Output: x with shape [B, num_heads, t, head_dim]
261
+ q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, self.head_dim).transpose(1, 3).unbind(2)
262
+
263
+ if self.qk_norm:
264
+ q = F.normalize(q, p=2, dim=-1)
265
+ k = F.normalize(k, p=2, dim=-1)
266
+
267
+ x = optimized_attention(q, k, v, self.num_heads, skip_reshape=True)
268
+
269
+ assert x.size(0) == q.size(0)
270
+
271
+ x = self.out(x)
272
+ x = rearrange(x, "(B h w) t C -> B C t h w", B=B, h=H, w=W)
273
+ return x
274
+
275
+
276
+ class AttentionBlock(nn.Module):
277
+ def __init__(
278
+ self,
279
+ dim: int,
280
+ **attn_kwargs,
281
+ ) -> None:
282
+ super().__init__()
283
+ self.norm = norm_fn(dim)
284
+ self.attn = Attention(dim, **attn_kwargs)
285
+
286
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
287
+ return x + self.attn(self.norm(x))
288
+
289
+
290
+ class CausalUpsampleBlock(nn.Module):
291
+ def __init__(
292
+ self,
293
+ in_channels: int,
294
+ out_channels: int,
295
+ num_res_blocks: int,
296
+ *,
297
+ temporal_expansion: int = 2,
298
+ spatial_expansion: int = 2,
299
+ **block_kwargs,
300
+ ):
301
+ super().__init__()
302
+
303
+ blocks = []
304
+ for _ in range(num_res_blocks):
305
+ blocks.append(block_fn(in_channels, **block_kwargs))
306
+ self.blocks = nn.Sequential(*blocks)
307
+
308
+ self.temporal_expansion = temporal_expansion
309
+ self.spatial_expansion = spatial_expansion
310
+
311
+ # Change channels in the final convolution layer.
312
+ self.proj = Conv1x1(
313
+ in_channels,
314
+ out_channels * temporal_expansion * (spatial_expansion**2),
315
+ )
316
+
317
+ self.d2st = DepthToSpaceTime(
318
+ temporal_expansion=temporal_expansion, spatial_expansion=spatial_expansion
319
+ )
320
+
321
+ def forward(self, x):
322
+ x = self.blocks(x)
323
+ x = self.proj(x)
324
+ x = self.d2st(x)
325
+ return x
326
+
327
+
328
+ def block_fn(channels, *, affine: bool = True, has_attention: bool = False, **block_kwargs):
329
+ attn_block = AttentionBlock(channels) if has_attention else None
330
+ return ResBlock(channels, affine=affine, attn_block=attn_block, **block_kwargs)
331
+
332
+
333
+ class DownsampleBlock(nn.Module):
334
+ def __init__(
335
+ self,
336
+ in_channels: int,
337
+ out_channels: int,
338
+ num_res_blocks,
339
+ *,
340
+ temporal_reduction=2,
341
+ spatial_reduction=2,
342
+ **block_kwargs,
343
+ ):
344
+ """
345
+ Downsample block for the VAE encoder.
346
+
347
+ Args:
348
+ in_channels: Number of input channels.
349
+ out_channels: Number of output channels.
350
+ num_res_blocks: Number of residual blocks.
351
+ temporal_reduction: Temporal reduction factor.
352
+ spatial_reduction: Spatial reduction factor.
353
+ """
354
+ super().__init__()
355
+ layers = []
356
+
357
+ # Change the channel count in the strided convolution.
358
+ # This lets the ResBlock have uniform channel count,
359
+ # as in ConvNeXt.
360
+ assert in_channels != out_channels
361
+ layers.append(
362
+ PConv3d(
363
+ in_channels=in_channels,
364
+ out_channels=out_channels,
365
+ kernel_size=(temporal_reduction, spatial_reduction, spatial_reduction),
366
+ stride=(temporal_reduction, spatial_reduction, spatial_reduction),
367
+ # First layer in each block always uses replicate padding
368
+ padding_mode="replicate",
369
+ bias=block_kwargs["bias"],
370
+ )
371
+ )
372
+
373
+ for _ in range(num_res_blocks):
374
+ layers.append(block_fn(out_channels, **block_kwargs))
375
+
376
+ self.layers = nn.Sequential(*layers)
377
+
378
+ def forward(self, x):
379
+ return self.layers(x)
380
+
381
+
382
+ def add_fourier_features(inputs: torch.Tensor, start=6, stop=8, step=1):
383
+ num_freqs = (stop - start) // step
384
+ assert inputs.ndim == 5
385
+ C = inputs.size(1)
386
+
387
+ # Create Base 2 Fourier features.
388
+ freqs = torch.arange(start, stop, step, dtype=inputs.dtype, device=inputs.device)
389
+ assert num_freqs == len(freqs)
390
+ w = torch.pow(2.0, freqs) * (2 * torch.pi) # [num_freqs]
391
+ C = inputs.shape[1]
392
+ w = w.repeat(C)[None, :, None, None, None] # [1, C * num_freqs, 1, 1, 1]
393
+
394
+ # Interleaved repeat of input channels to match w.
395
+ h = inputs.repeat_interleave(num_freqs, dim=1) # [B, C * num_freqs, T, H, W]
396
+ # Scale channels by frequency.
397
+ h = w * h
398
+
399
+ return torch.cat(
400
+ [
401
+ inputs,
402
+ torch.sin(h),
403
+ torch.cos(h),
404
+ ],
405
+ dim=1,
406
+ )
407
+
408
+
409
+ class FourierFeatures(nn.Module):
410
+ def __init__(self, start: int = 6, stop: int = 8, step: int = 1):
411
+ super().__init__()
412
+ self.start = start
413
+ self.stop = stop
414
+ self.step = step
415
+
416
+ def forward(self, inputs):
417
+ """Add Fourier features to inputs.
418
+
419
+ Args:
420
+ inputs: Input tensor. Shape: [B, C, T, H, W]
421
+
422
+ Returns:
423
+ h: Output tensor. Shape: [B, (1 + 2 * num_freqs) * C, T, H, W]
424
+ """
425
+ return add_fourier_features(inputs, self.start, self.stop, self.step)
426
+
427
+
428
+ class Decoder(nn.Module):
429
+ def __init__(
430
+ self,
431
+ *,
432
+ out_channels: int = 3,
433
+ latent_dim: int,
434
+ base_channels: int,
435
+ channel_multipliers: List[int],
436
+ num_res_blocks: List[int],
437
+ temporal_expansions: Optional[List[int]] = None,
438
+ spatial_expansions: Optional[List[int]] = None,
439
+ has_attention: List[bool],
440
+ output_norm: bool = True,
441
+ nonlinearity: str = "silu",
442
+ output_nonlinearity: str = "silu",
443
+ causal: bool = True,
444
+ **block_kwargs,
445
+ ):
446
+ super().__init__()
447
+ self.input_channels = latent_dim
448
+ self.base_channels = base_channels
449
+ self.channel_multipliers = channel_multipliers
450
+ self.num_res_blocks = num_res_blocks
451
+ self.output_nonlinearity = output_nonlinearity
452
+ assert nonlinearity == "silu"
453
+ assert causal
454
+
455
+ ch = [mult * base_channels for mult in channel_multipliers]
456
+ self.num_up_blocks = len(ch) - 1
457
+ assert len(num_res_blocks) == self.num_up_blocks + 2
458
+
459
+ blocks = []
460
+
461
+ first_block = [
462
+ ops.Conv3d(latent_dim, ch[-1], kernel_size=(1, 1, 1))
463
+ ] # Input layer.
464
+ # First set of blocks preserve channel count.
465
+ for _ in range(num_res_blocks[-1]):
466
+ first_block.append(
467
+ block_fn(
468
+ ch[-1],
469
+ has_attention=has_attention[-1],
470
+ causal=causal,
471
+ **block_kwargs,
472
+ )
473
+ )
474
+ blocks.append(nn.Sequential(*first_block))
475
+
476
+ assert len(temporal_expansions) == len(spatial_expansions) == self.num_up_blocks
477
+ assert len(num_res_blocks) == len(has_attention) == self.num_up_blocks + 2
478
+
479
+ upsample_block_fn = CausalUpsampleBlock
480
+
481
+ for i in range(self.num_up_blocks):
482
+ block = upsample_block_fn(
483
+ ch[-i - 1],
484
+ ch[-i - 2],
485
+ num_res_blocks=num_res_blocks[-i - 2],
486
+ has_attention=has_attention[-i - 2],
487
+ temporal_expansion=temporal_expansions[-i - 1],
488
+ spatial_expansion=spatial_expansions[-i - 1],
489
+ causal=causal,
490
+ **block_kwargs,
491
+ )
492
+ blocks.append(block)
493
+
494
+ assert not output_norm
495
+
496
+ # Last block. Preserve channel count.
497
+ last_block = []
498
+ for _ in range(num_res_blocks[0]):
499
+ last_block.append(
500
+ block_fn(
501
+ ch[0], has_attention=has_attention[0], causal=causal, **block_kwargs
502
+ )
503
+ )
504
+ blocks.append(nn.Sequential(*last_block))
505
+
506
+ self.blocks = nn.ModuleList(blocks)
507
+ self.output_proj = Conv1x1(ch[0], out_channels)
508
+
509
+ def forward(self, x):
510
+ """Forward pass.
511
+
512
+ Args:
513
+ x: Latent tensor. Shape: [B, input_channels, t, h, w]. Scaled [-1, 1].
514
+
515
+ Returns:
516
+ x: Reconstructed video tensor. Shape: [B, C, T, H, W]. Scaled to [-1, 1].
517
+ T + 1 = (t - 1) * 4.
518
+ H = h * 16, W = w * 16.
519
+ """
520
+ for block in self.blocks:
521
+ x = block(x)
522
+
523
+ if self.output_nonlinearity == "silu":
524
+ x = F.silu(x, inplace=not self.training)
525
+ else:
526
+ assert (
527
+ not self.output_nonlinearity
528
+ ) # StyleGAN3 omits the to-RGB nonlinearity.
529
+
530
+ return self.output_proj(x).contiguous()
531
+
532
+ class LatentDistribution:
533
+ def __init__(self, mean: torch.Tensor, logvar: torch.Tensor):
534
+ """Initialize latent distribution.
535
+
536
+ Args:
537
+ mean: Mean of the distribution. Shape: [B, C, T, H, W].
538
+ logvar: Logarithm of variance of the distribution. Shape: [B, C, T, H, W].
539
+ """
540
+ assert mean.shape == logvar.shape
541
+ self.mean = mean
542
+ self.logvar = logvar
543
+
544
+ def sample(self, temperature=1.0, generator: torch.Generator = None, noise=None):
545
+ if temperature == 0.0:
546
+ return self.mean
547
+
548
+ if noise is None:
549
+ noise = torch.randn(self.mean.shape, device=self.mean.device, dtype=self.mean.dtype, generator=generator)
550
+ else:
551
+ assert noise.device == self.mean.device
552
+ noise = noise.to(self.mean.dtype)
553
+
554
+ if temperature != 1.0:
555
+ raise NotImplementedError(f"Temperature {temperature} is not supported.")
556
+
557
+ # Just Gaussian sample with no scaling of variance.
558
+ return noise * torch.exp(self.logvar * 0.5) + self.mean
559
+
560
+ def mode(self):
561
+ return self.mean
562
+
563
+ class Encoder(nn.Module):
564
+ def __init__(
565
+ self,
566
+ *,
567
+ in_channels: int,
568
+ base_channels: int,
569
+ channel_multipliers: List[int],
570
+ num_res_blocks: List[int],
571
+ latent_dim: int,
572
+ temporal_reductions: List[int],
573
+ spatial_reductions: List[int],
574
+ prune_bottlenecks: List[bool],
575
+ has_attentions: List[bool],
576
+ affine: bool = True,
577
+ bias: bool = True,
578
+ input_is_conv_1x1: bool = False,
579
+ padding_mode: str,
580
+ ):
581
+ super().__init__()
582
+ self.temporal_reductions = temporal_reductions
583
+ self.spatial_reductions = spatial_reductions
584
+ self.base_channels = base_channels
585
+ self.channel_multipliers = channel_multipliers
586
+ self.num_res_blocks = num_res_blocks
587
+ self.latent_dim = latent_dim
588
+
589
+ self.fourier_features = FourierFeatures()
590
+ ch = [mult * base_channels for mult in channel_multipliers]
591
+ num_down_blocks = len(ch) - 1
592
+ assert len(num_res_blocks) == num_down_blocks + 2
593
+
594
+ layers = (
595
+ [ops.Conv3d(in_channels, ch[0], kernel_size=(1, 1, 1), bias=True)]
596
+ if not input_is_conv_1x1
597
+ else [Conv1x1(in_channels, ch[0])]
598
+ )
599
+
600
+ assert len(prune_bottlenecks) == num_down_blocks + 2
601
+ assert len(has_attentions) == num_down_blocks + 2
602
+ block = partial(block_fn, padding_mode=padding_mode, affine=affine, bias=bias)
603
+
604
+ for _ in range(num_res_blocks[0]):
605
+ layers.append(block(ch[0], has_attention=has_attentions[0], prune_bottleneck=prune_bottlenecks[0]))
606
+ prune_bottlenecks = prune_bottlenecks[1:]
607
+ has_attentions = has_attentions[1:]
608
+
609
+ assert len(temporal_reductions) == len(spatial_reductions) == len(ch) - 1
610
+ for i in range(num_down_blocks):
611
+ layer = DownsampleBlock(
612
+ ch[i],
613
+ ch[i + 1],
614
+ num_res_blocks=num_res_blocks[i + 1],
615
+ temporal_reduction=temporal_reductions[i],
616
+ spatial_reduction=spatial_reductions[i],
617
+ prune_bottleneck=prune_bottlenecks[i],
618
+ has_attention=has_attentions[i],
619
+ affine=affine,
620
+ bias=bias,
621
+ padding_mode=padding_mode,
622
+ )
623
+
624
+ layers.append(layer)
625
+
626
+ # Additional blocks.
627
+ for _ in range(num_res_blocks[-1]):
628
+ layers.append(block(ch[-1], has_attention=has_attentions[-1], prune_bottleneck=prune_bottlenecks[-1]))
629
+
630
+ self.layers = nn.Sequential(*layers)
631
+
632
+ # Output layers.
633
+ self.output_norm = norm_fn(ch[-1])
634
+ self.output_proj = Conv1x1(ch[-1], 2 * latent_dim, bias=False)
635
+
636
+ @property
637
+ def temporal_downsample(self):
638
+ return math.prod(self.temporal_reductions)
639
+
640
+ @property
641
+ def spatial_downsample(self):
642
+ return math.prod(self.spatial_reductions)
643
+
644
+ def forward(self, x) -> LatentDistribution:
645
+ """Forward pass.
646
+
647
+ Args:
648
+ x: Input video tensor. Shape: [B, C, T, H, W]. Scaled to [-1, 1]
649
+
650
+ Returns:
651
+ means: Latent tensor. Shape: [B, latent_dim, t, h, w]. Scaled [-1, 1].
652
+ h = H // 8, w = W // 8, t - 1 = (T - 1) // 6
653
+ logvar: Shape: [B, latent_dim, t, h, w].
654
+ """
655
+ assert x.ndim == 5, f"Expected 5D input, got {x.shape}"
656
+ x = self.fourier_features(x)
657
+
658
+ x = self.layers(x)
659
+
660
+ x = self.output_norm(x)
661
+ x = F.silu(x, inplace=True)
662
+ x = self.output_proj(x)
663
+
664
+ means, logvar = torch.chunk(x, 2, dim=1)
665
+
666
+ assert means.ndim == 5
667
+ assert logvar.shape == means.shape
668
+ assert means.size(1) == self.latent_dim
669
+
670
+ return LatentDistribution(means, logvar)
671
+
672
+
673
+ class VideoVAE(nn.Module):
674
+ def __init__(self):
675
+ super().__init__()
676
+ self.encoder = Encoder(
677
+ in_channels=15,
678
+ base_channels=64,
679
+ channel_multipliers=[1, 2, 4, 6],
680
+ num_res_blocks=[3, 3, 4, 6, 3],
681
+ latent_dim=12,
682
+ temporal_reductions=[1, 2, 3],
683
+ spatial_reductions=[2, 2, 2],
684
+ prune_bottlenecks=[False, False, False, False, False],
685
+ has_attentions=[False, True, True, True, True],
686
+ affine=True,
687
+ bias=True,
688
+ input_is_conv_1x1=True,
689
+ padding_mode="replicate"
690
+ )
691
+ self.decoder = Decoder(
692
+ out_channels=3,
693
+ base_channels=128,
694
+ channel_multipliers=[1, 2, 4, 6],
695
+ temporal_expansions=[1, 2, 3],
696
+ spatial_expansions=[2, 2, 2],
697
+ num_res_blocks=[3, 3, 4, 6, 3],
698
+ latent_dim=12,
699
+ has_attention=[False, False, False, False, False],
700
+ padding_mode="replicate",
701
+ output_norm=False,
702
+ nonlinearity="silu",
703
+ output_nonlinearity="silu",
704
+ causal=True,
705
+ )
706
+
707
+ def encode(self, x):
708
+ return self.encoder(x).mode()
709
+
710
+ def decode(self, x):
711
+ return self.decoder(x)
pixel_norm.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+
4
+
5
+ class PixelNorm(nn.Module):
6
+ def __init__(self, dim=1, eps=1e-8):
7
+ super(PixelNorm, self).__init__()
8
+ self.dim = dim
9
+ self.eps = eps
10
+
11
+ def forward(self, x):
12
+ return x / torch.sqrt(torch.mean(x**2, dim=self.dim, keepdim=True) + self.eps)
put_taesd_encoder_pth_and_taesd_decoder_pth_here ADDED
File without changes
put_vae_here ADDED
File without changes
vae (1)/causal_conv3d.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ import comfy.ops
6
+ ops = comfy.ops.disable_weight_init
7
+
8
+
9
+ class CausalConv3d(nn.Module):
10
+ def __init__(
11
+ self,
12
+ in_channels,
13
+ out_channels,
14
+ kernel_size: int = 3,
15
+ stride: Union[int, Tuple[int]] = 1,
16
+ dilation: int = 1,
17
+ groups: int = 1,
18
+ **kwargs,
19
+ ):
20
+ super().__init__()
21
+
22
+ self.in_channels = in_channels
23
+ self.out_channels = out_channels
24
+
25
+ kernel_size = (kernel_size, kernel_size, kernel_size)
26
+ self.time_kernel_size = kernel_size[0]
27
+
28
+ dilation = (dilation, 1, 1)
29
+
30
+ height_pad = kernel_size[1] // 2
31
+ width_pad = kernel_size[2] // 2
32
+ padding = (0, height_pad, width_pad)
33
+
34
+ self.conv = ops.Conv3d(
35
+ in_channels,
36
+ out_channels,
37
+ kernel_size,
38
+ stride=stride,
39
+ dilation=dilation,
40
+ padding=padding,
41
+ padding_mode="zeros",
42
+ groups=groups,
43
+ )
44
+
45
+ def forward(self, x, causal: bool = True):
46
+ if causal:
47
+ first_frame_pad = x[:, :, :1, :, :].repeat(
48
+ (1, 1, self.time_kernel_size - 1, 1, 1)
49
+ )
50
+ x = torch.concatenate((first_frame_pad, x), dim=2)
51
+ else:
52
+ first_frame_pad = x[:, :, :1, :, :].repeat(
53
+ (1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
54
+ )
55
+ last_frame_pad = x[:, :, -1:, :, :].repeat(
56
+ (1, 1, (self.time_kernel_size - 1) // 2, 1, 1)
57
+ )
58
+ x = torch.concatenate((first_frame_pad, x, last_frame_pad), dim=2)
59
+ x = self.conv(x)
60
+ return x
61
+
62
+ @property
63
+ def weight(self):
64
+ return self.conv.weight
vae (1)/causal_video_autoencoder.py ADDED
@@ -0,0 +1,907 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+ from functools import partial
4
+ import math
5
+ from einops import rearrange
6
+ from typing import Optional, Tuple, Union
7
+ from .conv_nd_factory import make_conv_nd, make_linear_nd
8
+ from .pixel_norm import PixelNorm
9
+ from ..model import PixArtAlphaCombinedTimestepSizeEmbeddings
10
+ import comfy.ops
11
+ ops = comfy.ops.disable_weight_init
12
+
13
+ class Encoder(nn.Module):
14
+ r"""
15
+ The `Encoder` layer of a variational autoencoder that encodes its input into a latent representation.
16
+
17
+ Args:
18
+ dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3):
19
+ The number of dimensions to use in convolutions.
20
+ in_channels (`int`, *optional*, defaults to 3):
21
+ The number of input channels.
22
+ out_channels (`int`, *optional*, defaults to 3):
23
+ The number of output channels.
24
+ blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`):
25
+ The blocks to use. Each block is a tuple of the block name and the number of layers.
26
+ base_channels (`int`, *optional*, defaults to 128):
27
+ The number of output channels for the first convolutional layer.
28
+ norm_num_groups (`int`, *optional*, defaults to 32):
29
+ The number of groups for normalization.
30
+ patch_size (`int`, *optional*, defaults to 1):
31
+ The patch size to use. Should be a power of 2.
32
+ norm_layer (`str`, *optional*, defaults to `group_norm`):
33
+ The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
34
+ latent_log_var (`str`, *optional*, defaults to `per_channel`):
35
+ The number of channels for the log variance. Can be either `per_channel`, `uniform`, or `none`.
36
+ """
37
+
38
+ def __init__(
39
+ self,
40
+ dims: Union[int, Tuple[int, int]] = 3,
41
+ in_channels: int = 3,
42
+ out_channels: int = 3,
43
+ blocks=[("res_x", 1)],
44
+ base_channels: int = 128,
45
+ norm_num_groups: int = 32,
46
+ patch_size: Union[int, Tuple[int]] = 1,
47
+ norm_layer: str = "group_norm", # group_norm, pixel_norm
48
+ latent_log_var: str = "per_channel",
49
+ ):
50
+ super().__init__()
51
+ self.patch_size = patch_size
52
+ self.norm_layer = norm_layer
53
+ self.latent_channels = out_channels
54
+ self.latent_log_var = latent_log_var
55
+ self.blocks_desc = blocks
56
+
57
+ in_channels = in_channels * patch_size**2
58
+ output_channel = base_channels
59
+
60
+ self.conv_in = make_conv_nd(
61
+ dims=dims,
62
+ in_channels=in_channels,
63
+ out_channels=output_channel,
64
+ kernel_size=3,
65
+ stride=1,
66
+ padding=1,
67
+ causal=True,
68
+ )
69
+
70
+ self.down_blocks = nn.ModuleList([])
71
+
72
+ for block_name, block_params in blocks:
73
+ input_channel = output_channel
74
+ if isinstance(block_params, int):
75
+ block_params = {"num_layers": block_params}
76
+
77
+ if block_name == "res_x":
78
+ block = UNetMidBlock3D(
79
+ dims=dims,
80
+ in_channels=input_channel,
81
+ num_layers=block_params["num_layers"],
82
+ resnet_eps=1e-6,
83
+ resnet_groups=norm_num_groups,
84
+ norm_layer=norm_layer,
85
+ )
86
+ elif block_name == "res_x_y":
87
+ output_channel = block_params.get("multiplier", 2) * output_channel
88
+ block = ResnetBlock3D(
89
+ dims=dims,
90
+ in_channels=input_channel,
91
+ out_channels=output_channel,
92
+ eps=1e-6,
93
+ groups=norm_num_groups,
94
+ norm_layer=norm_layer,
95
+ )
96
+ elif block_name == "compress_time":
97
+ block = make_conv_nd(
98
+ dims=dims,
99
+ in_channels=input_channel,
100
+ out_channels=output_channel,
101
+ kernel_size=3,
102
+ stride=(2, 1, 1),
103
+ causal=True,
104
+ )
105
+ elif block_name == "compress_space":
106
+ block = make_conv_nd(
107
+ dims=dims,
108
+ in_channels=input_channel,
109
+ out_channels=output_channel,
110
+ kernel_size=3,
111
+ stride=(1, 2, 2),
112
+ causal=True,
113
+ )
114
+ elif block_name == "compress_all":
115
+ block = make_conv_nd(
116
+ dims=dims,
117
+ in_channels=input_channel,
118
+ out_channels=output_channel,
119
+ kernel_size=3,
120
+ stride=(2, 2, 2),
121
+ causal=True,
122
+ )
123
+ elif block_name == "compress_all_x_y":
124
+ output_channel = block_params.get("multiplier", 2) * output_channel
125
+ block = make_conv_nd(
126
+ dims=dims,
127
+ in_channels=input_channel,
128
+ out_channels=output_channel,
129
+ kernel_size=3,
130
+ stride=(2, 2, 2),
131
+ causal=True,
132
+ )
133
+ else:
134
+ raise ValueError(f"unknown block: {block_name}")
135
+
136
+ self.down_blocks.append(block)
137
+
138
+ # out
139
+ if norm_layer == "group_norm":
140
+ self.conv_norm_out = nn.GroupNorm(
141
+ num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6
142
+ )
143
+ elif norm_layer == "pixel_norm":
144
+ self.conv_norm_out = PixelNorm()
145
+ elif norm_layer == "layer_norm":
146
+ self.conv_norm_out = LayerNorm(output_channel, eps=1e-6)
147
+
148
+ self.conv_act = nn.SiLU()
149
+
150
+ conv_out_channels = out_channels
151
+ if latent_log_var == "per_channel":
152
+ conv_out_channels *= 2
153
+ elif latent_log_var == "uniform":
154
+ conv_out_channels += 1
155
+ elif latent_log_var != "none":
156
+ raise ValueError(f"Invalid latent_log_var: {latent_log_var}")
157
+ self.conv_out = make_conv_nd(
158
+ dims, output_channel, conv_out_channels, 3, padding=1, causal=True
159
+ )
160
+
161
+ self.gradient_checkpointing = False
162
+
163
+ def forward(self, sample: torch.FloatTensor) -> torch.FloatTensor:
164
+ r"""The forward method of the `Encoder` class."""
165
+
166
+ sample = patchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
167
+ sample = self.conv_in(sample)
168
+
169
+ checkpoint_fn = (
170
+ partial(torch.utils.checkpoint.checkpoint, use_reentrant=False)
171
+ if self.gradient_checkpointing and self.training
172
+ else lambda x: x
173
+ )
174
+
175
+ for down_block in self.down_blocks:
176
+ sample = checkpoint_fn(down_block)(sample)
177
+
178
+ sample = self.conv_norm_out(sample)
179
+ sample = self.conv_act(sample)
180
+ sample = self.conv_out(sample)
181
+
182
+ if self.latent_log_var == "uniform":
183
+ last_channel = sample[:, -1:, ...]
184
+ num_dims = sample.dim()
185
+
186
+ if num_dims == 4:
187
+ # For shape (B, C, H, W)
188
+ repeated_last_channel = last_channel.repeat(
189
+ 1, sample.shape[1] - 2, 1, 1
190
+ )
191
+ sample = torch.cat([sample, repeated_last_channel], dim=1)
192
+ elif num_dims == 5:
193
+ # For shape (B, C, F, H, W)
194
+ repeated_last_channel = last_channel.repeat(
195
+ 1, sample.shape[1] - 2, 1, 1, 1
196
+ )
197
+ sample = torch.cat([sample, repeated_last_channel], dim=1)
198
+ else:
199
+ raise ValueError(f"Invalid input shape: {sample.shape}")
200
+
201
+ return sample
202
+
203
+
204
+ class Decoder(nn.Module):
205
+ r"""
206
+ The `Decoder` layer of a variational autoencoder that decodes its latent representation into an output sample.
207
+
208
+ Args:
209
+ dims (`int` or `Tuple[int, int]`, *optional*, defaults to 3):
210
+ The number of dimensions to use in convolutions.
211
+ in_channels (`int`, *optional*, defaults to 3):
212
+ The number of input channels.
213
+ out_channels (`int`, *optional*, defaults to 3):
214
+ The number of output channels.
215
+ blocks (`List[Tuple[str, int]]`, *optional*, defaults to `[("res_x", 1)]`):
216
+ The blocks to use. Each block is a tuple of the block name and the number of layers.
217
+ base_channels (`int`, *optional*, defaults to 128):
218
+ The number of output channels for the first convolutional layer.
219
+ norm_num_groups (`int`, *optional*, defaults to 32):
220
+ The number of groups for normalization.
221
+ patch_size (`int`, *optional*, defaults to 1):
222
+ The patch size to use. Should be a power of 2.
223
+ norm_layer (`str`, *optional*, defaults to `group_norm`):
224
+ The normalization layer to use. Can be either `group_norm` or `pixel_norm`.
225
+ causal (`bool`, *optional*, defaults to `True`):
226
+ Whether to use causal convolutions or not.
227
+ """
228
+
229
+ def __init__(
230
+ self,
231
+ dims,
232
+ in_channels: int = 3,
233
+ out_channels: int = 3,
234
+ blocks=[("res_x", 1)],
235
+ base_channels: int = 128,
236
+ layers_per_block: int = 2,
237
+ norm_num_groups: int = 32,
238
+ patch_size: int = 1,
239
+ norm_layer: str = "group_norm",
240
+ causal: bool = True,
241
+ timestep_conditioning: bool = False,
242
+ ):
243
+ super().__init__()
244
+ self.patch_size = patch_size
245
+ self.layers_per_block = layers_per_block
246
+ out_channels = out_channels * patch_size**2
247
+ self.causal = causal
248
+ self.blocks_desc = blocks
249
+
250
+ # Compute output channel to be product of all channel-multiplier blocks
251
+ output_channel = base_channels
252
+ for block_name, block_params in list(reversed(blocks)):
253
+ block_params = block_params if isinstance(block_params, dict) else {}
254
+ if block_name == "res_x_y":
255
+ output_channel = output_channel * block_params.get("multiplier", 2)
256
+ if block_name == "compress_all":
257
+ output_channel = output_channel * block_params.get("multiplier", 1)
258
+
259
+ self.conv_in = make_conv_nd(
260
+ dims,
261
+ in_channels,
262
+ output_channel,
263
+ kernel_size=3,
264
+ stride=1,
265
+ padding=1,
266
+ causal=True,
267
+ )
268
+
269
+ self.up_blocks = nn.ModuleList([])
270
+
271
+ for block_name, block_params in list(reversed(blocks)):
272
+ input_channel = output_channel
273
+ if isinstance(block_params, int):
274
+ block_params = {"num_layers": block_params}
275
+
276
+ if block_name == "res_x":
277
+ block = UNetMidBlock3D(
278
+ dims=dims,
279
+ in_channels=input_channel,
280
+ num_layers=block_params["num_layers"],
281
+ resnet_eps=1e-6,
282
+ resnet_groups=norm_num_groups,
283
+ norm_layer=norm_layer,
284
+ inject_noise=block_params.get("inject_noise", False),
285
+ timestep_conditioning=timestep_conditioning,
286
+ )
287
+ elif block_name == "attn_res_x":
288
+ block = UNetMidBlock3D(
289
+ dims=dims,
290
+ in_channels=input_channel,
291
+ num_layers=block_params["num_layers"],
292
+ resnet_groups=norm_num_groups,
293
+ norm_layer=norm_layer,
294
+ inject_noise=block_params.get("inject_noise", False),
295
+ timestep_conditioning=timestep_conditioning,
296
+ attention_head_dim=block_params["attention_head_dim"],
297
+ )
298
+ elif block_name == "res_x_y":
299
+ output_channel = output_channel // block_params.get("multiplier", 2)
300
+ block = ResnetBlock3D(
301
+ dims=dims,
302
+ in_channels=input_channel,
303
+ out_channels=output_channel,
304
+ eps=1e-6,
305
+ groups=norm_num_groups,
306
+ norm_layer=norm_layer,
307
+ inject_noise=block_params.get("inject_noise", False),
308
+ timestep_conditioning=False,
309
+ )
310
+ elif block_name == "compress_time":
311
+ block = DepthToSpaceUpsample(
312
+ dims=dims, in_channels=input_channel, stride=(2, 1, 1)
313
+ )
314
+ elif block_name == "compress_space":
315
+ block = DepthToSpaceUpsample(
316
+ dims=dims, in_channels=input_channel, stride=(1, 2, 2)
317
+ )
318
+ elif block_name == "compress_all":
319
+ output_channel = output_channel // block_params.get("multiplier", 1)
320
+ block = DepthToSpaceUpsample(
321
+ dims=dims,
322
+ in_channels=input_channel,
323
+ stride=(2, 2, 2),
324
+ residual=block_params.get("residual", False),
325
+ out_channels_reduction_factor=block_params.get("multiplier", 1),
326
+ )
327
+ else:
328
+ raise ValueError(f"unknown layer: {block_name}")
329
+
330
+ self.up_blocks.append(block)
331
+
332
+ if norm_layer == "group_norm":
333
+ self.conv_norm_out = nn.GroupNorm(
334
+ num_channels=output_channel, num_groups=norm_num_groups, eps=1e-6
335
+ )
336
+ elif norm_layer == "pixel_norm":
337
+ self.conv_norm_out = PixelNorm()
338
+ elif norm_layer == "layer_norm":
339
+ self.conv_norm_out = LayerNorm(output_channel, eps=1e-6)
340
+
341
+ self.conv_act = nn.SiLU()
342
+ self.conv_out = make_conv_nd(
343
+ dims, output_channel, out_channels, 3, padding=1, causal=True
344
+ )
345
+
346
+ self.gradient_checkpointing = False
347
+
348
+ self.timestep_conditioning = timestep_conditioning
349
+
350
+ if timestep_conditioning:
351
+ self.timestep_scale_multiplier = nn.Parameter(
352
+ torch.tensor(1000.0, dtype=torch.float32)
353
+ )
354
+ self.last_time_embedder = PixArtAlphaCombinedTimestepSizeEmbeddings(
355
+ output_channel * 2, 0, operations=ops,
356
+ )
357
+ self.last_scale_shift_table = nn.Parameter(torch.empty(2, output_channel))
358
+
359
+ # def forward(self, sample: torch.FloatTensor, target_shape) -> torch.FloatTensor:
360
+ def forward(
361
+ self,
362
+ sample: torch.FloatTensor,
363
+ timestep: Optional[torch.Tensor] = None,
364
+ ) -> torch.FloatTensor:
365
+ r"""The forward method of the `Decoder` class."""
366
+ batch_size = sample.shape[0]
367
+
368
+ sample = self.conv_in(sample, causal=self.causal)
369
+
370
+ checkpoint_fn = (
371
+ partial(torch.utils.checkpoint.checkpoint, use_reentrant=False)
372
+ if self.gradient_checkpointing and self.training
373
+ else lambda x: x
374
+ )
375
+
376
+ scaled_timestep = None
377
+ if self.timestep_conditioning:
378
+ assert (
379
+ timestep is not None
380
+ ), "should pass timestep with timestep_conditioning=True"
381
+ scaled_timestep = timestep * self.timestep_scale_multiplier.to(dtype=sample.dtype, device=sample.device)
382
+
383
+ for up_block in self.up_blocks:
384
+ if self.timestep_conditioning and isinstance(up_block, UNetMidBlock3D):
385
+ sample = checkpoint_fn(up_block)(
386
+ sample, causal=self.causal, timestep=scaled_timestep
387
+ )
388
+ else:
389
+ sample = checkpoint_fn(up_block)(sample, causal=self.causal)
390
+
391
+ sample = self.conv_norm_out(sample)
392
+
393
+ if self.timestep_conditioning:
394
+ embedded_timestep = self.last_time_embedder(
395
+ timestep=scaled_timestep.flatten(),
396
+ resolution=None,
397
+ aspect_ratio=None,
398
+ batch_size=sample.shape[0],
399
+ hidden_dtype=sample.dtype,
400
+ )
401
+ embedded_timestep = embedded_timestep.view(
402
+ batch_size, embedded_timestep.shape[-1], 1, 1, 1
403
+ )
404
+ ada_values = self.last_scale_shift_table[
405
+ None, ..., None, None, None
406
+ ].to(device=sample.device, dtype=sample.dtype) + embedded_timestep.reshape(
407
+ batch_size,
408
+ 2,
409
+ -1,
410
+ embedded_timestep.shape[-3],
411
+ embedded_timestep.shape[-2],
412
+ embedded_timestep.shape[-1],
413
+ )
414
+ shift, scale = ada_values.unbind(dim=1)
415
+ sample = sample * (1 + scale) + shift
416
+
417
+ sample = self.conv_act(sample)
418
+ sample = self.conv_out(sample, causal=self.causal)
419
+
420
+ sample = unpatchify(sample, patch_size_hw=self.patch_size, patch_size_t=1)
421
+
422
+ return sample
423
+
424
+
425
+ class UNetMidBlock3D(nn.Module):
426
+ """
427
+ A 3D UNet mid-block [`UNetMidBlock3D`] with multiple residual blocks.
428
+
429
+ Args:
430
+ in_channels (`int`): The number of input channels.
431
+ dropout (`float`, *optional*, defaults to 0.0): The dropout rate.
432
+ num_layers (`int`, *optional*, defaults to 1): The number of residual blocks.
433
+ resnet_eps (`float`, *optional*, 1e-6 ): The epsilon value for the resnet blocks.
434
+ resnet_groups (`int`, *optional*, defaults to 32):
435
+ The number of groups to use in the group normalization layers of the resnet blocks.
436
+
437
+ Returns:
438
+ `torch.FloatTensor`: The output of the last residual block, which is a tensor of shape `(batch_size,
439
+ in_channels, height, width)`.
440
+
441
+ """
442
+
443
+ def __init__(
444
+ self,
445
+ dims: Union[int, Tuple[int, int]],
446
+ in_channels: int,
447
+ dropout: float = 0.0,
448
+ num_layers: int = 1,
449
+ resnet_eps: float = 1e-6,
450
+ resnet_groups: int = 32,
451
+ norm_layer: str = "group_norm",
452
+ inject_noise: bool = False,
453
+ timestep_conditioning: bool = False,
454
+ ):
455
+ super().__init__()
456
+ resnet_groups = (
457
+ resnet_groups if resnet_groups is not None else min(in_channels // 4, 32)
458
+ )
459
+
460
+ self.timestep_conditioning = timestep_conditioning
461
+
462
+ if timestep_conditioning:
463
+ self.time_embedder = PixArtAlphaCombinedTimestepSizeEmbeddings(
464
+ in_channels * 4, 0, operations=ops,
465
+ )
466
+
467
+ self.res_blocks = nn.ModuleList(
468
+ [
469
+ ResnetBlock3D(
470
+ dims=dims,
471
+ in_channels=in_channels,
472
+ out_channels=in_channels,
473
+ eps=resnet_eps,
474
+ groups=resnet_groups,
475
+ dropout=dropout,
476
+ norm_layer=norm_layer,
477
+ inject_noise=inject_noise,
478
+ timestep_conditioning=timestep_conditioning,
479
+ )
480
+ for _ in range(num_layers)
481
+ ]
482
+ )
483
+
484
+ def forward(
485
+ self, hidden_states: torch.FloatTensor, causal: bool = True, timestep: Optional[torch.Tensor] = None
486
+ ) -> torch.FloatTensor:
487
+ timestep_embed = None
488
+ if self.timestep_conditioning:
489
+ assert (
490
+ timestep is not None
491
+ ), "should pass timestep with timestep_conditioning=True"
492
+ batch_size = hidden_states.shape[0]
493
+ timestep_embed = self.time_embedder(
494
+ timestep=timestep.flatten(),
495
+ resolution=None,
496
+ aspect_ratio=None,
497
+ batch_size=batch_size,
498
+ hidden_dtype=hidden_states.dtype,
499
+ )
500
+ timestep_embed = timestep_embed.view(
501
+ batch_size, timestep_embed.shape[-1], 1, 1, 1
502
+ )
503
+
504
+ for resnet in self.res_blocks:
505
+ hidden_states = resnet(hidden_states, causal=causal, timestep=timestep_embed)
506
+
507
+ return hidden_states
508
+
509
+
510
+ class DepthToSpaceUpsample(nn.Module):
511
+ def __init__(
512
+ self, dims, in_channels, stride, residual=False, out_channels_reduction_factor=1
513
+ ):
514
+ super().__init__()
515
+ self.stride = stride
516
+ self.out_channels = (
517
+ math.prod(stride) * in_channels // out_channels_reduction_factor
518
+ )
519
+ self.conv = make_conv_nd(
520
+ dims=dims,
521
+ in_channels=in_channels,
522
+ out_channels=self.out_channels,
523
+ kernel_size=3,
524
+ stride=1,
525
+ causal=True,
526
+ )
527
+ self.residual = residual
528
+ self.out_channels_reduction_factor = out_channels_reduction_factor
529
+
530
+ def forward(self, x, causal: bool = True, timestep: Optional[torch.Tensor] = None):
531
+ if self.residual:
532
+ # Reshape and duplicate the input to match the output shape
533
+ x_in = rearrange(
534
+ x,
535
+ "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
536
+ p1=self.stride[0],
537
+ p2=self.stride[1],
538
+ p3=self.stride[2],
539
+ )
540
+ num_repeat = math.prod(self.stride) // self.out_channels_reduction_factor
541
+ x_in = x_in.repeat(1, num_repeat, 1, 1, 1)
542
+ if self.stride[0] == 2:
543
+ x_in = x_in[:, :, 1:, :, :]
544
+ x = self.conv(x, causal=causal)
545
+ x = rearrange(
546
+ x,
547
+ "b (c p1 p2 p3) d h w -> b c (d p1) (h p2) (w p3)",
548
+ p1=self.stride[0],
549
+ p2=self.stride[1],
550
+ p3=self.stride[2],
551
+ )
552
+ if self.stride[0] == 2:
553
+ x = x[:, :, 1:, :, :]
554
+ if self.residual:
555
+ x = x + x_in
556
+ return x
557
+
558
+ class LayerNorm(nn.Module):
559
+ def __init__(self, dim, eps, elementwise_affine=True) -> None:
560
+ super().__init__()
561
+ self.norm = nn.LayerNorm(dim, eps=eps, elementwise_affine=elementwise_affine)
562
+
563
+ def forward(self, x):
564
+ x = rearrange(x, "b c d h w -> b d h w c")
565
+ x = self.norm(x)
566
+ x = rearrange(x, "b d h w c -> b c d h w")
567
+ return x
568
+
569
+
570
+ class ResnetBlock3D(nn.Module):
571
+ r"""
572
+ A Resnet block.
573
+
574
+ Parameters:
575
+ in_channels (`int`): The number of channels in the input.
576
+ out_channels (`int`, *optional*, default to be `None`):
577
+ The number of output channels for the first conv layer. If None, same as `in_channels`.
578
+ dropout (`float`, *optional*, defaults to `0.0`): The dropout probability to use.
579
+ groups (`int`, *optional*, default to `32`): The number of groups to use for the first normalization layer.
580
+ eps (`float`, *optional*, defaults to `1e-6`): The epsilon to use for the normalization.
581
+ """
582
+
583
+ def __init__(
584
+ self,
585
+ dims: Union[int, Tuple[int, int]],
586
+ in_channels: int,
587
+ out_channels: Optional[int] = None,
588
+ dropout: float = 0.0,
589
+ groups: int = 32,
590
+ eps: float = 1e-6,
591
+ norm_layer: str = "group_norm",
592
+ inject_noise: bool = False,
593
+ timestep_conditioning: bool = False,
594
+ ):
595
+ super().__init__()
596
+ self.in_channels = in_channels
597
+ out_channels = in_channels if out_channels is None else out_channels
598
+ self.out_channels = out_channels
599
+ self.inject_noise = inject_noise
600
+
601
+ if norm_layer == "group_norm":
602
+ self.norm1 = nn.GroupNorm(
603
+ num_groups=groups, num_channels=in_channels, eps=eps, affine=True
604
+ )
605
+ elif norm_layer == "pixel_norm":
606
+ self.norm1 = PixelNorm()
607
+ elif norm_layer == "layer_norm":
608
+ self.norm1 = LayerNorm(in_channels, eps=eps, elementwise_affine=True)
609
+
610
+ self.non_linearity = nn.SiLU()
611
+
612
+ self.conv1 = make_conv_nd(
613
+ dims,
614
+ in_channels,
615
+ out_channels,
616
+ kernel_size=3,
617
+ stride=1,
618
+ padding=1,
619
+ causal=True,
620
+ )
621
+
622
+ if inject_noise:
623
+ self.per_channel_scale1 = nn.Parameter(torch.zeros((in_channels, 1, 1)))
624
+
625
+ if norm_layer == "group_norm":
626
+ self.norm2 = nn.GroupNorm(
627
+ num_groups=groups, num_channels=out_channels, eps=eps, affine=True
628
+ )
629
+ elif norm_layer == "pixel_norm":
630
+ self.norm2 = PixelNorm()
631
+ elif norm_layer == "layer_norm":
632
+ self.norm2 = LayerNorm(out_channels, eps=eps, elementwise_affine=True)
633
+
634
+ self.dropout = torch.nn.Dropout(dropout)
635
+
636
+ self.conv2 = make_conv_nd(
637
+ dims,
638
+ out_channels,
639
+ out_channels,
640
+ kernel_size=3,
641
+ stride=1,
642
+ padding=1,
643
+ causal=True,
644
+ )
645
+
646
+ if inject_noise:
647
+ self.per_channel_scale2 = nn.Parameter(torch.zeros((in_channels, 1, 1)))
648
+
649
+ self.conv_shortcut = (
650
+ make_linear_nd(
651
+ dims=dims, in_channels=in_channels, out_channels=out_channels
652
+ )
653
+ if in_channels != out_channels
654
+ else nn.Identity()
655
+ )
656
+
657
+ self.norm3 = (
658
+ LayerNorm(in_channels, eps=eps, elementwise_affine=True)
659
+ if in_channels != out_channels
660
+ else nn.Identity()
661
+ )
662
+
663
+ self.timestep_conditioning = timestep_conditioning
664
+
665
+ if timestep_conditioning:
666
+ self.scale_shift_table = nn.Parameter(
667
+ torch.randn(4, in_channels) / in_channels**0.5
668
+ )
669
+
670
+ def _feed_spatial_noise(
671
+ self, hidden_states: torch.FloatTensor, per_channel_scale: torch.FloatTensor
672
+ ) -> torch.FloatTensor:
673
+ spatial_shape = hidden_states.shape[-2:]
674
+ device = hidden_states.device
675
+ dtype = hidden_states.dtype
676
+
677
+ # similar to the "explicit noise inputs" method in style-gan
678
+ spatial_noise = torch.randn(spatial_shape, device=device, dtype=dtype)[None]
679
+ scaled_noise = (spatial_noise * per_channel_scale)[None, :, None, ...]
680
+ hidden_states = hidden_states + scaled_noise
681
+
682
+ return hidden_states
683
+
684
+ def forward(
685
+ self,
686
+ input_tensor: torch.FloatTensor,
687
+ causal: bool = True,
688
+ timestep: Optional[torch.Tensor] = None,
689
+ ) -> torch.FloatTensor:
690
+ hidden_states = input_tensor
691
+ batch_size = hidden_states.shape[0]
692
+
693
+ hidden_states = self.norm1(hidden_states)
694
+ if self.timestep_conditioning:
695
+ assert (
696
+ timestep is not None
697
+ ), "should pass timestep with timestep_conditioning=True"
698
+ ada_values = self.scale_shift_table[
699
+ None, ..., None, None, None
700
+ ].to(device=hidden_states.device, dtype=hidden_states.dtype) + timestep.reshape(
701
+ batch_size,
702
+ 4,
703
+ -1,
704
+ timestep.shape[-3],
705
+ timestep.shape[-2],
706
+ timestep.shape[-1],
707
+ )
708
+ shift1, scale1, shift2, scale2 = ada_values.unbind(dim=1)
709
+
710
+ hidden_states = hidden_states * (1 + scale1) + shift1
711
+
712
+ hidden_states = self.non_linearity(hidden_states)
713
+
714
+ hidden_states = self.conv1(hidden_states, causal=causal)
715
+
716
+ if self.inject_noise:
717
+ hidden_states = self._feed_spatial_noise(
718
+ hidden_states, self.per_channel_scale1.to(device=hidden_states.device, dtype=hidden_states.dtype)
719
+ )
720
+
721
+ hidden_states = self.norm2(hidden_states)
722
+
723
+ if self.timestep_conditioning:
724
+ hidden_states = hidden_states * (1 + scale2) + shift2
725
+
726
+ hidden_states = self.non_linearity(hidden_states)
727
+
728
+ hidden_states = self.dropout(hidden_states)
729
+
730
+ hidden_states = self.conv2(hidden_states, causal=causal)
731
+
732
+ if self.inject_noise:
733
+ hidden_states = self._feed_spatial_noise(
734
+ hidden_states, self.per_channel_scale2.to(device=hidden_states.device, dtype=hidden_states.dtype)
735
+ )
736
+
737
+ input_tensor = self.norm3(input_tensor)
738
+
739
+ batch_size = input_tensor.shape[0]
740
+
741
+ input_tensor = self.conv_shortcut(input_tensor)
742
+
743
+ output_tensor = input_tensor + hidden_states
744
+
745
+ return output_tensor
746
+
747
+
748
+ def patchify(x, patch_size_hw, patch_size_t=1):
749
+ if patch_size_hw == 1 and patch_size_t == 1:
750
+ return x
751
+ if x.dim() == 4:
752
+ x = rearrange(
753
+ x, "b c (h q) (w r) -> b (c r q) h w", q=patch_size_hw, r=patch_size_hw
754
+ )
755
+ elif x.dim() == 5:
756
+ x = rearrange(
757
+ x,
758
+ "b c (f p) (h q) (w r) -> b (c p r q) f h w",
759
+ p=patch_size_t,
760
+ q=patch_size_hw,
761
+ r=patch_size_hw,
762
+ )
763
+ else:
764
+ raise ValueError(f"Invalid input shape: {x.shape}")
765
+
766
+ return x
767
+
768
+
769
+ def unpatchify(x, patch_size_hw, patch_size_t=1):
770
+ if patch_size_hw == 1 and patch_size_t == 1:
771
+ return x
772
+
773
+ if x.dim() == 4:
774
+ x = rearrange(
775
+ x, "b (c r q) h w -> b c (h q) (w r)", q=patch_size_hw, r=patch_size_hw
776
+ )
777
+ elif x.dim() == 5:
778
+ x = rearrange(
779
+ x,
780
+ "b (c p r q) f h w -> b c (f p) (h q) (w r)",
781
+ p=patch_size_t,
782
+ q=patch_size_hw,
783
+ r=patch_size_hw,
784
+ )
785
+
786
+ return x
787
+
788
+ class processor(nn.Module):
789
+ def __init__(self):
790
+ super().__init__()
791
+ self.register_buffer("std-of-means", torch.empty(128))
792
+ self.register_buffer("mean-of-means", torch.empty(128))
793
+ self.register_buffer("mean-of-stds", torch.empty(128))
794
+ self.register_buffer("mean-of-stds_over_std-of-means", torch.empty(128))
795
+ self.register_buffer("channel", torch.empty(128))
796
+
797
+ def un_normalize(self, x):
798
+ return (x * self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)) + self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)
799
+
800
+ def normalize(self, x):
801
+ return (x - self.get_buffer("mean-of-means").view(1, -1, 1, 1, 1).to(x)) / self.get_buffer("std-of-means").view(1, -1, 1, 1, 1).to(x)
802
+
803
+ class VideoVAE(nn.Module):
804
+ def __init__(self, version=0):
805
+ super().__init__()
806
+
807
+ if version == 0:
808
+ config = {
809
+ "_class_name": "CausalVideoAutoencoder",
810
+ "dims": 3,
811
+ "in_channels": 3,
812
+ "out_channels": 3,
813
+ "latent_channels": 128,
814
+ "blocks": [
815
+ ["res_x", 4],
816
+ ["compress_all", 1],
817
+ ["res_x_y", 1],
818
+ ["res_x", 3],
819
+ ["compress_all", 1],
820
+ ["res_x_y", 1],
821
+ ["res_x", 3],
822
+ ["compress_all", 1],
823
+ ["res_x", 3],
824
+ ["res_x", 4],
825
+ ],
826
+ "scaling_factor": 1.0,
827
+ "norm_layer": "pixel_norm",
828
+ "patch_size": 4,
829
+ "latent_log_var": "uniform",
830
+ "use_quant_conv": False,
831
+ "causal_decoder": False,
832
+ }
833
+ else:
834
+ config = {
835
+ "_class_name": "CausalVideoAutoencoder",
836
+ "dims": 3,
837
+ "in_channels": 3,
838
+ "out_channels": 3,
839
+ "latent_channels": 128,
840
+ "decoder_blocks": [
841
+ ["res_x", {"num_layers": 5, "inject_noise": True}],
842
+ ["compress_all", {"residual": True, "multiplier": 2}],
843
+ ["res_x", {"num_layers": 6, "inject_noise": True}],
844
+ ["compress_all", {"residual": True, "multiplier": 2}],
845
+ ["res_x", {"num_layers": 7, "inject_noise": True}],
846
+ ["compress_all", {"residual": True, "multiplier": 2}],
847
+ ["res_x", {"num_layers": 8, "inject_noise": False}]
848
+ ],
849
+ "encoder_blocks": [
850
+ ["res_x", {"num_layers": 4}],
851
+ ["compress_all", {}],
852
+ ["res_x_y", 1],
853
+ ["res_x", {"num_layers": 3}],
854
+ ["compress_all", {}],
855
+ ["res_x_y", 1],
856
+ ["res_x", {"num_layers": 3}],
857
+ ["compress_all", {}],
858
+ ["res_x", {"num_layers": 3}],
859
+ ["res_x", {"num_layers": 4}]
860
+ ],
861
+ "scaling_factor": 1.0,
862
+ "norm_layer": "pixel_norm",
863
+ "patch_size": 4,
864
+ "latent_log_var": "uniform",
865
+ "use_quant_conv": False,
866
+ "causal_decoder": False,
867
+ "timestep_conditioning": True,
868
+ }
869
+
870
+ double_z = config.get("double_z", True)
871
+ latent_log_var = config.get(
872
+ "latent_log_var", "per_channel" if double_z else "none"
873
+ )
874
+
875
+ self.encoder = Encoder(
876
+ dims=config["dims"],
877
+ in_channels=config.get("in_channels", 3),
878
+ out_channels=config["latent_channels"],
879
+ blocks=config.get("encoder_blocks", config.get("encoder_blocks", config.get("blocks"))),
880
+ patch_size=config.get("patch_size", 1),
881
+ latent_log_var=latent_log_var,
882
+ norm_layer=config.get("norm_layer", "group_norm"),
883
+ )
884
+
885
+ self.decoder = Decoder(
886
+ dims=config["dims"],
887
+ in_channels=config["latent_channels"],
888
+ out_channels=config.get("out_channels", 3),
889
+ blocks=config.get("decoder_blocks", config.get("decoder_blocks", config.get("blocks"))),
890
+ patch_size=config.get("patch_size", 1),
891
+ norm_layer=config.get("norm_layer", "group_norm"),
892
+ causal=config.get("causal_decoder", False),
893
+ timestep_conditioning=config.get("timestep_conditioning", False),
894
+ )
895
+
896
+ self.timestep_conditioning = config.get("timestep_conditioning", False)
897
+ self.per_channel_statistics = processor()
898
+
899
+ def encode(self, x):
900
+ means, logvar = torch.chunk(self.encoder(x), 2, dim=1)
901
+ return self.per_channel_statistics.normalize(means)
902
+
903
+ def decode(self, x, timestep=0.05, noise_scale=0.025):
904
+ if self.timestep_conditioning: #TODO: seed
905
+ x = torch.randn_like(x) * noise_scale + (1.0 - noise_scale) * x
906
+ return self.decoder(self.per_channel_statistics.un_normalize(x), timestep=timestep)
907
+
vae (1)/conv_nd_factory.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+
4
+ from .dual_conv3d import DualConv3d
5
+ from .causal_conv3d import CausalConv3d
6
+ import comfy.ops
7
+ ops = comfy.ops.disable_weight_init
8
+
9
+ def make_conv_nd(
10
+ dims: Union[int, Tuple[int, int]],
11
+ in_channels: int,
12
+ out_channels: int,
13
+ kernel_size: int,
14
+ stride=1,
15
+ padding=0,
16
+ dilation=1,
17
+ groups=1,
18
+ bias=True,
19
+ causal=False,
20
+ ):
21
+ if dims == 2:
22
+ return ops.Conv2d(
23
+ in_channels=in_channels,
24
+ out_channels=out_channels,
25
+ kernel_size=kernel_size,
26
+ stride=stride,
27
+ padding=padding,
28
+ dilation=dilation,
29
+ groups=groups,
30
+ bias=bias,
31
+ )
32
+ elif dims == 3:
33
+ if causal:
34
+ return CausalConv3d(
35
+ in_channels=in_channels,
36
+ out_channels=out_channels,
37
+ kernel_size=kernel_size,
38
+ stride=stride,
39
+ padding=padding,
40
+ dilation=dilation,
41
+ groups=groups,
42
+ bias=bias,
43
+ )
44
+ return ops.Conv3d(
45
+ in_channels=in_channels,
46
+ out_channels=out_channels,
47
+ kernel_size=kernel_size,
48
+ stride=stride,
49
+ padding=padding,
50
+ dilation=dilation,
51
+ groups=groups,
52
+ bias=bias,
53
+ )
54
+ elif dims == (2, 1):
55
+ return DualConv3d(
56
+ in_channels=in_channels,
57
+ out_channels=out_channels,
58
+ kernel_size=kernel_size,
59
+ stride=stride,
60
+ padding=padding,
61
+ bias=bias,
62
+ )
63
+ else:
64
+ raise ValueError(f"unsupported dimensions: {dims}")
65
+
66
+
67
+ def make_linear_nd(
68
+ dims: int,
69
+ in_channels: int,
70
+ out_channels: int,
71
+ bias=True,
72
+ ):
73
+ if dims == 2:
74
+ return ops.Conv2d(
75
+ in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias
76
+ )
77
+ elif dims == 3 or dims == (2, 1):
78
+ return ops.Conv3d(
79
+ in_channels=in_channels, out_channels=out_channels, kernel_size=1, bias=bias
80
+ )
81
+ else:
82
+ raise ValueError(f"unsupported dimensions: {dims}")
vae (1)/dual_conv3d.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from typing import Tuple, Union
3
+
4
+ import torch
5
+ import torch.nn as nn
6
+ import torch.nn.functional as F
7
+ from einops import rearrange
8
+
9
+
10
+ class DualConv3d(nn.Module):
11
+ def __init__(
12
+ self,
13
+ in_channels,
14
+ out_channels,
15
+ kernel_size,
16
+ stride: Union[int, Tuple[int, int, int]] = 1,
17
+ padding: Union[int, Tuple[int, int, int]] = 0,
18
+ dilation: Union[int, Tuple[int, int, int]] = 1,
19
+ groups=1,
20
+ bias=True,
21
+ ):
22
+ super(DualConv3d, self).__init__()
23
+
24
+ self.in_channels = in_channels
25
+ self.out_channels = out_channels
26
+ # Ensure kernel_size, stride, padding, and dilation are tuples of length 3
27
+ if isinstance(kernel_size, int):
28
+ kernel_size = (kernel_size, kernel_size, kernel_size)
29
+ if kernel_size == (1, 1, 1):
30
+ raise ValueError(
31
+ "kernel_size must be greater than 1. Use make_linear_nd instead."
32
+ )
33
+ if isinstance(stride, int):
34
+ stride = (stride, stride, stride)
35
+ if isinstance(padding, int):
36
+ padding = (padding, padding, padding)
37
+ if isinstance(dilation, int):
38
+ dilation = (dilation, dilation, dilation)
39
+
40
+ # Set parameters for convolutions
41
+ self.groups = groups
42
+ self.bias = bias
43
+
44
+ # Define the size of the channels after the first convolution
45
+ intermediate_channels = (
46
+ out_channels if in_channels < out_channels else in_channels
47
+ )
48
+
49
+ # Define parameters for the first convolution
50
+ self.weight1 = nn.Parameter(
51
+ torch.Tensor(
52
+ intermediate_channels,
53
+ in_channels // groups,
54
+ 1,
55
+ kernel_size[1],
56
+ kernel_size[2],
57
+ )
58
+ )
59
+ self.stride1 = (1, stride[1], stride[2])
60
+ self.padding1 = (0, padding[1], padding[2])
61
+ self.dilation1 = (1, dilation[1], dilation[2])
62
+ if bias:
63
+ self.bias1 = nn.Parameter(torch.Tensor(intermediate_channels))
64
+ else:
65
+ self.register_parameter("bias1", None)
66
+
67
+ # Define parameters for the second convolution
68
+ self.weight2 = nn.Parameter(
69
+ torch.Tensor(
70
+ out_channels, intermediate_channels // groups, kernel_size[0], 1, 1
71
+ )
72
+ )
73
+ self.stride2 = (stride[0], 1, 1)
74
+ self.padding2 = (padding[0], 0, 0)
75
+ self.dilation2 = (dilation[0], 1, 1)
76
+ if bias:
77
+ self.bias2 = nn.Parameter(torch.Tensor(out_channels))
78
+ else:
79
+ self.register_parameter("bias2", None)
80
+
81
+ # Initialize weights and biases
82
+ self.reset_parameters()
83
+
84
+ def reset_parameters(self):
85
+ nn.init.kaiming_uniform_(self.weight1, a=math.sqrt(5))
86
+ nn.init.kaiming_uniform_(self.weight2, a=math.sqrt(5))
87
+ if self.bias:
88
+ fan_in1, _ = nn.init._calculate_fan_in_and_fan_out(self.weight1)
89
+ bound1 = 1 / math.sqrt(fan_in1)
90
+ nn.init.uniform_(self.bias1, -bound1, bound1)
91
+ fan_in2, _ = nn.init._calculate_fan_in_and_fan_out(self.weight2)
92
+ bound2 = 1 / math.sqrt(fan_in2)
93
+ nn.init.uniform_(self.bias2, -bound2, bound2)
94
+
95
+ def forward(self, x, use_conv3d=False, skip_time_conv=False):
96
+ if use_conv3d:
97
+ return self.forward_with_3d(x=x, skip_time_conv=skip_time_conv)
98
+ else:
99
+ return self.forward_with_2d(x=x, skip_time_conv=skip_time_conv)
100
+
101
+ def forward_with_3d(self, x, skip_time_conv):
102
+ # First convolution
103
+ x = F.conv3d(
104
+ x,
105
+ self.weight1,
106
+ self.bias1,
107
+ self.stride1,
108
+ self.padding1,
109
+ self.dilation1,
110
+ self.groups,
111
+ )
112
+
113
+ if skip_time_conv:
114
+ return x
115
+
116
+ # Second convolution
117
+ x = F.conv3d(
118
+ x,
119
+ self.weight2,
120
+ self.bias2,
121
+ self.stride2,
122
+ self.padding2,
123
+ self.dilation2,
124
+ self.groups,
125
+ )
126
+
127
+ return x
128
+
129
+ def forward_with_2d(self, x, skip_time_conv):
130
+ b, c, d, h, w = x.shape
131
+
132
+ # First 2D convolution
133
+ x = rearrange(x, "b c d h w -> (b d) c h w")
134
+ # Squeeze the depth dimension out of weight1 since it's 1
135
+ weight1 = self.weight1.squeeze(2)
136
+ # Select stride, padding, and dilation for the 2D convolution
137
+ stride1 = (self.stride1[1], self.stride1[2])
138
+ padding1 = (self.padding1[1], self.padding1[2])
139
+ dilation1 = (self.dilation1[1], self.dilation1[2])
140
+ x = F.conv2d(x, weight1, self.bias1, stride1, padding1, dilation1, self.groups)
141
+
142
+ _, _, h, w = x.shape
143
+
144
+ if skip_time_conv:
145
+ x = rearrange(x, "(b d) c h w -> b c d h w", b=b)
146
+ return x
147
+
148
+ # Second convolution which is essentially treated as a 1D convolution across the 'd' dimension
149
+ x = rearrange(x, "(b d) c h w -> (b h w) c d", b=b)
150
+
151
+ # Reshape weight2 to match the expected dimensions for conv1d
152
+ weight2 = self.weight2.squeeze(-1).squeeze(-1)
153
+ # Use only the relevant dimension for stride, padding, and dilation for the 1D convolution
154
+ stride2 = self.stride2[0]
155
+ padding2 = self.padding2[0]
156
+ dilation2 = self.dilation2[0]
157
+ x = F.conv1d(x, weight2, self.bias2, stride2, padding2, dilation2, self.groups)
158
+ x = rearrange(x, "(b h w) c d -> b c d h w", b=b, h=h, w=w)
159
+
160
+ return x
161
+
162
+ @property
163
+ def weight(self):
164
+ return self.weight2
165
+
166
+
167
+ def test_dual_conv3d_consistency():
168
+ # Initialize parameters
169
+ in_channels = 3
170
+ out_channels = 5
171
+ kernel_size = (3, 3, 3)
172
+ stride = (2, 2, 2)
173
+ padding = (1, 1, 1)
174
+
175
+ # Create an instance of the DualConv3d class
176
+ dual_conv3d = DualConv3d(
177
+ in_channels=in_channels,
178
+ out_channels=out_channels,
179
+ kernel_size=kernel_size,
180
+ stride=stride,
181
+ padding=padding,
182
+ bias=True,
183
+ )
184
+
185
+ # Example input tensor
186
+ test_input = torch.randn(1, 3, 10, 10, 10)
187
+
188
+ # Perform forward passes with both 3D and 2D settings
189
+ output_conv3d = dual_conv3d(test_input, use_conv3d=True)
190
+ output_2d = dual_conv3d(test_input, use_conv3d=False)
191
+
192
+ # Assert that the outputs from both methods are sufficiently close
193
+ assert torch.allclose(
194
+ output_conv3d, output_2d, atol=1e-6
195
+ ), "Outputs are not consistent between 3D and 2D convolutions."
vae (1)/pixel_norm.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from torch import nn
3
+
4
+
5
+ class PixelNorm(nn.Module):
6
+ def __init__(self, dim=1, eps=1e-8):
7
+ super(PixelNorm, self).__init__()
8
+ self.dim = dim
9
+ self.eps = eps
10
+
11
+ def forward(self, x):
12
+ return x / torch.sqrt(torch.mean(x**2, dim=self.dim, keepdim=True) + self.eps)
vae (2)/model.py ADDED
@@ -0,0 +1,711 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #original code from https://github.com/genmoai/models under apache 2.0 license
2
+ #adapted to ComfyUI
3
+
4
+ from typing import List, Optional, Tuple, Union
5
+ from functools import partial
6
+ import math
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+ import torch.nn.functional as F
11
+ from einops import rearrange
12
+
13
+ from comfy.ldm.modules.attention import optimized_attention
14
+
15
+ import comfy.ops
16
+ ops = comfy.ops.disable_weight_init
17
+
18
+ # import mochi_preview.dit.joint_model.context_parallel as cp
19
+ # from mochi_preview.vae.cp_conv import cp_pass_frames, gather_all_frames
20
+
21
+
22
+ def cast_tuple(t, length=1):
23
+ return t if isinstance(t, tuple) else ((t,) * length)
24
+
25
+
26
+ class GroupNormSpatial(ops.GroupNorm):
27
+ """
28
+ GroupNorm applied per-frame.
29
+ """
30
+
31
+ def forward(self, x: torch.Tensor, *, chunk_size: int = 8):
32
+ B, C, T, H, W = x.shape
33
+ x = rearrange(x, "B C T H W -> (B T) C H W")
34
+ # Run group norm in chunks.
35
+ output = torch.empty_like(x)
36
+ for b in range(0, B * T, chunk_size):
37
+ output[b : b + chunk_size] = super().forward(x[b : b + chunk_size])
38
+ return rearrange(output, "(B T) C H W -> B C T H W", B=B, T=T)
39
+
40
+ class PConv3d(ops.Conv3d):
41
+ def __init__(
42
+ self,
43
+ in_channels,
44
+ out_channels,
45
+ kernel_size: Union[int, Tuple[int, int, int]],
46
+ stride: Union[int, Tuple[int, int, int]],
47
+ causal: bool = True,
48
+ context_parallel: bool = True,
49
+ **kwargs,
50
+ ):
51
+ self.causal = causal
52
+ self.context_parallel = context_parallel
53
+ kernel_size = cast_tuple(kernel_size, 3)
54
+ stride = cast_tuple(stride, 3)
55
+ height_pad = (kernel_size[1] - 1) // 2
56
+ width_pad = (kernel_size[2] - 1) // 2
57
+
58
+ super().__init__(
59
+ in_channels=in_channels,
60
+ out_channels=out_channels,
61
+ kernel_size=kernel_size,
62
+ stride=stride,
63
+ dilation=(1, 1, 1),
64
+ padding=(0, height_pad, width_pad),
65
+ **kwargs,
66
+ )
67
+
68
+ def forward(self, x: torch.Tensor):
69
+ # Compute padding amounts.
70
+ context_size = self.kernel_size[0] - 1
71
+ if self.causal:
72
+ pad_front = context_size
73
+ pad_back = 0
74
+ else:
75
+ pad_front = context_size // 2
76
+ pad_back = context_size - pad_front
77
+
78
+ # Apply padding.
79
+ assert self.padding_mode == "replicate" # DEBUG
80
+ mode = "constant" if self.padding_mode == "zeros" else self.padding_mode
81
+ x = F.pad(x, (0, 0, 0, 0, pad_front, pad_back), mode=mode)
82
+ return super().forward(x)
83
+
84
+
85
+ class Conv1x1(ops.Linear):
86
+ """*1x1 Conv implemented with a linear layer."""
87
+
88
+ def __init__(self, in_features: int, out_features: int, *args, **kwargs):
89
+ super().__init__(in_features, out_features, *args, **kwargs)
90
+
91
+ def forward(self, x: torch.Tensor):
92
+ """Forward pass.
93
+
94
+ Args:
95
+ x: Input tensor. Shape: [B, C, *] or [B, *, C].
96
+
97
+ Returns:
98
+ x: Output tensor. Shape: [B, C', *] or [B, *, C'].
99
+ """
100
+ x = x.movedim(1, -1)
101
+ x = super().forward(x)
102
+ x = x.movedim(-1, 1)
103
+ return x
104
+
105
+
106
+ class DepthToSpaceTime(nn.Module):
107
+ def __init__(
108
+ self,
109
+ temporal_expansion: int,
110
+ spatial_expansion: int,
111
+ ):
112
+ super().__init__()
113
+ self.temporal_expansion = temporal_expansion
114
+ self.spatial_expansion = spatial_expansion
115
+
116
+ # When printed, this module should show the temporal and spatial expansion factors.
117
+ def extra_repr(self):
118
+ return f"texp={self.temporal_expansion}, sexp={self.spatial_expansion}"
119
+
120
+ def forward(self, x: torch.Tensor):
121
+ """Forward pass.
122
+
123
+ Args:
124
+ x: Input tensor. Shape: [B, C, T, H, W].
125
+
126
+ Returns:
127
+ x: Rearranged tensor. Shape: [B, C/(st*s*s), T*st, H*s, W*s].
128
+ """
129
+ x = rearrange(
130
+ x,
131
+ "B (C st sh sw) T H W -> B C (T st) (H sh) (W sw)",
132
+ st=self.temporal_expansion,
133
+ sh=self.spatial_expansion,
134
+ sw=self.spatial_expansion,
135
+ )
136
+
137
+ # cp_rank, _ = cp.get_cp_rank_size()
138
+ if self.temporal_expansion > 1: # and cp_rank == 0:
139
+ # Drop the first self.temporal_expansion - 1 frames.
140
+ # This is because we always want the 3x3x3 conv filter to only apply
141
+ # to the first frame, and the first frame doesn't need to be repeated.
142
+ assert all(x.shape)
143
+ x = x[:, :, self.temporal_expansion - 1 :]
144
+ assert all(x.shape)
145
+
146
+ return x
147
+
148
+
149
+ def norm_fn(
150
+ in_channels: int,
151
+ affine: bool = True,
152
+ ):
153
+ return GroupNormSpatial(affine=affine, num_groups=32, num_channels=in_channels)
154
+
155
+
156
+ class ResBlock(nn.Module):
157
+ """Residual block that preserves the spatial dimensions."""
158
+
159
+ def __init__(
160
+ self,
161
+ channels: int,
162
+ *,
163
+ affine: bool = True,
164
+ attn_block: Optional[nn.Module] = None,
165
+ causal: bool = True,
166
+ prune_bottleneck: bool = False,
167
+ padding_mode: str,
168
+ bias: bool = True,
169
+ ):
170
+ super().__init__()
171
+ self.channels = channels
172
+
173
+ assert causal
174
+ self.stack = nn.Sequential(
175
+ norm_fn(channels, affine=affine),
176
+ nn.SiLU(inplace=True),
177
+ PConv3d(
178
+ in_channels=channels,
179
+ out_channels=channels // 2 if prune_bottleneck else channels,
180
+ kernel_size=(3, 3, 3),
181
+ stride=(1, 1, 1),
182
+ padding_mode=padding_mode,
183
+ bias=bias,
184
+ causal=causal,
185
+ ),
186
+ norm_fn(channels, affine=affine),
187
+ nn.SiLU(inplace=True),
188
+ PConv3d(
189
+ in_channels=channels // 2 if prune_bottleneck else channels,
190
+ out_channels=channels,
191
+ kernel_size=(3, 3, 3),
192
+ stride=(1, 1, 1),
193
+ padding_mode=padding_mode,
194
+ bias=bias,
195
+ causal=causal,
196
+ ),
197
+ )
198
+
199
+ self.attn_block = attn_block if attn_block else nn.Identity()
200
+
201
+ def forward(self, x: torch.Tensor):
202
+ """Forward pass.
203
+
204
+ Args:
205
+ x: Input tensor. Shape: [B, C, T, H, W].
206
+ """
207
+ residual = x
208
+ x = self.stack(x)
209
+ x = x + residual
210
+ del residual
211
+
212
+ return self.attn_block(x)
213
+
214
+
215
+ class Attention(nn.Module):
216
+ def __init__(
217
+ self,
218
+ dim: int,
219
+ head_dim: int = 32,
220
+ qkv_bias: bool = False,
221
+ out_bias: bool = True,
222
+ qk_norm: bool = True,
223
+ ) -> None:
224
+ super().__init__()
225
+ self.head_dim = head_dim
226
+ self.num_heads = dim // head_dim
227
+ self.qk_norm = qk_norm
228
+
229
+ self.qkv = nn.Linear(dim, 3 * dim, bias=qkv_bias)
230
+ self.out = nn.Linear(dim, dim, bias=out_bias)
231
+
232
+ def forward(
233
+ self,
234
+ x: torch.Tensor,
235
+ ) -> torch.Tensor:
236
+ """Compute temporal self-attention.
237
+
238
+ Args:
239
+ x: Input tensor. Shape: [B, C, T, H, W].
240
+ chunk_size: Chunk size for large tensors.
241
+
242
+ Returns:
243
+ x: Output tensor. Shape: [B, C, T, H, W].
244
+ """
245
+ B, _, T, H, W = x.shape
246
+
247
+ if T == 1:
248
+ # No attention for single frame.
249
+ x = x.movedim(1, -1) # [B, C, T, H, W] -> [B, T, H, W, C]
250
+ qkv = self.qkv(x)
251
+ _, _, x = qkv.chunk(3, dim=-1) # Throw away queries and keys.
252
+ x = self.out(x)
253
+ return x.movedim(-1, 1) # [B, T, H, W, C] -> [B, C, T, H, W]
254
+
255
+ # 1D temporal attention.
256
+ x = rearrange(x, "B C t h w -> (B h w) t C")
257
+ qkv = self.qkv(x)
258
+
259
+ # Input: qkv with shape [B, t, 3 * num_heads * head_dim]
260
+ # Output: x with shape [B, num_heads, t, head_dim]
261
+ q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, self.head_dim).transpose(1, 3).unbind(2)
262
+
263
+ if self.qk_norm:
264
+ q = F.normalize(q, p=2, dim=-1)
265
+ k = F.normalize(k, p=2, dim=-1)
266
+
267
+ x = optimized_attention(q, k, v, self.num_heads, skip_reshape=True)
268
+
269
+ assert x.size(0) == q.size(0)
270
+
271
+ x = self.out(x)
272
+ x = rearrange(x, "(B h w) t C -> B C t h w", B=B, h=H, w=W)
273
+ return x
274
+
275
+
276
+ class AttentionBlock(nn.Module):
277
+ def __init__(
278
+ self,
279
+ dim: int,
280
+ **attn_kwargs,
281
+ ) -> None:
282
+ super().__init__()
283
+ self.norm = norm_fn(dim)
284
+ self.attn = Attention(dim, **attn_kwargs)
285
+
286
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
287
+ return x + self.attn(self.norm(x))
288
+
289
+
290
+ class CausalUpsampleBlock(nn.Module):
291
+ def __init__(
292
+ self,
293
+ in_channels: int,
294
+ out_channels: int,
295
+ num_res_blocks: int,
296
+ *,
297
+ temporal_expansion: int = 2,
298
+ spatial_expansion: int = 2,
299
+ **block_kwargs,
300
+ ):
301
+ super().__init__()
302
+
303
+ blocks = []
304
+ for _ in range(num_res_blocks):
305
+ blocks.append(block_fn(in_channels, **block_kwargs))
306
+ self.blocks = nn.Sequential(*blocks)
307
+
308
+ self.temporal_expansion = temporal_expansion
309
+ self.spatial_expansion = spatial_expansion
310
+
311
+ # Change channels in the final convolution layer.
312
+ self.proj = Conv1x1(
313
+ in_channels,
314
+ out_channels * temporal_expansion * (spatial_expansion**2),
315
+ )
316
+
317
+ self.d2st = DepthToSpaceTime(
318
+ temporal_expansion=temporal_expansion, spatial_expansion=spatial_expansion
319
+ )
320
+
321
+ def forward(self, x):
322
+ x = self.blocks(x)
323
+ x = self.proj(x)
324
+ x = self.d2st(x)
325
+ return x
326
+
327
+
328
+ def block_fn(channels, *, affine: bool = True, has_attention: bool = False, **block_kwargs):
329
+ attn_block = AttentionBlock(channels) if has_attention else None
330
+ return ResBlock(channels, affine=affine, attn_block=attn_block, **block_kwargs)
331
+
332
+
333
+ class DownsampleBlock(nn.Module):
334
+ def __init__(
335
+ self,
336
+ in_channels: int,
337
+ out_channels: int,
338
+ num_res_blocks,
339
+ *,
340
+ temporal_reduction=2,
341
+ spatial_reduction=2,
342
+ **block_kwargs,
343
+ ):
344
+ """
345
+ Downsample block for the VAE encoder.
346
+
347
+ Args:
348
+ in_channels: Number of input channels.
349
+ out_channels: Number of output channels.
350
+ num_res_blocks: Number of residual blocks.
351
+ temporal_reduction: Temporal reduction factor.
352
+ spatial_reduction: Spatial reduction factor.
353
+ """
354
+ super().__init__()
355
+ layers = []
356
+
357
+ # Change the channel count in the strided convolution.
358
+ # This lets the ResBlock have uniform channel count,
359
+ # as in ConvNeXt.
360
+ assert in_channels != out_channels
361
+ layers.append(
362
+ PConv3d(
363
+ in_channels=in_channels,
364
+ out_channels=out_channels,
365
+ kernel_size=(temporal_reduction, spatial_reduction, spatial_reduction),
366
+ stride=(temporal_reduction, spatial_reduction, spatial_reduction),
367
+ # First layer in each block always uses replicate padding
368
+ padding_mode="replicate",
369
+ bias=block_kwargs["bias"],
370
+ )
371
+ )
372
+
373
+ for _ in range(num_res_blocks):
374
+ layers.append(block_fn(out_channels, **block_kwargs))
375
+
376
+ self.layers = nn.Sequential(*layers)
377
+
378
+ def forward(self, x):
379
+ return self.layers(x)
380
+
381
+
382
+ def add_fourier_features(inputs: torch.Tensor, start=6, stop=8, step=1):
383
+ num_freqs = (stop - start) // step
384
+ assert inputs.ndim == 5
385
+ C = inputs.size(1)
386
+
387
+ # Create Base 2 Fourier features.
388
+ freqs = torch.arange(start, stop, step, dtype=inputs.dtype, device=inputs.device)
389
+ assert num_freqs == len(freqs)
390
+ w = torch.pow(2.0, freqs) * (2 * torch.pi) # [num_freqs]
391
+ C = inputs.shape[1]
392
+ w = w.repeat(C)[None, :, None, None, None] # [1, C * num_freqs, 1, 1, 1]
393
+
394
+ # Interleaved repeat of input channels to match w.
395
+ h = inputs.repeat_interleave(num_freqs, dim=1) # [B, C * num_freqs, T, H, W]
396
+ # Scale channels by frequency.
397
+ h = w * h
398
+
399
+ return torch.cat(
400
+ [
401
+ inputs,
402
+ torch.sin(h),
403
+ torch.cos(h),
404
+ ],
405
+ dim=1,
406
+ )
407
+
408
+
409
+ class FourierFeatures(nn.Module):
410
+ def __init__(self, start: int = 6, stop: int = 8, step: int = 1):
411
+ super().__init__()
412
+ self.start = start
413
+ self.stop = stop
414
+ self.step = step
415
+
416
+ def forward(self, inputs):
417
+ """Add Fourier features to inputs.
418
+
419
+ Args:
420
+ inputs: Input tensor. Shape: [B, C, T, H, W]
421
+
422
+ Returns:
423
+ h: Output tensor. Shape: [B, (1 + 2 * num_freqs) * C, T, H, W]
424
+ """
425
+ return add_fourier_features(inputs, self.start, self.stop, self.step)
426
+
427
+
428
+ class Decoder(nn.Module):
429
+ def __init__(
430
+ self,
431
+ *,
432
+ out_channels: int = 3,
433
+ latent_dim: int,
434
+ base_channels: int,
435
+ channel_multipliers: List[int],
436
+ num_res_blocks: List[int],
437
+ temporal_expansions: Optional[List[int]] = None,
438
+ spatial_expansions: Optional[List[int]] = None,
439
+ has_attention: List[bool],
440
+ output_norm: bool = True,
441
+ nonlinearity: str = "silu",
442
+ output_nonlinearity: str = "silu",
443
+ causal: bool = True,
444
+ **block_kwargs,
445
+ ):
446
+ super().__init__()
447
+ self.input_channels = latent_dim
448
+ self.base_channels = base_channels
449
+ self.channel_multipliers = channel_multipliers
450
+ self.num_res_blocks = num_res_blocks
451
+ self.output_nonlinearity = output_nonlinearity
452
+ assert nonlinearity == "silu"
453
+ assert causal
454
+
455
+ ch = [mult * base_channels for mult in channel_multipliers]
456
+ self.num_up_blocks = len(ch) - 1
457
+ assert len(num_res_blocks) == self.num_up_blocks + 2
458
+
459
+ blocks = []
460
+
461
+ first_block = [
462
+ ops.Conv3d(latent_dim, ch[-1], kernel_size=(1, 1, 1))
463
+ ] # Input layer.
464
+ # First set of blocks preserve channel count.
465
+ for _ in range(num_res_blocks[-1]):
466
+ first_block.append(
467
+ block_fn(
468
+ ch[-1],
469
+ has_attention=has_attention[-1],
470
+ causal=causal,
471
+ **block_kwargs,
472
+ )
473
+ )
474
+ blocks.append(nn.Sequential(*first_block))
475
+
476
+ assert len(temporal_expansions) == len(spatial_expansions) == self.num_up_blocks
477
+ assert len(num_res_blocks) == len(has_attention) == self.num_up_blocks + 2
478
+
479
+ upsample_block_fn = CausalUpsampleBlock
480
+
481
+ for i in range(self.num_up_blocks):
482
+ block = upsample_block_fn(
483
+ ch[-i - 1],
484
+ ch[-i - 2],
485
+ num_res_blocks=num_res_blocks[-i - 2],
486
+ has_attention=has_attention[-i - 2],
487
+ temporal_expansion=temporal_expansions[-i - 1],
488
+ spatial_expansion=spatial_expansions[-i - 1],
489
+ causal=causal,
490
+ **block_kwargs,
491
+ )
492
+ blocks.append(block)
493
+
494
+ assert not output_norm
495
+
496
+ # Last block. Preserve channel count.
497
+ last_block = []
498
+ for _ in range(num_res_blocks[0]):
499
+ last_block.append(
500
+ block_fn(
501
+ ch[0], has_attention=has_attention[0], causal=causal, **block_kwargs
502
+ )
503
+ )
504
+ blocks.append(nn.Sequential(*last_block))
505
+
506
+ self.blocks = nn.ModuleList(blocks)
507
+ self.output_proj = Conv1x1(ch[0], out_channels)
508
+
509
+ def forward(self, x):
510
+ """Forward pass.
511
+
512
+ Args:
513
+ x: Latent tensor. Shape: [B, input_channels, t, h, w]. Scaled [-1, 1].
514
+
515
+ Returns:
516
+ x: Reconstructed video tensor. Shape: [B, C, T, H, W]. Scaled to [-1, 1].
517
+ T + 1 = (t - 1) * 4.
518
+ H = h * 16, W = w * 16.
519
+ """
520
+ for block in self.blocks:
521
+ x = block(x)
522
+
523
+ if self.output_nonlinearity == "silu":
524
+ x = F.silu(x, inplace=not self.training)
525
+ else:
526
+ assert (
527
+ not self.output_nonlinearity
528
+ ) # StyleGAN3 omits the to-RGB nonlinearity.
529
+
530
+ return self.output_proj(x).contiguous()
531
+
532
+ class LatentDistribution:
533
+ def __init__(self, mean: torch.Tensor, logvar: torch.Tensor):
534
+ """Initialize latent distribution.
535
+
536
+ Args:
537
+ mean: Mean of the distribution. Shape: [B, C, T, H, W].
538
+ logvar: Logarithm of variance of the distribution. Shape: [B, C, T, H, W].
539
+ """
540
+ assert mean.shape == logvar.shape
541
+ self.mean = mean
542
+ self.logvar = logvar
543
+
544
+ def sample(self, temperature=1.0, generator: torch.Generator = None, noise=None):
545
+ if temperature == 0.0:
546
+ return self.mean
547
+
548
+ if noise is None:
549
+ noise = torch.randn(self.mean.shape, device=self.mean.device, dtype=self.mean.dtype, generator=generator)
550
+ else:
551
+ assert noise.device == self.mean.device
552
+ noise = noise.to(self.mean.dtype)
553
+
554
+ if temperature != 1.0:
555
+ raise NotImplementedError(f"Temperature {temperature} is not supported.")
556
+
557
+ # Just Gaussian sample with no scaling of variance.
558
+ return noise * torch.exp(self.logvar * 0.5) + self.mean
559
+
560
+ def mode(self):
561
+ return self.mean
562
+
563
+ class Encoder(nn.Module):
564
+ def __init__(
565
+ self,
566
+ *,
567
+ in_channels: int,
568
+ base_channels: int,
569
+ channel_multipliers: List[int],
570
+ num_res_blocks: List[int],
571
+ latent_dim: int,
572
+ temporal_reductions: List[int],
573
+ spatial_reductions: List[int],
574
+ prune_bottlenecks: List[bool],
575
+ has_attentions: List[bool],
576
+ affine: bool = True,
577
+ bias: bool = True,
578
+ input_is_conv_1x1: bool = False,
579
+ padding_mode: str,
580
+ ):
581
+ super().__init__()
582
+ self.temporal_reductions = temporal_reductions
583
+ self.spatial_reductions = spatial_reductions
584
+ self.base_channels = base_channels
585
+ self.channel_multipliers = channel_multipliers
586
+ self.num_res_blocks = num_res_blocks
587
+ self.latent_dim = latent_dim
588
+
589
+ self.fourier_features = FourierFeatures()
590
+ ch = [mult * base_channels for mult in channel_multipliers]
591
+ num_down_blocks = len(ch) - 1
592
+ assert len(num_res_blocks) == num_down_blocks + 2
593
+
594
+ layers = (
595
+ [ops.Conv3d(in_channels, ch[0], kernel_size=(1, 1, 1), bias=True)]
596
+ if not input_is_conv_1x1
597
+ else [Conv1x1(in_channels, ch[0])]
598
+ )
599
+
600
+ assert len(prune_bottlenecks) == num_down_blocks + 2
601
+ assert len(has_attentions) == num_down_blocks + 2
602
+ block = partial(block_fn, padding_mode=padding_mode, affine=affine, bias=bias)
603
+
604
+ for _ in range(num_res_blocks[0]):
605
+ layers.append(block(ch[0], has_attention=has_attentions[0], prune_bottleneck=prune_bottlenecks[0]))
606
+ prune_bottlenecks = prune_bottlenecks[1:]
607
+ has_attentions = has_attentions[1:]
608
+
609
+ assert len(temporal_reductions) == len(spatial_reductions) == len(ch) - 1
610
+ for i in range(num_down_blocks):
611
+ layer = DownsampleBlock(
612
+ ch[i],
613
+ ch[i + 1],
614
+ num_res_blocks=num_res_blocks[i + 1],
615
+ temporal_reduction=temporal_reductions[i],
616
+ spatial_reduction=spatial_reductions[i],
617
+ prune_bottleneck=prune_bottlenecks[i],
618
+ has_attention=has_attentions[i],
619
+ affine=affine,
620
+ bias=bias,
621
+ padding_mode=padding_mode,
622
+ )
623
+
624
+ layers.append(layer)
625
+
626
+ # Additional blocks.
627
+ for _ in range(num_res_blocks[-1]):
628
+ layers.append(block(ch[-1], has_attention=has_attentions[-1], prune_bottleneck=prune_bottlenecks[-1]))
629
+
630
+ self.layers = nn.Sequential(*layers)
631
+
632
+ # Output layers.
633
+ self.output_norm = norm_fn(ch[-1])
634
+ self.output_proj = Conv1x1(ch[-1], 2 * latent_dim, bias=False)
635
+
636
+ @property
637
+ def temporal_downsample(self):
638
+ return math.prod(self.temporal_reductions)
639
+
640
+ @property
641
+ def spatial_downsample(self):
642
+ return math.prod(self.spatial_reductions)
643
+
644
+ def forward(self, x) -> LatentDistribution:
645
+ """Forward pass.
646
+
647
+ Args:
648
+ x: Input video tensor. Shape: [B, C, T, H, W]. Scaled to [-1, 1]
649
+
650
+ Returns:
651
+ means: Latent tensor. Shape: [B, latent_dim, t, h, w]. Scaled [-1, 1].
652
+ h = H // 8, w = W // 8, t - 1 = (T - 1) // 6
653
+ logvar: Shape: [B, latent_dim, t, h, w].
654
+ """
655
+ assert x.ndim == 5, f"Expected 5D input, got {x.shape}"
656
+ x = self.fourier_features(x)
657
+
658
+ x = self.layers(x)
659
+
660
+ x = self.output_norm(x)
661
+ x = F.silu(x, inplace=True)
662
+ x = self.output_proj(x)
663
+
664
+ means, logvar = torch.chunk(x, 2, dim=1)
665
+
666
+ assert means.ndim == 5
667
+ assert logvar.shape == means.shape
668
+ assert means.size(1) == self.latent_dim
669
+
670
+ return LatentDistribution(means, logvar)
671
+
672
+
673
+ class VideoVAE(nn.Module):
674
+ def __init__(self):
675
+ super().__init__()
676
+ self.encoder = Encoder(
677
+ in_channels=15,
678
+ base_channels=64,
679
+ channel_multipliers=[1, 2, 4, 6],
680
+ num_res_blocks=[3, 3, 4, 6, 3],
681
+ latent_dim=12,
682
+ temporal_reductions=[1, 2, 3],
683
+ spatial_reductions=[2, 2, 2],
684
+ prune_bottlenecks=[False, False, False, False, False],
685
+ has_attentions=[False, True, True, True, True],
686
+ affine=True,
687
+ bias=True,
688
+ input_is_conv_1x1=True,
689
+ padding_mode="replicate"
690
+ )
691
+ self.decoder = Decoder(
692
+ out_channels=3,
693
+ base_channels=128,
694
+ channel_multipliers=[1, 2, 4, 6],
695
+ temporal_expansions=[1, 2, 3],
696
+ spatial_expansions=[2, 2, 2],
697
+ num_res_blocks=[3, 3, 4, 6, 3],
698
+ latent_dim=12,
699
+ has_attention=[False, False, False, False, False],
700
+ padding_mode="replicate",
701
+ output_norm=False,
702
+ nonlinearity="silu",
703
+ output_nonlinearity="silu",
704
+ causal=True,
705
+ )
706
+
707
+ def encode(self, x):
708
+ return self.encoder(x).mode()
709
+
710
+ def decode(self, x):
711
+ return self.decoder(x)
vae.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """The causal continuous video tokenizer with VAE or AE formulation for 3D data.."""
16
+
17
+ import logging
18
+ import torch
19
+ from torch import nn
20
+ from enum import Enum
21
+ import math
22
+
23
+ from .cosmos_tokenizer.layers3d import (
24
+ EncoderFactorized,
25
+ DecoderFactorized,
26
+ CausalConv3d,
27
+ )
28
+
29
+
30
+ class IdentityDistribution(torch.nn.Module):
31
+ def __init__(self):
32
+ super().__init__()
33
+
34
+ def forward(self, parameters):
35
+ return parameters, (torch.tensor([0.0]), torch.tensor([0.0]))
36
+
37
+
38
+ class GaussianDistribution(torch.nn.Module):
39
+ def __init__(self, min_logvar: float = -30.0, max_logvar: float = 20.0):
40
+ super().__init__()
41
+ self.min_logvar = min_logvar
42
+ self.max_logvar = max_logvar
43
+
44
+ def sample(self, mean, logvar):
45
+ std = torch.exp(0.5 * logvar)
46
+ return mean + std * torch.randn_like(mean)
47
+
48
+ def forward(self, parameters):
49
+ mean, logvar = torch.chunk(parameters, 2, dim=1)
50
+ logvar = torch.clamp(logvar, self.min_logvar, self.max_logvar)
51
+ return self.sample(mean, logvar), (mean, logvar)
52
+
53
+
54
+ class ContinuousFormulation(Enum):
55
+ VAE = GaussianDistribution
56
+ AE = IdentityDistribution
57
+
58
+
59
+ class CausalContinuousVideoTokenizer(nn.Module):
60
+ def __init__(
61
+ self, z_channels: int, z_factor: int, latent_channels: int, **kwargs
62
+ ) -> None:
63
+ super().__init__()
64
+ self.name = kwargs.get("name", "CausalContinuousVideoTokenizer")
65
+ self.latent_channels = latent_channels
66
+ self.sigma_data = 0.5
67
+
68
+ # encoder_name = kwargs.get("encoder", Encoder3DType.BASE.name)
69
+ self.encoder = EncoderFactorized(
70
+ z_channels=z_factor * z_channels, **kwargs
71
+ )
72
+ if kwargs.get("temporal_compression", 4) == 4:
73
+ kwargs["channels_mult"] = [2, 4]
74
+ # decoder_name = kwargs.get("decoder", Decoder3DType.BASE.name)
75
+ self.decoder = DecoderFactorized(
76
+ z_channels=z_channels, **kwargs
77
+ )
78
+
79
+ self.quant_conv = CausalConv3d(
80
+ z_factor * z_channels,
81
+ z_factor * latent_channels,
82
+ kernel_size=1,
83
+ padding=0,
84
+ )
85
+ self.post_quant_conv = CausalConv3d(
86
+ latent_channels, z_channels, kernel_size=1, padding=0
87
+ )
88
+
89
+ # formulation_name = kwargs.get("formulation", ContinuousFormulation.AE.name)
90
+ self.distribution = IdentityDistribution() # ContinuousFormulation[formulation_name].value()
91
+
92
+ num_parameters = sum(param.numel() for param in self.parameters())
93
+ logging.debug(f"model={self.name}, num_parameters={num_parameters:,}")
94
+ logging.debug(
95
+ f"z_channels={z_channels}, latent_channels={self.latent_channels}."
96
+ )
97
+
98
+ latent_temporal_chunk = 16
99
+ self.latent_mean = nn.Parameter(torch.zeros([self.latent_channels * latent_temporal_chunk], dtype=torch.float32))
100
+ self.latent_std = nn.Parameter(torch.ones([self.latent_channels * latent_temporal_chunk], dtype=torch.float32))
101
+
102
+
103
+ def encode(self, x):
104
+ h = self.encoder(x)
105
+ moments = self.quant_conv(h)
106
+ z, posteriors = self.distribution(moments)
107
+ latent_ch = z.shape[1]
108
+ latent_t = z.shape[2]
109
+ in_dtype = z.dtype
110
+ mean = self.latent_mean.view(latent_ch, -1)
111
+ std = self.latent_std.view(latent_ch, -1)
112
+
113
+ mean = mean.repeat(1, math.ceil(latent_t / mean.shape[-1]))[:, : latent_t].reshape([1, latent_ch, -1, 1, 1]).to(dtype=in_dtype, device=z.device)
114
+ std = std.repeat(1, math.ceil(latent_t / std.shape[-1]))[:, : latent_t].reshape([1, latent_ch, -1, 1, 1]).to(dtype=in_dtype, device=z.device)
115
+ return ((z - mean) / std) * self.sigma_data
116
+
117
+ def decode(self, z):
118
+ in_dtype = z.dtype
119
+ latent_ch = z.shape[1]
120
+ latent_t = z.shape[2]
121
+ mean = self.latent_mean.view(latent_ch, -1)
122
+ std = self.latent_std.view(latent_ch, -1)
123
+
124
+ mean = mean.repeat(1, math.ceil(latent_t / mean.shape[-1]))[:, : latent_t].reshape([1, latent_ch, -1, 1, 1]).to(dtype=in_dtype, device=z.device)
125
+ std = std.repeat(1, math.ceil(latent_t / std.shape[-1]))[:, : latent_t].reshape([1, latent_ch, -1, 1, 1]).to(dtype=in_dtype, device=z.device)
126
+
127
+ z = z / self.sigma_data
128
+ z = z * std + mean
129
+ z = self.post_quant_conv(z)
130
+ return self.decoder(z)
131
+
vae/put_vae_here ADDED
File without changes