robo-diffusion-v2-base.ckpt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:60c4ea708f4648e5be9d8b1edadc0784d9fd97281f1bd88b6c92208ec36e312e
3
- size 2580250968
 
 
 
 
robo-diffusion-v2-base.yaml DELETED
@@ -1,67 +0,0 @@
1
- model:
2
- base_learning_rate: 1.0e-4
3
- target: ldm.models.diffusion.ddpm.LatentDiffusion
4
- params:
5
- linear_start: 0.00085
6
- linear_end: 0.0120
7
- num_timesteps_cond: 1
8
- log_every_t: 200
9
- timesteps: 1000
10
- first_stage_key: "jpg"
11
- cond_stage_key: "txt"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False # we set this to false because this is an inference only config
19
-
20
- unet_config:
21
- target: ldm.modules.diffusionmodules.openaimodel.UNetModel
22
- params:
23
- use_checkpoint: True
24
- use_fp16: True
25
- image_size: 32 # unused
26
- in_channels: 4
27
- out_channels: 4
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_head_channels: 64 # need to fix for flash-attn
33
- use_spatial_transformer: True
34
- use_linear_in_transformer: True
35
- transformer_depth: 1
36
- context_dim: 1024
37
- legacy: False
38
-
39
- first_stage_config:
40
- target: ldm.models.autoencoder.AutoencoderKL
41
- params:
42
- embed_dim: 4
43
- monitor: val/rec_loss
44
- ddconfig:
45
- #attn_type: "vanilla-xformers"
46
- double_z: true
47
- z_channels: 4
48
- resolution: 256
49
- in_channels: 3
50
- out_ch: 3
51
- ch: 128
52
- ch_mult:
53
- - 1
54
- - 2
55
- - 4
56
- - 4
57
- num_res_blocks: 2
58
- attn_resolutions: []
59
- dropout: 0.0
60
- lossconfig:
61
- target: torch.nn.Identity
62
-
63
- cond_stage_config:
64
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
65
- params:
66
- freeze: True
67
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
vae/config.json CHANGED
@@ -21,7 +21,6 @@
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
23
  "sample_size": 512,
24
- "scaling_factor": 0.18215,
25
  "up_block_types": [
26
  "UpDecoderBlock2D",
27
  "UpDecoderBlock2D",
 
21
  "norm_num_groups": 32,
22
  "out_channels": 3,
23
  "sample_size": 512,
 
24
  "up_block_types": [
25
  "UpDecoderBlock2D",
26
  "UpDecoderBlock2D",