Spaces:
Runtime error
Runtime error
Commit
·
dabac1b
1
Parent(s):
7d078ca
ok
Browse files- .DS_Store +0 -0
- .gitignore +1 -1
- checkpoints/{cifar.ckpt → model/celebahq.ckpt} +2 -2
- checkpoints/model/cifar10.ckpt +3 -0
- checkpoints/{mnist.ckpt → model/mnist.ckpt} +2 -2
- diffusion/dataset/celeba.py +6 -3
- diffusion/dataset/cifar10.py +3 -2
- diffusion/dataset/mnist.py +3 -2
- diffusion/model/diffusion/__init__.py +0 -1
- diffusion/model/diffusion/model.py +82 -42
- diffusion/model/diffusion/sampling.py +0 -82
- diffusion/model/diffusion/scheduler.py +171 -7
- diffusion/model/diffusion/unet.py +15 -9
- diffusion/model/ldm/model.py +1 -0
- diffusion/model/ldm/tests/__init__.py +0 -0
- diffusion/tests/__init__.py +0 -0
- diffusion/train/__main__.py +8 -2
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
.gitignore
CHANGED
@@ -161,8 +161,8 @@ cython_debug/
|
|
161 |
*.jpeg
|
162 |
*.gz
|
163 |
cifar-10-batches-py
|
164 |
-
checkpoints
|
165 |
MNIST
|
166 |
*.ipynb
|
167 |
data
|
168 |
wandb
|
|
|
|
161 |
*.jpeg
|
162 |
*.gz
|
163 |
cifar-10-batches-py
|
|
|
164 |
MNIST
|
165 |
*.ipynb
|
166 |
data
|
167 |
wandb
|
168 |
+
/checkpoints/lightning_logs
|
checkpoints/{cifar.ckpt → model/celebahq.ckpt}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bfc9fa8cb71bc57bc4d1f54da56e71060609828bc6903cec3ae46418c18bf3a1
|
3 |
+
size 99080226
|
checkpoints/model/cifar10.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:509e43eb3be202b3d71ef37ca5b66de501fccda8806e4a193a466bcbfcb71b83
|
3 |
+
size 99090784
|
checkpoints/{mnist.ckpt → model/mnist.ckpt}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2cde8ee89e68b413c32685145b4ad1fea10b7a6617c0f14a12a9af8afac9712
|
3 |
+
size 99081632
|
diffusion/dataset/celeba.py
CHANGED
@@ -11,12 +11,13 @@ class CelebADataset(Dataset):
|
|
11 |
def __init__(
|
12 |
self,
|
13 |
data_dir: str,
|
|
|
14 |
):
|
15 |
self.list_path = os.listdir(data_dir)
|
16 |
self.data_dir = data_dir
|
17 |
self.transform = transforms.Compose(
|
18 |
[
|
19 |
-
transforms.Resize((
|
20 |
transforms.ToTensor(),
|
21 |
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
|
22 |
]
|
@@ -37,13 +38,15 @@ class CelebADataModule(pl.LightningDataModule):
|
|
37 |
batch_size: int = 32,
|
38 |
num_workers: int = 0,
|
39 |
seed: int = 42,
|
40 |
-
train_ratio: float = 0.99
|
|
|
41 |
):
|
42 |
super().__init__()
|
43 |
self.data_dir = data_dir
|
44 |
self.batch_size = batch_size
|
45 |
self.num_workers = num_workers
|
46 |
self.train_ratio = min(train_ratio, 0.99)
|
|
|
47 |
self.seed = seed
|
48 |
|
49 |
self.loader = partial(
|
@@ -56,7 +59,7 @@ class CelebADataModule(pl.LightningDataModule):
|
|
56 |
|
57 |
def setup(self, stage: str):
|
58 |
if stage == "fit":
|
59 |
-
dataset = CelebADataset(self.data_dir)
|
60 |
self.CelebA_train, self.CelebA_val, _ = random_split(
|
61 |
dataset=dataset,
|
62 |
lengths=[self.train_ratio, 0.01, 1 - 0.01 - self.train_ratio],
|
|
|
11 |
def __init__(
|
12 |
self,
|
13 |
data_dir: str,
|
14 |
+
img_dim: int = 64
|
15 |
):
|
16 |
self.list_path = os.listdir(data_dir)
|
17 |
self.data_dir = data_dir
|
18 |
self.transform = transforms.Compose(
|
19 |
[
|
20 |
+
transforms.Resize((img_dim, img_dim)),
|
21 |
transforms.ToTensor(),
|
22 |
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
|
23 |
]
|
|
|
38 |
batch_size: int = 32,
|
39 |
num_workers: int = 0,
|
40 |
seed: int = 42,
|
41 |
+
train_ratio: float = 0.99,
|
42 |
+
img_dim: int = 64
|
43 |
):
|
44 |
super().__init__()
|
45 |
self.data_dir = data_dir
|
46 |
self.batch_size = batch_size
|
47 |
self.num_workers = num_workers
|
48 |
self.train_ratio = min(train_ratio, 0.99)
|
49 |
+
self.img_dim = img_dim
|
50 |
self.seed = seed
|
51 |
|
52 |
self.loader = partial(
|
|
|
59 |
|
60 |
def setup(self, stage: str):
|
61 |
if stage == "fit":
|
62 |
+
dataset = CelebADataset(self.data_dir, self.img_dim)
|
63 |
self.CelebA_train, self.CelebA_val, _ = random_split(
|
64 |
dataset=dataset,
|
65 |
lengths=[self.train_ratio, 0.01, 1 - 0.01 - self.train_ratio],
|
diffusion/dataset/cifar10.py
CHANGED
@@ -13,7 +13,8 @@ class CIFAR10DataModule(pl.LightningDataModule):
|
|
13 |
batch_size: int = 32,
|
14 |
num_workers: int = 0,
|
15 |
seed: int = 42,
|
16 |
-
train_ratio: float = 0.99
|
|
|
17 |
):
|
18 |
super().__init__()
|
19 |
self.data_dir = data_dir
|
@@ -23,7 +24,7 @@ class CIFAR10DataModule(pl.LightningDataModule):
|
|
23 |
self.train_ratio = min(train_ratio, 0.99)
|
24 |
self.transform = transforms.Compose(
|
25 |
[
|
26 |
-
transforms.Resize((
|
27 |
transforms.ToTensor(),
|
28 |
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
|
29 |
]
|
|
|
13 |
batch_size: int = 32,
|
14 |
num_workers: int = 0,
|
15 |
seed: int = 42,
|
16 |
+
train_ratio: float = 0.99,
|
17 |
+
img_dim: int = 32
|
18 |
):
|
19 |
super().__init__()
|
20 |
self.data_dir = data_dir
|
|
|
24 |
self.train_ratio = min(train_ratio, 0.99)
|
25 |
self.transform = transforms.Compose(
|
26 |
[
|
27 |
+
transforms.Resize((img_dim, img_dim)),
|
28 |
transforms.ToTensor(),
|
29 |
transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
|
30 |
]
|
diffusion/dataset/mnist.py
CHANGED
@@ -13,7 +13,8 @@ class MNISTDataModule(pl.LightningDataModule):
|
|
13 |
batch_size: int = 32,
|
14 |
num_workers: int = 0,
|
15 |
seed: int = 42,
|
16 |
-
train_ratio: float = 0.99
|
|
|
17 |
):
|
18 |
super().__init__()
|
19 |
self.data_dir = data_dir
|
@@ -23,7 +24,7 @@ class MNISTDataModule(pl.LightningDataModule):
|
|
23 |
self.seed = seed
|
24 |
self.transform = transforms.Compose(
|
25 |
[
|
26 |
-
transforms.Resize((
|
27 |
transforms.ToTensor(),
|
28 |
transforms.Normalize(mean=(0.5), std=(0.5))
|
29 |
]
|
|
|
13 |
batch_size: int = 32,
|
14 |
num_workers: int = 0,
|
15 |
seed: int = 42,
|
16 |
+
train_ratio: float = 0.99,
|
17 |
+
img_dim: int = 32
|
18 |
):
|
19 |
super().__init__()
|
20 |
self.data_dir = data_dir
|
|
|
24 |
self.seed = seed
|
25 |
self.transform = transforms.Compose(
|
26 |
[
|
27 |
+
transforms.Resize((img_dim, img_dim)),
|
28 |
transforms.ToTensor(),
|
29 |
transforms.Normalize(mean=(0.5), std=(0.5))
|
30 |
]
|
diffusion/model/diffusion/__init__.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
from .unet import *
|
2 |
from .model import *
|
3 |
-
from .sampling import *
|
4 |
from .scheduler import *
|
|
|
1 |
from .unet import *
|
2 |
from .model import *
|
|
|
3 |
from .scheduler import *
|
diffusion/model/diffusion/model.py
CHANGED
@@ -4,8 +4,10 @@ import numpy as np
|
|
4 |
import pytorch_lightning as pl
|
5 |
import diffusion
|
6 |
import wandb
|
|
|
7 |
from torchvision.utils import make_grid
|
8 |
from torch.optim.lr_scheduler import OneCycleLR
|
|
|
9 |
|
10 |
|
11 |
class DiffusionModel(pl.LightningModule):
|
@@ -16,6 +18,7 @@ class DiffusionModel(pl.LightningModule):
|
|
16 |
beta_1: float = 0.0001,
|
17 |
beta_2: float = 0.02,
|
18 |
in_channels: int = 3,
|
|
|
19 |
dim: int = 32,
|
20 |
num_classes: int | None = 10,
|
21 |
sample_per_epochs: int = 50,
|
@@ -33,11 +36,17 @@ class DiffusionModel(pl.LightningModule):
|
|
33 |
self.max_timesteps = max_timesteps
|
34 |
self.in_channels = in_channels
|
35 |
self.dim = dim
|
|
|
36 |
self.num_classes = num_classes
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
self.criterion = nn.MSELoss()
|
43 |
|
@@ -49,8 +58,6 @@ class DiffusionModel(pl.LightningModule):
|
|
49 |
|
50 |
self.sampling_kwargs = {
|
51 |
'model': self.model,
|
52 |
-
'scheduler': self.scheduler,
|
53 |
-
'max_timesteps': self.max_timesteps,
|
54 |
'in_channels': self.in_channels,
|
55 |
'dim': self.dim,
|
56 |
}
|
@@ -75,29 +82,37 @@ class DiffusionModel(pl.LightningModule):
|
|
75 |
x_0: torch.Tensor,
|
76 |
t: torch.Tensor
|
77 |
):
|
78 |
-
|
79 |
-
new_x = self.scheduler.get('sqrt_alpha_hat', t) * x_0
|
80 |
-
new_noise = self.scheduler.get('sqrt_one_minus_alpha_hat', t) * noise
|
81 |
-
return new_x + new_noise, noise
|
82 |
|
83 |
-
def sampling(
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
def forward(self, x_0, labels):
|
|
|
98 |
t = torch.randint(
|
99 |
-
low=0, high=self.max_timesteps, size=(
|
100 |
)
|
|
|
101 |
x_noise, noise = self.noising(x_0, t)
|
102 |
noise_pred = self.model(x_noise, t, labels)
|
103 |
return noise, noise_pred
|
@@ -108,8 +123,8 @@ class DiffusionModel(pl.LightningModule):
|
|
108 |
labels = None
|
109 |
else:
|
110 |
x_0, labels = batch
|
111 |
-
|
112 |
-
|
113 |
noise, noise_pred = self(x_0, labels)
|
114 |
loss = self.criterion(noise, noise_pred)
|
115 |
self.train_loss.append(loss)
|
@@ -135,19 +150,20 @@ class DiffusionModel(pl.LightningModule):
|
|
135 |
)
|
136 |
self.train_loss.clear()
|
137 |
|
138 |
-
if self.
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
|
|
151 |
|
152 |
self.epoch_count += 1
|
153 |
|
@@ -173,10 +189,34 @@ class DiffusionModel(pl.LightningModule):
|
|
173 |
total_steps=self.trainer.estimated_stepping_batches,
|
174 |
|
175 |
)
|
176 |
-
return
|
177 |
-
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
180 |
|
181 |
|
182 |
if __name__ == "__main__":
|
|
|
4 |
import pytorch_lightning as pl
|
5 |
import diffusion
|
6 |
import wandb
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
from torchvision.utils import make_grid
|
9 |
from torch.optim.lr_scheduler import OneCycleLR
|
10 |
+
from IPython.display import clear_output
|
11 |
|
12 |
|
13 |
class DiffusionModel(pl.LightningModule):
|
|
|
18 |
beta_1: float = 0.0001,
|
19 |
beta_2: float = 0.02,
|
20 |
in_channels: int = 3,
|
21 |
+
mode: str = "ddpm",
|
22 |
dim: int = 32,
|
23 |
num_classes: int | None = 10,
|
24 |
sample_per_epochs: int = 50,
|
|
|
36 |
self.max_timesteps = max_timesteps
|
37 |
self.in_channels = in_channels
|
38 |
self.dim = dim
|
39 |
+
self.mode = mode
|
40 |
self.num_classes = num_classes
|
41 |
|
42 |
+
if mode == "ddpm":
|
43 |
+
self.scheduler = diffusion.DDPMScheduler(
|
44 |
+
max_timesteps, beta_1, beta_2
|
45 |
+
)
|
46 |
+
elif mode == "ddim":
|
47 |
+
self.scheduler = diffusion.DDIMScheduler(
|
48 |
+
max_timesteps, beta_1, beta_2
|
49 |
+
)
|
50 |
|
51 |
self.criterion = nn.MSELoss()
|
52 |
|
|
|
58 |
|
59 |
self.sampling_kwargs = {
|
60 |
'model': self.model,
|
|
|
|
|
61 |
'in_channels': self.in_channels,
|
62 |
'dim': self.dim,
|
63 |
}
|
|
|
82 |
x_0: torch.Tensor,
|
83 |
t: torch.Tensor
|
84 |
):
|
85 |
+
return self.scheduler.noising(x_0, t)
|
|
|
|
|
|
|
86 |
|
87 |
+
def sampling(
|
88 |
+
self,
|
89 |
+
labels=None,
|
90 |
+
mode: int = "ddpm",
|
91 |
+
demo: bool = True,
|
92 |
+
n_samples: int = 16,
|
93 |
+
timesteps: int = 1000,
|
94 |
+
):
|
95 |
+
if mode == "ddpm":
|
96 |
+
self.test_scheduler = diffusion.DDPMScheduler(self.max_timesteps)
|
97 |
+
elif mode == "ddim":
|
98 |
+
self.test_scheduler = diffusion.DDIMScheduler(self.max_timesteps)
|
99 |
+
|
100 |
+
kwargs = {
|
101 |
+
"n_samples": n_samples,
|
102 |
+
"labels": labels,
|
103 |
+
"timesteps": timesteps,
|
104 |
+
} | self.sampling_kwargs
|
105 |
+
if demo:
|
106 |
+
return self.test_scheduler.sampling_demo(**kwargs)
|
107 |
+
else:
|
108 |
+
return self.test_scheduler.sampling(**kwargs)
|
109 |
|
110 |
def forward(self, x_0, labels):
|
111 |
+
n = x_0.shape[0]
|
112 |
t = torch.randint(
|
113 |
+
low=0, high=self.max_timesteps, size=(n//2+1,), device=x_0.device
|
114 |
)
|
115 |
+
t = torch.cat([t, self.max_timesteps - t - 1], dim=0)[:n]
|
116 |
x_noise, noise = self.noising(x_0, t)
|
117 |
noise_pred = self.model(x_noise, t, labels)
|
118 |
return noise, noise_pred
|
|
|
123 |
labels = None
|
124 |
else:
|
125 |
x_0, labels = batch
|
126 |
+
if np.random.random() < 0.1:
|
127 |
+
labels = None
|
128 |
noise, noise_pred = self(x_0, labels)
|
129 |
loss = self.criterion(noise, noise_pred)
|
130 |
self.train_loss.append(loss)
|
|
|
150 |
)
|
151 |
self.train_loss.clear()
|
152 |
|
153 |
+
if self.spe > 0:
|
154 |
+
if self.epoch_count % self.spe == 0:
|
155 |
+
wandblog = self.logger.experiment
|
156 |
+
x_t = self.sampling(n_samples=self.n_samples, timesteps=100, demo=False)
|
157 |
+
img_array = [x_t[i] for i in range(x_t.shape[0])]
|
158 |
+
|
159 |
+
wandblog.log(
|
160 |
+
{
|
161 |
+
"sampling": wandb.Image(
|
162 |
+
make_grid(img_array, nrow=4).permute(1, 2, 0).cpu().numpy(),
|
163 |
+
caption="Sampled Image!"
|
164 |
+
)
|
165 |
+
}
|
166 |
+
)
|
167 |
|
168 |
self.epoch_count += 1
|
169 |
|
|
|
189 |
total_steps=self.trainer.estimated_stepping_batches,
|
190 |
|
191 |
)
|
192 |
+
return [optimizer], [scheduler]
|
193 |
+
|
194 |
+
def draw(
|
195 |
+
self,
|
196 |
+
labels=None,
|
197 |
+
mode: int = "ddpm",
|
198 |
+
n_samples: int = 1,
|
199 |
+
timesteps: int = 1000,
|
200 |
+
):
|
201 |
+
demo = self.sampling(
|
202 |
+
labels=labels,
|
203 |
+
mode=mode,
|
204 |
+
n_samples=n_samples,
|
205 |
+
timesteps=timesteps,
|
206 |
+
demo=True
|
207 |
+
)
|
208 |
+
idx = 0
|
209 |
+
length = labels.shape[0] if labels is not None else n_samples
|
210 |
+
for img in demo:
|
211 |
+
for i in range(length):
|
212 |
+
plt.subplot(1, length, i+1)
|
213 |
+
plt.imshow(img[i].permute(1, 2, 0))
|
214 |
+
plt.axis('off')
|
215 |
+
plt.title(f"{idx+1}/{timesteps}")
|
216 |
+
idx += 1
|
217 |
+
plt.show()
|
218 |
+
if idx < timesteps:
|
219 |
+
clear_output(wait=True)
|
220 |
|
221 |
|
222 |
if __name__ == "__main__":
|
diffusion/model/diffusion/sampling.py
DELETED
@@ -1,82 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
|
3 |
-
|
4 |
-
def ddpm_sampling_timestep(
|
5 |
-
x_t,
|
6 |
-
model,
|
7 |
-
scheduler,
|
8 |
-
labels,
|
9 |
-
t,
|
10 |
-
n_samples: int = 16,
|
11 |
-
cfg_scale: int = 3,
|
12 |
-
):
|
13 |
-
time = torch.full((n_samples,), fill_value=t, device=model.device)
|
14 |
-
pred_noise = model(x_t, time, labels)
|
15 |
-
if cfg_scale > 0:
|
16 |
-
uncond_pred_noise = model(x_t, time, None)
|
17 |
-
pred_noise = torch.lerp(uncond_pred_noise, pred_noise, cfg_scale)
|
18 |
-
alpha = scheduler.get('alpha', time)
|
19 |
-
sqrt_alpha = scheduler.get('sqrt_alpha', time)
|
20 |
-
somah = scheduler.get('sqrt_one_minus_alpha_hat', time)
|
21 |
-
sqrt_beta = scheduler.get('sqrt_beta', time)
|
22 |
-
if t > 0:
|
23 |
-
noise = torch.randn_like(x_t, device=model.device)
|
24 |
-
else:
|
25 |
-
noise = torch.zeros_like(x_t, device=model.device)
|
26 |
-
|
27 |
-
x_t_new = 1 / sqrt_alpha * (x_t - (1-alpha) / somah * pred_noise) + sqrt_beta * noise
|
28 |
-
return x_t_new.clamp(-1, 1)
|
29 |
-
|
30 |
-
|
31 |
-
@torch.no_grad()
|
32 |
-
def ddpm_sampling(
|
33 |
-
model,
|
34 |
-
scheduler,
|
35 |
-
n_samples: int = 16,
|
36 |
-
max_timesteps: int = 1000,
|
37 |
-
in_channels: int = 3,
|
38 |
-
dim: int = 32,
|
39 |
-
cfg_scale: int = 3,
|
40 |
-
|
41 |
-
labels=None
|
42 |
-
):
|
43 |
-
if labels is not None:
|
44 |
-
n_samples = labels.shape[0]
|
45 |
-
|
46 |
-
x_t = torch.randn(
|
47 |
-
n_samples, in_channels, dim, dim, device=model.device
|
48 |
-
)
|
49 |
-
model.eval()
|
50 |
-
for t in range(max_timesteps-1, -1, -1):
|
51 |
-
x_t = ddpm_sampling_timestep(x_t=x_t, model=model, scheduler=scheduler,
|
52 |
-
labels=labels, t=t, n_samples=n_samples,
|
53 |
-
cfg_scale=cfg_scale)
|
54 |
-
|
55 |
-
model.train()
|
56 |
-
x_t = (x_t + 1) / 2 * 255. # range [0,255]
|
57 |
-
return x_t.type(torch.uint8)
|
58 |
-
|
59 |
-
|
60 |
-
@torch.no_grad()
|
61 |
-
def ddpm_sampling_demo(
|
62 |
-
model,
|
63 |
-
scheduler,
|
64 |
-
n_samples: int = 16,
|
65 |
-
max_timesteps: int = 1000,
|
66 |
-
in_channels: int = 3,
|
67 |
-
dim: int = 32,
|
68 |
-
cfg_scale: int = 3,
|
69 |
-
labels=None
|
70 |
-
):
|
71 |
-
if labels is not None:
|
72 |
-
n_samples = labels.shape[0]
|
73 |
-
|
74 |
-
x_t = torch.randn(
|
75 |
-
n_samples, in_channels, dim, dim, device=model.device
|
76 |
-
)
|
77 |
-
model.eval()
|
78 |
-
for t in range(max_timesteps-1, -1, -1):
|
79 |
-
x_t = ddpm_sampling_timestep(x_t=x_t, model=model, scheduler=scheduler,
|
80 |
-
labels=labels, t=t, n_samples=n_samples,
|
81 |
-
cfg_scale=cfg_scale)
|
82 |
-
yield ((x_t + 1) / 2 * 255).type(torch.uint8)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
diffusion/model/diffusion/scheduler.py
CHANGED
@@ -1,20 +1,184 @@
|
|
1 |
import torch
|
2 |
|
3 |
|
4 |
-
class
|
5 |
def __init__(
|
6 |
self,
|
7 |
max_timesteps: int = 1000,
|
8 |
beta_1: int = 0.0001,
|
9 |
beta_2: int = 0.02
|
10 |
) -> None:
|
11 |
-
self.
|
12 |
-
self.
|
13 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
self.sqrt_alpha = torch.sqrt(self.alpha)
|
15 |
-
self.alpha_hat = torch.cumprod(1 - self.beta, dim=0)
|
16 |
self.sqrt_alpha_hat = torch.sqrt(self.alpha_hat)
|
|
|
17 |
self.sqrt_one_minus_alpha_hat = torch.sqrt(1 - self.alpha_hat)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import torch
|
2 |
|
3 |
|
4 |
+
class DDPMScheduler:
|
5 |
def __init__(
|
6 |
self,
|
7 |
max_timesteps: int = 1000,
|
8 |
beta_1: int = 0.0001,
|
9 |
beta_2: int = 0.02
|
10 |
) -> None:
|
11 |
+
self.beta_1 = beta_1
|
12 |
+
self.beta_2 = beta_2
|
13 |
+
self.max_timesteps = max_timesteps
|
14 |
+
self._init_params()
|
15 |
+
|
16 |
+
def _init_params(self, timesteps: int | None = None):
|
17 |
+
self.beta = torch.linspace(self.beta_1, self.beta_2, timesteps or self.max_timesteps)
|
18 |
+
self.sqrt_beta = torch.sqrt(self.beta)
|
19 |
+
self.alpha = (1 - self.beta)
|
20 |
+
self.sqrt_alpha = torch.sqrt(self.alpha)
|
21 |
+
self.alpha_hat = torch.cumprod(1 - self.beta, dim=0)
|
22 |
+
self.sqrt_alpha_hat = torch.sqrt(self.alpha_hat)
|
23 |
+
self.sqrt_one_minus_alpha = torch.sqrt(1 - self.alpha)
|
24 |
+
self.sqrt_one_minus_alpha_hat = torch.sqrt(1 - self.alpha_hat)
|
25 |
+
|
26 |
+
def noising(
|
27 |
+
self,
|
28 |
+
x_0: torch.Tensor,
|
29 |
+
t: torch.Tensor
|
30 |
+
):
|
31 |
+
if t.device != x_0.device:
|
32 |
+
t = t.to(x_0.device)
|
33 |
+
noise = torch.randn_like(x_0, device=x_0.device)
|
34 |
+
new_x = self.sqrt_alpha_hat.to(x_0.device)[t][:, None, None, None] * x_0
|
35 |
+
new_noise = self.sqrt_one_minus_alpha_hat.to(x_0.device)[t][:, None, None, None] * noise
|
36 |
+
return new_x + new_noise, noise
|
37 |
+
|
38 |
+
@torch.no_grad()
|
39 |
+
def sampling_t(
|
40 |
+
self,
|
41 |
+
x_t: torch.Tensor,
|
42 |
+
model,
|
43 |
+
labels: torch.Tensor,
|
44 |
+
timesteps: int,
|
45 |
+
t: int,
|
46 |
+
n_samples: int = 16,
|
47 |
+
cfg_scale: int = 3,
|
48 |
+
):
|
49 |
+
time = torch.full((n_samples,), fill_value=t, device=model.device)
|
50 |
+
pred_noise = model(x_t, time, labels)
|
51 |
+
if cfg_scale > 0 and labels is not None:
|
52 |
+
uncond_pred_noise = model(x_t, time, None)
|
53 |
+
pred_noise = torch.lerp(uncond_pred_noise, pred_noise, cfg_scale)
|
54 |
+
alpha = self.alpha.to(model.device)[time][:, None, None, None]
|
55 |
+
sqrt_alpha = self.sqrt_alpha.to(model.device)[time][:, None, None, None]
|
56 |
+
somah = self.sqrt_one_minus_alpha_hat.to(model.device)[time][:, None, None, None]
|
57 |
+
sqrt_beta = self.sqrt_beta.to(model.device)[time][:, None, None, None]
|
58 |
+
if t > 1:
|
59 |
+
noise = torch.randn_like(x_t, device=model.device)
|
60 |
+
else:
|
61 |
+
noise = torch.zeros_like(x_t, device=model.device)
|
62 |
+
|
63 |
+
x_t_new = 1 / sqrt_alpha * (x_t - (1-alpha) / somah * pred_noise) + sqrt_beta * noise
|
64 |
+
return x_t_new.clamp(-1, 1)
|
65 |
+
|
66 |
+
@torch.no_grad()
|
67 |
+
def sampling(
|
68 |
+
self,
|
69 |
+
model,
|
70 |
+
n_samples: int = 16,
|
71 |
+
in_channels: int = 3,
|
72 |
+
dim: int = 32,
|
73 |
+
timesteps: int = 1000,
|
74 |
+
cfg_scale: int = 3,
|
75 |
+
labels=None,
|
76 |
+
*args, **kwargs
|
77 |
+
):
|
78 |
+
if labels is not None:
|
79 |
+
n_samples = labels.shape[0]
|
80 |
+
model.eval()
|
81 |
+
x_t = torch.randn(
|
82 |
+
n_samples, in_channels, dim, dim, device=model.device
|
83 |
+
)
|
84 |
+
step_ratios = self.max_timesteps // timesteps
|
85 |
+
all_timesteps = torch.flip(torch.arange(0, timesteps) * step_ratios, dims=(0,))
|
86 |
+
for t in all_timesteps:
|
87 |
+
x_t = self.sampling_t(x_t=x_t, model=model, labels=labels, t=t, timesteps=timesteps,
|
88 |
+
n_samples=n_samples, cfg_scale=cfg_scale)
|
89 |
+
model.train()
|
90 |
+
x_t = (x_t.clamp(-1, 1) + 1) / 2 * 255. # range [0,255]
|
91 |
+
return x_t.type(torch.uint8)
|
92 |
+
|
93 |
+
@torch.no_grad()
|
94 |
+
def sampling_demo(
|
95 |
+
self,
|
96 |
+
model,
|
97 |
+
n_samples: int = 16,
|
98 |
+
in_channels: int = 3,
|
99 |
+
dim: int = 32,
|
100 |
+
timesteps: int = 1000,
|
101 |
+
cfg_scale: int = 3,
|
102 |
+
labels=None,
|
103 |
+
*args, **kwargs
|
104 |
+
):
|
105 |
+
if labels is not None:
|
106 |
+
n_samples = labels.shape[0]
|
107 |
+
|
108 |
+
x_t = torch.randn(
|
109 |
+
n_samples, in_channels, dim, dim, device=model.device
|
110 |
+
)
|
111 |
+
model.eval()
|
112 |
+
step_ratios = self.max_timesteps // timesteps
|
113 |
+
all_timesteps = torch.flip(torch.arange(0, timesteps) * step_ratios, dims=(0,))
|
114 |
+
for t in all_timesteps:
|
115 |
+
x_t = self.sampling_t(x_t=x_t, model=model, labels=labels, t=t, timesteps=timesteps,
|
116 |
+
n_samples=n_samples, cfg_scale=cfg_scale)
|
117 |
+
yield ((x_t.clamp(-1, 1) + 1) / 2 * 255).type(torch.uint8)
|
118 |
+
|
119 |
+
|
120 |
+
class DDIMScheduler(DDPMScheduler):
|
121 |
+
def __init__(
|
122 |
+
self,
|
123 |
+
max_timesteps: int = 1000,
|
124 |
+
beta_1: int = 0.0001,
|
125 |
+
beta_2: int = 0.02
|
126 |
+
) -> None:
|
127 |
+
super().__init__(beta_1=beta_1, beta_2=beta_2, max_timesteps=max_timesteps)
|
128 |
+
self._init_params()
|
129 |
+
|
130 |
+
def _init_params(self, timesteps: int | None = None):
|
131 |
+
self.beta = torch.linspace(self.beta_1, self.beta_2, timesteps or self.max_timesteps)
|
132 |
+
self.sqrt_beta = torch.sqrt(self.beta)
|
133 |
+
self.alpha = (1 - self.beta)
|
134 |
self.sqrt_alpha = torch.sqrt(self.alpha)
|
135 |
+
self.alpha_hat = torch.cumprod(1 - self.beta, dim=0)
|
136 |
self.sqrt_alpha_hat = torch.sqrt(self.alpha_hat)
|
137 |
+
self.sqrt_one_minus_alpha = torch.sqrt(1 - self.alpha)
|
138 |
self.sqrt_one_minus_alpha_hat = torch.sqrt(1 - self.alpha_hat)
|
139 |
+
self.alpha_hat_prev = torch.cat([torch.tensor([1.]), self.alpha_hat], dim=0)[:-1]
|
140 |
+
self.variance = (1 - self.alpha_hat_prev) / (1 - self.alpha_hat) * \
|
141 |
+
(1 - self.alpha_hat / self.alpha_hat_prev)
|
142 |
+
|
143 |
+
@torch.no_grad()
|
144 |
+
def sampling_t(
|
145 |
+
self,
|
146 |
+
x_t: torch.Tensor, model, t: int,
|
147 |
+
timesteps: int,
|
148 |
+
labels: torch.Tensor | None = None,
|
149 |
+
n_samples: int = 16,
|
150 |
+
eta: float = 0.0,
|
151 |
+
*args, **kwargs
|
152 |
+
):
|
153 |
+
time = torch.full((n_samples,), fill_value=t, device=model.device)
|
154 |
+
time_prev = time - self.max_timesteps // timesteps
|
155 |
+
pred_noise = model(x_t, time, labels)
|
156 |
+
|
157 |
+
sqrt_one_minus_alpha_hat = self.sqrt_one_minus_alpha_hat.to(model.device)[time][:, None, None, None]
|
158 |
+
sqrt_alpha_hat = self.sqrt_alpha_hat.to(model.device)[time][:, None, None, None]
|
159 |
+
alpha_hat_prev = self.alpha_hat[time_prev] if time_prev[0] >= 0 else torch.ones_like(time_prev)
|
160 |
+
alpha_hat_prev = alpha_hat_prev.to(model.device)[:, None, None, None]
|
161 |
+
sqrt_alpha_hat_prev = torch.sqrt(alpha_hat_prev)
|
162 |
+
posterior_std = torch.sqrt(self.variance)[time][:, None, None, None] * eta
|
163 |
+
|
164 |
+
if t > 0:
|
165 |
+
noise = torch.randn_like(x_t, device=model.device)
|
166 |
+
else:
|
167 |
+
noise = torch.zeros_like(x_t, device=model.device)
|
168 |
+
|
169 |
+
x_0_pred = (x_t - sqrt_one_minus_alpha_hat * pred_noise) / sqrt_alpha_hat
|
170 |
+
x_0_pred = x_0_pred.clamp(-1, 1)
|
171 |
+
x_t_direction = torch.sqrt(1. - alpha_hat_prev - posterior_std**2) * pred_noise
|
172 |
+
random_noise = posterior_std * noise
|
173 |
+
x_t_1 = sqrt_alpha_hat_prev * x_0_pred + x_t_direction + random_noise
|
174 |
+
|
175 |
+
return x_t_1
|
176 |
+
|
177 |
|
178 |
+
if __name__ == "__main__":
|
179 |
+
dct = DDIMScheduler().__dict__
|
180 |
+
for k in dct.keys():
|
181 |
+
if isinstance(dct[k], torch.Tensor):
|
182 |
+
print(k, dct[k].shape)
|
183 |
+
else:
|
184 |
+
print(k, dct[k])
|
diffusion/model/diffusion/unet.py
CHANGED
@@ -45,10 +45,10 @@ class DoubleConv(nn.Module):
|
|
45 |
mid_channels = out_channels
|
46 |
self.double_conv = nn.Sequential(
|
47 |
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
|
48 |
-
nn.GroupNorm(
|
49 |
nn.GELU(),
|
50 |
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
|
51 |
-
nn.GroupNorm(
|
52 |
)
|
53 |
|
54 |
def forward(self, x):
|
@@ -137,15 +137,20 @@ class UNet(pl.LightningModule):
|
|
137 |
self.sa3 = SelfAttention(channels=256)
|
138 |
|
139 |
self.mid1 = DoubleConv(in_channels=256, out_channels=512)
|
|
|
140 |
self.mid2 = DoubleConv(in_channels=512, out_channels=512)
|
141 |
|
142 |
self.up1 = UpSample(in_channels=512, out_channels=256)
|
143 |
-
self.
|
144 |
self.up2 = UpSample(in_channels=256, out_channels=128)
|
145 |
-
self.
|
146 |
self.up3 = UpSample(in_channels=128, out_channels=64)
|
147 |
-
self.
|
148 |
-
self.outc = nn.
|
|
|
|
|
|
|
|
|
149 |
|
150 |
def pos_encoding(self, t, channels):
|
151 |
inv_freq = 1.0 / (
|
@@ -168,14 +173,15 @@ class UNet(pl.LightningModule):
|
|
168 |
x4 = self.sa3(x4)
|
169 |
|
170 |
x4 = self.mid1(x4)
|
|
|
171 |
x4 = self.mid2(x4)
|
172 |
|
173 |
x = self.up1(x4, x3, t)
|
174 |
-
x = self.sa4(x)
|
175 |
-
x = self.up2(x, x2, t)
|
176 |
x = self.sa5(x)
|
177 |
-
x = self.
|
178 |
x = self.sa6(x)
|
|
|
|
|
179 |
output = self.outc(x)
|
180 |
return output
|
181 |
|
|
|
45 |
mid_channels = out_channels
|
46 |
self.double_conv = nn.Sequential(
|
47 |
nn.Conv2d(in_channels, mid_channels, kernel_size=3, padding=1, bias=False),
|
48 |
+
nn.GroupNorm(32, mid_channels, eps=1e-6, affine=True),
|
49 |
nn.GELU(),
|
50 |
nn.Conv2d(mid_channels, out_channels, kernel_size=3, padding=1, bias=False),
|
51 |
+
nn.GroupNorm(32, out_channels, eps=1e-6, affine=True),
|
52 |
)
|
53 |
|
54 |
def forward(self, x):
|
|
|
137 |
self.sa3 = SelfAttention(channels=256)
|
138 |
|
139 |
self.mid1 = DoubleConv(in_channels=256, out_channels=512)
|
140 |
+
self.sa4 = SelfAttention(channels=512)
|
141 |
self.mid2 = DoubleConv(in_channels=512, out_channels=512)
|
142 |
|
143 |
self.up1 = UpSample(in_channels=512, out_channels=256)
|
144 |
+
self.sa5 = SelfAttention(channels=256)
|
145 |
self.up2 = UpSample(in_channels=256, out_channels=128)
|
146 |
+
self.sa6 = SelfAttention(channels=128)
|
147 |
self.up3 = UpSample(in_channels=128, out_channels=64)
|
148 |
+
self.sa7 = SelfAttention(channels=64)
|
149 |
+
self.outc = nn.Sequential(
|
150 |
+
nn.GroupNorm(32, 64, eps=1e-6, affine=True),
|
151 |
+
nn.SiLU(),
|
152 |
+
nn.Conv2d(64, c_out, kernel_size=3, padding=1)
|
153 |
+
)
|
154 |
|
155 |
def pos_encoding(self, t, channels):
|
156 |
inv_freq = 1.0 / (
|
|
|
173 |
x4 = self.sa3(x4)
|
174 |
|
175 |
x4 = self.mid1(x4)
|
176 |
+
x4 = self.sa4(x4)
|
177 |
x4 = self.mid2(x4)
|
178 |
|
179 |
x = self.up1(x4, x3, t)
|
|
|
|
|
180 |
x = self.sa5(x)
|
181 |
+
x = self.up2(x, x2, t)
|
182 |
x = self.sa6(x)
|
183 |
+
x = self.up3(x, x1, t)
|
184 |
+
x = self.sa7(x)
|
185 |
output = self.outc(x)
|
186 |
return output
|
187 |
|
diffusion/model/ldm/model.py
CHANGED
@@ -2,4 +2,5 @@ import torch
|
|
2 |
import pytorch_lightning as pl
|
3 |
|
4 |
class LatentDiffusionModel(pl.LightningModule):
|
|
|
5 |
pass
|
|
|
2 |
import pytorch_lightning as pl
|
3 |
|
4 |
class LatentDiffusionModel(pl.LightningModule):
|
5 |
+
# TODO
|
6 |
pass
|
diffusion/model/ldm/tests/__init__.py
DELETED
File without changes
|
diffusion/tests/__init__.py
DELETED
File without changes
|
diffusion/train/__main__.py
CHANGED
@@ -20,6 +20,10 @@ def main():
|
|
20 |
'--data_dir', '-dd', type=str, default='./data/',
|
21 |
help='model name'
|
22 |
)
|
|
|
|
|
|
|
|
|
23 |
parser.add_argument(
|
24 |
'--max_epochs', '-me', type=int, default=200,
|
25 |
help='max epoch'
|
@@ -117,7 +121,8 @@ def main():
|
|
117 |
batch_size=args.batch_size,
|
118 |
num_workers=args.num_workers,
|
119 |
seed=args.seed,
|
120 |
-
train_ratio=args.train_ratio
|
|
|
121 |
)
|
122 |
|
123 |
# MODEL
|
@@ -129,7 +134,8 @@ def main():
|
|
129 |
max_timesteps=args.timesteps,
|
130 |
dim=img_dim,
|
131 |
num_classes=num_classes,
|
132 |
-
n_samples=args.n_samples
|
|
|
133 |
)
|
134 |
|
135 |
# CALLBACK
|
|
|
20 |
'--data_dir', '-dd', type=str, default='./data/',
|
21 |
help='model name'
|
22 |
)
|
23 |
+
parser.add_argument(
|
24 |
+
'--mode', type=str, default='ddim',
|
25 |
+
help='sampling mode'
|
26 |
+
)
|
27 |
parser.add_argument(
|
28 |
'--max_epochs', '-me', type=int, default=200,
|
29 |
help='max epoch'
|
|
|
121 |
batch_size=args.batch_size,
|
122 |
num_workers=args.num_workers,
|
123 |
seed=args.seed,
|
124 |
+
train_ratio=args.train_ratio,
|
125 |
+
img_dim=img_dim
|
126 |
)
|
127 |
|
128 |
# MODEL
|
|
|
134 |
max_timesteps=args.timesteps,
|
135 |
dim=img_dim,
|
136 |
num_classes=num_classes,
|
137 |
+
n_samples=args.n_samples,
|
138 |
+
mode=args.mode
|
139 |
)
|
140 |
|
141 |
# CALLBACK
|