DreamPerson commited on
Commit
9eec9d2
·
1 Parent(s): 3e29ca5

Upload 2 files

Browse files
dpm2mv2/sampling.py ADDED
@@ -0,0 +1,687 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ from scipy import integrate
4
+ import torch
5
+ from torch import nn
6
+ from torchdiffeq import odeint
7
+ import torchsde
8
+ from tqdm.auto import trange, tqdm
9
+
10
+ from . import utils
11
+
12
+
13
+ def append_zero(x):
14
+ return torch.cat([x, x.new_zeros([1])])
15
+
16
+
17
+ def get_sigmas_karras(n, sigma_min, sigma_max, rho=7., device='cpu'):
18
+ """Constructs the noise schedule of Karras et al. (2022)."""
19
+ ramp = torch.linspace(0, 1, n)
20
+ min_inv_rho = sigma_min ** (1 / rho)
21
+ max_inv_rho = sigma_max ** (1 / rho)
22
+ sigmas = (max_inv_rho + ramp * (min_inv_rho - max_inv_rho)) ** rho
23
+ return append_zero(sigmas).to(device)
24
+
25
+
26
+ def get_sigmas_exponential(n, sigma_min, sigma_max, device='cpu'):
27
+ """Constructs an exponential noise schedule."""
28
+ sigmas = torch.linspace(math.log(sigma_max), math.log(sigma_min), n, device=device).exp()
29
+ return append_zero(sigmas)
30
+
31
+
32
+ def get_sigmas_polyexponential(n, sigma_min, sigma_max, rho=1., device='cpu'):
33
+ """Constructs an polynomial in log sigma noise schedule."""
34
+ ramp = torch.linspace(1, 0, n, device=device) ** rho
35
+ sigmas = torch.exp(ramp * (math.log(sigma_max) - math.log(sigma_min)) + math.log(sigma_min))
36
+ return append_zero(sigmas)
37
+
38
+
39
+ def get_sigmas_vp(n, beta_d=19.9, beta_min=0.1, eps_s=1e-3, device='cpu'):
40
+ """Constructs a continuous VP noise schedule."""
41
+ t = torch.linspace(1, eps_s, n, device=device)
42
+ sigmas = torch.sqrt(torch.exp(beta_d * t ** 2 / 2 + beta_min * t) - 1)
43
+ return append_zero(sigmas)
44
+
45
+
46
+ def to_d(x, sigma, denoised):
47
+ """Converts a denoiser output to a Karras ODE derivative."""
48
+ return (x - denoised) / utils.append_dims(sigma, x.ndim)
49
+
50
+
51
+ def get_ancestral_step(sigma_from, sigma_to, eta=1.):
52
+ """Calculates the noise level (sigma_down) to step down to and the amount
53
+ of noise to add (sigma_up) when doing an ancestral sampling step."""
54
+ if not eta:
55
+ return sigma_to, 0.
56
+ sigma_up = min(sigma_to, eta * (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5)
57
+ sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5
58
+ return sigma_down, sigma_up
59
+
60
+
61
+ def default_noise_sampler(x):
62
+ return lambda sigma, sigma_next: torch.randn_like(x)
63
+
64
+
65
+ class BatchedBrownianTree:
66
+ """A wrapper around torchsde.BrownianTree that enables batches of entropy."""
67
+
68
+ def __init__(self, x, t0, t1, seed=None, **kwargs):
69
+ t0, t1, self.sign = self.sort(t0, t1)
70
+ w0 = kwargs.get('w0', torch.zeros_like(x))
71
+ if seed is None:
72
+ seed = torch.randint(0, 2 ** 63 - 1, []).item()
73
+ self.batched = True
74
+ try:
75
+ assert len(seed) == x.shape[0]
76
+ w0 = w0[0]
77
+ except TypeError:
78
+ seed = [seed]
79
+ self.batched = False
80
+ self.trees = [torchsde.BrownianTree(t0, w0, t1, entropy=s, **kwargs) for s in seed]
81
+
82
+ @staticmethod
83
+ def sort(a, b):
84
+ return (a, b, 1) if a < b else (b, a, -1)
85
+
86
+ def __call__(self, t0, t1):
87
+ t0, t1, sign = self.sort(t0, t1)
88
+ w = torch.stack([tree(t0, t1) for tree in self.trees]) * (self.sign * sign)
89
+ return w if self.batched else w[0]
90
+
91
+
92
+ class BrownianTreeNoiseSampler:
93
+ """A noise sampler backed by a torchsde.BrownianTree.
94
+
95
+ Args:
96
+ x (Tensor): The tensor whose shape, device and dtype to use to generate
97
+ random samples.
98
+ sigma_min (float): The low end of the valid interval.
99
+ sigma_max (float): The high end of the valid interval.
100
+ seed (int or List[int]): The random seed. If a list of seeds is
101
+ supplied instead of a single integer, then the noise sampler will
102
+ use one BrownianTree per batch item, each with its own seed.
103
+ transform (callable): A function that maps sigma to the sampler's
104
+ internal timestep.
105
+ """
106
+
107
+ def __init__(self, x, sigma_min, sigma_max, seed=None, transform=lambda x: x):
108
+ self.transform = transform
109
+ t0, t1 = self.transform(torch.as_tensor(sigma_min)), self.transform(torch.as_tensor(sigma_max))
110
+ self.tree = BatchedBrownianTree(x, t0, t1, seed)
111
+
112
+ def __call__(self, sigma, sigma_next):
113
+ t0, t1 = self.transform(torch.as_tensor(sigma)), self.transform(torch.as_tensor(sigma_next))
114
+ return self.tree(t0, t1) / (t1 - t0).abs().sqrt()
115
+
116
+
117
+ @torch.no_grad()
118
+ def sample_euler(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.):
119
+ """Implements Algorithm 2 (Euler steps) from Karras et al. (2022)."""
120
+ extra_args = {} if extra_args is None else extra_args
121
+ s_in = x.new_ones([x.shape[0]])
122
+ for i in trange(len(sigmas) - 1, disable=disable):
123
+ gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.
124
+ eps = torch.randn_like(x) * s_noise
125
+ sigma_hat = sigmas[i] * (gamma + 1)
126
+ if gamma > 0:
127
+ x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5
128
+ denoised = model(x, sigma_hat * s_in, **extra_args)
129
+ d = to_d(x, sigma_hat, denoised)
130
+ if callback is not None:
131
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised})
132
+ dt = sigmas[i + 1] - sigma_hat
133
+ # Euler method
134
+ x = x + d * dt
135
+ return x
136
+
137
+
138
+ @torch.no_grad()
139
+ def sample_euler_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
140
+ """Ancestral sampling with Euler method steps."""
141
+ extra_args = {} if extra_args is None else extra_args
142
+ noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
143
+ s_in = x.new_ones([x.shape[0]])
144
+ for i in trange(len(sigmas) - 1, disable=disable):
145
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
146
+ sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta)
147
+ if callback is not None:
148
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
149
+ d = to_d(x, sigmas[i], denoised)
150
+ # Euler method
151
+ dt = sigma_down - sigmas[i]
152
+ x = x + d * dt
153
+ if sigmas[i + 1] > 0:
154
+ x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
155
+ return x
156
+
157
+
158
+ @torch.no_grad()
159
+ def sample_heun(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.):
160
+ """Implements Algorithm 2 (Heun steps) from Karras et al. (2022)."""
161
+ extra_args = {} if extra_args is None else extra_args
162
+ s_in = x.new_ones([x.shape[0]])
163
+ for i in trange(len(sigmas) - 1, disable=disable):
164
+ gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.
165
+ eps = torch.randn_like(x) * s_noise
166
+ sigma_hat = sigmas[i] * (gamma + 1)
167
+ if gamma > 0:
168
+ x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5
169
+ denoised = model(x, sigma_hat * s_in, **extra_args)
170
+ d = to_d(x, sigma_hat, denoised)
171
+ if callback is not None:
172
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised})
173
+ dt = sigmas[i + 1] - sigma_hat
174
+ if sigmas[i + 1] == 0:
175
+ # Euler method
176
+ x = x + d * dt
177
+ else:
178
+ # Heun's method
179
+ x_2 = x + d * dt
180
+ denoised_2 = model(x_2, sigmas[i + 1] * s_in, **extra_args)
181
+ d_2 = to_d(x_2, sigmas[i + 1], denoised_2)
182
+ d_prime = (d + d_2) / 2
183
+ x = x + d_prime * dt
184
+ return x
185
+
186
+
187
+ @torch.no_grad()
188
+ def sample_dpm_2(model, x, sigmas, extra_args=None, callback=None, disable=None, s_churn=0., s_tmin=0., s_tmax=float('inf'), s_noise=1.):
189
+ """A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022)."""
190
+ extra_args = {} if extra_args is None else extra_args
191
+ s_in = x.new_ones([x.shape[0]])
192
+ for i in trange(len(sigmas) - 1, disable=disable):
193
+ gamma = min(s_churn / (len(sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigmas[i] <= s_tmax else 0.
194
+ eps = torch.randn_like(x) * s_noise
195
+ sigma_hat = sigmas[i] * (gamma + 1)
196
+ if gamma > 0:
197
+ x = x + eps * (sigma_hat ** 2 - sigmas[i] ** 2) ** 0.5
198
+ denoised = model(x, sigma_hat * s_in, **extra_args)
199
+ d = to_d(x, sigma_hat, denoised)
200
+ if callback is not None:
201
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigma_hat, 'denoised': denoised})
202
+ if sigmas[i + 1] == 0:
203
+ # Euler method
204
+ dt = sigmas[i + 1] - sigma_hat
205
+ x = x + d * dt
206
+ else:
207
+ # DPM-Solver-2
208
+ sigma_mid = sigma_hat.log().lerp(sigmas[i + 1].log(), 0.5).exp()
209
+ dt_1 = sigma_mid - sigma_hat
210
+ dt_2 = sigmas[i + 1] - sigma_hat
211
+ x_2 = x + d * dt_1
212
+ denoised_2 = model(x_2, sigma_mid * s_in, **extra_args)
213
+ d_2 = to_d(x_2, sigma_mid, denoised_2)
214
+ x = x + d_2 * dt_2
215
+ return x
216
+
217
+
218
+ @torch.no_grad()
219
+ def sample_dpm_2_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
220
+ """Ancestral sampling with DPM-Solver second-order steps."""
221
+ extra_args = {} if extra_args is None else extra_args
222
+ noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
223
+ s_in = x.new_ones([x.shape[0]])
224
+ for i in trange(len(sigmas) - 1, disable=disable):
225
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
226
+ sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta)
227
+ if callback is not None:
228
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
229
+ d = to_d(x, sigmas[i], denoised)
230
+ if sigma_down == 0:
231
+ # Euler method
232
+ dt = sigma_down - sigmas[i]
233
+ x = x + d * dt
234
+ else:
235
+ # DPM-Solver-2
236
+ sigma_mid = sigmas[i].log().lerp(sigma_down.log(), 0.5).exp()
237
+ dt_1 = sigma_mid - sigmas[i]
238
+ dt_2 = sigma_down - sigmas[i]
239
+ x_2 = x + d * dt_1
240
+ denoised_2 = model(x_2, sigma_mid * s_in, **extra_args)
241
+ d_2 = to_d(x_2, sigma_mid, denoised_2)
242
+ x = x + d_2 * dt_2
243
+ x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
244
+ return x
245
+
246
+
247
+ def linear_multistep_coeff(order, t, i, j):
248
+ if order - 1 > i:
249
+ raise ValueError(f'Order {order} too high for step {i}')
250
+ def fn(tau):
251
+ prod = 1.
252
+ for k in range(order):
253
+ if j == k:
254
+ continue
255
+ prod *= (tau - t[i - k]) / (t[i - j] - t[i - k])
256
+ return prod
257
+ return integrate.quad(fn, t[i], t[i + 1], epsrel=1e-4)[0]
258
+
259
+
260
+ @torch.no_grad()
261
+ def sample_lms(model, x, sigmas, extra_args=None, callback=None, disable=None, order=4):
262
+ extra_args = {} if extra_args is None else extra_args
263
+ s_in = x.new_ones([x.shape[0]])
264
+ sigmas_cpu = sigmas.detach().cpu().numpy()
265
+ ds = []
266
+ for i in trange(len(sigmas) - 1, disable=disable):
267
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
268
+ d = to_d(x, sigmas[i], denoised)
269
+ ds.append(d)
270
+ if len(ds) > order:
271
+ ds.pop(0)
272
+ if callback is not None:
273
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
274
+ cur_order = min(i + 1, order)
275
+ coeffs = [linear_multistep_coeff(cur_order, sigmas_cpu, i, j) for j in range(cur_order)]
276
+ x = x + sum(coeff * d for coeff, d in zip(coeffs, reversed(ds)))
277
+ return x
278
+
279
+
280
+ @torch.no_grad()
281
+ def log_likelihood(model, x, sigma_min, sigma_max, extra_args=None, atol=1e-4, rtol=1e-4):
282
+ extra_args = {} if extra_args is None else extra_args
283
+ s_in = x.new_ones([x.shape[0]])
284
+ v = torch.randint_like(x, 2) * 2 - 1
285
+ fevals = 0
286
+ def ode_fn(sigma, x):
287
+ nonlocal fevals
288
+ with torch.enable_grad():
289
+ x = x[0].detach().requires_grad_()
290
+ denoised = model(x, sigma * s_in, **extra_args)
291
+ d = to_d(x, sigma, denoised)
292
+ fevals += 1
293
+ grad = torch.autograd.grad((d * v).sum(), x)[0]
294
+ d_ll = (v * grad).flatten(1).sum(1)
295
+ return d.detach(), d_ll
296
+ x_min = x, x.new_zeros([x.shape[0]])
297
+ t = x.new_tensor([sigma_min, sigma_max])
298
+ sol = odeint(ode_fn, x_min, t, atol=atol, rtol=rtol, method='dopri5')
299
+ latent, delta_ll = sol[0][-1], sol[1][-1]
300
+ ll_prior = torch.distributions.Normal(0, sigma_max).log_prob(latent).flatten(1).sum(1)
301
+ return ll_prior + delta_ll, {'fevals': fevals}
302
+
303
+
304
+ class PIDStepSizeController:
305
+ """A PID controller for ODE adaptive step size control."""
306
+ def __init__(self, h, pcoeff, icoeff, dcoeff, order=1, accept_safety=0.81, eps=1e-8):
307
+ self.h = h
308
+ self.b1 = (pcoeff + icoeff + dcoeff) / order
309
+ self.b2 = -(pcoeff + 2 * dcoeff) / order
310
+ self.b3 = dcoeff / order
311
+ self.accept_safety = accept_safety
312
+ self.eps = eps
313
+ self.errs = []
314
+
315
+ def limiter(self, x):
316
+ return 1 + math.atan(x - 1)
317
+
318
+ def propose_step(self, error):
319
+ inv_error = 1 / (float(error) + self.eps)
320
+ if not self.errs:
321
+ self.errs = [inv_error, inv_error, inv_error]
322
+ self.errs[0] = inv_error
323
+ factor = self.errs[0] ** self.b1 * self.errs[1] ** self.b2 * self.errs[2] ** self.b3
324
+ factor = self.limiter(factor)
325
+ accept = factor >= self.accept_safety
326
+ if accept:
327
+ self.errs[2] = self.errs[1]
328
+ self.errs[1] = self.errs[0]
329
+ self.h *= factor
330
+ return accept
331
+
332
+
333
+ class DPMSolver(nn.Module):
334
+ """DPM-Solver. See https://arxiv.org/abs/2206.00927."""
335
+
336
+ def __init__(self, model, extra_args=None, eps_callback=None, info_callback=None):
337
+ super().__init__()
338
+ self.model = model
339
+ self.extra_args = {} if extra_args is None else extra_args
340
+ self.eps_callback = eps_callback
341
+ self.info_callback = info_callback
342
+
343
+ def t(self, sigma):
344
+ return -sigma.log()
345
+
346
+ def sigma(self, t):
347
+ return t.neg().exp()
348
+
349
+ def eps(self, eps_cache, key, x, t, *args, **kwargs):
350
+ if key in eps_cache:
351
+ return eps_cache[key], eps_cache
352
+ sigma = self.sigma(t) * x.new_ones([x.shape[0]])
353
+ eps = (x - self.model(x, sigma, *args, **self.extra_args, **kwargs)) / self.sigma(t)
354
+ if self.eps_callback is not None:
355
+ self.eps_callback()
356
+ return eps, {key: eps, **eps_cache}
357
+
358
+ def dpm_solver_1_step(self, x, t, t_next, eps_cache=None):
359
+ eps_cache = {} if eps_cache is None else eps_cache
360
+ h = t_next - t
361
+ eps, eps_cache = self.eps(eps_cache, 'eps', x, t)
362
+ x_1 = x - self.sigma(t_next) * h.expm1() * eps
363
+ return x_1, eps_cache
364
+
365
+ def dpm_solver_2_step(self, x, t, t_next, r1=1 / 2, eps_cache=None):
366
+ eps_cache = {} if eps_cache is None else eps_cache
367
+ h = t_next - t
368
+ eps, eps_cache = self.eps(eps_cache, 'eps', x, t)
369
+ s1 = t + r1 * h
370
+ u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps
371
+ eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1)
372
+ x_2 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / (2 * r1) * h.expm1() * (eps_r1 - eps)
373
+ return x_2, eps_cache
374
+
375
+ def dpm_solver_3_step(self, x, t, t_next, r1=1 / 3, r2=2 / 3, eps_cache=None):
376
+ eps_cache = {} if eps_cache is None else eps_cache
377
+ h = t_next - t
378
+ eps, eps_cache = self.eps(eps_cache, 'eps', x, t)
379
+ s1 = t + r1 * h
380
+ s2 = t + r2 * h
381
+ u1 = x - self.sigma(s1) * (r1 * h).expm1() * eps
382
+ eps_r1, eps_cache = self.eps(eps_cache, 'eps_r1', u1, s1)
383
+ u2 = x - self.sigma(s2) * (r2 * h).expm1() * eps - self.sigma(s2) * (r2 / r1) * ((r2 * h).expm1() / (r2 * h) - 1) * (eps_r1 - eps)
384
+ eps_r2, eps_cache = self.eps(eps_cache, 'eps_r2', u2, s2)
385
+ x_3 = x - self.sigma(t_next) * h.expm1() * eps - self.sigma(t_next) / r2 * (h.expm1() / h - 1) * (eps_r2 - eps)
386
+ return x_3, eps_cache
387
+
388
+ def dpm_solver_fast(self, x, t_start, t_end, nfe, eta=0., s_noise=1., noise_sampler=None):
389
+ noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
390
+ if not t_end > t_start and eta:
391
+ raise ValueError('eta must be 0 for reverse sampling')
392
+
393
+ m = math.floor(nfe / 3) + 1
394
+ ts = torch.linspace(t_start, t_end, m + 1, device=x.device)
395
+
396
+ if nfe % 3 == 0:
397
+ orders = [3] * (m - 2) + [2, 1]
398
+ else:
399
+ orders = [3] * (m - 1) + [nfe % 3]
400
+
401
+ for i in range(len(orders)):
402
+ eps_cache = {}
403
+ t, t_next = ts[i], ts[i + 1]
404
+ if eta:
405
+ sd, su = get_ancestral_step(self.sigma(t), self.sigma(t_next), eta)
406
+ t_next_ = torch.minimum(t_end, self.t(sd))
407
+ su = (self.sigma(t_next) ** 2 - self.sigma(t_next_) ** 2) ** 0.5
408
+ else:
409
+ t_next_, su = t_next, 0.
410
+
411
+ eps, eps_cache = self.eps(eps_cache, 'eps', x, t)
412
+ denoised = x - self.sigma(t) * eps
413
+ if self.info_callback is not None:
414
+ self.info_callback({'x': x, 'i': i, 't': ts[i], 't_up': t, 'denoised': denoised})
415
+
416
+ if orders[i] == 1:
417
+ x, eps_cache = self.dpm_solver_1_step(x, t, t_next_, eps_cache=eps_cache)
418
+ elif orders[i] == 2:
419
+ x, eps_cache = self.dpm_solver_2_step(x, t, t_next_, eps_cache=eps_cache)
420
+ else:
421
+ x, eps_cache = self.dpm_solver_3_step(x, t, t_next_, eps_cache=eps_cache)
422
+
423
+ x = x + su * s_noise * noise_sampler(self.sigma(t), self.sigma(t_next))
424
+
425
+ return x
426
+
427
+ def dpm_solver_adaptive(self, x, t_start, t_end, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None):
428
+ noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
429
+ if order not in {2, 3}:
430
+ raise ValueError('order should be 2 or 3')
431
+ forward = t_end > t_start
432
+ if not forward and eta:
433
+ raise ValueError('eta must be 0 for reverse sampling')
434
+ h_init = abs(h_init) * (1 if forward else -1)
435
+ atol = torch.tensor(atol)
436
+ rtol = torch.tensor(rtol)
437
+ s = t_start
438
+ x_prev = x
439
+ accept = True
440
+ pid = PIDStepSizeController(h_init, pcoeff, icoeff, dcoeff, 1.5 if eta else order, accept_safety)
441
+ info = {'steps': 0, 'nfe': 0, 'n_accept': 0, 'n_reject': 0}
442
+
443
+ while s < t_end - 1e-5 if forward else s > t_end + 1e-5:
444
+ eps_cache = {}
445
+ t = torch.minimum(t_end, s + pid.h) if forward else torch.maximum(t_end, s + pid.h)
446
+ if eta:
447
+ sd, su = get_ancestral_step(self.sigma(s), self.sigma(t), eta)
448
+ t_ = torch.minimum(t_end, self.t(sd))
449
+ su = (self.sigma(t) ** 2 - self.sigma(t_) ** 2) ** 0.5
450
+ else:
451
+ t_, su = t, 0.
452
+
453
+ eps, eps_cache = self.eps(eps_cache, 'eps', x, s)
454
+ denoised = x - self.sigma(s) * eps
455
+
456
+ if order == 2:
457
+ x_low, eps_cache = self.dpm_solver_1_step(x, s, t_, eps_cache=eps_cache)
458
+ x_high, eps_cache = self.dpm_solver_2_step(x, s, t_, eps_cache=eps_cache)
459
+ else:
460
+ x_low, eps_cache = self.dpm_solver_2_step(x, s, t_, r1=1 / 3, eps_cache=eps_cache)
461
+ x_high, eps_cache = self.dpm_solver_3_step(x, s, t_, eps_cache=eps_cache)
462
+ delta = torch.maximum(atol, rtol * torch.maximum(x_low.abs(), x_prev.abs()))
463
+ error = torch.linalg.norm((x_low - x_high) / delta) / x.numel() ** 0.5
464
+ accept = pid.propose_step(error)
465
+ if accept:
466
+ x_prev = x_low
467
+ x = x_high + su * s_noise * noise_sampler(self.sigma(s), self.sigma(t))
468
+ s = t
469
+ info['n_accept'] += 1
470
+ else:
471
+ info['n_reject'] += 1
472
+ info['nfe'] += order
473
+ info['steps'] += 1
474
+
475
+ if self.info_callback is not None:
476
+ self.info_callback({'x': x, 'i': info['steps'] - 1, 't': s, 't_up': s, 'denoised': denoised, 'error': error, 'h': pid.h, **info})
477
+
478
+ return x, info
479
+
480
+
481
+ @torch.no_grad()
482
+ def sample_dpm_fast(model, x, sigma_min, sigma_max, n, extra_args=None, callback=None, disable=None, eta=0., s_noise=1., noise_sampler=None):
483
+ """DPM-Solver-Fast (fixed step size). See https://arxiv.org/abs/2206.00927."""
484
+ if sigma_min <= 0 or sigma_max <= 0:
485
+ raise ValueError('sigma_min and sigma_max must not be 0')
486
+ with tqdm(total=n, disable=disable) as pbar:
487
+ dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update)
488
+ if callback is not None:
489
+ dpm_solver.info_callback = lambda info: callback({'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info})
490
+ return dpm_solver.dpm_solver_fast(x, dpm_solver.t(torch.tensor(sigma_max)), dpm_solver.t(torch.tensor(sigma_min)), n, eta, s_noise, noise_sampler)
491
+
492
+
493
+ @torch.no_grad()
494
+ def sample_dpm_adaptive(model, x, sigma_min, sigma_max, extra_args=None, callback=None, disable=None, order=3, rtol=0.05, atol=0.0078, h_init=0.05, pcoeff=0., icoeff=1., dcoeff=0., accept_safety=0.81, eta=0., s_noise=1., noise_sampler=None, return_info=False):
495
+ """DPM-Solver-12 and 23 (adaptive step size). See https://arxiv.org/abs/2206.00927."""
496
+ if sigma_min <= 0 or sigma_max <= 0:
497
+ raise ValueError('sigma_min and sigma_max must not be 0')
498
+ with tqdm(disable=disable) as pbar:
499
+ dpm_solver = DPMSolver(model, extra_args, eps_callback=pbar.update)
500
+ if callback is not None:
501
+ dpm_solver.info_callback = lambda info: callback({'sigma': dpm_solver.sigma(info['t']), 'sigma_hat': dpm_solver.sigma(info['t_up']), **info})
502
+ x, info = dpm_solver.dpm_solver_adaptive(x, dpm_solver.t(torch.tensor(sigma_max)), dpm_solver.t(torch.tensor(sigma_min)), order, rtol, atol, h_init, pcoeff, icoeff, dcoeff, accept_safety, eta, s_noise, noise_sampler)
503
+ if return_info:
504
+ return x, info
505
+ return x
506
+
507
+
508
+ @torch.no_grad()
509
+ def sample_dpmpp_2s_ancestral(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None):
510
+ """Ancestral sampling with DPM-Solver++(2S) second-order steps."""
511
+ extra_args = {} if extra_args is None else extra_args
512
+ noise_sampler = default_noise_sampler(x) if noise_sampler is None else noise_sampler
513
+ s_in = x.new_ones([x.shape[0]])
514
+ sigma_fn = lambda t: t.neg().exp()
515
+ t_fn = lambda sigma: sigma.log().neg()
516
+
517
+ for i in trange(len(sigmas) - 1, disable=disable):
518
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
519
+ sigma_down, sigma_up = get_ancestral_step(sigmas[i], sigmas[i + 1], eta=eta)
520
+ if callback is not None:
521
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
522
+ if sigma_down == 0:
523
+ # Euler method
524
+ d = to_d(x, sigmas[i], denoised)
525
+ dt = sigma_down - sigmas[i]
526
+ x = x + d * dt
527
+ else:
528
+ # DPM-Solver++(2S)
529
+ t, t_next = t_fn(sigmas[i]), t_fn(sigma_down)
530
+ r = 1 / 2
531
+ h = t_next - t
532
+ s = t + r * h
533
+ x_2 = (sigma_fn(s) / sigma_fn(t)) * x - (-h * r).expm1() * denoised
534
+ denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args)
535
+ x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_2
536
+ # Noise addition
537
+ if sigmas[i + 1] > 0:
538
+ x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * s_noise * sigma_up
539
+ return x
540
+
541
+
542
+ @torch.no_grad()
543
+ def sample_dpmpp_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r=1 / 2):
544
+ """DPM-Solver++ (stochastic)."""
545
+ sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
546
+ noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler
547
+ extra_args = {} if extra_args is None else extra_args
548
+ s_in = x.new_ones([x.shape[0]])
549
+ sigma_fn = lambda t: t.neg().exp()
550
+ t_fn = lambda sigma: sigma.log().neg()
551
+
552
+ for i in trange(len(sigmas) - 1, disable=disable):
553
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
554
+ if callback is not None:
555
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
556
+ if sigmas[i + 1] == 0:
557
+ # Euler method
558
+ d = to_d(x, sigmas[i], denoised)
559
+ dt = sigmas[i + 1] - sigmas[i]
560
+ x = x + d * dt
561
+ else:
562
+ # DPM-Solver++
563
+ t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
564
+ h = t_next - t
565
+ s = t + h * r
566
+ fac = 1 / (2 * r)
567
+
568
+ # Step 1
569
+ sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(s), eta)
570
+ s_ = t_fn(sd)
571
+ x_2 = (sigma_fn(s_) / sigma_fn(t)) * x - (t - s_).expm1() * denoised
572
+ x_2 = x_2 + noise_sampler(sigma_fn(t), sigma_fn(s)) * s_noise * su
573
+ denoised_2 = model(x_2, sigma_fn(s) * s_in, **extra_args)
574
+
575
+ # Step 2
576
+ sd, su = get_ancestral_step(sigma_fn(t), sigma_fn(t_next), eta)
577
+ t_next_ = t_fn(sd)
578
+ denoised_d = (1 - fac) * denoised + fac * denoised_2
579
+ x = (sigma_fn(t_next_) / sigma_fn(t)) * x - (t - t_next_).expm1() * denoised_d
580
+ x = x + noise_sampler(sigma_fn(t), sigma_fn(t_next)) * s_noise * su
581
+ return x
582
+
583
+
584
+ @torch.no_grad()
585
+ def sample_dpmpp_2m(model, x, sigmas, extra_args=None, callback=None, disable=None):
586
+ """DPM-Solver++(2M)."""
587
+ extra_args = {} if extra_args is None else extra_args
588
+ s_in = x.new_ones([x.shape[0]])
589
+ sigma_fn = lambda t: t.neg().exp()
590
+ t_fn = lambda sigma: sigma.log().neg()
591
+ old_denoised = None
592
+
593
+ for i in trange(len(sigmas) - 1, disable=disable):
594
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
595
+ if callback is not None:
596
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
597
+ t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
598
+ h = t_next - t
599
+ if old_denoised is None or sigmas[i + 1] == 0:
600
+ x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised
601
+ else:
602
+ h_last = t - t_fn(sigmas[i - 1])
603
+ r = h_last / h
604
+ denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised
605
+ x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_d
606
+ old_denoised = denoised
607
+ return x
608
+
609
+
610
+ @torch.no_grad()
611
+ def sample_dpmpp_2m_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type='midpoint'):
612
+ """DPM-Solver++(2M) SDE."""
613
+
614
+ if solver_type not in {'heun', 'midpoint'}:
615
+ raise ValueError('solver_type must be \'heun\' or \'midpoint\'')
616
+
617
+ sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
618
+ noise_sampler = BrownianTreeNoiseSampler(x, sigma_min, sigma_max) if noise_sampler is None else noise_sampler
619
+ extra_args = {} if extra_args is None else extra_args
620
+ s_in = x.new_ones([x.shape[0]])
621
+
622
+ old_denoised = None
623
+ h_last = None
624
+
625
+ for i in trange(len(sigmas) - 1, disable=disable):
626
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
627
+ if callback is not None:
628
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
629
+ if sigmas[i + 1] == 0:
630
+ # Denoising step
631
+ x = denoised
632
+ else:
633
+ # DPM-Solver++(2M) SDE
634
+ t, s = -sigmas[i].log(), -sigmas[i + 1].log()
635
+ h = s - t
636
+ eta_h = eta * h
637
+
638
+ x = sigmas[i + 1] / sigmas[i] * (-eta_h).exp() * x + (-h - eta_h).expm1().neg() * denoised
639
+
640
+ if old_denoised is not None:
641
+ r = h_last / h
642
+ if solver_type == 'heun':
643
+ x = x + ((-h - eta_h).expm1().neg() / (-h - eta_h) + 1) * (1 / r) * (denoised - old_denoised)
644
+ elif solver_type == 'midpoint':
645
+ x = x + 0.5 * (-h - eta_h).expm1().neg() * (1 / r) * (denoised - old_denoised)
646
+
647
+ x = x + noise_sampler(sigmas[i], sigmas[i + 1]) * sigmas[i + 1] * (-2 * eta_h).expm1().neg().sqrt() * s_noise
648
+
649
+ old_denoised = denoised
650
+ h_last = h
651
+ return x
652
+
653
+
654
+ @torch.no_grad()
655
+ def sample_dpmpp_2m_test(model, x, sigmas, extra_args=None, callback=None, disable=None):
656
+ """DPM-Solver++(2M)."""
657
+ extra_args = {} if extra_args is None else extra_args
658
+ s_in = x.new_ones([x.shape[0]])
659
+ sigma_fn = lambda t: t.neg().exp()
660
+ t_fn = lambda sigma: sigma.log().neg()
661
+ old_denoised = None
662
+
663
+ for i in trange(len(sigmas) - 1, disable=disable):
664
+ denoised = model(x, sigmas[i] * s_in, **extra_args)
665
+ if callback is not None:
666
+ callback({'x': x, 'i': i, 'sigma': sigmas[i], 'sigma_hat': sigmas[i], 'denoised': denoised})
667
+ t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
668
+ h = t_next - t
669
+
670
+ t_min = min(sigma_fn(t_next), sigma_fn(t))
671
+ t_max = max(sigma_fn(t_next), sigma_fn(t))
672
+
673
+ if old_denoised is None or sigmas[i + 1] == 0:
674
+ x = (t_min / t_max) * x - (-h).expm1() * denoised
675
+ else:
676
+ h_last = t - t_fn(sigmas[i - 1])
677
+
678
+ h_min = min(h_last, h)
679
+ h_max = max(h_last, h)
680
+ r = h_max / h_min
681
+
682
+ h_d = (h_max + h_min) / 2
683
+ denoised_d = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised
684
+ x = (t_min / t_max) * x - (-h_d).expm1() * denoised_d
685
+
686
+ old_denoised = denoised
687
+ return x
dpm2mv2/sd_samplers_kdiffusion.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import deque
2
+ import torch
3
+ import inspect
4
+ import k_diffusion.sampling
5
+ from modules import prompt_parser, devices, sd_samplers_common
6
+
7
+ from modules.shared import opts, state
8
+ import modules.shared as shared
9
+ from modules.script_callbacks import CFGDenoiserParams, cfg_denoiser_callback
10
+ from modules.script_callbacks import CFGDenoisedParams, cfg_denoised_callback
11
+ from modules.script_callbacks import AfterCFGCallbackParams, cfg_after_cfg_callback
12
+
13
+ samplers_k_diffusion = [
14
+ ('Euler a', 'sample_euler_ancestral', ['k_euler_a', 'k_euler_ancestral'], {"uses_ensd": True}),
15
+ ('Euler', 'sample_euler', ['k_euler'], {}),
16
+ ('LMS', 'sample_lms', ['k_lms'], {}),
17
+ ('Heun', 'sample_heun', ['k_heun'], {"second_order": True}),
18
+ ('DPM2', 'sample_dpm_2', ['k_dpm_2'], {'discard_next_to_last_sigma': True}),
19
+ ('DPM2 a', 'sample_dpm_2_ancestral', ['k_dpm_2_a'], {'discard_next_to_last_sigma': True, "uses_ensd": True}),
20
+ ('DPM++ 2S a', 'sample_dpmpp_2s_ancestral', ['k_dpmpp_2s_a'], {"uses_ensd": True, "second_order": True}),
21
+ ('DPM++ 2M', 'sample_dpmpp_2m', ['k_dpmpp_2m'], {}),
22
+ ('DPM++ 2M V2', 'sample_dpmpp_2m_test', ['k_dpmpp_2m'], {}),
23
+ ('DPM++ SDE', 'sample_dpmpp_sde', ['k_dpmpp_sde'], {"second_order": True, "brownian_noise": True}),
24
+ ('DPM++ 2M SDE', 'sample_dpmpp_2m_sde', ['k_dpmpp_2m_sde_ka'], {"brownian_noise": True, 'discard_next_to_last_sigma': True}),
25
+ ('DPM fast', 'sample_dpm_fast', ['k_dpm_fast'], {"uses_ensd": True}),
26
+ ('DPM adaptive', 'sample_dpm_adaptive', ['k_dpm_ad'], {"uses_ensd": True}),
27
+ ('LMS Karras', 'sample_lms', ['k_lms_ka'], {'scheduler': 'karras'}),
28
+ ('DPM2 Karras', 'sample_dpm_2', ['k_dpm_2_ka'], {'scheduler': 'karras', 'discard_next_to_last_sigma': True, "uses_ensd": True, "second_order": True}),
29
+ ('DPM2 a Karras', 'sample_dpm_2_ancestral', ['k_dpm_2_a_ka'], {'scheduler': 'karras', 'discard_next_to_last_sigma': True, "uses_ensd": True, "second_order": True}),
30
+ ('DPM++ 2S a Karras', 'sample_dpmpp_2s_ancestral', ['k_dpmpp_2s_a_ka'], {'scheduler': 'karras', "uses_ensd": True, "second_order": True}),
31
+ ('DPM++ 2M Karras', 'sample_dpmpp_2m', ['k_dpmpp_2m_ka'], {'scheduler': 'karras'}),
32
+ ('DPM++ 2M Karras V2', 'sample_dpmpp_2m_test', ['k_dpmpp_2m_ka'], {'scheduler': 'karras'}),
33
+ ('DPM++ SDE Karras', 'sample_dpmpp_sde', ['k_dpmpp_sde_ka'], {'scheduler': 'karras', "second_order": True, "brownian_noise": True}),
34
+ ('DPM++ 2M SDE Karras', 'sample_dpmpp_2m_sde', ['k_dpmpp_2m_sde_ka'], {'scheduler': 'karras', "brownian_noise": True, 'discard_next_to_last_sigma': True}),
35
+ ]
36
+
37
+ samplers_data_k_diffusion = [
38
+ sd_samplers_common.SamplerData(label, lambda model, funcname=funcname: KDiffusionSampler(funcname, model), aliases, options)
39
+ for label, funcname, aliases, options in samplers_k_diffusion
40
+ if hasattr(k_diffusion.sampling, funcname)
41
+ ]
42
+
43
+ sampler_extra_params = {
44
+ 'sample_euler': ['s_churn', 's_tmin', 's_tmax', 's_noise'],
45
+ 'sample_heun': ['s_churn', 's_tmin', 's_tmax', 's_noise'],
46
+ 'sample_dpm_2': ['s_churn', 's_tmin', 's_tmax', 's_noise'],
47
+ }
48
+
49
+
50
+ class CFGDenoiser(torch.nn.Module):
51
+ """
52
+ Classifier free guidance denoiser. A wrapper for stable diffusion model (specifically for unet)
53
+ that can take a noisy picture and produce a noise-free picture using two guidances (prompts)
54
+ instead of one. Originally, the second prompt is just an empty string, but we use non-empty
55
+ negative prompt.
56
+ """
57
+
58
+ def __init__(self, model):
59
+ super().__init__()
60
+ self.inner_model = model
61
+ self.mask = None
62
+ self.nmask = None
63
+ self.init_latent = None
64
+ self.step = 0
65
+ self.image_cfg_scale = None
66
+
67
+ def combine_denoised(self, x_out, conds_list, uncond, cond_scale):
68
+ denoised_uncond = x_out[-uncond.shape[0]:]
69
+ denoised = torch.clone(denoised_uncond)
70
+
71
+ for i, conds in enumerate(conds_list):
72
+ for cond_index, weight in conds:
73
+ denoised[i] += (x_out[cond_index] - denoised_uncond[i]) * (weight * cond_scale)
74
+
75
+ return denoised
76
+
77
+ def combine_denoised_for_edit_model(self, x_out, cond_scale):
78
+ out_cond, out_img_cond, out_uncond = x_out.chunk(3)
79
+ denoised = out_uncond + cond_scale * (out_cond - out_img_cond) + self.image_cfg_scale * (out_img_cond - out_uncond)
80
+
81
+ return denoised
82
+
83
+ def forward(self, x, sigma, uncond, cond, cond_scale, s_min_uncond, image_cond):
84
+ if state.interrupted or state.skipped:
85
+ raise sd_samplers_common.InterruptedException
86
+
87
+ # at self.image_cfg_scale == 1.0 produced results for edit model are the same as with normal sampling,
88
+ # so is_edit_model is set to False to support AND composition.
89
+ is_edit_model = shared.sd_model.cond_stage_key == "edit" and self.image_cfg_scale is not None and self.image_cfg_scale != 1.0
90
+
91
+ conds_list, tensor = prompt_parser.reconstruct_multicond_batch(cond, self.step)
92
+ uncond = prompt_parser.reconstruct_cond_batch(uncond, self.step)
93
+
94
+ assert not is_edit_model or all(len(conds) == 1 for conds in conds_list), "AND is not supported for InstructPix2Pix checkpoint (unless using Image CFG scale = 1.0)"
95
+
96
+ batch_size = len(conds_list)
97
+ repeats = [len(conds_list[i]) for i in range(batch_size)]
98
+
99
+ if shared.sd_model.model.conditioning_key == "crossattn-adm":
100
+ image_uncond = torch.zeros_like(image_cond)
101
+ make_condition_dict = lambda c_crossattn, c_adm: {"c_crossattn": c_crossattn, "c_adm": c_adm}
102
+ else:
103
+ image_uncond = image_cond
104
+ make_condition_dict = lambda c_crossattn, c_concat: {"c_crossattn": c_crossattn, "c_concat": [c_concat]}
105
+
106
+ if not is_edit_model:
107
+ x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x])
108
+ sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma])
109
+ image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_uncond])
110
+ else:
111
+ x_in = torch.cat([torch.stack([x[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [x] + [x])
112
+ sigma_in = torch.cat([torch.stack([sigma[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [sigma] + [sigma])
113
+ image_cond_in = torch.cat([torch.stack([image_cond[i] for _ in range(n)]) for i, n in enumerate(repeats)] + [image_uncond] + [torch.zeros_like(self.init_latent)])
114
+
115
+ denoiser_params = CFGDenoiserParams(x_in, image_cond_in, sigma_in, state.sampling_step, state.sampling_steps, tensor, uncond)
116
+ cfg_denoiser_callback(denoiser_params)
117
+ x_in = denoiser_params.x
118
+ image_cond_in = denoiser_params.image_cond
119
+ sigma_in = denoiser_params.sigma
120
+ tensor = denoiser_params.text_cond
121
+ uncond = denoiser_params.text_uncond
122
+ skip_uncond = False
123
+
124
+ # alternating uncond allows for higher thresholds without the quality loss normally expected from raising it
125
+ if self.step % 2 and s_min_uncond > 0 and sigma[0] < s_min_uncond and not is_edit_model:
126
+ skip_uncond = True
127
+ x_in = x_in[:-batch_size]
128
+ sigma_in = sigma_in[:-batch_size]
129
+
130
+ if tensor.shape[1] == uncond.shape[1] or skip_uncond:
131
+ if is_edit_model:
132
+ cond_in = torch.cat([tensor, uncond, uncond])
133
+ elif skip_uncond:
134
+ cond_in = tensor
135
+ else:
136
+ cond_in = torch.cat([tensor, uncond])
137
+
138
+ if shared.batch_cond_uncond:
139
+ x_out = self.inner_model(x_in, sigma_in, cond=make_condition_dict([cond_in], image_cond_in))
140
+ else:
141
+ x_out = torch.zeros_like(x_in)
142
+ for batch_offset in range(0, x_out.shape[0], batch_size):
143
+ a = batch_offset
144
+ b = a + batch_size
145
+ x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=make_condition_dict([cond_in[a:b]], image_cond_in[a:b]))
146
+ else:
147
+ x_out = torch.zeros_like(x_in)
148
+ batch_size = batch_size*2 if shared.batch_cond_uncond else batch_size
149
+ for batch_offset in range(0, tensor.shape[0], batch_size):
150
+ a = batch_offset
151
+ b = min(a + batch_size, tensor.shape[0])
152
+
153
+ if not is_edit_model:
154
+ c_crossattn = [tensor[a:b]]
155
+ else:
156
+ c_crossattn = torch.cat([tensor[a:b]], uncond)
157
+
158
+ x_out[a:b] = self.inner_model(x_in[a:b], sigma_in[a:b], cond=make_condition_dict(c_crossattn, image_cond_in[a:b]))
159
+
160
+ if not skip_uncond:
161
+ x_out[-uncond.shape[0]:] = self.inner_model(x_in[-uncond.shape[0]:], sigma_in[-uncond.shape[0]:], cond=make_condition_dict([uncond], image_cond_in[-uncond.shape[0]:]))
162
+
163
+ denoised_image_indexes = [x[0][0] for x in conds_list]
164
+ if skip_uncond:
165
+ fake_uncond = torch.cat([x_out[i:i+1] for i in denoised_image_indexes])
166
+ x_out = torch.cat([x_out, fake_uncond]) # we skipped uncond denoising, so we put cond-denoised image to where the uncond-denoised image should be
167
+
168
+ denoised_params = CFGDenoisedParams(x_out, state.sampling_step, state.sampling_steps, self.inner_model)
169
+ cfg_denoised_callback(denoised_params)
170
+
171
+ devices.test_for_nans(x_out, "unet")
172
+
173
+ if opts.live_preview_content == "Prompt":
174
+ sd_samplers_common.store_latent(torch.cat([x_out[i:i+1] for i in denoised_image_indexes]))
175
+ elif opts.live_preview_content == "Negative prompt":
176
+ sd_samplers_common.store_latent(x_out[-uncond.shape[0]:])
177
+
178
+ if is_edit_model:
179
+ denoised = self.combine_denoised_for_edit_model(x_out, cond_scale)
180
+ elif skip_uncond:
181
+ denoised = self.combine_denoised(x_out, conds_list, uncond, 1.0)
182
+ else:
183
+ denoised = self.combine_denoised(x_out, conds_list, uncond, cond_scale)
184
+
185
+ if self.mask is not None:
186
+ denoised = self.init_latent * self.mask + self.nmask * denoised
187
+
188
+ after_cfg_callback_params = AfterCFGCallbackParams(denoised, state.sampling_step, state.sampling_steps)
189
+ cfg_after_cfg_callback(after_cfg_callback_params)
190
+ denoised = after_cfg_callback_params.x
191
+
192
+ self.step += 1
193
+ return denoised
194
+
195
+
196
+ class TorchHijack:
197
+ def __init__(self, sampler_noises):
198
+ # Using a deque to efficiently receive the sampler_noises in the same order as the previous index-based
199
+ # implementation.
200
+ self.sampler_noises = deque(sampler_noises)
201
+
202
+ def __getattr__(self, item):
203
+ if item == 'randn_like':
204
+ return self.randn_like
205
+
206
+ if hasattr(torch, item):
207
+ return getattr(torch, item)
208
+
209
+ raise AttributeError(f"'{type(self).__name__}' object has no attribute '{item}'")
210
+
211
+ def randn_like(self, x):
212
+ if self.sampler_noises:
213
+ noise = self.sampler_noises.popleft()
214
+ if noise.shape == x.shape:
215
+ return noise
216
+
217
+ if opts.randn_source == "CPU" or x.device.type == 'mps':
218
+ return torch.randn_like(x, device=devices.cpu).to(x.device)
219
+ else:
220
+ return torch.randn_like(x)
221
+
222
+
223
+ class KDiffusionSampler:
224
+ def __init__(self, funcname, sd_model):
225
+ denoiser = k_diffusion.external.CompVisVDenoiser if sd_model.parameterization == "v" else k_diffusion.external.CompVisDenoiser
226
+
227
+ self.model_wrap = denoiser(sd_model, quantize=shared.opts.enable_quantization)
228
+ self.funcname = funcname
229
+ self.func = getattr(k_diffusion.sampling, self.funcname)
230
+ self.extra_params = sampler_extra_params.get(funcname, [])
231
+ self.model_wrap_cfg = CFGDenoiser(self.model_wrap)
232
+ self.sampler_noises = None
233
+ self.stop_at = None
234
+ self.eta = None
235
+ self.config = None # set by the function calling the constructor
236
+ self.last_latent = None
237
+ self.s_min_uncond = None
238
+
239
+ self.conditioning_key = sd_model.model.conditioning_key
240
+
241
+ def callback_state(self, d):
242
+ step = d['i']
243
+ latent = d["denoised"]
244
+ if opts.live_preview_content == "Combined":
245
+ sd_samplers_common.store_latent(latent)
246
+ self.last_latent = latent
247
+
248
+ if self.stop_at is not None and step > self.stop_at:
249
+ raise sd_samplers_common.InterruptedException
250
+
251
+ state.sampling_step = step
252
+ shared.total_tqdm.update()
253
+
254
+ def launch_sampling(self, steps, func):
255
+ state.sampling_steps = steps
256
+ state.sampling_step = 0
257
+
258
+ try:
259
+ return func()
260
+ except sd_samplers_common.InterruptedException:
261
+ return self.last_latent
262
+
263
+ def number_of_needed_noises(self, p):
264
+ return p.steps
265
+
266
+ def initialize(self, p):
267
+ self.model_wrap_cfg.mask = p.mask if hasattr(p, 'mask') else None
268
+ self.model_wrap_cfg.nmask = p.nmask if hasattr(p, 'nmask') else None
269
+ self.model_wrap_cfg.step = 0
270
+ self.model_wrap_cfg.image_cfg_scale = getattr(p, 'image_cfg_scale', None)
271
+ self.eta = p.eta if p.eta is not None else opts.eta_ancestral
272
+ self.s_min_uncond = getattr(p, 's_min_uncond', 0.0)
273
+
274
+ k_diffusion.sampling.torch = TorchHijack(self.sampler_noises if self.sampler_noises is not None else [])
275
+
276
+ extra_params_kwargs = {}
277
+ for param_name in self.extra_params:
278
+ if hasattr(p, param_name) and param_name in inspect.signature(self.func).parameters:
279
+ extra_params_kwargs[param_name] = getattr(p, param_name)
280
+
281
+ if 'eta' in inspect.signature(self.func).parameters:
282
+ if self.eta != 1.0:
283
+ p.extra_generation_params["Eta"] = self.eta
284
+
285
+ extra_params_kwargs['eta'] = self.eta
286
+
287
+ return extra_params_kwargs
288
+
289
+ def get_sigmas(self, p, steps):
290
+ discard_next_to_last_sigma = self.config is not None and self.config.options.get('discard_next_to_last_sigma', False)
291
+ if opts.always_discard_next_to_last_sigma and not discard_next_to_last_sigma:
292
+ discard_next_to_last_sigma = True
293
+ p.extra_generation_params["Discard penultimate sigma"] = True
294
+
295
+ steps += 1 if discard_next_to_last_sigma else 0
296
+
297
+ if p.sampler_noise_scheduler_override:
298
+ sigmas = p.sampler_noise_scheduler_override(steps)
299
+ elif self.config is not None and self.config.options.get('scheduler', None) == 'karras':
300
+ sigma_min, sigma_max = (0.1, 10) if opts.use_old_karras_scheduler_sigmas else (self.model_wrap.sigmas[0].item(), self.model_wrap.sigmas[-1].item())
301
+
302
+ sigmas = k_diffusion.sampling.get_sigmas_karras(n=steps, sigma_min=sigma_min, sigma_max=sigma_max, device=shared.device)
303
+ else:
304
+ sigmas = self.model_wrap.get_sigmas(steps)
305
+
306
+ if discard_next_to_last_sigma:
307
+ sigmas = torch.cat([sigmas[:-2], sigmas[-1:]])
308
+
309
+ return sigmas
310
+
311
+ def create_noise_sampler(self, x, sigmas, p):
312
+ """For DPM++ SDE: manually create noise sampler to enable deterministic results across different batch sizes"""
313
+ if shared.opts.no_dpmpp_sde_batch_determinism:
314
+ return None
315
+
316
+ from k_diffusion.sampling import BrownianTreeNoiseSampler
317
+ sigma_min, sigma_max = sigmas[sigmas > 0].min(), sigmas.max()
318
+ current_iter_seeds = p.all_seeds[p.iteration * p.batch_size:(p.iteration + 1) * p.batch_size]
319
+ return BrownianTreeNoiseSampler(x, sigma_min, sigma_max, seed=current_iter_seeds)
320
+
321
+ def sample_img2img(self, p, x, noise, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
322
+ steps, t_enc = sd_samplers_common.setup_img2img_steps(p, steps)
323
+
324
+ sigmas = self.get_sigmas(p, steps)
325
+
326
+ sigma_sched = sigmas[steps - t_enc - 1:]
327
+ xi = x + noise * sigma_sched[0]
328
+
329
+ extra_params_kwargs = self.initialize(p)
330
+ parameters = inspect.signature(self.func).parameters
331
+
332
+ if 'sigma_min' in parameters:
333
+ ## last sigma is zero which isn't allowed by DPM Fast & Adaptive so taking value before last
334
+ extra_params_kwargs['sigma_min'] = sigma_sched[-2]
335
+ if 'sigma_max' in parameters:
336
+ extra_params_kwargs['sigma_max'] = sigma_sched[0]
337
+ if 'n' in parameters:
338
+ extra_params_kwargs['n'] = len(sigma_sched) - 1
339
+ if 'sigma_sched' in parameters:
340
+ extra_params_kwargs['sigma_sched'] = sigma_sched
341
+ if 'sigmas' in parameters:
342
+ extra_params_kwargs['sigmas'] = sigma_sched
343
+
344
+ if self.config.options.get('brownian_noise', False):
345
+ noise_sampler = self.create_noise_sampler(x, sigmas, p)
346
+ extra_params_kwargs['noise_sampler'] = noise_sampler
347
+
348
+ self.model_wrap_cfg.init_latent = x
349
+ self.last_latent = x
350
+ extra_args = {
351
+ 'cond': conditioning,
352
+ 'image_cond': image_conditioning,
353
+ 'uncond': unconditional_conditioning,
354
+ 'cond_scale': p.cfg_scale,
355
+ 's_min_uncond': self.s_min_uncond
356
+ }
357
+
358
+ samples = self.launch_sampling(t_enc + 1, lambda: self.func(self.model_wrap_cfg, xi, extra_args=extra_args, disable=False, callback=self.callback_state, **extra_params_kwargs))
359
+
360
+ return samples
361
+
362
+ def sample(self, p, x, conditioning, unconditional_conditioning, steps=None, image_conditioning=None):
363
+ steps = steps or p.steps
364
+
365
+ sigmas = self.get_sigmas(p, steps)
366
+
367
+ x = x * sigmas[0]
368
+
369
+ extra_params_kwargs = self.initialize(p)
370
+ parameters = inspect.signature(self.func).parameters
371
+
372
+ if 'sigma_min' in parameters:
373
+ extra_params_kwargs['sigma_min'] = self.model_wrap.sigmas[0].item()
374
+ extra_params_kwargs['sigma_max'] = self.model_wrap.sigmas[-1].item()
375
+ if 'n' in parameters:
376
+ extra_params_kwargs['n'] = steps
377
+ else:
378
+ extra_params_kwargs['sigmas'] = sigmas
379
+
380
+ if self.config.options.get('brownian_noise', False):
381
+ noise_sampler = self.create_noise_sampler(x, sigmas, p)
382
+ extra_params_kwargs['noise_sampler'] = noise_sampler
383
+
384
+ self.last_latent = x
385
+ samples = self.launch_sampling(steps, lambda: self.func(self.model_wrap_cfg, x, extra_args={
386
+ 'cond': conditioning,
387
+ 'image_cond': image_conditioning,
388
+ 'uncond': unconditional_conditioning,
389
+ 'cond_scale': p.cfg_scale,
390
+ 's_min_uncond': self.s_min_uncond
391
+ }, disable=False, callback=self.callback_state, **extra_params_kwargs))
392
+
393
+ return samples
394
+