File size: 2,580 Bytes
bfd34e9
 
 
 
f1cc496
 
 
 
bfd34e9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
736e88e
f1cc496
736e88e
 
 
 
bfd34e9
736e88e
f1cc496
 
 
 
736e88e
 
bfd34e9
736e88e
 
 
bfd34e9
 
 
 
 
 
 
da1e12f
bfd34e9
 
 
 
da1e12f
bfd34e9
 
 
 
 
 
 
 
 
 
da1e12f
 
 
 
 
 
bfd34e9
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import torch
from pytorch_lightning import seed_everything
from tqdm import tqdm

from src.utils.iimage import IImage
from src.smplfusion import share, router, attentionpatch, transformerpatch
from src.smplfusion.patches.attentionpatch import painta
from src.utils import tokenize

verbose = False


def init_painta(token_idx):
    # Initialize painta
    router.attention_forward = attentionpatch.painta.forward
    router.basic_transformer_forward = transformerpatch.painta.forward
    painta.painta_on = True
    painta.painta_res = [16, 32]
    painta.token_idx = token_idx

def run(
    ddim,
    method,
    prompt,
    image,
    mask,
    seed=0,
    eta=0.0,
    negative_prompt='',
    positive_prompt='',
    num_steps=50,
    guidance_scale=7.5
):
    image = image.padx(64)
    mask = mask.dilate(1).alpha().padx(64)
    full_prompt = prompt
    if positive_prompt != '':
        full_prompt = f'{prompt}, {positive_prompt}'
    dt = 1000 // num_steps

    # Text condition
    context = ddim.encoder.encode([negative_prompt, full_prompt])
    token_idx = list(range(1, tokenize(prompt).index('<end_of_text>')))
    token_idx += [tokenize(full_prompt).index('<end_of_text>')]

    # Setup painta if needed
    if 'painta' in method: init_painta(token_idx)
    else: router.reset() 
    
    # Image condition    
    unet_condition = ddim.get_inpainting_condition(image, mask)
    dtype = unet_condition.dtype
    share.set_mask(mask)
    
    # Starting latent
    seed_everything(seed)
    zt = torch.randn((1,4) + unet_condition.shape[2:]).cuda().to(dtype)

    # Turn off gradients
    ddim.unet.requires_grad_(False)
    
    pbar = tqdm(range(999, 0, -dt)) if verbose else range(999, 0, -dt)

    for timestep in share.DDIMIterator(pbar):
        if share.timestep <= 500: router.reset()

        _zt = zt if unet_condition is None else torch.cat([zt, unet_condition], 1)
        with torch.autocast('cuda'):
            eps_uncond, eps = ddim.unet(
                torch.cat([_zt, _zt]).to(dtype), 
                timesteps = torch.tensor([timestep, timestep]).cuda(), 
                context = context
            ).chunk(2)

        eps = (eps_uncond + guidance_scale * (eps - eps_uncond))
        z0 = (zt - share.schedule.sqrt_one_minus_alphas[timestep] * eps) / share.schedule.sqrt_alphas[timestep]
        zt = share.schedule.sqrt_alphas[timestep - dt] * z0 + share.schedule.sqrt_one_minus_alphas[timestep - dt] * eps

    with torch.no_grad():
        output_image = IImage(ddim.vae.decode(z0 / ddim.config.scale_factor))
    
    return output_image