Spaces:

xingzhehe
/

AutoLink

Runtime error

App Files Files Community

xingzhehe commited on Mar 25, 2023

Commit

91fc62a

•

1 Parent(s): 354ef90

try fitst commit

Browse files

Files changed (10) hide show

.gitignore +8 -0
app.py +71 -0
assets/00344.mp4 +0 -0
assets/jackie_chan.jpg +0 -0
models/decoder.py +170 -0
models/encoder.py +106 -0
models/model.py +58 -0
requirements.txt +15 -0
utils_/loss.py +48 -0
utils_/visualization.py +98 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+checkpoints
+diffusers_cache
+hub
+wandb
+__pycache__
+*.pyc
+flagged
+gif

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from models.model import Model as AutoLink
+import gradio as gr
+import PIL
+import torch
+import os
+import imageio
+import numpy as np
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+autolink = AutoLink.load_from_checkpoint(os.path.join("checkpoints", "celeba_wild_k32_m0.8_b16_t0.00075_sklr512", "model.ckpt"))
+autolink.to(device)
+def predict_image(image_in: PIL.Image.Image) -> PIL.Image.Image:
+    if image_in == None:
+        raise gr.Error("Please upload a video or image.")
+    edge_map = autolink(image_in)
+    return edge_map
+def predict_video(video_in: str) -> str:
+    if video_in == None:
+        raise gr.Error("Please upload a video or image.")
+    video_out = video_in[:-4] + '_out.mp4'
+    video_in = imageio.get_reader(video_in)
+    writer = imageio.get_writer(video_out, mode='I', fps=video_in.get_meta_data()['fps'])
+    for image_in in video_in:
+        image_in = PIL.Image.fromarray(image_in)
+        edge_map = autolink(image_in)
+        writer.append_data(np.array(edge_map))
+    writer.close()
+    return video_out
+with gr.Blocks() as blocks:
+    gr.Markdown("""
+    # AutoLink
+    ## Self-supervised Learning of Human Skeletons and Object Outlines by Linking Keypoints
+    * [Paper](https://arxiv.org/abs/2205.10636)
+    * [Project Page](https://xingzhehe.github.io/autolink/)
+    * [GitHub](https://github.com/xingzhehe/AutoLink-Self-supervised-Learning-of-Human-Skeletons-and-Object-Outlines-by-Linking-Keypoints)
+""")
+    with gr.Tab("Image"):
+        with gr.Row():
+            with gr.Column():
+                image_in = gr.Image(source="upload",  type="pil", visible=True)
+            with gr.Column():
+                image_out = gr.Image()
+        run_btn = gr.Button("Run")
+        run_btn.click(fn=predict_image, inputs=[image_in], outputs=[image_out])
+        gr.Examples(fn=predict_image, examples=[["assets/jackie_chan.jpg", None]],
+                    inputs=[image_in], outputs=[image_out],
+                    cache_examples=False)
+    with gr.Tab("Video") as tab:
+        with gr.Row():
+            with gr.Column():
+                video_in = gr.Video(source="upload", type="mp4")
+            with gr.Column():
+                video_out = gr.Video()
+        run_btn = gr.Button("Run")
+        run_btn.click(fn=predict_video, inputs=[video_in], outputs=[video_out])
+        gr.Examples(fn=predict_video, examples=[["assets/00344.mp4"],],
+                    inputs=[video_in], outputs=[video_out],
+                    cache_examples=False)
+blocks.launch()

assets/00344.mp4 ADDED Viewed

Binary file (165 kB). View file

assets/jackie_chan.jpg ADDED Viewed

models/decoder.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import torch
+import torch.nn.functional as F
+from torch import nn
+from typing import Union
+import pytorch_lightning as pl
+def gen_grid2d(grid_size: int, left_end: float=-1, right_end: float=1) -> torch.Tensor:
+    """
+    Generate a grid of size (grid_size, grid_size, 2) with coordinate values in the range [left_end, right_end]
+    """
+    x = torch.linspace(left_end, right_end, grid_size)
+    x, y = torch.meshgrid([x, x], indexing='ij')
+    grid = torch.cat((x.reshape(-1, 1), y.reshape(-1, 1)), dim=1).reshape(grid_size, grid_size, 2)
+    return grid
+def draw_lines(paired_joints: torch.Tensor, heatmap_size: int=16, thick: Union[float, torch.Tensor]=1e-2) -> torch.Tensor:
+    """
+    Draw lines on a grid.
+    :param paired_joints: (batch_size, n_points, 2, 2)
+    :return: (batch_size, n_points, grid_size, grid_size)
+    dist[i,j] = ||x[b,i,:]-y[b,j,:]||^2
+    """
+    bs, n_points, _, _ = paired_joints.shape
+    start = paired_joints[:, :, 0, :]   # (batch_size, n_points, 2)
+    end = paired_joints[:, :, 1, :]     # (batch_size, n_points, 2)
+    paired_diff = end - start           # (batch_size, n_points, 2)
+    grid = gen_grid2d(heatmap_size).to(paired_joints.device).reshape(1, 1, -1, 2)
+    diff_to_start = grid - start.unsqueeze(-2)  # (batch_size, n_points, heatmap_size**2, 2)
+    # (batch_size, n_points, heatmap_size**2)
+    t = (diff_to_start @ paired_diff.unsqueeze(-1)).squeeze(-1) / (1e-8+paired_diff.square().sum(dim=-1, keepdim=True))
+    diff_to_end = grid - end.unsqueeze(-2)  # (batch_size, n_points, heatmap_size**2, 2)
+    before_start = (t <= 0).float() * diff_to_start.square().sum(dim=-1)
+    after_end = (t >= 1).float() * diff_to_end.square().sum(dim=-1)
+    between_start_end = (0 < t).float() * (t < 1).float() * (grid - (start.unsqueeze(-2) + t.unsqueeze(-1) * paired_diff.unsqueeze(-2))).square().sum(dim=-1)
+    squared_dist = (before_start + after_end + between_start_end).reshape(bs, n_points, heatmap_size, heatmap_size)
+    heatmaps = torch.exp(- squared_dist / thick)
+    return heatmaps
+class DownBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int) -> None:
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=(3, 3), padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=(3, 3), padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.Upsample(scale_factor=0.5, mode='bilinear', align_corners=False),
+            nn.LeakyReLU(0.2, True),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.net(x)
+        return x
+class UpBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int) -> None:
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            nn.Conv2d(in_channels, out_channels, kernel_size=(3, 3), padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=(3, 3), padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.2, True),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.net(x)
+        return x
+class Decoder(nn.Module):
+    def __init__(self, hyper_paras: pl.LightningModule.hparams) -> None:
+        super().__init__()
+        self.n_parts = hyper_paras['n_parts']
+        self.thick = hyper_paras['thick']
+        self.sklr = hyper_paras['sklr']
+        self.skeleton_idx = torch.triu_indices(self.n_parts, self.n_parts, offset=1)
+        self.n_skeleton = len(self.skeleton_idx[0])
+        self.alpha = nn.Parameter(torch.tensor(1.0), requires_grad=True)
+        skeleton_scalar = (torch.randn(self.n_parts, self.n_parts) / 10 - 4) / self.sklr
+        self.skeleton_scalar = nn.Parameter(skeleton_scalar, requires_grad=True)
+        self.down0 = nn.Sequential(
+            nn.Conv2d(3 + 1, 64, kernel_size=(3, 3), padding=1),
+            nn.LeakyReLU(0.2, True),
+        )
+        self.down1 = DownBlock(64, 128)  # 64
+        self.down2 = DownBlock(128, 256)  # 32
+        self.down3 = DownBlock(256, 512)  # 16
+        self.down4 = DownBlock(512, 512)  # 8
+        self.up1 = UpBlock(512, 512)  # 16
+        self.up2 = UpBlock(512 + 512, 256)  # 32
+        self.up3 = UpBlock(256 + 256, 128)  # 64
+        self.up4 = UpBlock(128 + 128, 64)  # 64
+        self.conv = nn.Conv2d(64+64, 3, kernel_size=(3, 3), padding=1)
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight, a=0.2)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+    def skeleton_scalar_matrix(self) -> torch.Tensor:
+        """
+        Give the skeleton scalar matrix
+        :return: (n_parts, n_parts)
+        """
+        skeleton_scalar = F.softplus(self.skeleton_scalar * self.sklr)
+        skeleton_scalar = torch.triu(skeleton_scalar, diagonal=1)
+        skeleton_scalar = skeleton_scalar + skeleton_scalar.transpose(1, 0)
+        return skeleton_scalar
+    def rasterize(self, keypoints: torch.Tensor, output_size: int=128) -> torch.Tensor:
+        """
+        Generate edge heatmap from keypoints, where edges are weighted by the learned scalars.
+        :param keypoints: (batch_size, n_points, 2)
+        :return: (batch_size, 1, heatmap_size, heatmap_size)
+        """
+        paired_joints = torch.stack([keypoints[:, self.skeleton_idx[0], :2], keypoints[:, self.skeleton_idx[1], :2]], dim=2)
+        skeleton_scalar = F.softplus(self.skeleton_scalar * self.sklr)
+        skeleton_scalar = torch.triu(skeleton_scalar, diagonal=1)
+        skeleton_scalar = skeleton_scalar[self.skeleton_idx[0], self.skeleton_idx[1]].reshape(1, self.n_skeleton, 1, 1)
+        skeleton_heatmap_sep = draw_lines(paired_joints, heatmap_size=output_size, thick=self.thick)
+        skeleton_heatmap_sep = skeleton_heatmap_sep * skeleton_scalar.reshape(1, self.n_skeleton, 1, 1)
+        skeleton_heatmap = skeleton_heatmap_sep.max(dim=1, keepdim=True)[0]
+        return skeleton_heatmap
+    def forward(self, input_dict: dict) -> dict:
+        skeleton_heatmap = self.rasterize(input_dict['keypoints'])
+        x = torch.cat([input_dict['damaged_img'] * self.alpha, skeleton_heatmap], dim=1)
+        down_128 = self.down0(x)
+        down_64 = self.down1(down_128)
+        down_32 = self.down2(down_64)
+        down_16 = self.down3(down_32)
+        down_8 = self.down4(down_16)
+        up_8 = down_8
+        up_16 = torch.cat([self.up1(up_8), down_16], dim=1)
+        up_32 = torch.cat([self.up2(up_16), down_32], dim=1)
+        up_64 = torch.cat([self.up3(up_32), down_64], dim=1)
+        up_128 = torch.cat([self.up4(up_64), down_128], dim=1)
+        img = self.conv(up_128)
+        input_dict['heatmap'] = skeleton_heatmap
+        input_dict['img'] = img
+        return input_dict
+if __name__ == '__main__':
+    model = Decoder({'z_dim': 256, 'n_parts': 10, 'n_embedding': 128, 'tau': 0.01})
+    print(sum(p.numel() for p in model.parameters() if p.requires_grad))

models/encoder.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import torch
+import torch.nn.functional as F
+from torch import nn
+import pytorch_lightning as pl
+def gen_grid2d(grid_size: int, left_end: float=-1, right_end: float=1) -> torch.Tensor:
+    """
+    Generate a grid of size (grid_size, grid_size, 2) with coordinate values in the range [left_end, right_end]
+    """
+    x = torch.linspace(left_end, right_end, grid_size)
+    x, y = torch.meshgrid([x, x], indexing='ij')
+    grid = torch.cat((x.reshape(-1, 1), y.reshape(-1, 1)), dim=1).reshape(grid_size, grid_size, 2)
+    return grid
+class ResBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int) -> None:
+        super().__init__()
+        self.conv_res = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.Upsample(scale_factor=0.5, mode='bilinear', align_corners=False),
+            nn.BatchNorm2d(out_channels)
+        )
+        self.net = nn.Sequential(
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.Upsample(scale_factor=0.5, mode='bilinear', align_corners=False),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.2, True),
+            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, stride=1),
+            nn.BatchNorm2d(out_channels)
+        )
+        self.relu = nn.LeakyReLU(0.2, True)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        res = self.conv_res(x)
+        x = self.net(x)
+        return self.relu(x + res)
+class TransposedBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int) -> None:
+        super().__init__()
+        self.net = nn.Sequential(
+            nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
+            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
+            nn.BatchNorm2d(out_channels),
+            nn.LeakyReLU(0.2, True),
+        )
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.net(x)
+        return x
+class Detector(nn.Module):
+    def __init__(self, hyper_paras: pl.utilities.parsing.AttributeDict) -> None:
+        super().__init__()
+        self.n_parts = hyper_paras.n_parts
+        self.output_size = 32
+        self.conv = nn.Sequential(
+            ResBlock(3, 64),  # 64
+            ResBlock(64, 128),  # 32
+            ResBlock(128, 256),  # 16
+            ResBlock(256, 512),  # 8
+            TransposedBlock(512, 256),  # 16
+            TransposedBlock(256, 128),  # 32
+            nn.Conv2d(128, self.n_parts, kernel_size=3, padding=1),
+        )
+        grid = gen_grid2d(self.output_size).reshape(1, 1, self.output_size ** 2, 2)
+        self.coord = nn.Parameter(grid, requires_grad=False)
+    def forward(self, input_dict: dict) -> dict:
+        img = F.interpolate(input_dict['img'], size=(128, 128), mode='bilinear', align_corners=False)
+        prob_map = self.conv(img).reshape(img.shape[0], self.n_parts, -1, 1)
+        prob_map = F.softmax(prob_map, dim=2)
+        keypoints = self.coord * prob_map
+        keypoints = keypoints.sum(dim=2)
+        prob_map = prob_map.reshape(keypoints.shape[0], self.n_parts, self.output_size, self.output_size)
+        return {'keypoints': keypoints, 'prob_map': prob_map}
+class Encoder(nn.Module):
+    def __init__(self, hyper_paras: pl.utilities.parsing.AttributeDict) -> None:
+        super().__init__()
+        self.detector = Detector(hyper_paras)
+        self.missing = 0.8 # hyper_paras.missing
+        self.block = 16 # hyper_paras.block
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
+                nn.init.kaiming_normal_(m.weight, a=0.2)
+                if m.bias is not None:
+                    m.bias.data.zero_()
+    def forward(self, input_dict: dict, need_masked_img: bool=False) -> dict:
+        mask_batch = self.detector(input_dict)
+        if need_masked_img:
+            damage_mask = torch.zeros(input_dict['img'].shape[0], 1, self.block, self.block, device=input_dict['img'].device).uniform_() > self.missing
+            damage_mask = F.interpolate(damage_mask.to(input_dict['img']), size=input_dict['img'].shape[-1], mode='nearest')
+            mask_batch['damaged_img'] = input_dict['img'] * damage_mask
+        return mask_batch

models/model.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import importlib
+import PIL
+import pytorch_lightning as pl
+import torch.utils.data
+import wandb
+from typing import Union
+from torchvision import transforms
+from utils_.loss import VGGPerceptualLoss
+from utils_.visualization import *
+import torch.nn.functional as F
+import matplotlib.pyplot as plt
+class Model(pl.LightningModule):
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.save_hyperparameters()
+        self.encoder = importlib.import_module('models.' + self.hparams.encoder).Encoder(self.hparams)
+        self.decoder = importlib.import_module('models.' + self.hparams.decoder).Decoder(self.hparams)
+        self.batch_size = self.hparams.batch_size
+        self.vgg_loss = VGGPerceptualLoss()
+        self.transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(0.5, 0.5)
+        ])
+    def forward(self, x: PIL.Image.Image) -> PIL.Image.Image:
+        """
+        :param x: a PIL image
+        :return: an edge map of the same size as x with values in [0, 1] (normalized by max)
+        """
+        w, h = x.size
+        x = self.transform(x).unsqueeze(0)
+        x = x.to(self.device)
+        kp = self.encoder({'img': x})['keypoints']
+        edge_map = self.decoder.rasterize(kp, output_size=64)
+        bs = edge_map.shape[0]
+        edge_map = edge_map / (1e-8 + edge_map.reshape(bs, 1, -1).max(dim=2, keepdim=True)[0].reshape(bs, 1, 1, 1))
+        edge_map = torch.cat([edge_map] * 3, dim=1)
+        edge_map = F.interpolate(edge_map, size=(h, w), mode='bilinear', align_corners=False)
+        x = torch.clamp(edge_map + (x * 0.5 + 0.5)*0.5, min=0, max=1)
+        x = transforms.ToPILImage()(x[0].detach().cpu())
+        fig = plt.figure(figsize=(1, h/w), dpi=w)
+        fig.tight_layout(pad=0)
+        plt.axis('off')
+        plt.imshow(x)
+        kp = kp[0].detach().cpu() * 0.5 + 0.5
+        kp[:, 1] *= w
+        kp[:, 0] *= h
+        plt.scatter(kp[:, 1], kp[:, 0], s=min(w/h, min(1, h/w)), marker='o')
+        ncols, nrows = fig.canvas.get_width_height()
+        fig.canvas.draw()
+        plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(nrows, ncols, 3)
+        plt.close(fig)
+        return plot

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+numpy
+torch==1.13.1
+torchvision
+matplotlib
+scipy
+h5py
+pandas
+kornia
+wandb
+pytorch-lightning==1.5.10
+seaborn
+scikit-learn
+imageio
+imageio-ffmpeg
+gradio

utils_/loss.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import os
+import torch
+import torch.nn.functional as F
+import torchvision
+class VGGPerceptualLoss(torch.nn.Module):
+    def __init__(self):
+        super(VGGPerceptualLoss, self).__init__()
+        os.environ['TORCH_HOME'] = os.path.abspath(os.getcwd())
+        blocks = [torchvision.models.vgg16().features[:4].eval(),
+                  torchvision.models.vgg16().features[4:9].eval(),
+                  torchvision.models.vgg16().features[9:16].eval(),
+                  torchvision.models.vgg16().features[16:23].eval()]
+        for bl in blocks:
+            for p in bl.parameters():
+                p.requires_grad = False
+        self.blocks = torch.nn.ModuleList(blocks)
+        self.register_buffer("mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
+        self.register_buffer("std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))
+    def forward(self, x, y):
+        x = x * 0.5 + 0.5
+        y = y * 0.5 + 0.5
+        x = (x - self.mean) / self.std
+        y = (y - self.mean) / self.std
+        x = F.interpolate(x, mode='bilinear', size=(224, 224), align_corners=False)
+        y = F.interpolate(y, mode='bilinear', size=(224, 224), align_corners=False)
+        perceptual_loss = 0.0
+        style_loss = 0.0
+        for i, block in enumerate(self.blocks):
+            x = block(x)
+            y = block(y)
+            perceptual_loss += torch.nn.functional.l1_loss(x, y)
+            # b, ch, h, w = x.shape
+            # act_x = x.reshape(x.shape[0], x.shape[1], -1)
+            # act_y = y.reshape(y.shape[0], y.shape[1], -1)
+            # gram_x = act_x @ act_x.permute(0, 2, 1) / (ch * h * w)
+            # gram_y = act_y @ act_y.permute(0, 2, 1) / (ch * h * w)
+            # style_loss += torch.nn.functional.l1_loss(gram_x, gram_y)
+        return perceptual_loss#, style_loss

utils_/visualization.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import matplotlib.gridspec as gridspec
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+import torch
+import torchvision
+from matplotlib import colors
+def get_part_color(n_parts):
+    colormap = ('red', 'blue', 'yellow', 'magenta', 'green', 'indigo', 'darkorange', 'cyan', 'pink', 'yellowgreen',
+                'rosybrown', 'coral', 'chocolate', 'bisque', 'gold', 'yellowgreen', 'aquamarine', 'deepskyblue', 'navy', 'orchid',
+                'maroon', 'sienna', 'olive', 'lightgreen', 'teal', 'steelblue', 'slateblue', 'darkviolet', 'fuchsia', 'crimson',
+                'honeydew', 'thistle',
+                'red', 'blue', 'yellow', 'magenta', 'green', 'indigo', 'darkorange', 'cyan', 'pink', 'yellowgreen',
+                'rosybrown', 'coral', 'chocolate', 'bisque', 'gold', 'yellowgreen', 'aquamarine', 'deepskyblue', 'navy', 'orchid',
+                'maroon', 'sienna', 'olive', 'lightgreen', 'teal', 'steelblue', 'slateblue', 'darkviolet', 'fuchsia', 'crimson',
+                'honeydew', 'thistle')[:n_parts]
+    part_color = []
+    for i in range(n_parts):
+        part_color.append(colors.to_rgb(colormap[i]))
+    part_color = np.array(part_color)
+    return part_color
+def denormalize(img):
+    mean = torch.tensor((0.5, 0.5, 0.5), device=img.device).reshape(1, 3, 1, 1)
+    std = torch.tensor((0.5, 0.5, 0.5), device=img.device).reshape(1, 3, 1, 1)
+    img = img * std + mean
+    img = torch.clamp(img, min=0, max=1)
+    return img
+def draw_matrix(mat):
+    fig = plt.figure()
+    sns.heatmap(mat, annot=True, fmt='.2f', cmap="YlGnBu")
+    ncols, nrows = fig.canvas.get_width_height()
+    fig.canvas.draw()
+    plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(nrows, ncols, 3)
+    plt.close(fig)
+    return plot
+def draw_kp_grid(img, kp):
+    kp_color = get_part_color(kp.shape[1])
+    img = img[:64].permute(0, 2, 3, 1).detach().cpu()
+    kp = kp.detach().cpu()[:64]
+    fig = plt.figure(figsize=(8, 8))
+    gs = gridspec.GridSpec(8, 8)
+    gs.update(wspace=0, hspace=0)
+    for i, sample in enumerate(img):
+        ax = plt.subplot(gs[i])
+        plt.axis('off')
+        ax.set_xticklabels([])
+        ax.set_yticklabels([])
+        ax.imshow(sample, vmin=0, vmax=1)
+        ax.scatter(kp[i, :, 1], kp[i, :, 0], c=kp_color, s=20, marker='+')
+    ncols, nrows = fig.canvas.get_width_height()
+    fig.canvas.draw()
+    plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(nrows, ncols, 3)
+    plt.close(fig)
+    return plot
+def draw_kp_grid_unnorm(img, kp):
+    kp_color = get_part_color(kp.shape[1])
+    img = img[:64].permute(0, 2, 3, 1).detach().cpu()
+    kp = kp.detach().cpu()[:64]
+    fig = plt.figure(figsize=(8, 8))
+    gs = gridspec.GridSpec(8, 8)
+    gs.update(wspace=0, hspace=0)
+    for i, sample in enumerate(img):
+        ax = plt.subplot(gs[i])
+        plt.axis('off')
+        ax.set_xticklabels([])
+        ax.set_yticklabels([])
+        ax.imshow(sample)
+        ax.scatter(kp[i, :, 1], kp[i, :, 0], c=kp_color, s=20, marker='+')
+    ncols, nrows = fig.canvas.get_width_height()
+    fig.canvas.draw()
+    plot = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8).reshape(nrows, ncols, 3)
+    plt.close(fig)
+    return plot
+def draw_img_grid(img):
+    img = img[:64].detach().cpu()
+    nrow = min(8, img.shape[0])
+    img = torchvision.utils.make_grid(img[:64], nrow=nrow).permute(1, 2, 0)
+    return torch.clamp(img * 255, min=0, max=255).numpy().astype(np.uint8)