ArantxaCasanova commited on Oct 27, 2021

Commit

a00ee36

1 Parent(s): 03f1c62

First model version

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

BigGAN_PyTorch/BigGAN.py +711 -0
BigGAN_PyTorch/BigGANdeep.py +734 -0
BigGAN_PyTorch/LICENSE +21 -0
BigGAN_PyTorch/README.md +144 -0
BigGAN_PyTorch/TFHub/README.md +14 -0
BigGAN_PyTorch/TFHub/biggan_v1.py +441 -0
BigGAN_PyTorch/TFHub/converter.py +558 -0
BigGAN_PyTorch/animal_hash.py +2652 -0
BigGAN_PyTorch/config_files/COCO_Stuff/BigGAN/unconditional_biggan_res128.json +44 -0
BigGAN_PyTorch/config_files/COCO_Stuff/BigGAN/unconditional_biggan_res256.json +44 -0
BigGAN_PyTorch/config_files/COCO_Stuff/IC-GAN/icgan_res128_ddp.json +51 -0
BigGAN_PyTorch/config_files/COCO_Stuff/IC-GAN/icgan_res256_ddp.json +51 -0
BigGAN_PyTorch/config_files/ImageNet-LT/BigGAN/biggan_res128.json +48 -0
BigGAN_PyTorch/config_files/ImageNet-LT/BigGAN/biggan_res256.json +48 -0
BigGAN_PyTorch/config_files/ImageNet-LT/BigGAN/biggan_res64.json +48 -0
BigGAN_PyTorch/config_files/ImageNet-LT/cc_IC-GAN/cc_icgan_res128.json +56 -0
BigGAN_PyTorch/config_files/ImageNet-LT/cc_IC-GAN/cc_icgan_res256.json +56 -0
BigGAN_PyTorch/config_files/ImageNet-LT/cc_IC-GAN/cc_icgan_res64.json +56 -0
BigGAN_PyTorch/config_files/ImageNet/BigGAN/biggan_res128.json +40 -0
BigGAN_PyTorch/config_files/ImageNet/BigGAN/biggan_res256_half_cap.json +40 -0
BigGAN_PyTorch/config_files/ImageNet/BigGAN/biggan_res64.json +40 -0
BigGAN_PyTorch/config_files/ImageNet/IC-GAN/icgan_res128.json +48 -0
BigGAN_PyTorch/config_files/ImageNet/IC-GAN/icgan_res256.json +47 -0
BigGAN_PyTorch/config_files/ImageNet/IC-GAN/icgan_res256_halfcap.json +47 -0
BigGAN_PyTorch/config_files/ImageNet/IC-GAN/icgan_res64.json +48 -0
BigGAN_PyTorch/config_files/ImageNet/cc_IC-GAN/cc_icgan_res128.json +48 -0
BigGAN_PyTorch/config_files/ImageNet/cc_IC-GAN/cc_icgan_res256.json +47 -0
BigGAN_PyTorch/config_files/ImageNet/cc_IC-GAN/cc_icgan_res256_halfcap.json +48 -0
BigGAN_PyTorch/config_files/ImageNet/cc_IC-GAN/cc_icgan_res64.json +48 -0
BigGAN_PyTorch/diffaugment_utils.py +119 -0
BigGAN_PyTorch/imagenet_lt/ImageNet_LT_train.txt +0 -0
BigGAN_PyTorch/imagenet_lt/ImageNet_LT_val.txt +0 -0
BigGAN_PyTorch/imgs/D Singular Values.png +0 -0
BigGAN_PyTorch/imgs/DeepSamples.png +0 -0
BigGAN_PyTorch/imgs/DogBall.png +0 -0
BigGAN_PyTorch/imgs/G Singular Values.png +0 -0
BigGAN_PyTorch/imgs/IS_FID.png +0 -0
BigGAN_PyTorch/imgs/Losses.png +0 -0
BigGAN_PyTorch/imgs/header_image.jpg +0 -0
BigGAN_PyTorch/imgs/interp_sample.jpg +0 -0
BigGAN_PyTorch/layers.py +616 -0
BigGAN_PyTorch/logs/BigGAN_ch96_bs256x8.jsonl +68 -0
BigGAN_PyTorch/logs/compare_IS.m +97 -0
BigGAN_PyTorch/logs/metalog.txt +3 -0
BigGAN_PyTorch/logs/process_inception_log.m +27 -0
BigGAN_PyTorch/logs/process_training.m +117 -0
BigGAN_PyTorch/losses.py +43 -0
BigGAN_PyTorch/make_hdf5.py +193 -0
BigGAN_PyTorch/run.py +75 -0
BigGAN_PyTorch/scripts/launch_BigGAN_bs256x8.sh +26 -0

BigGAN_PyTorch/BigGAN.py ADDED Viewed

	@@ -0,0 +1,711 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+import numpy as np
+import math
+import functools
+import os
+import torch
+import torch.nn as nn
+from torch.nn import init
+import torch.optim as optim
+import torch.nn.functional as F
+# from torch.nn import Parameter as P
+import sys
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+import BigGAN_PyTorch.layers as layers
+# from sync_batchnorm import SynchronizedBatchNorm2d as SyncBatchNorm2d
+from BigGAN_PyTorch.diffaugment_utils import DiffAugment
+# Architectures for G
+# Attention is passed in in the format '32_64' to mean applying an attention
+# block at both resolution 32x32 and 64x64. Just '64' will apply at 64x64.
+def G_arch(ch=64, attention="64", ksize="333333", dilation="111111"):
+    arch = {}
+    arch[512] = {
+        "in_channels": [ch * item for item in [16, 16, 8, 8, 4, 2, 1]],
+        "out_channels": [ch * item for item in [16, 8, 8, 4, 2, 1, 1]],
+        "upsample": [True] * 7,
+        "resolution": [8, 16, 32, 64, 128, 256, 512],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 10)
+        },
+    }
+    arch[256] = {
+        "in_channels": [ch * item for item in [16, 16, 8, 8, 4, 2]],
+        "out_channels": [ch * item for item in [16, 8, 8, 4, 2, 1]],
+        "upsample": [True] * 6,
+        "resolution": [8, 16, 32, 64, 128, 256],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 9)
+        },
+    }
+    arch[128] = {
+        "in_channels": [ch * item for item in [16, 16, 8, 4, 2]],
+        "out_channels": [ch * item for item in [16, 8, 4, 2, 1]],
+        "upsample": [True] * 5,
+        "resolution": [8, 16, 32, 64, 128],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 8)
+        },
+    }
+    arch[64] = {
+        "in_channels": [ch * item for item in [16, 16, 8, 4]],
+        "out_channels": [ch * item for item in [16, 8, 4, 2]],
+        "upsample": [True] * 4,
+        "resolution": [8, 16, 32, 64],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 7)
+        },
+    }
+    arch[32] = {
+        "in_channels": [ch * item for item in [4, 4, 4]],
+        "out_channels": [ch * item for item in [4, 4, 4]],
+        "upsample": [True] * 3,
+        "resolution": [8, 16, 32],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 6)
+        },
+    }
+    return arch
+class Generator(nn.Module):
+    def __init__(
+        self,
+        G_ch=64,
+        dim_z=128,
+        bottom_width=4,
+        resolution=128,
+        G_kernel_size=3,
+        G_attn="64",
+        n_classes=1000,
+        num_G_SVs=1,
+        num_G_SV_itrs=1,
+        G_shared=True,
+        shared_dim=0,
+        hier=False,
+        cross_replica=False,
+        mybn=False,
+        G_activation=nn.ReLU(inplace=False),
+        G_lr=5e-5,
+        G_B1=0.0,
+        G_B2=0.999,
+        adam_eps=1e-8,
+        BN_eps=1e-5,
+        SN_eps=1e-12,
+        G_mixed_precision=False,
+        G_fp16=False,
+        G_init="ortho",
+        skip_init=False,
+        no_optim=False,
+        G_param="SN",
+        norm_style="bn",
+        class_cond=True,
+        embedded_optimizer=True,
+        instance_cond=False,
+        G_shared_feat=True,
+        shared_dim_feat=2048,
+        **kwargs
+    ):
+        super(Generator, self).__init__()
+        # Channel width mulitplier
+        self.ch = G_ch
+        # Dimensionality of the latent space
+        self.dim_z = dim_z
+        # The initial spatial dimensions
+        self.bottom_width = bottom_width
+        # Resolution of the output
+        self.resolution = resolution
+        # Kernel size?
+        self.kernel_size = G_kernel_size
+        # Attention?
+        self.attention = G_attn
+        # number of classes, for use in categorical conditional generation
+        self.n_classes = n_classes
+        # Use shared embeddings?
+        self.G_shared = G_shared
+        # Dimensionality of the shared embedding? Unused if not using G_shared
+        self.shared_dim = shared_dim if shared_dim > 0 else dim_z
+        # Hierarchical latent space?
+        self.hier = hier
+        # Cross replica batchnorm?
+        self.cross_replica = cross_replica
+        # Use my batchnorm?
+        self.mybn = mybn
+        # nonlinearity for residual blocks
+        self.activation = G_activation
+        # Initialization style
+        self.init = G_init
+        # Parameterization style
+        self.G_param = G_param
+        # Normalization style
+        self.norm_style = norm_style
+        # Epsilon for BatchNorm?
+        self.BN_eps = BN_eps
+        # Epsilon for Spectral Norm?
+        self.SN_eps = SN_eps
+        # fp16?
+        self.fp16 = G_fp16
+        # Use embeddings for instance features?
+        self.G_shared_feat = G_shared_feat
+        self.shared_dim_feat = shared_dim_feat
+        # Architecture dict
+        self.arch = G_arch(self.ch, self.attention)[resolution]
+        # If using hierarchical latents, adjust z
+        if self.hier:
+            # Number of places z slots into
+            self.num_slots = len(self.arch["in_channels"]) + 1
+            self.z_chunk_size = self.dim_z // self.num_slots
+            # Recalculate latent dimensionality for even splitting into chunks
+            self.dim_z = self.z_chunk_size * self.num_slots
+        else:
+            self.num_slots = 1
+            self.z_chunk_size = 0
+        # Which convs, batchnorms, and linear layers to use
+        if self.G_param == "SN":
+            self.which_conv = functools.partial(
+                layers.SNConv2d,
+                kernel_size=3,
+                padding=1,
+                num_svs=num_G_SVs,
+                num_itrs=num_G_SV_itrs,
+                eps=self.SN_eps,
+            )
+            self.which_linear = functools.partial(
+                layers.SNLinear,
+                num_svs=num_G_SVs,
+                num_itrs=num_G_SV_itrs,
+                eps=self.SN_eps,
+            )
+        else:
+            self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1)
+            self.which_linear = nn.Linear
+        # We use a non-spectral-normed embedding here regardless;
+        # For some reason applying SN to G's embedding seems to randomly cripple G
+        self.which_embedding = nn.Embedding
+        bn_linear = (
+            functools.partial(self.which_linear, bias=False)
+            if self.G_shared
+            else self.which_embedding
+        )
+        if not class_cond and not instance_cond:
+            input_sz_bn = self.n_classes
+        else:
+            input_sz_bn = self.z_chunk_size
+        if class_cond:
+            input_sz_bn += self.shared_dim
+        if instance_cond:
+            input_sz_bn += self.shared_dim_feat
+        self.which_bn = functools.partial(
+            layers.ccbn,
+            which_linear=bn_linear,
+            cross_replica=self.cross_replica,
+            mybn=self.mybn,
+            input_size=input_sz_bn,
+            norm_style=self.norm_style,
+            eps=self.BN_eps,
+        )
+        # Prepare model
+        # If not using shared embeddings, self.shared is just a passthrough
+        self.shared = (
+            self.which_embedding(n_classes, self.shared_dim)
+            if G_shared
+            else layers.identity()
+        )
+        self.shared_feat = (
+            self.which_linear(2048, self.shared_dim_feat)
+            if G_shared_feat
+            else layers.identity()
+        )
+        # First linear layer
+        self.linear = self.which_linear(
+            self.dim_z // self.num_slots,
+            self.arch["in_channels"][0] * (self.bottom_width ** 2),
+        )
+        # self.blocks is a doubly-nested list of modules, the outer loop intended
+        # to be over blocks at a given resolution (resblocks and/or self-attention)
+        # while the inner loop is over a given block
+        self.blocks = []
+        for index in range(len(self.arch["out_channels"])):
+            self.blocks += [
+                [
+                    layers.GBlock(
+                        in_channels=self.arch["in_channels"][index],
+                        out_channels=self.arch["out_channels"][index],
+                        which_conv=self.which_conv,
+                        which_bn=self.which_bn,
+                        activation=self.activation,
+                        upsample=(
+                            functools.partial(F.interpolate, scale_factor=2)
+                            if self.arch["upsample"][index]
+                            else None
+                        ),
+                    )
+                ]
+            ]
+            # If attention on this block, attach it to the end
+            if self.arch["attention"][self.arch["resolution"][index]]:
+                print(
+                    "Adding attention layer in G at resolution %d"
+                    % self.arch["resolution"][index]
+                )
+                self.blocks[-1] += [
+                    layers.Attention(self.arch["out_channels"][index], self.which_conv)
+                ]
+        # Turn self.blocks into a ModuleList so that it's all properly registered.
+        self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])
+        # output layer: batchnorm-relu-conv.
+        # Consider using a non-spectral conv here
+        self.output_layer = nn.Sequential(
+            layers.bn(
+                self.arch["out_channels"][-1],
+                cross_replica=self.cross_replica,
+                mybn=self.mybn,
+            ),
+            self.activation,
+            self.which_conv(self.arch["out_channels"][-1], 3),
+        )
+        # Initialize weights. Optionally skip init for testing.
+        if not skip_init:
+            self.init_weights()
+        # Set up optimizer
+        # If this is an EMA copy, no need for an optim, so just return now
+        if no_optim or not embedded_optimizer:
+            return
+        self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps
+        if G_mixed_precision:
+            print("Using fp16 adam in G...")
+            import utils
+            self.optim = utils.Adam16(
+                params=self.parameters(),
+                lr=self.lr,
+                betas=(self.B1, self.B2),
+                weight_decay=0,
+                eps=self.adam_eps,
+            )
+        else:
+            self.optim = optim.Adam(
+                params=self.parameters(),
+                lr=self.lr,
+                betas=(self.B1, self.B2),
+                weight_decay=0,
+                eps=self.adam_eps,
+            )
+        # LR scheduling, left here for forward compatibility
+        # self.lr_sched = {'itr' : 0}# if self.progressive else {}
+        # self.j = 0
+    # Initialize
+    def init_weights(self):
+        self.param_count = 0
+        for module in self.modules():
+            if (
+                isinstance(module, nn.Conv2d)
+                or isinstance(module, nn.Linear)
+                or isinstance(module, nn.Embedding)
+            ):
+                if self.init == "ortho":
+                    init.orthogonal_(module.weight)
+                elif self.init == "N02":
+                    init.normal_(module.weight, 0, 0.02)
+                elif self.init in ["glorot", "xavier"]:
+                    init.xavier_uniform_(module.weight)
+                else:
+                    print("Init style not recognized...")
+                self.param_count += sum(
+                    [p.data.nelement() for p in module.parameters()]
+                )
+        print("Param count for G" "s initialized parameters: %d" % self.param_count)
+        # Get conditionings
+    def get_condition_embeddings(self, cl=None, feat=None):
+        c_embed = []
+        if cl is not None:
+            c_embed.append(self.shared(cl))
+        if feat is not None:
+            c_embed.append(self.shared_feat(feat))
+        if len(c_embed) > 0:
+            c_embed = torch.cat(c_embed, dim=-1)
+        return c_embed
+    # Note on this forward function: we pass in a y vector which has
+    # already been passed through G.shared to enable easy class-wise
+    # interpolation later. If we passed in the one-hot and then ran it through
+    # G.shared in this forward function, it would be harder to handle.
+    def forward(self, z, label=None, feats=None):
+        y = self.get_condition_embeddings(label, feats)
+        # If hierarchical, concatenate zs and ys
+        if self.hier:
+            zs = torch.split(z, self.z_chunk_size, 1)
+            z = zs[0]
+            ys = [torch.cat([y, item], 1) for item in zs[1:]]
+        else:
+            ys = [y] * len(self.blocks)
+        # First linear layer
+        h = self.linear(z)
+        # Reshape
+        h = h.view(h.size(0), -1, self.bottom_width, self.bottom_width)
+        # Loop over blocks
+        for index, blocklist in enumerate(self.blocks):
+            # Second inner loop in case block has multiple layers
+            for block in blocklist:
+                h = block(h, ys[index])
+        # Apply batchnorm-relu-conv-tanh at output
+        return torch.tanh(self.output_layer(h))
+# Discriminator architecture, same paradigm as G's above
+def D_arch(ch=64, attention="64", ksize="333333", dilation="111111"):
+    arch = {}
+    arch[256] = {
+        "in_channels": [3] + [ch * item for item in [1, 2, 4, 8, 8, 16]],
+        "out_channels": [item * ch for item in [1, 2, 4, 8, 8, 16, 16]],
+        "downsample": [True] * 6 + [False],
+        "resolution": [128, 64, 32, 16, 8, 4, 4],
+        "attention": {
+            2 ** i: 2 ** i in [int(item) for item in attention.split("_")]
+            for i in range(2, 8)
+        },
+    }
+    arch[128] = {
+        "in_channels": [3] + [ch * item for item in [1, 2, 4, 8, 16]],
+        "out_channels": [item * ch for item in [1, 2, 4, 8, 16, 16]],
+        "downsample": [True] * 5 + [False],
+        "resolution": [64, 32, 16, 8, 4, 4],
+        "attention": {
+            2 ** i: 2 ** i in [int(item) for item in attention.split("_")]
+            for i in range(2, 8)
+        },
+    }
+    arch[64] = {
+        "in_channels": [3] + [ch * item for item in [1, 2, 4, 8]],
+        "out_channels": [item * ch for item in [1, 2, 4, 8, 16]],
+        "downsample": [True] * 4 + [False],
+        "resolution": [32, 16, 8, 4, 4],
+        "attention": {
+            2 ** i: 2 ** i in [int(item) for item in attention.split("_")]
+            for i in range(2, 7)
+        },
+    }
+    arch[32] = {
+        "in_channels": [3] + [item * ch for item in [4, 4, 4]],
+        "out_channels": [item * ch for item in [4, 4, 4, 4]],
+        "downsample": [True, True, False, False],
+        "resolution": [16, 16, 16, 16],
+        "attention": {
+            2 ** i: 2 ** i in [int(item) for item in attention.split("_")]
+            for i in range(2, 6)
+        },
+    }
+    return arch
+class Discriminator(nn.Module):
+    def __init__(
+        self,
+        D_ch=64,
+        D_wide=True,
+        resolution=128,
+        D_kernel_size=3,
+        D_attn="64",
+        n_classes=1000,
+        num_D_SVs=1,
+        num_D_SV_itrs=1,
+        D_activation=nn.ReLU(inplace=False),
+        D_lr=2e-4,
+        D_B1=0.0,
+        D_B2=0.999,
+        adam_eps=1e-8,
+        SN_eps=1e-12,
+        output_dim=1,
+        D_mixed_precision=False,
+        D_fp16=False,
+        D_init="ortho",
+        skip_init=False,
+        D_param="SN",
+        class_cond=True,
+        embedded_optimizer=True,
+        instance_cond=False,
+        instance_sz=2048,
+        **kwargs
+    ):
+        super(Discriminator, self).__init__()
+        # Width multiplier
+        self.ch = D_ch
+        # Use Wide D as in BigGAN and SA-GAN or skinny D as in SN-GAN?
+        self.D_wide = D_wide
+        # Resolution
+        self.resolution = resolution
+        # Kernel size
+        self.kernel_size = D_kernel_size
+        # Attention?
+        self.attention = D_attn
+        # Number of classes
+        self.n_classes = n_classes
+        # Activation
+        self.activation = D_activation
+        # Initialization style
+        self.init = D_init
+        # Parameterization style
+        self.D_param = D_param
+        # Epsilon for Spectral Norm?
+        self.SN_eps = SN_eps
+        # Fp16?
+        self.fp16 = D_fp16
+        # Architecture
+        self.arch = D_arch(self.ch, self.attention)[resolution]
+        # Which convs, batchnorms, and linear layers to use
+        # No option to turn off SN in D right now
+        if self.D_param == "SN":
+            self.which_conv = functools.partial(
+                layers.SNConv2d,
+                kernel_size=3,
+                padding=1,
+                num_svs=num_D_SVs,
+                num_itrs=num_D_SV_itrs,
+                eps=self.SN_eps,
+            )
+            self.which_linear = functools.partial(
+                layers.SNLinear,
+                num_svs=num_D_SVs,
+                num_itrs=num_D_SV_itrs,
+                eps=self.SN_eps,
+            )
+            self.which_embedding = functools.partial(
+                layers.SNEmbedding,
+                num_svs=num_D_SVs,
+                num_itrs=num_D_SV_itrs,
+                eps=self.SN_eps,
+            )
+        # Prepare model
+        # self.blocks is a doubly-nested list of modules, the outer loop intended
+        # to be over blocks at a given resolution (resblocks and/or self-attention)
+        self.blocks = []
+        for index in range(len(self.arch["out_channels"])):
+            self.blocks += [
+                [
+                    layers.DBlock(
+                        in_channels=self.arch["in_channels"][index],
+                        out_channels=self.arch["out_channels"][index],
+                        which_conv=self.which_conv,
+                        wide=self.D_wide,
+                        activation=self.activation,
+                        preactivation=(index > 0),
+                        downsample=(
+                            nn.AvgPool2d(2) if self.arch["downsample"][index] else None
+                        ),
+                    )
+                ]
+            ]
+            # If attention on this block, attach it to the end
+            if self.arch["attention"][self.arch["resolution"][index]]:
+                print(
+                    "Adding attention layer in D at resolution %d"
+                    % self.arch["resolution"][index]
+                )
+                self.blocks[-1] += [
+                    layers.Attention(self.arch["out_channels"][index], self.which_conv)
+                ]
+        # Turn self.blocks into a ModuleList so that it's all properly registered.
+        self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])
+        # Linear output layer. The output dimension is typically 1, but may be
+        # larger if we're e.g. turning this into a VAE with an inference output
+        self.linear = self.which_linear(self.arch["out_channels"][-1], output_dim)
+        # Embedding for projection discrimination
+        if class_cond and instance_cond:
+            self.linear_feat = self.which_linear(
+                instance_sz, self.arch["out_channels"][-1] // 2
+            )
+            self.embed = self.which_embedding(
+                self.n_classes, self.arch["out_channels"][-1] // 2
+            )
+        elif class_cond:
+            # Embedding for projection discrimination
+            self.embed = self.which_embedding(
+                self.n_classes, self.arch["out_channels"][-1]
+            )
+        elif instance_cond:
+            self.linear_feat = self.which_linear(
+                instance_sz, self.arch["out_channels"][-1]
+            )
+        # Initialize weights
+        if not skip_init:
+            self.init_weights()
+        # Set up optimizer
+        if embedded_optimizer:
+            self.lr, self.B1, self.B2, self.adam_eps = D_lr, D_B1, D_B2, adam_eps
+            if D_mixed_precision:
+                print("Using fp16 adam in D...")
+                import utils
+                self.optim = utils.Adam16(
+                    params=self.parameters(),
+                    lr=self.lr,
+                    betas=(self.B1, self.B2),
+                    weight_decay=0,
+                    eps=self.adam_eps,
+                )
+            else:
+                self.optim = optim.Adam(
+                    params=self.parameters(),
+                    lr=self.lr,
+                    betas=(self.B1, self.B2),
+                    weight_decay=0,
+                    eps=self.adam_eps,
+                )
+        # LR scheduling, left here for forward compatibility
+        # self.lr_sched = {'itr' : 0}# if self.progressive else {}
+        # self.j = 0
+    # Initialize
+    def init_weights(self):
+        self.param_count = 0
+        for module in self.modules():
+            if (
+                isinstance(module, nn.Conv2d)
+                or isinstance(module, nn.Linear)
+                or isinstance(module, nn.Embedding)
+            ):
+                if self.init == "ortho":
+                    init.orthogonal_(module.weight)
+                elif self.init == "N02":
+                    init.normal_(module.weight, 0, 0.02)
+                elif self.init in ["glorot", "xavier"]:
+                    init.xavier_uniform_(module.weight)
+                else:
+                    print("Init style not recognized...")
+                self.param_count += sum(
+                    [p.data.nelement() for p in module.parameters()]
+                )
+        print("Param count for D" "s initialized parameters: %d" % self.param_count)
+    def forward(self, x, y=None, feat=None):
+        # Stick x into h for cleaner for loops without flow control
+        h = x
+        # Loop over blocks
+        for index, blocklist in enumerate(self.blocks):
+            for block in blocklist:
+                h = block(h)
+        # Apply global sum pooling as in SN-GAN
+        h = torch.sum(self.activation(h), [2, 3])
+        # Get initial class-unconditional output
+        out = self.linear(h)
+        # Condition on both class and instance features
+        if y is not None and feat is not None:
+            out = out + torch.sum(
+                torch.cat([self.embed(y), self.linear_feat(feat)], dim=-1) * h,
+                1,
+                keepdim=True,
+            )
+        # Condition on class only
+        elif y is not None:
+            # Get projection of final featureset onto class vectors and add to evidence
+            out = out + torch.sum(self.embed(y) * h, 1, keepdim=True)
+        # Condition on instance features only
+        elif feat is not None:
+            out = out + torch.sum(self.linear_feat(feat) * h, 1, keepdim=True)
+        return out
+# Parallelized G_D to minimize cross-gpu communication
+# Without this, Generator outputs would get all-gathered and then rebroadcast.
+class G_D(nn.Module):
+    def __init__(self, G, D, optimizer_G=None, optimizer_D=None):
+        super(G_D, self).__init__()
+        self.G = G
+        self.D = D
+        self.optimizer_G = optimizer_G
+        self.optimizer_D = optimizer_D
+    def forward(
+        self,
+        z,
+        gy,
+        feats_g=None,
+        x=None,
+        dy=None,
+        feats=None,
+        train_G=False,
+        return_G_z=False,
+        split_D=False,
+        policy=False,
+        DA=False,
+    ):
+        # If training G, enable grad tape
+        with torch.set_grad_enabled(train_G):
+            # Get Generator output given noise
+            G_z = self.G(z, gy, feats_g)
+            # Cast as necessary
+            # if self.G.fp16 and not self.D.fp16:
+            #   G_z = G_z.float()
+            # if self.D.fp16 and not self.G.fp16:
+            #   G_z = G_z.half()
+        # Split_D means to run D once with real data and once with fake,
+        # rather than concatenating along the batch dimension.
+        if split_D:
+            D_fake = self.D(G_z, gy, feats_g)
+            if x is not None:
+                D_real = self.D(x, dy, feats)
+                return D_fake, D_real
+            else:
+                if return_G_z:
+                    return D_fake, G_z
+                else:
+                    return D_fake
+        # If real data is provided, concatenate it with the Generator's output
+        # along the batch dimension for improved efficiency.
+        else:
+            D_input = torch.cat([G_z, x], 0) if x is not None else G_z
+            D_class = torch.cat([gy, dy], 0) if dy is not None else gy
+            if feats_g is not None:
+                D_feats = (
+                    torch.cat([feats_g, feats], 0) if feats is not None else feats_g
+                )
+            else:
+                D_feats = None
+            if DA:
+                D_input = DiffAugment(D_input, policy=policy)
+            # Get Discriminator output
+            D_out = self.D(D_input, D_class, D_feats)
+            if x is not None:
+                return torch.split(D_out, [G_z.shape[0], x.shape[0]])  # D_fake, D_real
+            else:
+                if return_G_z:
+                    return D_out, G_z
+                else:
+                    return D_out

BigGAN_PyTorch/BigGANdeep.py ADDED Viewed

	@@ -0,0 +1,734 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+import numpy as np
+import math
+import functools
+import torch
+import torch.nn as nn
+from torch.nn import init
+import torch.optim as optim
+import torch.nn.functional as F
+from torch.nn import Parameter as P
+import layers
+from sync_batchnorm import SynchronizedBatchNorm2d as SyncBatchNorm2d
+# BigGAN-deep: uses a different resblock and pattern
+# Architectures for G
+# Attention is passed in in the format '32_64' to mean applying an attention
+# block at both resolution 32x32 and 64x64. Just '64' will apply at 64x64.
+# Channel ratio is the ratio of
+class GBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        which_conv=nn.Conv2d,
+        which_bn=layers.bn,
+        activation=None,
+        upsample=None,
+        channel_ratio=4,
+    ):
+        super(GBlock, self).__init__()
+        self.in_channels, self.out_channels = in_channels, out_channels
+        self.hidden_channels = self.in_channels // channel_ratio
+        self.which_conv, self.which_bn = which_conv, which_bn
+        self.activation = activation
+        # Conv layers
+        self.conv1 = self.which_conv(
+            self.in_channels, self.hidden_channels, kernel_size=1, padding=0
+        )
+        self.conv2 = self.which_conv(self.hidden_channels, self.hidden_channels)
+        self.conv3 = self.which_conv(self.hidden_channels, self.hidden_channels)
+        self.conv4 = self.which_conv(
+            self.hidden_channels, self.out_channels, kernel_size=1, padding=0
+        )
+        # Batchnorm layers
+        self.bn1 = self.which_bn(self.in_channels)
+        self.bn2 = self.which_bn(self.hidden_channels)
+        self.bn3 = self.which_bn(self.hidden_channels)
+        self.bn4 = self.which_bn(self.hidden_channels)
+        # upsample layers
+        self.upsample = upsample
+    def forward(self, x, y):
+        # Project down to channel ratio
+        h = self.conv1(self.activation(self.bn1(x, y)))
+        # Apply next BN-ReLU
+        h = self.activation(self.bn2(h, y))
+        # Drop channels in x if necessary
+        if self.in_channels != self.out_channels:
+            x = x[:, : self.out_channels]
+        # Upsample both h and x at this point
+        if self.upsample:
+            h = self.upsample(h)
+            x = self.upsample(x)
+        # 3x3 convs
+        h = self.conv2(h)
+        h = self.conv3(self.activation(self.bn3(h, y)))
+        # Final 1x1 conv
+        h = self.conv4(self.activation(self.bn4(h, y)))
+        return h + x
+def G_arch(ch=64, attention="64", ksize="333333", dilation="111111"):
+    arch = {}
+    arch[256] = {
+        "in_channels": [ch * item for item in [16, 16, 8, 8, 4, 2]],
+        "out_channels": [ch * item for item in [16, 8, 8, 4, 2, 1]],
+        "upsample": [True] * 6,
+        "resolution": [8, 16, 32, 64, 128, 256],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 9)
+        },
+    }
+    arch[128] = {
+        "in_channels": [ch * item for item in [16, 16, 8, 4, 2]],
+        "out_channels": [ch * item for item in [16, 8, 4, 2, 1]],
+        "upsample": [True] * 5,
+        "resolution": [8, 16, 32, 64, 128],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 8)
+        },
+    }
+    arch[64] = {
+        "in_channels": [ch * item for item in [16, 16, 8, 4]],
+        "out_channels": [ch * item for item in [16, 8, 4, 2]],
+        "upsample": [True] * 4,
+        "resolution": [8, 16, 32, 64],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 7)
+        },
+    }
+    arch[32] = {
+        "in_channels": [ch * item for item in [4, 4, 4]],
+        "out_channels": [ch * item for item in [4, 4, 4]],
+        "upsample": [True] * 3,
+        "resolution": [8, 16, 32],
+        "attention": {
+            2 ** i: (2 ** i in [int(item) for item in attention.split("_")])
+            for i in range(3, 6)
+        },
+    }
+    return arch
+class Generator(nn.Module):
+    def __init__(
+        self,
+        G_ch=64,
+        G_depth=2,
+        dim_z=128,
+        bottom_width=4,
+        resolution=128,
+        G_kernel_size=3,
+        G_attn="64",
+        n_classes=1000,
+        num_G_SVs=1,
+        num_G_SV_itrs=1,
+        G_shared=True,
+        shared_dim=0,
+        hier=False,
+        cross_replica=False,
+        mybn=False,
+        G_activation=nn.ReLU(inplace=False),
+        G_lr=5e-5,
+        G_B1=0.0,
+        G_B2=0.999,
+        adam_eps=1e-8,
+        BN_eps=1e-5,
+        SN_eps=1e-12,
+        G_mixed_precision=False,
+        G_fp16=False,
+        G_init="ortho",
+        skip_init=False,
+        no_optim=False,
+        G_param="SN",
+        norm_style="bn",
+        **kwargs
+    ):
+        super(Generator, self).__init__()
+        # Channel width mulitplier
+        self.ch = G_ch
+        # Number of resblocks per stage
+        self.G_depth = G_depth
+        # Dimensionality of the latent space
+        self.dim_z = dim_z
+        # The initial spatial dimensions
+        self.bottom_width = bottom_width
+        # Resolution of the output
+        self.resolution = resolution
+        # Kernel size?
+        self.kernel_size = G_kernel_size
+        # Attention?
+        self.attention = G_attn
+        # number of classes, for use in categorical conditional generation
+        self.n_classes = n_classes
+        # Use shared embeddings?
+        self.G_shared = G_shared
+        # Dimensionality of the shared embedding? Unused if not using G_shared
+        self.shared_dim = shared_dim if shared_dim > 0 else dim_z
+        # Hierarchical latent space?
+        self.hier = hier
+        # Cross replica batchnorm?
+        self.cross_replica = cross_replica
+        # Use my batchnorm?
+        self.mybn = mybn
+        # nonlinearity for residual blocks
+        self.activation = G_activation
+        # Initialization style
+        self.init = G_init
+        # Parameterization style
+        self.G_param = G_param
+        # Normalization style
+        self.norm_style = norm_style
+        # Epsilon for BatchNorm?
+        self.BN_eps = BN_eps
+        # Epsilon for Spectral Norm?
+        self.SN_eps = SN_eps
+        # fp16?
+        self.fp16 = G_fp16
+        # Architecture dict
+        self.arch = G_arch(self.ch, self.attention)[resolution]
+        # Which convs, batchnorms, and linear layers to use
+        if self.G_param == "SN":
+            self.which_conv = functools.partial(
+                layers.SNConv2d,
+                kernel_size=3,
+                padding=1,
+                num_svs=num_G_SVs,
+                num_itrs=num_G_SV_itrs,
+                eps=self.SN_eps,
+            )
+            self.which_linear = functools.partial(
+                layers.SNLinear,
+                num_svs=num_G_SVs,
+                num_itrs=num_G_SV_itrs,
+                eps=self.SN_eps,
+            )
+        else:
+            self.which_conv = functools.partial(nn.Conv2d, kernel_size=3, padding=1)
+            self.which_linear = nn.Linear
+        # We use a non-spectral-normed embedding here regardless;
+        # For some reason applying SN to G's embedding seems to randomly cripple G
+        self.which_embedding = nn.Embedding
+        bn_linear = (
+            functools.partial(self.which_linear, bias=False)
+            if self.G_shared
+            else self.which_embedding
+        )
+        self.which_bn = functools.partial(
+            layers.ccbn,
+            which_linear=bn_linear,
+            cross_replica=self.cross_replica,
+            mybn=self.mybn,
+            input_size=(
+                self.shared_dim + self.dim_z if self.G_shared else self.n_classes
+            ),
+            norm_style=self.norm_style,
+            eps=self.BN_eps,
+        )
+        # Prepare model
+        # If not using shared embeddings, self.shared is just a passthrough
+        self.shared = (
+            self.which_embedding(n_classes, self.shared_dim)
+            if G_shared
+            else layers.identity()
+        )
+        # First linear layer
+        self.linear = self.which_linear(
+            self.dim_z + self.shared_dim,
+            self.arch["in_channels"][0] * (self.bottom_width ** 2),
+        )
+        # self.blocks is a doubly-nested list of modules, the outer loop intended
+        # to be over blocks at a given resolution (resblocks and/or self-attention)
+        # while the inner loop is over a given block
+        self.blocks = []
+        for index in range(len(self.arch["out_channels"])):
+            self.blocks += [
+                [
+                    GBlock(
+                        in_channels=self.arch["in_channels"][index],
+                        out_channels=self.arch["in_channels"][index]
+                        if g_index == 0
+                        else self.arch["out_channels"][index],
+                        which_conv=self.which_conv,
+                        which_bn=self.which_bn,
+                        activation=self.activation,
+                        upsample=(
+                            functools.partial(F.interpolate, scale_factor=2)
+                            if self.arch["upsample"][index]
+                            and g_index == (self.G_depth - 1)
+                            else None
+                        ),
+                    )
+                ]
+                for g_index in range(self.G_depth)
+            ]
+            # If attention on this block, attach it to the end
+            if self.arch["attention"][self.arch["resolution"][index]]:
+                print(
+                    "Adding attention layer in G at resolution %d"
+                    % self.arch["resolution"][index]
+                )
+                self.blocks[-1] += [
+                    layers.Attention(self.arch["out_channels"][index], self.which_conv)
+                ]
+        # Turn self.blocks into a ModuleList so that it's all properly registered.
+        self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])
+        # output layer: batchnorm-relu-conv.
+        # Consider using a non-spectral conv here
+        self.output_layer = nn.Sequential(
+            layers.bn(
+                self.arch["out_channels"][-1],
+                cross_replica=self.cross_replica,
+                mybn=self.mybn,
+            ),
+            self.activation,
+            self.which_conv(self.arch["out_channels"][-1], 3),
+        )
+        # Initialize weights. Optionally skip init for testing.
+        if not skip_init:
+            self.init_weights()
+        # Set up optimizer
+        # If this is an EMA copy, no need for an optim, so just return now
+        if no_optim:
+            return
+        self.lr, self.B1, self.B2, self.adam_eps = G_lr, G_B1, G_B2, adam_eps
+        if G_mixed_precision:
+            print("Using fp16 adam in G...")
+            import utils
+            self.optim = utils.Adam16(
+                params=self.parameters(),
+                lr=self.lr,
+                betas=(self.B1, self.B2),
+                weight_decay=0,
+                eps=self.adam_eps,
+            )
+        else:
+            self.optim = optim.Adam(
+                params=self.parameters(),
+                lr=self.lr,
+                betas=(self.B1, self.B2),
+                weight_decay=0,
+                eps=self.adam_eps,
+            )
+        # LR scheduling, left here for forward compatibility
+        # self.lr_sched = {'itr' : 0}# if self.progressive else {}
+        # self.j = 0
+    # Initialize
+    def init_weights(self):
+        self.param_count = 0
+        for module in self.modules():
+            if (
+                isinstance(module, nn.Conv2d)
+                or isinstance(module, nn.Linear)
+                or isinstance(module, nn.Embedding)
+            ):
+                if self.init == "ortho":
+                    init.orthogonal_(module.weight)
+                elif self.init == "N02":
+                    init.normal_(module.weight, 0, 0.02)
+                elif self.init in ["glorot", "xavier"]:
+                    init.xavier_uniform_(module.weight)
+                else:
+                    print("Init style not recognized...")
+                self.param_count += sum(
+                    [p.data.nelement() for p in module.parameters()]
+                )
+        print("Param count for G" "s initialized parameters: %d" % self.param_count)
+    # Note on this forward function: we pass in a y vector which has
+    # already been passed through G.shared to enable easy class-wise
+    # interpolation later. If we passed in the one-hot and then ran it through
+    # G.shared in this forward function, it would be harder to handle.
+    # NOTE: The z vs y dichotomy here is for compatibility with not-y
+    def forward(self, z, y):
+        # If hierarchical, concatenate zs and ys
+        if self.hier:
+            z = torch.cat([y, z], 1)
+            y = z
+        # First linear layer
+        h = self.linear(z)
+        # Reshape
+        h = h.view(h.size(0), -1, self.bottom_width, self.bottom_width)
+        # Loop over blocks
+        for index, blocklist in enumerate(self.blocks):
+            # Second inner loop in case block has multiple layers
+            for block in blocklist:
+                h = block(h, y)
+        # Apply batchnorm-relu-conv-tanh at output
+        return torch.tanh(self.output_layer(h))
+class DBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        which_conv=layers.SNConv2d,
+        wide=True,
+        preactivation=True,
+        activation=None,
+        downsample=None,
+        channel_ratio=4,
+    ):
+        super(DBlock, self).__init__()
+        self.in_channels, self.out_channels = in_channels, out_channels
+        # If using wide D (as in SA-GAN and BigGAN), change the channel pattern
+        self.hidden_channels = self.out_channels // channel_ratio
+        self.which_conv = which_conv
+        self.preactivation = preactivation
+        self.activation = activation
+        self.downsample = downsample
+        # Conv layers
+        self.conv1 = self.which_conv(
+            self.in_channels, self.hidden_channels, kernel_size=1, padding=0
+        )
+        self.conv2 = self.which_conv(self.hidden_channels, self.hidden_channels)
+        self.conv3 = self.which_conv(self.hidden_channels, self.hidden_channels)
+        self.conv4 = self.which_conv(
+            self.hidden_channels, self.out_channels, kernel_size=1, padding=0
+        )
+        self.learnable_sc = True if (in_channels != out_channels) else False
+        if self.learnable_sc:
+            self.conv_sc = self.which_conv(
+                in_channels, out_channels - in_channels, kernel_size=1, padding=0
+            )
+    def shortcut(self, x):
+        if self.downsample:
+            x = self.downsample(x)
+        if self.learnable_sc:
+            x = torch.cat([x, self.conv_sc(x)], 1)
+        return x
+    def forward(self, x):
+        # 1x1 bottleneck conv
+        h = self.conv1(F.relu(x))
+        # 3x3 convs
+        h = self.conv2(self.activation(h))
+        h = self.conv3(self.activation(h))
+        # relu before downsample
+        h = self.activation(h)
+        # downsample
+        if self.downsample:
+            h = self.downsample(h)
+        # final 1x1 conv
+        h = self.conv4(h)
+        return h + self.shortcut(x)
+# Discriminator architecture, same paradigm as G's above
+def D_arch(ch=64, attention="64", ksize="333333", dilation="111111"):
+    arch = {}
+    arch[256] = {
+        "in_channels": [item * ch for item in [1, 2, 4, 8, 8, 16]],
+        "out_channels": [item * ch for item in [2, 4, 8, 8, 16, 16]],
+        "downsample": [True] * 6 + [False],
+        "resolution": [128, 64, 32, 16, 8, 4, 4],
+        "attention": {
+            2 ** i: 2 ** i in [int(item) for item in attention.split("_")]
+            for i in range(2, 8)
+        },
+    }
+    arch[128] = {
+        "in_channels": [item * ch for item in [1, 2, 4, 8, 16]],
+        "out_channels": [item * ch for item in [2, 4, 8, 16, 16]],
+        "downsample": [True] * 5 + [False],
+        "resolution": [64, 32, 16, 8, 4, 4],
+        "attention": {
+            2 ** i: 2 ** i in [int(item) for item in attention.split("_")]
+            for i in range(2, 8)
+        },
+    }
+    arch[64] = {
+        "in_channels": [item * ch for item in [1, 2, 4, 8]],
+        "out_channels": [item * ch for item in [2, 4, 8, 16]],
+        "downsample": [True] * 4 + [False],
+        "resolution": [32, 16, 8, 4, 4],
+        "attention": {
+            2 ** i: 2 ** i in [int(item) for item in attention.split("_")]
+            for i in range(2, 7)
+        },
+    }
+    arch[32] = {
+        "in_channels": [item * ch for item in [4, 4, 4]],
+        "out_channels": [item * ch for item in [4, 4, 4]],
+        "downsample": [True, True, False, False],
+        "resolution": [16, 16, 16, 16],
+        "attention": {
+            2 ** i: 2 ** i in [int(item) for item in attention.split("_")]
+            for i in range(2, 6)
+        },
+    }
+    return arch
+class Discriminator(nn.Module):
+    def __init__(
+        self,
+        D_ch=64,
+        D_wide=True,
+        D_depth=2,
+        resolution=128,
+        D_kernel_size=3,
+        D_attn="64",
+        n_classes=1000,
+        num_D_SVs=1,
+        num_D_SV_itrs=1,
+        D_activation=nn.ReLU(inplace=False),
+        D_lr=2e-4,
+        D_B1=0.0,
+        D_B2=0.999,
+        adam_eps=1e-8,
+        SN_eps=1e-12,
+        output_dim=1,
+        D_mixed_precision=False,
+        D_fp16=False,
+        D_init="ortho",
+        skip_init=False,
+        D_param="SN",
+        **kwargs
+    ):
+        super(Discriminator, self).__init__()
+        # Width multiplier
+        self.ch = D_ch
+        # Use Wide D as in BigGAN and SA-GAN or skinny D as in SN-GAN?
+        self.D_wide = D_wide
+        # How many resblocks per stage?
+        self.D_depth = D_depth
+        # Resolution
+        self.resolution = resolution
+        # Kernel size
+        self.kernel_size = D_kernel_size
+        # Attention?
+        self.attention = D_attn
+        # Number of classes
+        self.n_classes = n_classes
+        # Activation
+        self.activation = D_activation
+        # Initialization style
+        self.init = D_init
+        # Parameterization style
+        self.D_param = D_param
+        # Epsilon for Spectral Norm?
+        self.SN_eps = SN_eps
+        # Fp16?
+        self.fp16 = D_fp16
+        # Architecture
+        self.arch = D_arch(self.ch, self.attention)[resolution]
+        # Which convs, batchnorms, and linear layers to use
+        # No option to turn off SN in D right now
+        if self.D_param == "SN":
+            self.which_conv = functools.partial(
+                layers.SNConv2d,
+                kernel_size=3,
+                padding=1,
+                num_svs=num_D_SVs,
+                num_itrs=num_D_SV_itrs,
+                eps=self.SN_eps,
+            )
+            self.which_linear = functools.partial(
+                layers.SNLinear,
+                num_svs=num_D_SVs,
+                num_itrs=num_D_SV_itrs,
+                eps=self.SN_eps,
+            )
+            self.which_embedding = functools.partial(
+                layers.SNEmbedding,
+                num_svs=num_D_SVs,
+                num_itrs=num_D_SV_itrs,
+                eps=self.SN_eps,
+            )
+        # Prepare model
+        # Stem convolution
+        self.input_conv = self.which_conv(3, self.arch["in_channels"][0])
+        # self.blocks is a doubly-nested list of modules, the outer loop intended
+        # to be over blocks at a given resolution (resblocks and/or self-attention)
+        self.blocks = []
+        for index in range(len(self.arch["out_channels"])):
+            self.blocks += [
+                [
+                    DBlock(
+                        in_channels=self.arch["in_channels"][index]
+                        if d_index == 0
+                        else self.arch["out_channels"][index],
+                        out_channels=self.arch["out_channels"][index],
+                        which_conv=self.which_conv,
+                        wide=self.D_wide,
+                        activation=self.activation,
+                        preactivation=True,
+                        downsample=(
+                            nn.AvgPool2d(2)
+                            if self.arch["downsample"][index] and d_index == 0
+                            else None
+                        ),
+                    )
+                    for d_index in range(self.D_depth)
+                ]
+            ]
+            # If attention on this block, attach it to the end
+            if self.arch["attention"][self.arch["resolution"][index]]:
+                print(
+                    "Adding attention layer in D at resolution %d"
+                    % self.arch["resolution"][index]
+                )
+                self.blocks[-1] += [
+                    layers.Attention(self.arch["out_channels"][index], self.which_conv)
+                ]
+        # Turn self.blocks into a ModuleList so that it's all properly registered.
+        self.blocks = nn.ModuleList([nn.ModuleList(block) for block in self.blocks])
+        # Linear output layer. The output dimension is typically 1, but may be
+        # larger if we're e.g. turning this into a VAE with an inference output
+        self.linear = self.which_linear(self.arch["out_channels"][-1], output_dim)
+        # Embedding for projection discrimination
+        self.embed = self.which_embedding(self.n_classes, self.arch["out_channels"][-1])
+        # Initialize weights
+        if not skip_init:
+            self.init_weights()
+        # Set up optimizer
+        self.lr, self.B1, self.B2, self.adam_eps = D_lr, D_B1, D_B2, adam_eps
+        if D_mixed_precision:
+            print("Using fp16 adam in D...")
+            import utils
+            self.optim = utils.Adam16(
+                params=self.parameters(),
+                lr=self.lr,
+                betas=(self.B1, self.B2),
+                weight_decay=0,
+                eps=self.adam_eps,
+            )
+        else:
+            self.optim = optim.Adam(
+                params=self.parameters(),
+                lr=self.lr,
+                betas=(self.B1, self.B2),
+                weight_decay=0,
+                eps=self.adam_eps,
+            )
+        # LR scheduling, left here for forward compatibility
+        # self.lr_sched = {'itr' : 0}# if self.progressive else {}
+        # self.j = 0
+    # Initialize
+    def init_weights(self):
+        self.param_count = 0
+        for module in self.modules():
+            if (
+                isinstance(module, nn.Conv2d)
+                or isinstance(module, nn.Linear)
+                or isinstance(module, nn.Embedding)
+            ):
+                if self.init == "ortho":
+                    init.orthogonal_(module.weight)
+                elif self.init == "N02":
+                    init.normal_(module.weight, 0, 0.02)
+                elif self.init in ["glorot", "xavier"]:
+                    init.xavier_uniform_(module.weight)
+                else:
+                    print("Init style not recognized...")
+                self.param_count += sum(
+                    [p.data.nelement() for p in module.parameters()]
+                )
+        print("Param count for D" "s initialized parameters: %d" % self.param_count)
+    def forward(self, x, y=None):
+        # Run input conv
+        h = self.input_conv(x)
+        # Loop over blocks
+        for index, blocklist in enumerate(self.blocks):
+            for block in blocklist:
+                h = block(h)
+        # Apply global sum pooling as in SN-GAN
+        h = torch.sum(self.activation(h), [2, 3])
+        # Get initial class-unconditional output
+        out = self.linear(h)
+        # Get projection of final featureset onto class vectors and add to evidence
+        out = out + torch.sum(self.embed(y) * h, 1, keepdim=True)
+        return out
+# Parallelized G_D to minimize cross-gpu communication
+# Without this, Generator outputs would get all-gathered and then rebroadcast.
+class G_D(nn.Module):
+    def __init__(self, G, D):
+        super(G_D, self).__init__()
+        self.G = G
+        self.D = D
+    def forward(
+        self, z, gy, x=None, dy=None, train_G=False, return_G_z=False, split_D=False
+    ):
+        # If training G, enable grad tape
+        with torch.set_grad_enabled(train_G):
+            # Get Generator output given noise
+            G_z = self.G(z, self.G.shared(gy))
+            # Cast as necessary
+            if self.G.fp16 and not self.D.fp16:
+                G_z = G_z.float()
+            if self.D.fp16 and not self.G.fp16:
+                G_z = G_z.half()
+        # Split_D means to run D once with real data and once with fake,
+        # rather than concatenating along the batch dimension.
+        if split_D:
+            D_fake = self.D(G_z, gy)
+            if x is not None:
+                D_real = self.D(x, dy)
+                return D_fake, D_real
+            else:
+                if return_G_z:
+                    return D_fake, G_z
+                else:
+                    return D_fake
+        # If real data is provided, concatenate it with the Generator's output
+        # along the batch dimension for improved efficiency.
+        else:
+            D_input = torch.cat([G_z, x], 0) if x is not None else G_z
+            D_class = torch.cat([gy, dy], 0) if dy is not None else gy
+            # Get Discriminator output
+            D_out = self.D(D_input, D_class)
+            if x is not None:
+                return torch.split(D_out, [G_z.shape[0], x.shape[0]])  # D_fake, D_real
+            else:
+                if return_G_z:
+                    return D_out, G_z
+                else:
+                    return D_out

BigGAN_PyTorch/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2019 Andy Brock
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

BigGAN_PyTorch/README.md ADDED Viewed

	@@ -0,0 +1,144 @@

+# BigGAN-PyTorch
+The author's officially unofficial PyTorch BigGAN implementation.
+![Dogball? Dogball!](imgs/header_image.jpg?raw=true "Dogball? Dogball!")
+This repo contains code for 4-8 GPU training of BigGANs from [Large Scale GAN Training for High Fidelity Natural Image Synthesis](https://arxiv.org/abs/1809.11096) by Andrew Brock, Jeff Donahue, and Karen Simonyan.
+This code is by Andy Brock and Alex Andonian.
+## How To Use This Code
+You will need:
+- [PyTorch](https://PyTorch.org/), version 1.0.1
+- tqdm, numpy, scipy, and h5py
+- The ImageNet training set
+First, you may optionally prepare a pre-processed HDF5 version of your target dataset for faster I/O. Following this (or not), you'll need the Inception moments needed to calculate FID. These can both be done by modifying and running
+```sh
+sh scripts/utils/prepare_data.sh
+```
+Which by default assumes your ImageNet training set is downloaded into the root folder `data` in this directory, and will prepare the cached HDF5 at 128x128 pixel resolution.
+In the scripts folder, there are multiple bash scripts which will train BigGANs with different batch sizes. This code assumes you do not have access to a full TPU pod, and accordingly
+spoofs mega-batches by using gradient accumulation (averaging grads over multiple minibatches, and only taking an optimizer step after N accumulations). By default, the `launch_BigGAN_bs256x8.sh` script trains a
+full-sized BigGAN model with a batch size of 256 and 8 gradient accumulations, for a total batch size of 2048. On 8xV100 with full-precision training (no Tensor cores), this script takes 15 days to train to 150k iterations.
+You will first need to figure out the maximum batch size your setup can support. The pre-trained models provided here were trained on 8xV100 (16GB VRAM each) which can support slightly more than the BS256 used by default.
+Once you've determined this, you should modify the script so that the batch size times the number of gradient accumulations is equal to your desired total batch size (BigGAN defaults to 2048).
+Note also that this script uses the `--load_in_mem` arg, which loads the entire (~64GB) I128.hdf5 file into RAM for faster data loading. If you don't have enough RAM to support this (probably 96GB+), remove this argument.
+## Metrics and Sampling
+![I believe I can fly!](imgs/interp_sample.jpg?raw=true "I believe I can fly!")
+During training, this script will output logs with training metrics and test metrics, will save multiple copies (2 most recent and 5 highest-scoring) of the model weights/optimizer params, and will produce samples and interpolations every time it saves weights.
+The logs folder contains scripts to process these logs and plot the results using MATLAB (sorry not sorry).
+After training, one can use `sample.py` to produce additional samples and interpolations, test with different truncation values, batch sizes, number of standing stat accumulations, etc. See the `sample_BigGAN_bs256x8.sh` script for an example.
+By default, everything is saved to weights/samples/logs/data folders which are assumed to be in the same folder as this repo.
+You can point all of these to a different base folder using the `--base_root` argument, or pick specific locations for each of these with their respective arguments (e.g. `--logs_root`).
+We include scripts to run BigGAN-deep, but we have not fully trained a model using them, so consider them untested. Additionally, we include scripts to run a model on CIFAR, and to run SA-GAN (with EMA) and SN-GAN on ImageNet. The SA-GAN code assumes you have 4xTitanX (or equivalent in terms of GPU RAM) and will run with a batch size of 128 and 2 gradient accumulations.
+## An Important Note on Inception Metrics
+This repo uses the PyTorch in-built inception network to calculate IS and FID.
+These scores are different from the scores you would get using the official TF inception code, and are only for monitoring purposes!
+Run sample.py on your model, with the `--sample_npz` argument, then run inception_tf13 to calculate the actual TensorFlow IS. Note that you will need to have TensorFlow 1.3 or earlier installed, as TF1.4+ breaks the original IS code.
+## Pretrained models
+![PyTorch Inception Score and FID](imgs/IS_FID.png)
+We include two pretrained model checkpoints (with G, D, the EMA copy of G, the optimizers, and the state dict):
+- The main checkpoint is for a BigGAN trained on ImageNet at 128x128, using BS256 and 8 gradient accumulations, taken just before collapse, with a TF Inception Score of 97.35 +/- 1.79: [LINK](https://drive.google.com/open?id=1nAle7FCVFZdix2--ks0r5JBkFnKw8ctW)
+- An earlier checkpoint of the first model (100k G iters), at high performance but well before collapse, which may be easier to fine-tune: [LINK](https://drive.google.com/open?id=1dmZrcVJUAWkPBGza_XgswSuT-UODXZcO)
+Pretrained models for Places-365 coming soon.
+This repo also contains scripts for porting the original TFHub BigGAN Generator weights to PyTorch. See the scripts in the TFHub folder for more details.
+## Fine-tuning, Using Your Own Dataset, or Making New Training Functions
+![That's deep, man](imgs/DeepSamples.png?raw=true "Deep Samples")
+If you wish to resume interrupted training or fine-tune a pre-trained model, run the same launch script but with the `--resume` argument added.
+Experiment names are automatically generated from the configuration, but can be overridden using the `--experiment_name` arg (for example, if you wish to fine-tune a model using modified optimizer settings).
+To prep your own dataset, you will need to add it to datasets.py and modify the convenience dicts in utils.py (dset_dict, imsize_dict, root_dict, nclass_dict, classes_per_sheet_dict) to have the appropriate metadata for your dataset.
+Repeat the process in prepare_data.sh (optionally produce an HDF5 preprocessed copy, and calculate the Inception Moments for FID).
+By default, the training script will save the top 5 best checkpoints as measured by Inception Score.
+For datasets other than ImageNet, Inception Score can be a very poor measure of quality, so you will likely want to use `--which_best FID` instead.
+To use your own training function (e.g. train a BigVAE): either modify train_fns.GAN_training_function or add a new train fn and add it after the `if config['which_train_fn'] == 'GAN':` line in `train.py`.
+## Neat Stuff
+- We include the full training and metrics logs [here](https://drive.google.com/open?id=1ZhY9Mg2b_S4QwxNmt57aXJ9FOC3ZN1qb) for reference. I've found that one of the hardest things about re-implementing a paper can be checking if the logs line up early in training,
+especially if training takes multiple weeks. Hopefully these will be helpful for future work.
+- We include an accelerated FID calculation--the original scipy version can require upwards of 10 minutes to calculate the matrix sqrt, this version uses an accelerated PyTorch version to calculate it in under a second.
+- We include an accelerated, low-memory consumption ortho reg implementation.
+- By default, we only compute the top singular value (the spectral norm), but this code supports computing more SVs through the `--num_G_SVs` argument.
+## Key Differences Between This Code And The Original BigGAN
+- We use the optimizer settings from SA-GAN (G_lr=1e-4, D_lr=4e-4, num_D_steps=1, as opposed to BigGAN's G_lr=5e-5, D_lr=2e-4, num_D_steps=2).
+While slightly less performant, this was the first corner we cut to bring training times down.
+- By default, we do not use Cross-Replica BatchNorm (AKA Synced BatchNorm).
+The two variants we tried (a custom, naive one and the one included in this repo) have slightly different gradients (albeit identical forward passes) from the built-in BatchNorm, which appear to be sufficient to cripple training.
+- Gradient accumulation means that we update the SV estimates and the BN statistics 8 times more frequently. This means that the BN stats are much closer to standing stats, and that the singular value estimates tend to be more accurate.
+Because of this, we measure metrics by default with G in test mode (using the BatchNorm running stat estimates instead of computing standing stats as in the paper). We do still support standing stats (see the sample.sh scripts).
+This could also conceivably result in gradients from the earlier accumulations being stale, but in practice this does not appear to be a problem.
+- The currently provided pretrained models were not trained with orthogonal regularization. Training without ortho reg seems to increase the probability that models will not be amenable to truncation,
+but it looks like this particular model got a winning ticket. Regardless, we provide two highly optimized (fast and minimal memory consumption) ortho reg implementations which directly compute the ortho reg. gradients.
+## A Note On The Design Of This Repo
+This code is designed from the ground up to serve as an extensible, hackable base for further research code.
+We've put a lot of thought into making sure the abstractions are the *right* thickness for research--not so thick as to be impenetrable, but not so thin as to be useless.
+The key idea is that if you want to experiment with a SOTA setup and make some modification (try out your own new loss function, architecture, self-attention block, etc) you should be able to easily do so just by dropping your code in one or two places, without having to worry about the rest of the codebase.
+Things like the use of self.which_conv and functools.partial in the BigGAN.py model definition were put together with this in mind, as was the design of the Spectral Norm class inheritance.
+With that said, this is a somewhat large codebase for a single project. While we tried to be thorough with the comments, if there's something you think could be more clear, better written, or better refactored, please feel free to raise an issue or a pull request.
+## Feature Requests
+Want to work on or improve this code? There are a couple things this repo would benefit from, but which don't yet work.
+- Synchronized BatchNorm (AKA Cross-Replica BatchNorm). We tried out two variants of this, but for some unknown reason it crippled training each time.
+  We have not tried the [apex](https://github.com/NVIDIA/apex) SyncBN as my school's servers are on ancient NVIDIA drivers that don't support it--apex would probably be a good place to start.
+- Mixed precision training and making use of Tensor cores. This repo includes a naive mixed-precision Adam implementation which works early in training but leads to early collapse, and doesn't do anything to activate Tensor cores (it just reduces memory consumption).
+  As above, integrating [apex](https://github.com/NVIDIA/apex) into this code and employing its mixed-precision training techniques to take advantage of Tensor cores and reduce memory consumption could yield substantial speed gains.
+## Misc Notes
+See [This directory](https://gist.github.com/yrevar/942d3a0ac09ec9e5eb3a) for ImageNet labels.
+If you use this code, please cite
+```text
+@inproceedings{
+brock2018large,
+title={Large Scale {GAN} Training for High Fidelity Natural Image Synthesis},
+author={Andrew Brock and Jeff Donahue and Karen Simonyan},
+booktitle={International Conference on Learning Representations},
+year={2019},
+url={https://openreview.net/forum?id=B1xsqj09Fm},
+}
+```
+## Acknowledgments
+Thanks to Google for the generous cloud credit donations.
+[SyncBN](https://github.com/vacancy/Synchronized-BatchNorm-PyTorch) by Jiayuan Mao and Tete Xiao.
+[Progress bar](https://github.com/Lasagne/Recipes/tree/master/papers/densenet) originally from Jan Schlüter.
+Test metrics logger from [VoxNet.](https://github.com/dimatura/voxnet)
+PyTorch [implementation of cov](https://discuss.PyTorch.org/t/covariance-and-gradient-support/16217/2) from Modar M. Alfadly.
+PyTorch [fast Matrix Sqrt](https://github.com/msubhransu/matrix-sqrt) for FID from Tsung-Yu Lin and Subhransu Maji.
+TensorFlow Inception Score code from [OpenAI's Improved-GAN.](https://github.com/openai/improved-gan)

BigGAN_PyTorch/TFHub/README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# BigGAN-PyTorch TFHub converter
+This dir contains scripts for taking the [pre-trained generator weights from TFHub](https://tfhub.dev/s?q=biggan) and porting them to BigGAN-Pytorch.
+In addition to the base libraries for BigGAN-PyTorch, to run this code you will need:
+TensorFlow
+TFHub
+parse
+Note that this code is only presently set up to run the ported models without truncation--you'll need to accumulate standing stats at each truncation level yourself if you wish to employ it.
+To port the 128x128 model from tfhub, produce a pretrained weights .pth file, and generate samples using all your GPUs, run
+`python converter.py -r 128 --generate_samples --parallel`

BigGAN_PyTorch/TFHub/biggan_v1.py ADDED Viewed

	@@ -0,0 +1,441 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+#
+# BigGAN V1:
+# This is now deprecated code used for porting the TFHub modules to pytorch,
+# included here for reference only.
+import numpy as np
+import torch
+from scipy.stats import truncnorm
+from torch import nn
+from torch.nn import Parameter
+from torch.nn import functional as F
+def l2normalize(v, eps=1e-4):
+    return v / (v.norm() + eps)
+def truncated_z_sample(batch_size, z_dim, truncation=0.5, seed=None):
+    state = None if seed is None else np.random.RandomState(seed)
+    values = truncnorm.rvs(-2, 2, size=(batch_size, z_dim), random_state=state)
+    return truncation * values
+def denorm(x):
+    out = (x + 1) / 2
+    return out.clamp_(0, 1)
+class SpectralNorm(nn.Module):
+    def __init__(self, module, name="weight", power_iterations=1):
+        super(SpectralNorm, self).__init__()
+        self.module = module
+        self.name = name
+        self.power_iterations = power_iterations
+        if not self._made_params():
+            self._make_params()
+    def _update_u_v(self):
+        u = getattr(self.module, self.name + "_u")
+        v = getattr(self.module, self.name + "_v")
+        w = getattr(self.module, self.name + "_bar")
+        height = w.data.shape[0]
+        _w = w.view(height, -1)
+        for _ in range(self.power_iterations):
+            v = l2normalize(torch.matmul(_w.t(), u))
+            u = l2normalize(torch.matmul(_w, v))
+        sigma = u.dot((_w).mv(v))
+        setattr(self.module, self.name, w / sigma.expand_as(w))
+    def _made_params(self):
+        try:
+            getattr(self.module, self.name + "_u")
+            getattr(self.module, self.name + "_v")
+            getattr(self.module, self.name + "_bar")
+            return True
+        except AttributeError:
+            return False
+    def _make_params(self):
+        w = getattr(self.module, self.name)
+        height = w.data.shape[0]
+        width = w.view(height, -1).data.shape[1]
+        u = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
+        v = Parameter(w.data.new(height).normal_(0, 1), requires_grad=False)
+        u.data = l2normalize(u.data)
+        v.data = l2normalize(v.data)
+        w_bar = Parameter(w.data)
+        del self.module._parameters[self.name]
+        self.module.register_parameter(self.name + "_u", u)
+        self.module.register_parameter(self.name + "_v", v)
+        self.module.register_parameter(self.name + "_bar", w_bar)
+    def forward(self, *args):
+        self._update_u_v()
+        return self.module.forward(*args)
+class SelfAttention(nn.Module):
+    """ Self Attention Layer"""
+    def __init__(self, in_dim, activation=F.relu):
+        super().__init__()
+        self.chanel_in = in_dim
+        self.activation = activation
+        self.theta = SpectralNorm(
+            nn.Conv2d(
+                in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1, bias=False
+            )
+        )
+        self.phi = SpectralNorm(
+            nn.Conv2d(
+                in_channels=in_dim, out_channels=in_dim // 8, kernel_size=1, bias=False
+            )
+        )
+        self.pool = nn.MaxPool2d(2, 2)
+        self.g = SpectralNorm(
+            nn.Conv2d(
+                in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1, bias=False
+            )
+        )
+        self.o_conv = SpectralNorm(
+            nn.Conv2d(
+                in_channels=in_dim // 2, out_channels=in_dim, kernel_size=1, bias=False
+            )
+        )
+        self.gamma = nn.Parameter(torch.zeros(1))
+        self.softmax = nn.Softmax(dim=-1)
+    def forward(self, x):
+        m_batchsize, C, width, height = x.size()
+        N = height * width
+        theta = self.theta(x)
+        phi = self.phi(x)
+        phi = self.pool(phi)
+        phi = phi.view(m_batchsize, -1, N // 4)
+        theta = theta.view(m_batchsize, -1, N)
+        theta = theta.permute(0, 2, 1)
+        attention = self.softmax(torch.bmm(theta, phi))
+        g = self.pool(self.g(x)).view(m_batchsize, -1, N // 4)
+        attn_g = torch.bmm(g, attention.permute(0, 2, 1)).view(
+            m_batchsize, -1, width, height
+        )
+        out = self.o_conv(attn_g)
+        return self.gamma * out + x
+class ConditionalBatchNorm2d(nn.Module):
+    def __init__(self, num_features, num_classes, eps=1e-4, momentum=0.1):
+        super().__init__()
+        self.num_features = num_features
+        self.bn = nn.BatchNorm2d(num_features, affine=False, eps=eps, momentum=momentum)
+        self.gamma_embed = SpectralNorm(
+            nn.Linear(num_classes, num_features, bias=False)
+        )
+        self.beta_embed = SpectralNorm(nn.Linear(num_classes, num_features, bias=False))
+    def forward(self, x, y):
+        out = self.bn(x)
+        gamma = self.gamma_embed(y) + 1
+        beta = self.beta_embed(y)
+        out = gamma.view(-1, self.num_features, 1, 1) * out + beta.view(
+            -1, self.num_features, 1, 1
+        )
+        return out
+class GBlock(nn.Module):
+    def __init__(
+        self,
+        in_channel,
+        out_channel,
+        kernel_size=[3, 3],
+        padding=1,
+        stride=1,
+        n_class=None,
+        bn=True,
+        activation=F.relu,
+        upsample=True,
+        downsample=False,
+        z_dim=148,
+    ):
+        super().__init__()
+        self.conv0 = SpectralNorm(
+            nn.Conv2d(
+                in_channel,
+                out_channel,
+                kernel_size,
+                stride,
+                padding,
+                bias=True if bn else True,
+            )
+        )
+        self.conv1 = SpectralNorm(
+            nn.Conv2d(
+                out_channel,
+                out_channel,
+                kernel_size,
+                stride,
+                padding,
+                bias=True if bn else True,
+            )
+        )
+        self.skip_proj = False
+        if in_channel != out_channel or upsample or downsample:
+            self.conv_sc = SpectralNorm(nn.Conv2d(in_channel, out_channel, 1, 1, 0))
+            self.skip_proj = True
+        self.upsample = upsample
+        self.downsample = downsample
+        self.activation = activation
+        self.bn = bn
+        if bn:
+            self.HyperBN = ConditionalBatchNorm2d(in_channel, z_dim)
+            self.HyperBN_1 = ConditionalBatchNorm2d(out_channel, z_dim)
+    def forward(self, input, condition=None):
+        out = input
+        if self.bn:
+            out = self.HyperBN(out, condition)
+        out = self.activation(out)
+        if self.upsample:
+            out = F.interpolate(out, scale_factor=2)
+        out = self.conv0(out)
+        if self.bn:
+            out = self.HyperBN_1(out, condition)
+        out = self.activation(out)
+        out = self.conv1(out)
+        if self.downsample:
+            out = F.avg_pool2d(out, 2)
+        if self.skip_proj:
+            skip = input
+            if self.upsample:
+                skip = F.interpolate(skip, scale_factor=2)
+            skip = self.conv_sc(skip)
+            if self.downsample:
+                skip = F.avg_pool2d(skip, 2)
+        else:
+            skip = input
+        return out + skip
+class Generator128(nn.Module):
+    def __init__(self, code_dim=120, n_class=1000, chn=96, debug=False):
+        super().__init__()
+        self.linear = nn.Linear(n_class, 128, bias=False)
+        if debug:
+            chn = 8
+        self.first_view = 16 * chn
+        self.G_linear = SpectralNorm(nn.Linear(20, 4 * 4 * 16 * chn))
+        z_dim = code_dim + 28
+        self.GBlock = nn.ModuleList(
+            [
+                GBlock(16 * chn, 16 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(16 * chn, 8 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(8 * chn, 4 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(4 * chn, 2 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(2 * chn, 1 * chn, n_class=n_class, z_dim=z_dim),
+            ]
+        )
+        self.sa_id = 4
+        self.num_split = len(self.GBlock) + 1
+        self.attention = SelfAttention(2 * chn)
+        self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn, eps=1e-4)
+        self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1))
+    def forward(self, input, class_id):
+        codes = torch.chunk(input, self.num_split, 1)
+        class_emb = self.linear(class_id)  # 128
+        out = self.G_linear(codes[0])
+        out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2)
+        for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)):
+            if i == self.sa_id:
+                out = self.attention(out)
+            condition = torch.cat([code, class_emb], 1)
+            out = GBlock(out, condition)
+        out = self.ScaledCrossReplicaBN(out)
+        out = F.relu(out)
+        out = self.colorize(out)
+        return torch.tanh(out)
+class Generator256(nn.Module):
+    def __init__(self, code_dim=140, n_class=1000, chn=96, debug=False):
+        super().__init__()
+        self.linear = nn.Linear(n_class, 128, bias=False)
+        if debug:
+            chn = 8
+        self.first_view = 16 * chn
+        self.G_linear = SpectralNorm(nn.Linear(20, 4 * 4 * 16 * chn))
+        self.GBlock = nn.ModuleList(
+            [
+                GBlock(16 * chn, 16 * chn, n_class=n_class),
+                GBlock(16 * chn, 8 * chn, n_class=n_class),
+                GBlock(8 * chn, 8 * chn, n_class=n_class),
+                GBlock(8 * chn, 4 * chn, n_class=n_class),
+                GBlock(4 * chn, 2 * chn, n_class=n_class),
+                GBlock(2 * chn, 1 * chn, n_class=n_class),
+            ]
+        )
+        self.sa_id = 5
+        self.num_split = len(self.GBlock) + 1
+        self.attention = SelfAttention(2 * chn)
+        self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn, eps=1e-4)
+        self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1))
+    def forward(self, input, class_id):
+        codes = torch.chunk(input, self.num_split, 1)
+        class_emb = self.linear(class_id)  # 128
+        out = self.G_linear(codes[0])
+        out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2)
+        for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)):
+            if i == self.sa_id:
+                out = self.attention(out)
+            condition = torch.cat([code, class_emb], 1)
+            out = GBlock(out, condition)
+        out = self.ScaledCrossReplicaBN(out)
+        out = F.relu(out)
+        out = self.colorize(out)
+        return torch.tanh(out)
+class Generator512(nn.Module):
+    def __init__(self, code_dim=128, n_class=1000, chn=96, debug=False):
+        super().__init__()
+        self.linear = nn.Linear(n_class, 128, bias=False)
+        if debug:
+            chn = 8
+        self.first_view = 16 * chn
+        self.G_linear = SpectralNorm(nn.Linear(16, 4 * 4 * 16 * chn))
+        z_dim = code_dim + 16
+        self.GBlock = nn.ModuleList(
+            [
+                GBlock(16 * chn, 16 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(16 * chn, 8 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(8 * chn, 8 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(8 * chn, 4 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(4 * chn, 2 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(2 * chn, 1 * chn, n_class=n_class, z_dim=z_dim),
+                GBlock(1 * chn, 1 * chn, n_class=n_class, z_dim=z_dim),
+            ]
+        )
+        self.sa_id = 4
+        self.num_split = len(self.GBlock) + 1
+        self.attention = SelfAttention(4 * chn)
+        self.ScaledCrossReplicaBN = nn.BatchNorm2d(1 * chn)
+        self.colorize = SpectralNorm(nn.Conv2d(1 * chn, 3, [3, 3], padding=1))
+    def forward(self, input, class_id):
+        codes = torch.chunk(input, self.num_split, 1)
+        class_emb = self.linear(class_id)  # 128
+        out = self.G_linear(codes[0])
+        out = out.view(-1, 4, 4, self.first_view).permute(0, 3, 1, 2)
+        for i, (code, GBlock) in enumerate(zip(codes[1:], self.GBlock)):
+            if i == self.sa_id:
+                out = self.attention(out)
+            condition = torch.cat([code, class_emb], 1)
+            out = GBlock(out, condition)
+        out = self.ScaledCrossReplicaBN(out)
+        out = F.relu(out)
+        out = self.colorize(out)
+        return torch.tanh(out)
+class Discriminator(nn.Module):
+    def __init__(self, n_class=1000, chn=96, debug=False):
+        super().__init__()
+        def conv(in_channel, out_channel, downsample=True):
+            return GBlock(
+                in_channel, out_channel, bn=False, upsample=False, downsample=downsample
+            )
+        if debug:
+            chn = 8
+        self.debug = debug
+        self.pre_conv = nn.Sequential(
+            SpectralNorm(nn.Conv2d(3, 1 * chn, 3, padding=1)),
+            nn.ReLU(),
+            SpectralNorm(nn.Conv2d(1 * chn, 1 * chn, 3, padding=1)),
+            nn.AvgPool2d(2),
+        )
+        self.pre_skip = SpectralNorm(nn.Conv2d(3, 1 * chn, 1))
+        self.conv = nn.Sequential(
+            conv(1 * chn, 1 * chn, downsample=True),
+            conv(1 * chn, 2 * chn, downsample=True),
+            SelfAttention(2 * chn),
+            conv(2 * chn, 2 * chn, downsample=True),
+            conv(2 * chn, 4 * chn, downsample=True),
+            conv(4 * chn, 8 * chn, downsample=True),
+            conv(8 * chn, 8 * chn, downsample=True),
+            conv(8 * chn, 16 * chn, downsample=True),
+            conv(16 * chn, 16 * chn, downsample=False),
+        )
+        self.linear = SpectralNorm(nn.Linear(16 * chn, 1))
+        self.embed = nn.Embedding(n_class, 16 * chn)
+        self.embed.weight.data.uniform_(-0.1, 0.1)
+        self.embed = SpectralNorm(self.embed)
+    def forward(self, input, class_id):
+        out = self.pre_conv(input)
+        out += self.pre_skip(F.avg_pool2d(input, 2))
+        out = self.conv(out)
+        out = F.relu(out)
+        out = out.view(out.size(0), out.size(1), -1)
+        out = out.sum(2)
+        out_linear = self.linear(out).squeeze(1)
+        embed = self.embed(class_id)
+        prod = (out * embed).sum(1)
+        return out_linear + prod

BigGAN_PyTorch/TFHub/converter.py ADDED Viewed

	@@ -0,0 +1,558 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+"""Utilities for converting TFHub BigGAN generator weights to PyTorch.
+Recommended usage:
+To convert all BigGAN variants and generate test samples, use:
+```bash
+CUDA_VISIBLE_DEVICES=0 python converter.py --generate_samples
+```
+See `parse_args` for additional options.
+"""
+import argparse
+import os
+import sys
+import h5py
+import torch
+import torch.nn as nn
+from torchvision.utils import save_image
+import tensorflow as tf
+import tensorflow_hub as hub
+import parse
+# import reference biggan from this folder
+import biggan_v1 as biggan_for_conversion
+# Import model from main folder
+sys.path.append("..")
+import BigGAN
+DEVICE = "cuda"
+HDF5_TMPL = "biggan-{}.h5"
+PTH_TMPL = "biggan-{}.pth"
+MODULE_PATH_TMPL = "https://tfhub.dev/deepmind/biggan-{}/2"
+Z_DIMS = {128: 120, 256: 140, 512: 128}
+RESOLUTIONS = list(Z_DIMS)
+def dump_tfhub_to_hdf5(module_path, hdf5_path, redownload=False):
+    """Loads TFHub weights and saves them to intermediate HDF5 file.
+  Args:
+    module_path ([Path-like]): Path to TFHub module.
+    hdf5_path ([Path-like]): Path to output HDF5 file.
+  Returns:
+    [h5py.File]: Loaded hdf5 file containing module weights.
+  """
+    if os.path.exists(hdf5_path) and (not redownload):
+        print("Loading BigGAN hdf5 file from:", hdf5_path)
+        return h5py.File(hdf5_path, "r")
+    print("Loading BigGAN module from:", module_path)
+    tf.reset_default_graph()
+    hub.Module(module_path)
+    print("Loaded BigGAN module from:", module_path)
+    initializer = tf.global_variables_initializer()
+    sess = tf.Session()
+    sess.run(initializer)
+    print("Saving BigGAN weights to :", hdf5_path)
+    h5f = h5py.File(hdf5_path, "w")
+    for var in tf.global_variables():
+        val = sess.run(var)
+        h5f.create_dataset(var.name, data=val)
+        print(f"Saving {var.name} with shape {val.shape}")
+    h5f.close()
+    return h5py.File(hdf5_path, "r")
+class TFHub2Pytorch(object):
+    TF_ROOT = "module"
+    NUM_GBLOCK = {128: 5, 256: 6, 512: 7}
+    w = "w"
+    b = "b"
+    u = "u0"
+    v = "u1"
+    gamma = "gamma"
+    beta = "beta"
+    def __init__(
+        self, state_dict, tf_weights, resolution=256, load_ema=True, verbose=False
+    ):
+        self.state_dict = state_dict
+        self.tf_weights = tf_weights
+        self.resolution = resolution
+        self.verbose = verbose
+        if load_ema:
+            for name in ["w", "b", "gamma", "beta"]:
+                setattr(self, name, getattr(self, name) + "/ema_b999900")
+    def load(self):
+        self.load_generator()
+        return self.state_dict
+    def load_generator(self):
+        GENERATOR_ROOT = os.path.join(self.TF_ROOT, "Generator")
+        for i in range(self.NUM_GBLOCK[self.resolution]):
+            name_tf = os.path.join(GENERATOR_ROOT, "GBlock")
+            name_tf += f"_{i}" if i != 0 else ""
+            self.load_GBlock(f"GBlock.{i}.", name_tf)
+        self.load_attention("attention.", os.path.join(GENERATOR_ROOT, "attention"))
+        self.load_linear("linear", os.path.join(self.TF_ROOT, "linear"), bias=False)
+        self.load_snlinear("G_linear", os.path.join(GENERATOR_ROOT, "G_Z", "G_linear"))
+        self.load_colorize("colorize", os.path.join(GENERATOR_ROOT, "conv_2d"))
+        self.load_ScaledCrossReplicaBNs(
+            "ScaledCrossReplicaBN", os.path.join(GENERATOR_ROOT, "ScaledCrossReplicaBN")
+        )
+    def load_linear(self, name_pth, name_tf, bias=True):
+        self.state_dict[name_pth + ".weight"] = self.load_tf_tensor(
+            name_tf, self.w
+        ).permute(1, 0)
+        if bias:
+            self.state_dict[name_pth + ".bias"] = self.load_tf_tensor(name_tf, self.b)
+    def load_snlinear(self, name_pth, name_tf, bias=True):
+        self.state_dict[name_pth + ".module.weight_u"] = self.load_tf_tensor(
+            name_tf, self.u
+        ).squeeze()
+        self.state_dict[name_pth + ".module.weight_v"] = self.load_tf_tensor(
+            name_tf, self.v
+        ).squeeze()
+        self.state_dict[name_pth + ".module.weight_bar"] = self.load_tf_tensor(
+            name_tf, self.w
+        ).permute(1, 0)
+        if bias:
+            self.state_dict[name_pth + ".module.bias"] = self.load_tf_tensor(
+                name_tf, self.b
+            )
+    def load_colorize(self, name_pth, name_tf):
+        self.load_snconv(name_pth, name_tf)
+    def load_GBlock(self, name_pth, name_tf):
+        self.load_convs(name_pth, name_tf)
+        self.load_HyperBNs(name_pth, name_tf)
+    def load_convs(self, name_pth, name_tf):
+        self.load_snconv(name_pth + "conv0", os.path.join(name_tf, "conv0"))
+        self.load_snconv(name_pth + "conv1", os.path.join(name_tf, "conv1"))
+        self.load_snconv(name_pth + "conv_sc", os.path.join(name_tf, "conv_sc"))
+    def load_snconv(self, name_pth, name_tf, bias=True):
+        if self.verbose:
+            print(f"loading: {name_pth} from {name_tf}")
+        self.state_dict[name_pth + ".module.weight_u"] = self.load_tf_tensor(
+            name_tf, self.u
+        ).squeeze()
+        self.state_dict[name_pth + ".module.weight_v"] = self.load_tf_tensor(
+            name_tf, self.v
+        ).squeeze()
+        self.state_dict[name_pth + ".module.weight_bar"] = self.load_tf_tensor(
+            name_tf, self.w
+        ).permute(3, 2, 0, 1)
+        if bias:
+            self.state_dict[name_pth + ".module.bias"] = self.load_tf_tensor(
+                name_tf, self.b
+            ).squeeze()
+    def load_conv(self, name_pth, name_tf, bias=True):
+        self.state_dict[name_pth + ".weight_u"] = self.load_tf_tensor(
+            name_tf, self.u
+        ).squeeze()
+        self.state_dict[name_pth + ".weight_v"] = self.load_tf_tensor(
+            name_tf, self.v
+        ).squeeze()
+        self.state_dict[name_pth + ".weight_bar"] = self.load_tf_tensor(
+            name_tf, self.w
+        ).permute(3, 2, 0, 1)
+        if bias:
+            self.state_dict[name_pth + ".bias"] = self.load_tf_tensor(name_tf, self.b)
+    def load_HyperBNs(self, name_pth, name_tf):
+        self.load_HyperBN(name_pth + "HyperBN", os.path.join(name_tf, "HyperBN"))
+        self.load_HyperBN(name_pth + "HyperBN_1", os.path.join(name_tf, "HyperBN_1"))
+    def load_ScaledCrossReplicaBNs(self, name_pth, name_tf):
+        self.state_dict[name_pth + ".bias"] = self.load_tf_tensor(
+            name_tf, self.beta
+        ).squeeze()
+        self.state_dict[name_pth + ".weight"] = self.load_tf_tensor(
+            name_tf, self.gamma
+        ).squeeze()
+        self.state_dict[name_pth + ".running_mean"] = self.load_tf_tensor(
+            name_tf + "bn", "accumulated_mean"
+        )
+        self.state_dict[name_pth + ".running_var"] = self.load_tf_tensor(
+            name_tf + "bn", "accumulated_var"
+        )
+        self.state_dict[name_pth + ".num_batches_tracked"] = torch.tensor(
+            self.tf_weights[os.path.join(name_tf + "bn", "accumulation_counter:0")][()],
+            dtype=torch.float32,
+        )
+    def load_HyperBN(self, name_pth, name_tf):
+        if self.verbose:
+            print(f"loading: {name_pth} from {name_tf}")
+        beta = name_pth + ".beta_embed.module"
+        gamma = name_pth + ".gamma_embed.module"
+        self.state_dict[beta + ".weight_u"] = self.load_tf_tensor(
+            os.path.join(name_tf, "beta"), self.u
+        ).squeeze()
+        self.state_dict[gamma + ".weight_u"] = self.load_tf_tensor(
+            os.path.join(name_tf, "gamma"), self.u
+        ).squeeze()
+        self.state_dict[beta + ".weight_v"] = self.load_tf_tensor(
+            os.path.join(name_tf, "beta"), self.v
+        ).squeeze()
+        self.state_dict[gamma + ".weight_v"] = self.load_tf_tensor(
+            os.path.join(name_tf, "gamma"), self.v
+        ).squeeze()
+        self.state_dict[beta + ".weight_bar"] = self.load_tf_tensor(
+            os.path.join(name_tf, "beta"), self.w
+        ).permute(1, 0)
+        self.state_dict[gamma + ".weight_bar"] = self.load_tf_tensor(
+            os.path.join(name_tf, "gamma"), self.w
+        ).permute(1, 0)
+        cr_bn_name = name_tf.replace("HyperBN", "CrossReplicaBN")
+        self.state_dict[name_pth + ".bn.running_mean"] = self.load_tf_tensor(
+            cr_bn_name, "accumulated_mean"
+        )
+        self.state_dict[name_pth + ".bn.running_var"] = self.load_tf_tensor(
+            cr_bn_name, "accumulated_var"
+        )
+        self.state_dict[name_pth + ".bn.num_batches_tracked"] = torch.tensor(
+            self.tf_weights[os.path.join(cr_bn_name, "accumulation_counter:0")][()],
+            dtype=torch.float32,
+        )
+    def load_attention(self, name_pth, name_tf):
+        self.load_snconv(name_pth + "theta", os.path.join(name_tf, "theta"), bias=False)
+        self.load_snconv(name_pth + "phi", os.path.join(name_tf, "phi"), bias=False)
+        self.load_snconv(name_pth + "g", os.path.join(name_tf, "g"), bias=False)
+        self.load_snconv(
+            name_pth + "o_conv", os.path.join(name_tf, "o_conv"), bias=False
+        )
+        self.state_dict[name_pth + "gamma"] = self.load_tf_tensor(name_tf, self.gamma)
+    def load_tf_tensor(self, prefix, var, device="0"):
+        name = os.path.join(prefix, var) + f":{device}"
+        return torch.from_numpy(self.tf_weights[name][:])
+# Convert from v1: This function maps
+def convert_from_v1(hub_dict, resolution=128):
+    weightname_dict = {"weight_u": "u0", "weight_bar": "weight", "bias": "bias"}
+    convnum_dict = {"conv0": "conv1", "conv1": "conv2", "conv_sc": "conv_sc"}
+    attention_blocknum = {128: 3, 256: 4, 512: 3}[resolution]
+    hub2me = {
+        "linear.weight": "shared.weight",  # This is actually the shared weight
+        # Linear stuff
+        "G_linear.module.weight_bar": "linear.weight",
+        "G_linear.module.bias": "linear.bias",
+        "G_linear.module.weight_u": "linear.u0",
+        # output layer stuff
+        "ScaledCrossReplicaBN.weight": "output_layer.0.gain",
+        "ScaledCrossReplicaBN.bias": "output_layer.0.bias",
+        "ScaledCrossReplicaBN.running_mean": "output_layer.0.stored_mean",
+        "ScaledCrossReplicaBN.running_var": "output_layer.0.stored_var",
+        "colorize.module.weight_bar": "output_layer.2.weight",
+        "colorize.module.bias": "output_layer.2.bias",
+        "colorize.module.weight_u": "output_layer.2.u0",
+        # Attention stuff
+        "attention.gamma": "blocks.%d.1.gamma" % attention_blocknum,
+        "attention.theta.module.weight_u": "blocks.%d.1.theta.u0" % attention_blocknum,
+        "attention.theta.module.weight_bar": "blocks.%d.1.theta.weight"
+        % attention_blocknum,
+        "attention.phi.module.weight_u": "blocks.%d.1.phi.u0" % attention_blocknum,
+        "attention.phi.module.weight_bar": "blocks.%d.1.phi.weight"
+        % attention_blocknum,
+        "attention.g.module.weight_u": "blocks.%d.1.g.u0" % attention_blocknum,
+        "attention.g.module.weight_bar": "blocks.%d.1.g.weight" % attention_blocknum,
+        "attention.o_conv.module.weight_u": "blocks.%d.1.o.u0" % attention_blocknum,
+        "attention.o_conv.module.weight_bar": "blocks.%d.1.o.weight"
+        % attention_blocknum,
+    }
+    # Loop over the hub dict and build the hub2me map
+    for name in hub_dict.keys():
+        if "GBlock" in name:
+            if "HyperBN" not in name:  # it's a conv
+                out = parse.parse("GBlock.{:d}.{}.module.{}", name)
+                blocknum, convnum, weightname = out
+                if weightname not in weightname_dict:
+                    continue  # else hyperBN in
+                out_name = "blocks.%d.0.%s.%s" % (
+                    blocknum,
+                    convnum_dict[convnum],
+                    weightname_dict[weightname],
+                )  # Increment conv number by 1
+            else:  # hyperbn not conv
+                BNnum = 2 if "HyperBN_1" in name else 1
+                if "embed" in name:
+                    out = parse.parse("GBlock.{:d}.{}.module.{}", name)
+                    blocknum, gamma_or_beta, weightname = out
+                    if weightname not in weightname_dict:  # Ignore weight_v
+                        continue
+                    out_name = "blocks.%d.0.bn%d.%s.%s" % (
+                        blocknum,
+                        BNnum,
+                        "gain" if "gamma" in gamma_or_beta else "bias",
+                        weightname_dict[weightname],
+                    )
+                else:
+                    out = parse.parse("GBlock.{:d}.{}.bn.{}", name)
+                    blocknum, dummy, mean_or_var = out
+                    if "num_batches_tracked" in mean_or_var:
+                        continue
+                    out_name = "blocks.%d.0.bn%d.%s" % (
+                        blocknum,
+                        BNnum,
+                        "stored_mean" if "mean" in mean_or_var else "stored_var",
+                    )
+            hub2me[name] = out_name
+    # Invert the hub2me map
+    me2hub = {hub2me[item]: item for item in hub2me}
+    new_dict = {}
+    dimz_dict = {128: 20, 256: 20, 512: 16}
+    for item in me2hub:
+        # Swap input dim ordering on batchnorm bois to account for my arbitrary change of ordering when concatenating Ys and Zs
+        if (
+            ("bn" in item and "weight" in item)
+            and ("gain" in item or "bias" in item)
+            and ("output_layer" not in item)
+        ):
+            new_dict[item] = torch.cat(
+                [
+                    hub_dict[me2hub[item]][:, -128:],
+                    hub_dict[me2hub[item]][:, : dimz_dict[resolution]],
+                ],
+                1,
+            )
+        # Reshape the first linear weight, bias, and u0
+        elif item == "linear.weight":
+            new_dict[item] = (
+                hub_dict[me2hub[item]]
+                .contiguous()
+                .view(4, 4, 96 * 16, -1)
+                .permute(2, 0, 1, 3)
+                .contiguous()
+                .view(-1, dimz_dict[resolution])
+            )
+        elif item == "linear.bias":
+            new_dict[item] = (
+                hub_dict[me2hub[item]]
+                .view(4, 4, 96 * 16)
+                .permute(2, 0, 1)
+                .contiguous()
+                .view(-1)
+            )
+        elif item == "linear.u0":
+            new_dict[item] = (
+                hub_dict[me2hub[item]]
+                .view(4, 4, 96 * 16)
+                .permute(2, 0, 1)
+                .contiguous()
+                .view(1, -1)
+            )
+        elif (
+            me2hub[item] == "linear.weight"
+        ):  # THIS IS THE SHARED WEIGHT NOT THE FIRST LINEAR LAYER
+            # Transpose shared weight so that it's an embedding
+            new_dict[item] = hub_dict[me2hub[item]].t()
+        elif "weight_u" in me2hub[item]:  # Unsqueeze u0s
+            new_dict[item] = hub_dict[me2hub[item]].unsqueeze(0)
+        else:
+            new_dict[item] = hub_dict[me2hub[item]]
+    return new_dict
+def get_config(resolution):
+    attn_dict = {128: "64", 256: "128", 512: "64"}
+    dim_z_dict = {128: 120, 256: 140, 512: 128}
+    config = {
+        "G_param": "SN",
+        "D_param": "SN",
+        "G_ch": 96,
+        "D_ch": 96,
+        "D_wide": True,
+        "G_shared": True,
+        "shared_dim": 128,
+        "dim_z": dim_z_dict[resolution],
+        "hier": True,
+        "cross_replica": False,
+        "mybn": False,
+        "G_activation": nn.ReLU(inplace=True),
+        "G_attn": attn_dict[resolution],
+        "norm_style": "bn",
+        "G_init": "ortho",
+        "skip_init": True,
+        "no_optim": True,
+        "G_fp16": False,
+        "G_mixed_precision": False,
+        "accumulate_stats": False,
+        "num_standing_accumulations": 16,
+        "G_eval_mode": True,
+        "BN_eps": 1e-04,
+        "SN_eps": 1e-04,
+        "num_G_SVs": 1,
+        "num_G_SV_itrs": 1,
+        "resolution": resolution,
+        "n_classes": 1000,
+    }
+    return config
+def convert_biggan(
+    resolution, weight_dir, redownload=False, no_ema=False, verbose=False
+):
+    module_path = MODULE_PATH_TMPL.format(resolution)
+    hdf5_path = os.path.join(weight_dir, HDF5_TMPL.format(resolution))
+    pth_path = os.path.join(weight_dir, PTH_TMPL.format(resolution))
+    tf_weights = dump_tfhub_to_hdf5(module_path, hdf5_path, redownload=redownload)
+    G_temp = getattr(biggan_for_conversion, f"Generator{resolution}")()
+    state_dict_temp = G_temp.state_dict()
+    converter = TFHub2Pytorch(
+        state_dict_temp,
+        tf_weights,
+        resolution=resolution,
+        load_ema=(not no_ema),
+        verbose=verbose,
+    )
+    state_dict_v1 = converter.load()
+    state_dict = convert_from_v1(state_dict_v1, resolution)
+    # Get the config, build the model
+    config = get_config(resolution)
+    G = BigGAN.Generator(**config)
+    G.load_state_dict(state_dict, strict=False)  # Ignore missing sv0 entries
+    torch.save(state_dict, pth_path)
+    # output_location ='pretrained_weights/TFHub-PyTorch-128.pth'
+    return G
+def generate_sample(G, z_dim, batch_size, filename, parallel=False):
+    G.eval()
+    G.to(DEVICE)
+    with torch.no_grad():
+        z = torch.randn(batch_size, G.dim_z).to(DEVICE)
+        y = torch.randint(
+            low=0,
+            high=1000,
+            size=(batch_size,),
+            device=DEVICE,
+            dtype=torch.int64,
+            requires_grad=False,
+        )
+        if parallel:
+            images = nn.parallel.data_parallel(G, (z, G.shared(y)))
+        else:
+            images = G(z, G.shared(y))
+    save_image(images, filename, scale_each=True, normalize=True)
+def parse_args():
+    usage = "Parser for conversion script."
+    parser = argparse.ArgumentParser(description=usage)
+    parser.add_argument(
+        "--resolution",
+        "-r",
+        type=int,
+        default=None,
+        choices=[128, 256, 512],
+        help="Resolution of TFHub module to convert. Converts all resolutions if None.",
+    )
+    parser.add_argument(
+        "--redownload",
+        action="store_true",
+        default=False,
+        help="Redownload weights and overwrite current hdf5 file, if present.",
+    )
+    parser.add_argument("--weights_dir", type=str, default="pretrained_weights")
+    parser.add_argument("--samples_dir", type=str, default="pretrained_samples")
+    parser.add_argument(
+        "--no_ema", action="store_true", default=False, help="Do not load ema weights."
+    )
+    parser.add_argument(
+        "--verbose", action="store_true", default=False, help="Additionally logging."
+    )
+    parser.add_argument(
+        "--generate_samples",
+        action="store_true",
+        default=False,
+        help="Generate test sample with pretrained model.",
+    )
+    parser.add_argument(
+        "--batch_size", type=int, default=64, help="Batch size used for test sample."
+    )
+    parser.add_argument(
+        "--parallel", action="store_true", default=False, help="Parallelize G?"
+    )
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = parse_args()
+    os.makedirs(args.weights_dir, exist_ok=True)
+    os.makedirs(args.samples_dir, exist_ok=True)
+    if args.resolution is not None:
+        G = convert_biggan(
+            args.resolution,
+            args.weights_dir,
+            redownload=args.redownload,
+            no_ema=args.no_ema,
+            verbose=args.verbose,
+        )
+        if args.generate_samples:
+            filename = os.path.join(
+                args.samples_dir, f"biggan{args.resolution}_samples.jpg"
+            )
+            print("Generating samples...")
+            generate_sample(
+                G, Z_DIMS[args.resolution], args.batch_size, filename, args.parallel
+            )
+    else:
+        for res in RESOLUTIONS:
+            G = convert_biggan(
+                res,
+                args.weights_dir,
+                redownload=args.redownload,
+                no_ema=args.no_ema,
+                verbose=args.verbose,
+            )
+            if args.generate_samples:
+                filename = os.path.join(args.samples_dir, f"biggan{res}_samples.jpg")
+                print("Generating samples...")
+                generate_sample(
+                    G, Z_DIMS[res], args.batch_size, filename, args.parallel
+                )

BigGAN_PyTorch/animal_hash.py ADDED Viewed

	@@ -0,0 +1,2652 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+c = [
+    "Aardvark",
+    "Abyssinian",
+    "Affenpinscher",
+    "Akbash",
+    "Akita",
+    "Albatross",
+    "Alligator",
+    "Alpaca",
+    "Angelfish",
+    "Ant",
+    "Anteater",
+    "Antelope",
+    "Ape",
+    "Armadillo",
+    "Ass",
+    "Avocet",
+    "Axolotl",
+    "Baboon",
+    "Badger",
+    "Balinese",
+    "Bandicoot",
+    "Barb",
+    "Barnacle",
+    "Barracuda",
+    "Bat",
+    "Beagle",
+    "Bear",
+    "Beaver",
+    "Bee",
+    "Beetle",
+    "Binturong",
+    "Bird",
+    "Birman",
+    "Bison",
+    "Bloodhound",
+    "Boar",
+    "Bobcat",
+    "Bombay",
+    "Bongo",
+    "Bonobo",
+    "Booby",
+    "Budgerigar",
+    "Buffalo",
+    "Bulldog",
+    "Bullfrog",
+    "Burmese",
+    "Butterfly",
+    "Caiman",
+    "Camel",
+    "Capybara",
+    "Caracal",
+    "Caribou",
+    "Cassowary",
+    "Cat",
+    "Caterpillar",
+    "Catfish",
+    "Cattle",
+    "Centipede",
+    "Chameleon",
+    "Chamois",
+    "Cheetah",
+    "Chicken",
+    "Chihuahua",
+    "Chimpanzee",
+    "Chinchilla",
+    "Chinook",
+    "Chipmunk",
+    "Chough",
+    "Cichlid",
+    "Clam",
+    "Coati",
+    "Cobra",
+    "Cockroach",
+    "Cod",
+    "Collie",
+    "Coral",
+    "Cormorant",
+    "Cougar",
+    "Cow",
+    "Coyote",
+    "Crab",
+    "Crane",
+    "Crocodile",
+    "Crow",
+    "Curlew",
+    "Cuscus",
+    "Cuttlefish",
+    "Dachshund",
+    "Dalmatian",
+    "Deer",
+    "Dhole",
+    "Dingo",
+    "Dinosaur",
+    "Discus",
+    "Dodo",
+    "Dog",
+    "Dogball",
+    "Dogfish",
+    "Dolphin",
+    "Donkey",
+    "Dormouse",
+    "Dove",
+    "Dragonfly",
+    "Drever",
+    "Duck",
+    "Dugong",
+    "Dunker",
+    "Dunlin",
+    "Eagle",
+    "Earwig",
+    "Echidna",
+    "Eel",
+    "Eland",
+    "Elephant",
+    "ElephantSeal",
+    "Elk",
+    "Emu",
+    "Falcon",
+    "Ferret",
+    "Finch",
+    "Fish",
+    "Flamingo",
+    "Flounder",
+    "Fly",
+    "Fossa",
+    "Fox",
+    "Frigatebird",
+    "Frog",
+    "Galago",
+    "Gar",
+    "Gaur",
+    "Gazelle",
+    "Gecko",
+    "Gerbil",
+    "Gharial",
+    "GiantPanda",
+    "Gibbon",
+    "Giraffe",
+    "Gnat",
+    "Gnu",
+    "Goat",
+    "Goldfinch",
+    "Goldfish",
+    "Goose",
+    "Gopher",
+    "Gorilla",
+    "Goshawk",
+    "Grasshopper",
+    "Greyhound",
+    "Grouse",
+    "Guanaco",
+    "GuineaFowl",
+    "GuineaPig",
+    "Gull",
+    "Guppy",
+    "Hamster",
+    "Hare",
+    "Harrier",
+    "Havanese",
+    "Hawk",
+    "Hedgehog",
+    "Heron",
+    "Herring",
+    "Himalayan",
+    "Hippopotamus",
+    "Hornet",
+    "Horse",
+    "Human",
+    "Hummingbird",
+    "Hyena",
+    "Ibis",
+    "Iguana",
+    "Impala",
+    "Indri",
+    "Insect",
+    "Jackal",
+    "Jaguar",
+    "Javanese",
+    "Jay",
+    "Jellyfish",
+    "Kakapo",
+    "Kangaroo",
+    "Kingfisher",
+    "Kiwi",
+    "Koala",
+    "KomodoDragon",
+    "Kouprey",
+    "Kudu",
+    "Labradoodle",
+    "Ladybird",
+    "Lapwing",
+    "Lark",
+    "Lemming",
+    "Lemur",
+    "Leopard",
+    "Liger",
+    "Lion",
+    "Lionfish",
+    "Lizard",
+    "Llama",
+    "Lobster",
+    "Locust",
+    "Loris",
+    "Louse",
+    "Lynx",
+    "Lyrebird",
+    "Macaw",
+    "Magpie",
+    "Mallard",
+    "Maltese",
+    "Manatee",
+    "Mandrill",
+    "Markhor",
+    "Marten",
+    "Mastiff",
+    "Mayfly",
+    "Meerkat",
+    "Millipede",
+    "Mink",
+    "Mole",
+    "Molly",
+    "Mongoose",
+    "Mongrel",
+    "Monkey",
+    "Moorhen",
+    "Moose",
+    "Mosquito",
+    "Moth",
+    "Mouse",
+    "Mule",
+    "Narwhal",
+    "Neanderthal",
+    "Newfoundland",
+    "Newt",
+    "Nightingale",
+    "Numbat",
+    "Ocelot",
+    "Octopus",
+    "Okapi",
+    "Olm",
+    "Opossum",
+    "Orang-utan",
+    "Oryx",
+    "Ostrich",
+    "Otter",
+    "Owl",
+    "Ox",
+    "Oyster",
+    "Pademelon",
+    "Panther",
+    "Parrot",
+    "Partridge",
+    "Peacock",
+    "Peafowl",
+    "Pekingese",
+    "Pelican",
+    "Penguin",
+    "Persian",
+    "Pheasant",
+    "Pig",
+    "Pigeon",
+    "Pika",
+    "Pike",
+    "Piranha",
+    "Platypus",
+    "Pointer",
+    "Pony",
+    "Poodle",
+    "Porcupine",
+    "Porpoise",
+    "Possum",
+    "PrairieDog",
+    "Prawn",
+    "Puffin",
+    "Pug",
+    "Puma",
+    "Quail",
+    "Quelea",
+    "Quetzal",
+    "Quokka",
+    "Quoll",
+    "Rabbit",
+    "Raccoon",
+    "Ragdoll",
+    "Rail",
+    "Ram",
+    "Rat",
+    "Rattlesnake",
+    "Raven",
+    "RedDeer",
+    "RedPanda",
+    "Reindeer",
+    "Rhinoceros",
+    "Robin",
+    "Rook",
+    "Rottweiler",
+    "Ruff",
+    "Salamander",
+    "Salmon",
+    "SandDollar",
+    "Sandpiper",
+    "Saola",
+    "Sardine",
+    "Scorpion",
+    "SeaLion",
+    "SeaUrchin",
+    "Seahorse",
+    "Seal",
+    "Serval",
+    "Shark",
+    "Sheep",
+    "Shrew",
+    "Shrimp",
+    "Siamese",
+    "Siberian",
+    "Skunk",
+    "Sloth",
+    "Snail",
+    "Snake",
+    "Snowshoe",
+    "Somali",
+    "Sparrow",
+    "Spider",
+    "Sponge",
+    "Squid",
+    "Squirrel",
+    "Starfish",
+    "Starling",
+    "Stingray",
+    "Stinkbug",
+    "Stoat",
+    "Stork",
+    "Swallow",
+    "Swan",
+    "Tang",
+    "Tapir",
+    "Tarsier",
+    "Termite",
+    "Tetra",
+    "Tiffany",
+    "Tiger",
+    "Toad",
+    "Tortoise",
+    "Toucan",
+    "Tropicbird",
+    "Trout",
+    "Tuatara",
+    "Turkey",
+    "Turtle",
+    "Uakari",
+    "Uguisu",
+    "Umbrellabird",
+    "Viper",
+    "Vulture",
+    "Wallaby",
+    "Walrus",
+    "Warthog",
+    "Wasp",
+    "WaterBuffalo",
+    "Weasel",
+    "Whale",
+    "Whippet",
+    "Wildebeest",
+    "Wolf",
+    "Wolverine",
+    "Wombat",
+    "Woodcock",
+    "Woodlouse",
+    "Woodpecker",
+    "Worm",
+    "Wrasse",
+    "Wren",
+    "Yak",
+    "Zebra",
+    "Zebu",
+    "Zonkey",
+]
+a = [
+    "able",
+    "above",
+    "absent",
+    "absolute",
+    "abstract",
+    "abundant",
+    "academic",
+    "acceptable",
+    "accepted",
+    "accessible",
+    "accurate",
+    "accused",
+    "active",
+    "actual",
+    "acute",
+    "added",
+    "additional",
+    "adequate",
+    "adjacent",
+    "administrative",
+    "adorable",
+    "advanced",
+    "adverse",
+    "advisory",
+    "aesthetic",
+    "afraid",
+    "african",
+    "aggregate",
+    "aggressive",
+    "agreeable",
+    "agreed",
+    "agricultural",
+    "alert",
+    "alive",
+    "alleged",
+    "allied",
+    "alone",
+    "alright",
+    "alternative",
+    "amateur",
+    "amazing",
+    "ambitious",
+    "american",
+    "amused",
+    "ancient",
+    "angry",
+    "annoyed",
+    "annual",
+    "anonymous",
+    "anxious",
+    "appalling",
+    "apparent",
+    "applicable",
+    "appropriate",
+    "arab",
+    "arbitrary",
+    "architectural",
+    "armed",
+    "arrogant",
+    "artificial",
+    "artistic",
+    "ashamed",
+    "asian",
+    "asleep",
+    "assistant",
+    "associated",
+    "atomic",
+    "attractive",
+    "australian",
+    "automatic",
+    "autonomous",
+    "available",
+    "average",
+    "awake",
+    "aware",
+    "awful",
+    "awkward",
+    "back",
+    "bad",
+    "balanced",
+    "bare",
+    "basic",
+    "beautiful",
+    "beneficial",
+    "better",
+    "bewildered",
+    "big",
+    "binding",
+    "biological",
+    "bitter",
+    "bizarre",
+    "black",
+    "blank",
+    "blind",
+    "blonde",
+    "bloody",
+    "blue",
+    "blushing",
+    "boiling",
+    "bold",
+    "bored",
+    "boring",
+    "bottom",
+    "brainy",
+    "brave",
+    "breakable",
+    "breezy",
+    "brief",
+    "bright",
+    "brilliant",
+    "british",
+    "broad",
+    "broken",
+    "brown",
+    "bumpy",
+    "burning",
+    "busy",
+    "calm",
+    "canadian",
+    "capable",
+    "capitalist",
+    "careful",
+    "casual",
+    "catholic",
+    "causal",
+    "cautious",
+    "central",
+    "certain",
+    "changing",
+    "characteristic",
+    "charming",
+    "cheap",
+    "cheerful",
+    "chemical",
+    "chief",
+    "chilly",
+    "chinese",
+    "chosen",
+    "christian",
+    "chronic",
+    "chubby",
+    "circular",
+    "civic",
+    "civil",
+    "civilian",
+    "classic",
+    "classical",
+    "clean",
+    "clear",
+    "clever",
+    "clinical",
+    "close",
+    "closed",
+    "cloudy",
+    "clumsy",
+    "coastal",
+    "cognitive",
+    "coherent",
+    "cold",
+    "collective",
+    "colonial",
+    "colorful",
+    "colossal",
+    "coloured",
+    "colourful",
+    "combative",
+    "combined",
+    "comfortable",
+    "coming",
+    "commercial",
+    "common",
+    "communist",
+    "compact",
+    "comparable",
+    "comparative",
+    "compatible",
+    "competent",
+    "competitive",
+    "complete",
+    "complex",
+    "complicated",
+    "comprehensive",
+    "compulsory",
+    "conceptual",
+    "concerned",
+    "concrete",
+    "condemned",
+    "confident",
+    "confidential",
+    "confused",
+    "conscious",
+    "conservation",
+    "conservative",
+    "considerable",
+    "consistent",
+    "constant",
+    "constitutional",
+    "contemporary",
+    "content",
+    "continental",
+    "continued",
+    "continuing",
+    "continuous",
+    "controlled",
+    "controversial",
+    "convenient",
+    "conventional",
+    "convinced",
+    "convincing",
+    "cooing",
+    "cool",
+    "cooperative",
+    "corporate",
+    "correct",
+    "corresponding",
+    "costly",
+    "courageous",
+    "crazy",
+    "creative",
+    "creepy",
+    "criminal",
+    "critical",
+    "crooked",
+    "crowded",
+    "crucial",
+    "crude",
+    "cruel",
+    "cuddly",
+    "cultural",
+    "curious",
+    "curly",
+    "current",
+    "curved",
+    "cute",
+    "daily",
+    "damaged",
+    "damp",
+    "dangerous",
+    "dark",
+    "dead",
+    "deaf",
+    "deafening",
+    "dear",
+    "decent",
+    "decisive",
+    "deep",
+    "defeated",
+    "defensive",
+    "defiant",
+    "definite",
+    "deliberate",
+    "delicate",
+    "delicious",
+    "delighted",
+    "delightful",
+    "democratic",
+    "dependent",
+    "depressed",
+    "desirable",
+    "desperate",
+    "detailed",
+    "determined",
+    "developed",
+    "developing",
+    "devoted",
+    "different",
+    "difficult",
+    "digital",
+    "diplomatic",
+    "direct",
+    "dirty",
+    "disabled",
+    "disappointed",
+    "disastrous",
+    "disciplinary",
+    "disgusted",
+    "distant",
+    "distinct",
+    "distinctive",
+    "distinguished",
+    "disturbed",
+    "disturbing",
+    "diverse",
+    "divine",
+    "dizzy",
+    "domestic",
+    "dominant",
+    "double",
+    "doubtful",
+    "drab",
+    "dramatic",
+    "dreadful",
+    "driving",
+    "drunk",
+    "dry",
+    "dual",
+    "due",
+    "dull",
+    "dusty",
+    "dutch",
+    "dying",
+    "dynamic",
+    "eager",
+    "early",
+    "eastern",
+    "easy",
+    "economic",
+    "educational",
+    "eerie",
+    "effective",
+    "efficient",
+    "elaborate",
+    "elated",
+    "elderly",
+    "eldest",
+    "electoral",
+    "electric",
+    "electrical",
+    "electronic",
+    "elegant",
+    "eligible",
+    "embarrassed",
+    "embarrassing",
+    "emotional",
+    "empirical",
+    "empty",
+    "enchanting",
+    "encouraging",
+    "endless",
+    "energetic",
+    "english",
+    "enormous",
+    "enthusiastic",
+    "entire",
+    "entitled",
+    "envious",
+    "environmental",
+    "equal",
+    "equivalent",
+    "essential",
+    "established",
+    "estimated",
+    "ethical",
+    "ethnic",
+    "european",
+    "eventual",
+    "everyday",
+    "evident",
+    "evil",
+    "evolutionary",
+    "exact",
+    "excellent",
+    "exceptional",
+    "excess",
+    "excessive",
+    "excited",
+    "exciting",
+    "exclusive",
+    "existing",
+    "exotic",
+    "expected",
+    "expensive",
+    "experienced",
+    "experimental",
+    "explicit",
+    "extended",
+    "extensive",
+    "external",
+    "extra",
+    "extraordinary",
+    "extreme",
+    "exuberant",
+    "faint",
+    "fair",
+    "faithful",
+    "familiar",
+    "famous",
+    "fancy",
+    "fantastic",
+    "far",
+    "fascinating",
+    "fashionable",
+    "fast",
+    "fat",
+    "fatal",
+    "favourable",
+    "favourite",
+    "federal",
+    "fellow",
+    "female",
+    "feminist",
+    "few",
+    "fierce",
+    "filthy",
+    "final",
+    "financial",
+    "fine",
+    "firm",
+    "fiscal",
+    "fit",
+    "fixed",
+    "flaky",
+    "flat",
+    "flexible",
+    "fluffy",
+    "fluttering",
+    "flying",
+    "following",
+    "fond",
+    "foolish",
+    "foreign",
+    "formal",
+    "formidable",
+    "forthcoming",
+    "fortunate",
+    "forward",
+    "fragile",
+    "frail",
+    "frantic",
+    "free",
+    "french",
+    "frequent",
+    "fresh",
+    "friendly",
+    "frightened",
+    "front",
+    "frozen",
+    "fucking",
+    "full",
+    "full-time",
+    "fun",
+    "functional",
+    "fundamental",
+    "funny",
+    "furious",
+    "future",
+    "fuzzy",
+    "gastric",
+    "gay",
+    "general",
+    "generous",
+    "genetic",
+    "gentle",
+    "genuine",
+    "geographical",
+    "german",
+    "giant",
+    "gigantic",
+    "given",
+    "glad",
+    "glamorous",
+    "gleaming",
+    "global",
+    "glorious",
+    "golden",
+    "good",
+    "gorgeous",
+    "gothic",
+    "governing",
+    "graceful",
+    "gradual",
+    "grand",
+    "grateful",
+    "greasy",
+    "great",
+    "greek",
+    "green",
+    "grey",
+    "grieving",
+    "grim",
+    "gross",
+    "grotesque",
+    "growing",
+    "grubby",
+    "grumpy",
+    "guilty",
+    "handicapped",
+    "handsome",
+    "happy",
+    "hard",
+    "harsh",
+    "head",
+    "healthy",
+    "heavy",
+    "helpful",
+    "helpless",
+    "hidden",
+    "high",
+    "high-pitched",
+    "hilarious",
+    "hissing",
+    "historic",
+    "historical",
+    "hollow",
+    "holy",
+    "homeless",
+    "homely",
+    "hon",
+    "honest",
+    "horizontal",
+    "horrible",
+    "hostile",
+    "hot",
+    "huge",
+    "human",
+    "hungry",
+    "hurt",
+    "hushed",
+    "husky",
+    "icy",
+    "ideal",
+    "identical",
+    "ideological",
+    "ill",
+    "illegal",
+    "imaginative",
+    "immediate",
+    "immense",
+    "imperial",
+    "implicit",
+    "important",
+    "impossible",
+    "impressed",
+    "impressive",
+    "improved",
+    "inadequate",
+    "inappropriate",
+    "inc",
+    "inclined",
+    "increased",
+    "increasing",
+    "incredible",
+    "independent",
+    "indian",
+    "indirect",
+    "individual",
+    "industrial",
+    "inevitable",
+    "influential",
+    "informal",
+    "inherent",
+    "initial",
+    "injured",
+    "inland",
+    "inner",
+    "innocent",
+    "innovative",
+    "inquisitive",
+    "instant",
+    "institutional",
+    "insufficient",
+    "intact",
+    "integral",
+    "integrated",
+    "intellectual",
+    "intelligent",
+    "intense",
+    "intensive",
+    "interested",
+    "interesting",
+    "interim",
+    "interior",
+    "intermediate",
+    "internal",
+    "international",
+    "intimate",
+    "invisible",
+    "involved",
+    "iraqi",
+    "irish",
+    "irrelevant",
+    "islamic",
+    "isolated",
+    "israeli",
+    "italian",
+    "itchy",
+    "japanese",
+    "jealous",
+    "jewish",
+    "jittery",
+    "joint",
+    "jolly",
+    "joyous",
+    "judicial",
+    "juicy",
+    "junior",
+    "just",
+    "keen",
+    "key",
+    "kind",
+    "known",
+    "korean",
+    "labour",
+    "large",
+    "large-scale",
+    "late",
+    "latin",
+    "lazy",
+    "leading",
+    "left",
+    "legal",
+    "legislative",
+    "legitimate",
+    "lengthy",
+    "lesser",
+    "level",
+    "lexical",
+    "liable",
+    "liberal",
+    "light",
+    "like",
+    "likely",
+    "limited",
+    "linear",
+    "linguistic",
+    "liquid",
+    "literary",
+    "little",
+    "live",
+    "lively",
+    "living",
+    "local",
+    "logical",
+    "lonely",
+    "long",
+    "long-term",
+    "loose",
+    "lost",
+    "loud",
+    "lovely",
+    "low",
+    "loyal",
+    "ltd",
+    "lucky",
+    "mad",
+    "magenta",
+    "magic",
+    "magnetic",
+    "magnificent",
+    "main",
+    "major",
+    "male",
+    "mammoth",
+    "managerial",
+    "managing",
+    "manual",
+    "many",
+    "marginal",
+    "marine",
+    "marked",
+    "married",
+    "marvellous",
+    "marxist",
+    "mass",
+    "massive",
+    "mathematical",
+    "mature",
+    "maximum",
+    "mean",
+    "meaningful",
+    "mechanical",
+    "medical",
+    "medieval",
+    "melodic",
+    "melted",
+    "mental",
+    "mere",
+    "metropolitan",
+    "mid",
+    "middle",
+    "middle-class",
+    "mighty",
+    "mild",
+    "military",
+    "miniature",
+    "minimal",
+    "minimum",
+    "ministerial",
+    "minor",
+    "miserable",
+    "misleading",
+    "missing",
+    "misty",
+    "mixed",
+    "moaning",
+    "mobile",
+    "moderate",
+    "modern",
+    "modest",
+    "molecular",
+    "monetary",
+    "monthly",
+    "moral",
+    "motionless",
+    "muddy",
+    "multiple",
+    "mushy",
+    "musical",
+    "mute",
+    "mutual",
+    "mysterious",
+    "naked",
+    "narrow",
+    "nasty",
+    "national",
+    "native",
+    "natural",
+    "naughty",
+    "naval",
+    "near",
+    "nearby",
+    "neat",
+    "necessary",
+    "negative",
+    "neighbouring",
+    "nervous",
+    "net",
+    "neutral",
+    "new",
+    "nice",
+    "nineteenth-century",
+    "noble",
+    "noisy",
+    "normal",
+    "northern",
+    "nosy",
+    "notable",
+    "novel",
+    "nuclear",
+    "numerous",
+    "nursing",
+    "nutritious",
+    "nutty",
+    "obedient",
+    "objective",
+    "obliged",
+    "obnoxious",
+    "obvious",
+    "occasional",
+    "occupational",
+    "odd",
+    "official",
+    "ok",
+    "okay",
+    "old",
+    "old-fashioned",
+    "olympic",
+    "only",
+    "open",
+    "operational",
+    "opposite",
+    "optimistic",
+    "oral",
+    "orange",
+    "ordinary",
+    "organic",
+    "organisational",
+    "original",
+    "orthodox",
+    "other",
+    "outdoor",
+    "outer",
+    "outrageous",
+    "outside",
+    "outstanding",
+    "overall",
+    "overseas",
+    "overwhelming",
+    "painful",
+    "pale",
+    "palestinian",
+    "panicky",
+    "parallel",
+    "parental",
+    "parliamentary",
+    "part-time",
+    "partial",
+    "particular",
+    "passing",
+    "passive",
+    "past",
+    "patient",
+    "payable",
+    "peaceful",
+    "peculiar",
+    "perfect",
+    "permanent",
+    "persistent",
+    "personal",
+    "petite",
+    "philosophical",
+    "physical",
+    "pink",
+    "plain",
+    "planned",
+    "plastic",
+    "pleasant",
+    "pleased",
+    "poised",
+    "polish",
+    "polite",
+    "political",
+    "poor",
+    "popular",
+    "positive",
+    "possible",
+    "post-war",
+    "potential",
+    "powerful",
+    "practical",
+    "precious",
+    "precise",
+    "preferred",
+    "pregnant",
+    "preliminary",
+    "premier",
+    "prepared",
+    "present",
+    "presidential",
+    "pretty",
+    "previous",
+    "prickly",
+    "primary",
+    "prime",
+    "primitive",
+    "principal",
+    "printed",
+    "prior",
+    "private",
+    "probable",
+    "productive",
+    "professional",
+    "profitable",
+    "profound",
+    "progressive",
+    "prominent",
+    "promising",
+    "proper",
+    "proposed",
+    "prospective",
+    "protective",
+    "protestant",
+    "proud",
+    "provincial",
+    "psychiatric",
+    "psychological",
+    "public",
+    "puny",
+    "pure",
+    "purple",
+    "purring",
+    "puzzled",
+    "quaint",
+    "qualified",
+    "quick",
+    "quickest",
+    "quiet",
+    "racial",
+    "radical",
+    "rainy",
+    "random",
+    "rapid",
+    "rare",
+    "raspy",
+    "rational",
+    "ratty",
+    "raw",
+    "ready",
+    "real",
+    "realistic",
+    "rear",
+    "reasonable",
+    "recent",
+    "red",
+    "reduced",
+    "redundant",
+    "regional",
+    "registered",
+    "regular",
+    "regulatory",
+    "related",
+    "relative",
+    "relaxed",
+    "relevant",
+    "reliable",
+    "relieved",
+    "religious",
+    "reluctant",
+    "remaining",
+    "remarkable",
+    "remote",
+    "renewed",
+    "representative",
+    "repulsive",
+    "required",
+    "resident",
+    "residential",
+    "resonant",
+    "respectable",
+    "respective",
+    "responsible",
+    "resulting",
+    "retail",
+    "retired",
+    "revolutionary",
+    "rich",
+    "ridiculous",
+    "right",
+    "rigid",
+    "ripe",
+    "rising",
+    "rival",
+    "roasted",
+    "robust",
+    "rolling",
+    "roman",
+    "romantic",
+    "rotten",
+    "rough",
+    "round",
+    "royal",
+    "rubber",
+    "rude",
+    "ruling",
+    "running",
+    "rural",
+    "russian",
+    "sacred",
+    "sad",
+    "safe",
+    "salty",
+    "satisfactory",
+    "satisfied",
+    "scared",
+    "scary",
+    "scattered",
+    "scientific",
+    "scornful",
+    "scottish",
+    "scrawny",
+    "screeching",
+    "secondary",
+    "secret",
+    "secure",
+    "select",
+    "selected",
+    "selective",
+    "selfish",
+    "semantic",
+    "senior",
+    "sensible",
+    "sensitive",
+    "separate",
+    "serious",
+    "severe",
+    "sexual",
+    "shaggy",
+    "shaky",
+    "shallow",
+    "shared",
+    "sharp",
+    "sheer",
+    "shiny",
+    "shivering",
+    "shocked",
+    "short",
+    "short-term",
+    "shrill",
+    "shy",
+    "sick",
+    "significant",
+    "silent",
+    "silky",
+    "silly",
+    "similar",
+    "simple",
+    "single",
+    "skilled",
+    "skinny",
+    "sleepy",
+    "slight",
+    "slim",
+    "slimy",
+    "slippery",
+    "slow",
+    "small",
+    "smart",
+    "smiling",
+    "smoggy",
+    "smooth",
+    "so-called",
+    "social",
+    "socialist",
+    "soft",
+    "solar",
+    "sole",
+    "solid",
+    "sophisticated",
+    "sore",
+    "sorry",
+    "sound",
+    "sour",
+    "southern",
+    "soviet",
+    "spanish",
+    "spare",
+    "sparkling",
+    "spatial",
+    "special",
+    "specific",
+    "specified",
+    "spectacular",
+    "spicy",
+    "spiritual",
+    "splendid",
+    "spontaneous",
+    "sporting",
+    "spotless",
+    "spotty",
+    "square",
+    "squealing",
+    "stable",
+    "stale",
+    "standard",
+    "static",
+    "statistical",
+    "statutory",
+    "steady",
+    "steep",
+    "sticky",
+    "stiff",
+    "still",
+    "stingy",
+    "stormy",
+    "straight",
+    "straightforward",
+    "strange",
+    "strategic",
+    "strict",
+    "striking",
+    "striped",
+    "strong",
+    "structural",
+    "stuck",
+    "stupid",
+    "subjective",
+    "subsequent",
+    "substantial",
+    "subtle",
+    "successful",
+    "successive",
+    "sudden",
+    "sufficient",
+    "suitable",
+    "sunny",
+    "super",
+    "superb",
+    "superior",
+    "supporting",
+    "supposed",
+    "supreme",
+    "sure",
+    "surprised",
+    "surprising",
+    "surrounding",
+    "surviving",
+    "suspicious",
+    "sweet",
+    "swift",
+    "swiss",
+    "symbolic",
+    "sympathetic",
+    "systematic",
+    "tall",
+    "tame",
+    "tan",
+    "tart",
+    "tasteless",
+    "tasty",
+    "technical",
+    "technological",
+    "teenage",
+    "temporary",
+    "tender",
+    "tense",
+    "terrible",
+    "territorial",
+    "testy",
+    "then",
+    "theoretical",
+    "thick",
+    "thin",
+    "thirsty",
+    "thorough",
+    "thoughtful",
+    "thoughtless",
+    "thundering",
+    "tight",
+    "tiny",
+    "tired",
+    "top",
+    "tory",
+    "total",
+    "tough",
+    "toxic",
+    "traditional",
+    "tragic",
+    "tremendous",
+    "tricky",
+    "tropical",
+    "troubled",
+    "turkish",
+    "typical",
+    "ugliest",
+    "ugly",
+    "ultimate",
+    "unable",
+    "unacceptable",
+    "unaware",
+    "uncertain",
+    "unchanged",
+    "uncomfortable",
+    "unconscious",
+    "underground",
+    "underlying",
+    "unemployed",
+    "uneven",
+    "unexpected",
+    "unfair",
+    "unfortunate",
+    "unhappy",
+    "uniform",
+    "uninterested",
+    "unique",
+    "united",
+    "universal",
+    "unknown",
+    "unlikely",
+    "unnecessary",
+    "unpleasant",
+    "unsightly",
+    "unusual",
+    "unwilling",
+    "upper",
+    "upset",
+    "uptight",
+    "urban",
+    "urgent",
+    "used",
+    "useful",
+    "useless",
+    "usual",
+    "vague",
+    "valid",
+    "valuable",
+    "variable",
+    "varied",
+    "various",
+    "varying",
+    "vast",
+    "verbal",
+    "vertical",
+    "very",
+    "victorian",
+    "victorious",
+    "video-taped",
+    "violent",
+    "visible",
+    "visiting",
+    "visual",
+    "vital",
+    "vivacious",
+    "vivid",
+    "vocational",
+    "voiceless",
+    "voluntary",
+    "vulnerable",
+    "wandering",
+    "warm",
+    "wasteful",
+    "watery",
+    "weak",
+    "wealthy",
+    "weary",
+    "wee",
+    "weekly",
+    "weird",
+    "welcome",
+    "well",
+    "well-known",
+    "welsh",
+    "western",
+    "wet",
+    "whispering",
+    "white",
+    "whole",
+    "wicked",
+    "wide",
+    "wide-eyed",
+    "widespread",
+    "wild",
+    "willing",
+    "wise",
+    "witty",
+    "wonderful",
+    "wooden",
+    "working",
+    "working-class",
+    "worldwide",
+    "worried",
+    "worrying",
+    "worthwhile",
+    "worthy",
+    "written",
+    "wrong",
+    "yellow",
+    "young",
+    "yummy",
+    "zany",
+    "zealous",
+]
+b = [
+    "abiding",
+    "accelerating",
+    "accepting",
+    "accomplishing",
+    "achieving",
+    "acquiring",
+    "acteding",
+    "activating",
+    "adapting",
+    "adding",
+    "addressing",
+    "administering",
+    "admiring",
+    "admiting",
+    "adopting",
+    "advising",
+    "affording",
+    "agreeing",
+    "alerting",
+    "alighting",
+    "allowing",
+    "altereding",
+    "amusing",
+    "analyzing",
+    "announcing",
+    "annoying",
+    "answering",
+    "anticipating",
+    "apologizing",
+    "appearing",
+    "applauding",
+    "applieding",
+    "appointing",
+    "appraising",
+    "appreciating",
+    "approving",
+    "arbitrating",
+    "arguing",
+    "arising",
+    "arranging",
+    "arresting",
+    "arriving",
+    "ascertaining",
+    "asking",
+    "assembling",
+    "assessing",
+    "assisting",
+    "assuring",
+    "attaching",
+    "attacking",
+    "attaining",
+    "attempting",
+    "attending",
+    "attracting",
+    "auditeding",
+    "avoiding",
+    "awaking",
+    "backing",
+    "baking",
+    "balancing",
+    "baning",
+    "banging",
+    "baring",
+    "bating",
+    "bathing",
+    "battling",
+    "bing",
+    "beaming",
+    "bearing",
+    "beating",
+    "becoming",
+    "beging",
+    "begining",
+    "behaving",
+    "beholding",
+    "belonging",
+    "bending",
+    "beseting",
+    "beting",
+    "biding",
+    "binding",
+    "biting",
+    "bleaching",
+    "bleeding",
+    "blessing",
+    "blinding",
+    "blinking",
+    "bloting",
+    "blowing",
+    "blushing",
+    "boasting",
+    "boiling",
+    "bolting",
+    "bombing",
+    "booking",
+    "boring",
+    "borrowing",
+    "bouncing",
+    "bowing",
+    "boxing",
+    "braking",
+    "branching",
+    "breaking",
+    "breathing",
+    "breeding",
+    "briefing",
+    "bringing",
+    "broadcasting",
+    "bruising",
+    "brushing",
+    "bubbling",
+    "budgeting",
+    "building",
+    "bumping",
+    "burning",
+    "bursting",
+    "burying",
+    "busting",
+    "buying",
+    "buzing",
+    "calculating",
+    "calling",
+    "camping",
+    "caring",
+    "carrying",
+    "carving",
+    "casting",
+    "cataloging",
+    "catching",
+    "causing",
+    "challenging",
+    "changing",
+    "charging",
+    "charting",
+    "chasing",
+    "cheating",
+    "checking",
+    "cheering",
+    "chewing",
+    "choking",
+    "choosing",
+    "choping",
+    "claiming",
+    "claping",
+    "clarifying",
+    "classifying",
+    "cleaning",
+    "clearing",
+    "clinging",
+    "cliping",
+    "closing",
+    "clothing",
+    "coaching",
+    "coiling",
+    "collecting",
+    "coloring",
+    "combing",
+    "coming",
+    "commanding",
+    "communicating",
+    "comparing",
+    "competing",
+    "compiling",
+    "complaining",
+    "completing",
+    "composing",
+    "computing",
+    "conceiving",
+    "concentrating",
+    "conceptualizing",
+    "concerning",
+    "concluding",
+    "conducting",
+    "confessing",
+    "confronting",
+    "confusing",
+    "connecting",
+    "conserving",
+    "considering",
+    "consisting",
+    "consolidating",
+    "constructing",
+    "consulting",
+    "containing",
+    "continuing",
+    "contracting",
+    "controling",
+    "converting",
+    "coordinating",
+    "copying",
+    "correcting",
+    "correlating",
+    "costing",
+    "coughing",
+    "counseling",
+    "counting",
+    "covering",
+    "cracking",
+    "crashing",
+    "crawling",
+    "creating",
+    "creeping",
+    "critiquing",
+    "crossing",
+    "crushing",
+    "crying",
+    "curing",
+    "curling",
+    "curving",
+    "cuting",
+    "cycling",
+    "daming",
+    "damaging",
+    "dancing",
+    "daring",
+    "dealing",
+    "decaying",
+    "deceiving",
+    "deciding",
+    "decorating",
+    "defining",
+    "delaying",
+    "delegating",
+    "delighting",
+    "delivering",
+    "demonstrating",
+    "depending",
+    "describing",
+    "deserting",
+    "deserving",
+    "designing",
+    "destroying",
+    "detailing",
+    "detecting",
+    "determining",
+    "developing",
+    "devising",
+    "diagnosing",
+    "diging",
+    "directing",
+    "disagreing",
+    "disappearing",
+    "disapproving",
+    "disarming",
+    "discovering",
+    "disliking",
+    "dispensing",
+    "displaying",
+    "disproving",
+    "dissecting",
+    "distributing",
+    "diving",
+    "diverting",
+    "dividing",
+    "doing",
+    "doubling",
+    "doubting",
+    "drafting",
+    "draging",
+    "draining",
+    "dramatizing",
+    "drawing",
+    "dreaming",
+    "dressing",
+    "drinking",
+    "driping",
+    "driving",
+    "dropping",
+    "drowning",
+    "druming",
+    "drying",
+    "dusting",
+    "dwelling",
+    "earning",
+    "eating",
+    "editeding",
+    "educating",
+    "eliminating",
+    "embarrassing",
+    "employing",
+    "emptying",
+    "enacteding",
+    "encouraging",
+    "ending",
+    "enduring",
+    "enforcing",
+    "engineering",
+    "enhancing",
+    "enjoying",
+    "enlisting",
+    "ensuring",
+    "entering",
+    "entertaining",
+    "escaping",
+    "establishing",
+    "estimating",
+    "evaluating",
+    "examining",
+    "exceeding",
+    "exciting",
+    "excusing",
+    "executing",
+    "exercising",
+    "exhibiting",
+    "existing",
+    "expanding",
+    "expecting",
+    "expediting",
+    "experimenting",
+    "explaining",
+    "exploding",
+    "expressing",
+    "extending",
+    "extracting",
+    "facing",
+    "facilitating",
+    "fading",
+    "failing",
+    "fancying",
+    "fastening",
+    "faxing",
+    "fearing",
+    "feeding",
+    "feeling",
+    "fencing",
+    "fetching",
+    "fighting",
+    "filing",
+    "filling",
+    "filming",
+    "finalizing",
+    "financing",
+    "finding",
+    "firing",
+    "fiting",
+    "fixing",
+    "flaping",
+    "flashing",
+    "fleing",
+    "flinging",
+    "floating",
+    "flooding",
+    "flowing",
+    "flowering",
+    "flying",
+    "folding",
+    "following",
+    "fooling",
+    "forbiding",
+    "forcing",
+    "forecasting",
+    "foregoing",
+    "foreseing",
+    "foretelling",
+    "forgeting",
+    "forgiving",
+    "forming",
+    "formulating",
+    "forsaking",
+    "framing",
+    "freezing",
+    "frightening",
+    "frying",
+    "gathering",
+    "gazing",
+    "generating",
+    "geting",
+    "giving",
+    "glowing",
+    "gluing",
+    "going",
+    "governing",
+    "grabing",
+    "graduating",
+    "grating",
+    "greasing",
+    "greeting",
+    "grinning",
+    "grinding",
+    "griping",
+    "groaning",
+    "growing",
+    "guaranteeing",
+    "guarding",
+    "guessing",
+    "guiding",
+    "hammering",
+    "handing",
+    "handling",
+    "handwriting",
+    "hanging",
+    "happening",
+    "harassing",
+    "harming",
+    "hating",
+    "haunting",
+    "heading",
+    "healing",
+    "heaping",
+    "hearing",
+    "heating",
+    "helping",
+    "hiding",
+    "hitting",
+    "holding",
+    "hooking",
+    "hoping",
+    "hopping",
+    "hovering",
+    "hugging",
+    "hmuming",
+    "hunting",
+    "hurrying",
+    "hurting",
+    "hypothesizing",
+    "identifying",
+    "ignoring",
+    "illustrating",
+    "imagining",
+    "implementing",
+    "impressing",
+    "improving",
+    "improvising",
+    "including",
+    "increasing",
+    "inducing",
+    "influencing",
+    "informing",
+    "initiating",
+    "injecting",
+    "injuring",
+    "inlaying",
+    "innovating",
+    "inputing",
+    "inspecting",
+    "inspiring",
+    "installing",
+    "instituting",
+    "instructing",
+    "insuring",
+    "integrating",
+    "intending",
+    "intensifying",
+    "interesting",
+    "interfering",
+    "interlaying",
+    "interpreting",
+    "interrupting",
+    "interviewing",
+    "introducing",
+    "inventing",
+    "inventorying",
+    "investigating",
+    "inviting",
+    "irritating",
+    "itching",
+    "jailing",
+    "jamming",
+    "jogging",
+    "joining",
+    "joking",
+    "judging",
+    "juggling",
+    "jumping",
+    "justifying",
+    "keeping",
+    "kepting",
+    "kicking",
+    "killing",
+    "kissing",
+    "kneeling",
+    "kniting",
+    "knocking",
+    "knotting",
+    "knowing",
+    "labeling",
+    "landing",
+    "lasting",
+    "laughing",
+    "launching",
+    "laying",
+    "leading",
+    "leaning",
+    "leaping",
+    "learning",
+    "leaving",
+    "lecturing",
+    "leding",
+    "lending",
+    "leting",
+    "leveling",
+    "licensing",
+    "licking",
+    "lying",
+    "lifteding",
+    "lighting",
+    "lightening",
+    "liking",
+    "listing",
+    "listening",
+    "living",
+    "loading",
+    "locating",
+    "locking",
+    "loging",
+    "longing",
+    "looking",
+    "losing",
+    "loving",
+    "maintaining",
+    "making",
+    "maning",
+    "managing",
+    "manipulating",
+    "manufacturing",
+    "mapping",
+    "marching",
+    "marking",
+    "marketing",
+    "marrying",
+    "matching",
+    "mating",
+    "mattering",
+    "meaning",
+    "measuring",
+    "meddling",
+    "mediating",
+    "meeting",
+    "melting",
+    "melting",
+    "memorizing",
+    "mending",
+    "mentoring",
+    "milking",
+    "mining",
+    "misleading",
+    "missing",
+    "misspelling",
+    "mistaking",
+    "misunderstanding",
+    "mixing",
+    "moaning",
+    "modeling",
+    "modifying",
+    "monitoring",
+    "mooring",
+    "motivating",
+    "mourning",
+    "moving",
+    "mowing",
+    "muddling",
+    "muging",
+    "multiplying",
+    "murdering",
+    "nailing",
+    "naming",
+    "navigating",
+    "needing",
+    "negotiating",
+    "nesting",
+    "noding",
+    "nominating",
+    "normalizing",
+    "noting",
+    "noticing",
+    "numbering",
+    "obeying",
+    "objecting",
+    "observing",
+    "obtaining",
+    "occuring",
+    "offending",
+    "offering",
+    "officiating",
+    "opening",
+    "operating",
+    "ordering",
+    "organizing",
+    "orienteding",
+    "originating",
+    "overcoming",
+    "overdoing",
+    "overdrawing",
+    "overflowing",
+    "overhearing",
+    "overtaking",
+    "overthrowing",
+    "owing",
+    "owning",
+    "packing",
+    "paddling",
+    "painting",
+    "parking",
+    "parting",
+    "participating",
+    "passing",
+    "pasting",
+    "pating",
+    "pausing",
+    "paying",
+    "pecking",
+    "pedaling",
+    "peeling",
+    "peeping",
+    "perceiving",
+    "perfecting",
+    "performing",
+    "permiting",
+    "persuading",
+    "phoning",
+    "photographing",
+    "picking",
+    "piloting",
+    "pinching",
+    "pining",
+    "pinpointing",
+    "pioneering",
+    "placing",
+    "planing",
+    "planting",
+    "playing",
+    "pleading",
+    "pleasing",
+    "plugging",
+    "pointing",
+    "poking",
+    "polishing",
+    "poping",
+    "possessing",
+    "posting",
+    "pouring",
+    "practicing",
+    "praiseding",
+    "praying",
+    "preaching",
+    "preceding",
+    "predicting",
+    "prefering",
+    "preparing",
+    "prescribing",
+    "presenting",
+    "preserving",
+    "preseting",
+    "presiding",
+    "pressing",
+    "pretending",
+    "preventing",
+    "pricking",
+    "printing",
+    "processing",
+    "procuring",
+    "producing",
+    "professing",
+    "programing",
+    "progressing",
+    "projecting",
+    "promising",
+    "promoting",
+    "proofreading",
+    "proposing",
+    "protecting",
+    "proving",
+    "providing",
+    "publicizing",
+    "pulling",
+    "pumping",
+    "punching",
+    "puncturing",
+    "punishing",
+    "purchasing",
+    "pushing",
+    "puting",
+    "qualifying",
+    "questioning",
+    "queuing",
+    "quiting",
+    "racing",
+    "radiating",
+    "raining",
+    "raising",
+    "ranking",
+    "rating",
+    "reaching",
+    "reading",
+    "realigning",
+    "realizing",
+    "reasoning",
+    "receiving",
+    "recognizing",
+    "recommending",
+    "reconciling",
+    "recording",
+    "recruiting",
+    "reducing",
+    "referring",
+    "reflecting",
+    "refusing",
+    "regreting",
+    "regulating",
+    "rehabilitating",
+    "reigning",
+    "reinforcing",
+    "rejecting",
+    "rejoicing",
+    "relating",
+    "relaxing",
+    "releasing",
+    "relying",
+    "remaining",
+    "remembering",
+    "reminding",
+    "removing",
+    "rendering",
+    "reorganizing",
+    "repairing",
+    "repeating",
+    "replacing",
+    "replying",
+    "reporting",
+    "representing",
+    "reproducing",
+    "requesting",
+    "rescuing",
+    "researching",
+    "resolving",
+    "responding",
+    "restoreding",
+    "restructuring",
+    "retiring",
+    "retrieving",
+    "returning",
+    "reviewing",
+    "revising",
+    "rhyming",
+    "riding",
+    "riding",
+    "ringing",
+    "rinsing",
+    "rising",
+    "risking",
+    "robing",
+    "rocking",
+    "rolling",
+    "roting",
+    "rubing",
+    "ruining",
+    "ruling",
+    "runing",
+    "rushing",
+    "sacking",
+    "sailing",
+    "satisfying",
+    "saving",
+    "sawing",
+    "saying",
+    "scaring",
+    "scattering",
+    "scheduling",
+    "scolding",
+    "scorching",
+    "scraping",
+    "scratching",
+    "screaming",
+    "screwing",
+    "scribbling",
+    "scrubing",
+    "sealing",
+    "searching",
+    "securing",
+    "seing",
+    "seeking",
+    "selecting",
+    "selling",
+    "sending",
+    "sensing",
+    "separating",
+    "serving",
+    "servicing",
+    "seting",
+    "settling",
+    "sewing",
+    "shading",
+    "shaking",
+    "shaping",
+    "sharing",
+    "shaving",
+    "shearing",
+    "sheding",
+    "sheltering",
+    "shining",
+    "shivering",
+    "shocking",
+    "shoing",
+    "shooting",
+    "shoping",
+    "showing",
+    "shrinking",
+    "shruging",
+    "shuting",
+    "sighing",
+    "signing",
+    "signaling",
+    "simplifying",
+    "sining",
+    "singing",
+    "sinking",
+    "siping",
+    "siting",
+    "sketching",
+    "skiing",
+    "skiping",
+    "slaping",
+    "slaying",
+    "sleeping",
+    "sliding",
+    "slinging",
+    "slinking",
+    "sliping",
+    "sliting",
+    "slowing",
+    "smashing",
+    "smelling",
+    "smiling",
+    "smiting",
+    "smoking",
+    "snatching",
+    "sneaking",
+    "sneezing",
+    "sniffing",
+    "snoring",
+    "snowing",
+    "soaking",
+    "solving",
+    "soothing",
+    "soothsaying",
+    "sorting",
+    "sounding",
+    "sowing",
+    "sparing",
+    "sparking",
+    "sparkling",
+    "speaking",
+    "specifying",
+    "speeding",
+    "spelling",
+    "spending",
+    "spilling",
+    "spining",
+    "spiting",
+    "spliting",
+    "spoiling",
+    "spoting",
+    "spraying",
+    "spreading",
+    "springing",
+    "sprouting",
+    "squashing",
+    "squeaking",
+    "squealing",
+    "squeezing",
+    "staining",
+    "stamping",
+    "standing",
+    "staring",
+    "starting",
+    "staying",
+    "stealing",
+    "steering",
+    "stepping",
+    "sticking",
+    "stimulating",
+    "stinging",
+    "stinking",
+    "stirring",
+    "stitching",
+    "stoping",
+    "storing",
+    "straping",
+    "streamlining",
+    "strengthening",
+    "stretching",
+    "striding",
+    "striking",
+    "stringing",
+    "stripping",
+    "striving",
+    "stroking",
+    "structuring",
+    "studying",
+    "stuffing",
+    "subleting",
+    "subtracting",
+    "succeeding",
+    "sucking",
+    "suffering",
+    "suggesting",
+    "suiting",
+    "summarizing",
+    "supervising",
+    "supplying",
+    "supporting",
+    "supposing",
+    "surprising",
+    "surrounding",
+    "suspecting",
+    "suspending",
+    "swearing",
+    "sweating",
+    "sweeping",
+    "swelling",
+    "swimming",
+    "swinging",
+    "switching",
+    "symbolizing",
+    "synthesizing",
+    "systemizing",
+    "tabulating",
+    "taking",
+    "talking",
+    "taming",
+    "taping",
+    "targeting",
+    "tasting",
+    "teaching",
+    "tearing",
+    "teasing",
+    "telephoning",
+    "telling",
+    "tempting",
+    "terrifying",
+    "testing",
+    "thanking",
+    "thawing",
+    "thinking",
+    "thriving",
+    "throwing",
+    "thrusting",
+    "ticking",
+    "tickling",
+    "tying",
+    "timing",
+    "tiping",
+    "tiring",
+    "touching",
+    "touring",
+    "towing",
+    "tracing",
+    "trading",
+    "training",
+    "transcribing",
+    "transfering",
+    "transforming",
+    "translating",
+    "transporting",
+    "traping",
+    "traveling",
+    "treading",
+    "treating",
+    "trembling",
+    "tricking",
+    "triping",
+    "troting",
+    "troubling",
+    "troubleshooting",
+    "trusting",
+    "trying",
+    "tuging",
+    "tumbling",
+    "turning",
+    "tutoring",
+    "twisting",
+    "typing",
+    "undergoing",
+    "understanding",
+    "undertaking",
+    "undressing",
+    "unfastening",
+    "unifying",
+    "uniting",
+    "unlocking",
+    "unpacking",
+    "untidying",
+    "updating",
+    "upgrading",
+    "upholding",
+    "upseting",
+    "using",
+    "utilizing",
+    "vanishing",
+    "verbalizing",
+    "verifying",
+    "vexing",
+    "visiting",
+    "wailing",
+    "waiting",
+    "waking",
+    "walking",
+    "wandering",
+    "wanting",
+    "warming",
+    "warning",
+    "washing",
+    "wasting",
+    "watching",
+    "watering",
+    "waving",
+    "wearing",
+    "weaving",
+    "wedding",
+    "weeping",
+    "weighing",
+    "welcoming",
+    "wending",
+    "weting",
+    "whining",
+    "whiping",
+    "whirling",
+    "whispering",
+    "whistling",
+    "wining",
+    "winding",
+    "winking",
+    "wiping",
+    "wishing",
+    "withdrawing",
+    "withholding",
+    "withstanding",
+    "wobbling",
+    "wondering",
+    "working",
+    "worrying",
+    "wrapping",
+    "wrecking",
+    "wrestling",
+    "wriggling",
+    "wringing",
+    "writing",
+    "x-raying",
+    "yawning",
+    "yelling",
+    "zipping",
+    "zooming",
+]

BigGAN_PyTorch/config_files/COCO_Stuff/BigGAN/unconditional_biggan_res128.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "experiment_name": "unconditional_biggan_class_cond_res128_COCO",
+  "which_dataset": "coco",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 4,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "test_every": 1,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 2,
+  "constant_conditioning": true,
+  "class_cond": true,
+  "hier": true,
+  "resolution": 128,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "G_shared": true,
+  "batch_size": 64,
+  "D_lr": 4e-4,
+  "G_lr": 1e-4,
+  "G_ch": 48,
+  "D_ch": 48,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/COCO_Stuff/BigGAN/unconditional_biggan_res256.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "experiment_name": "unconditional_biggan_class_cond_res256_COCO",
+  "which_dataset": "coco",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 2,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "test_every": 1,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 2,
+  "constant_conditioning": true,
+  "class_cond": true,
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "G_shared": true,
+  "batch_size": 16,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 48,
+  "D_ch": 48,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/COCO_Stuff/IC-GAN/icgan_res128_ddp.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "experiment_name": "icgan_res128_COCO",
+  "which_dataset": "coco",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 4,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "feature_augmentation": true,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": false,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 128,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 5,
+  "feature_extractor": "selfsupervised",
+  "batch_size": 64,
+  "D_lr": 4e-4,
+  "G_lr": 1e-4,
+  "G_ch": 48,
+  "D_ch": 48,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/COCO_Stuff/IC-GAN/icgan_res256_ddp.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "experiment_name": "icgan_res256_COCO",
+  "which_dataset": "coco",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 2,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "feature_augmentation": true,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": false,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 5,
+  "feature_extractor": "selfsupervised",
+  "batch_size": 16,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 48,
+  "D_ch": 48,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet-LT/BigGAN/biggan_res128.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "experiment_name": "biggan_imagenet_lt_class_cond_res128",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 2,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "test_every": 10,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 2,
+  "class_cond": true,
+  "hier": true,
+  "resolution": 128,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "G_shared": true,
+  "batch_size": 64,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "longtail": true,
+  "longtail_gen": true,
+  "use_balanced_sampler": false,
+  "custom_distrib_gen": false,
+  "longtail_temperature": 1,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet-LT/BigGAN/biggan_res256.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "experiment_name": "biggan_imagenet_lt_class_cond_res256",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "test_every": 10,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 2,
+  "class_cond": true,
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "G_shared": true,
+  "batch_size": 16,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "longtail": true,
+  "longtail_gen": true,
+  "use_balanced_sampler": false,
+  "custom_distrib_gen": false,
+  "longtail_temperature": 1,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet-LT/BigGAN/biggan_res64.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "experiment_name": "biggan_imagenet_lt_class_cond_res64",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 1,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "test_every": 1,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": true,
+  "hier": true,
+  "resolution": 64,
+  "G_attn": "32",
+  "D_attn": "32",
+  "shared_dim": 128,
+  "G_shared": true,
+  "batch_size": 128,
+  "D_lr": 1e-3,
+  "G_lr": 1e-5,
+  "G_ch": 64,
+  "D_ch": 64,
+  "longtail": true,
+  "longtail_gen": true,
+  "use_balanced_sampler": false,
+  "custom_distrib_gen": false,
+  "longtail_temperature": 1,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet-LT/cc_IC-GAN/cc_icgan_res128.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "experiment_name": "cc_icgan_biggan_imagenet_res128",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 2,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "test_every": 10,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 2,
+  "class_cond": true,
+  "instance_cond": true,
+  "which_knn_balance": "instance_balance",
+  "hier": true,
+  "resolution": 128,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 5,
+  "feature_extractor": "classification",
+  "batch_size": 64,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "longtail": true,
+  "longtail_gen": true,
+  "use_balanced_sampler": false,
+  "custom_distrib_gen": false,
+  "longtail_temperature": 1,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet-LT/cc_IC-GAN/cc_icgan_res256.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "experiment_name": "cc_icgan_biggan_imagenet_res256",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "test_every": 10,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 2,
+  "class_cond": true,
+  "instance_cond": true,
+  "which_knn_balance": "instance_balance",
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 5,
+  "feature_extractor": "classification",
+  "batch_size": 16,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "longtail": true,
+  "longtail_gen": true,
+  "use_balanced_sampler": false,
+  "custom_distrib_gen": false,
+  "longtail_temperature": 1,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet-LT/cc_IC-GAN/cc_icgan_res64.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "experiment_name": "cc_icgan_biggan_imagenet_res64",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 1,
+  "hflips": true,
+  "DA": true,
+  "DiffAugment": "translation",
+  "test_every": 1,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": true,
+  "instance_cond": true,
+  "which_knn_balance": "instance_balance",
+  "hier": true,
+  "resolution": 64,
+  "G_attn": "32",
+  "D_attn": "32",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 5,
+  "feature_extractor": "classification",
+  "batch_size": 128,
+  "D_lr": 1e-3,
+  "G_lr": 1e-5,
+  "G_ch": 64,
+  "D_ch": 64,
+  "longtail": true,
+  "longtail_gen": true,
+  "use_balanced_sampler": false,
+  "custom_distrib_gen": false,
+  "longtail_temperature": 1,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/BigGAN/biggan_res128.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "experiment_name": "biggan_imagenet_res128",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 4,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "test_every": 5,
+  "save_every": 2,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": true,
+  "hier": true,
+  "resolution": 128,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "G_shared": true,
+  "batch_size": 64,
+  "D_lr": 4e-4,
+  "G_lr": 1e-4,
+  "G_ch": 96,
+  "D_ch": 96,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/BigGAN/biggan_res256_half_cap.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "experiment_name": "biggan_class_cond_res256_half_cap_noflips",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 4,
+  "n_gpus_per_node": 8,
+  "hflips": false,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 4,
+  "num_D_accumulations": 4,
+  "num_D_steps": 1,
+  "class_cond": true,
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "G_shared": true,
+  "batch_size": 16,
+  "D_lr": 4e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/BigGAN/biggan_res64.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "experiment_name": "biggan_imagenet_res64",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 1,
+  "hflips": true,
+  "test_every": 1,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": true,
+  "hier": true,
+  "resolution": 64,
+  "G_attn": "32",
+  "D_attn": "32",
+  "shared_dim": 128,
+  "G_shared": true,
+  "batch_size": 256,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/IC-GAN/icgan_res128.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "experiment_name": "icgan_biggan_imagenet_res128",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 4,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "feature_augmentation": true,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": false,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 128,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 50,
+  "feature_extractor": "selfsupervised",
+  "batch_size": 64,
+  "D_lr": 1e-4,
+  "G_lr": 4e-5,
+  "G_ch": 96,
+  "D_ch": 96,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/IC-GAN/icgan_res256.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "experiment_name": "icgan_biggan_imagenet_res256",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 4,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "feature_augmentation": false,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 4,
+  "num_D_accumulations": 4,
+  "num_D_steps": 1,
+  "class_cond": false,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 50,
+  "feature_extractor": "selfsupervised",
+  "batch_size": 16,
+  "D_lr": 1e-4,
+  "G_lr": 4e-5,
+  "G_ch": 96,
+  "D_ch": 96,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/IC-GAN/icgan_res256_halfcap.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "experiment_name": "icgan_biggan_imagenet_res256_halfcap",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 4,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "feature_augmentation": true,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 4,
+  "num_D_accumulations": 4,
+  "num_D_steps": 2,
+  "class_cond": false,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 50,
+  "feature_extractor": "selfsupervised",
+  "batch_size": 16,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/IC-GAN/icgan_res64.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "experiment_name": "icgan_biggan_imagenet_res64",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 1,
+  "hflips": true,
+  "feature_augmentation": true,
+  "test_every": 1,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": false,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 64,
+  "G_attn": "32",
+  "D_attn": "32",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 50,
+  "feature_extractor": "selfsupervised",
+  "batch_size": 256,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/cc_IC-GAN/cc_icgan_res128.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "experiment_name": "cc_icgan_biggan_imagenet_res128",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 4,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "feature_augmentation": true,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": true,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 128,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 50,
+  "feature_extractor": "classification",
+  "batch_size": 64,
+  "D_lr": 1e-4,
+  "G_lr": 4e-5,
+  "G_ch": 96,
+  "D_ch": 96,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/cc_IC-GAN/cc_icgan_res256.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "experiment_name": "cc_icgan_biggan_imagenet_res256",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 4,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "feature_augmentation": false,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 4,
+  "num_D_accumulations": 4,
+  "num_D_steps": 1,
+  "class_cond": true,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 50,
+  "feature_extractor": "classification",
+  "batch_size": 16,
+  "D_lr": 1e-4,
+  "G_lr": 4e-5,
+  "G_ch": 96,
+  "D_ch": 96,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/cc_IC-GAN/cc_icgan_res256_halfcap.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "experiment_name": "cc_icgan_biggan_imagenet_res256_halfcap",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 4,
+  "n_gpus_per_node": 8,
+  "hflips": true,
+  "feature_augmentation": true,
+  "test_every": 5,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 4,
+  "num_D_accumulations": 4,
+  "num_D_steps": 2,
+  "class_cond": true,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 256,
+  "G_attn": "64",
+  "D_attn": "64",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 50,
+  "feature_extractor": "classification",
+  "batch_size": 16,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "load_weights": ""
+}

BigGAN_PyTorch/config_files/ImageNet/cc_IC-GAN/cc_icgan_res64.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "experiment_name": "cc_icgan_biggan_imagenet_res64",
+  "run_setup": "local_debug",
+  "deterministic_run": true,
+  "num_workers": 10,
+  "ddp_train": true,
+  "n_nodes": 1,
+  "n_gpus_per_node": 1,
+  "hflips": true,
+  "feature_augmentation": true,
+  "test_every": 1,
+  "save_every": 1,
+  "num_epochs": 3000,
+  "es_patience": 50,
+  "shuffle": true,
+  "G_eval_mode": true,
+  "ema": true,
+  "use_ema": true,
+  "num_G_accumulations": 1,
+  "num_D_accumulations": 1,
+  "num_D_steps": 1,
+  "class_cond": true,
+  "instance_cond": true,
+  "hier": true,
+  "resolution": 64,
+  "G_attn": "32",
+  "D_attn": "32",
+  "shared_dim": 128,
+  "shared_dim_feat": 512,
+  "G_shared": true,
+  "G_shared_feat": true,
+  "k_nn": 50,
+  "feature_extractor": "classification",
+  "batch_size": 256,
+  "D_lr": 1e-4,
+  "G_lr": 1e-4,
+  "G_ch": 64,
+  "D_ch": 64,
+  "load_weights": ""
+}

BigGAN_PyTorch/diffaugment_utils.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# Copyright (c) 2020, Shengyu Zhao, Zhijian Liu, Ji Lin, Jun-Yan Zhu, and Song Han
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# * Redistributions of source code must retain the above copyright notice, this
+#   list of conditions and the following disclaimer.
+#
+# * Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import torch
+import torch.nn.functional as F
+def DiffAugment(x, policy="", channels_first=True):
+    if policy:
+        if not channels_first:
+            x = x.permute(0, 3, 1, 2)
+        for p in policy.split(","):
+            for f in AUGMENT_FNS[p]:
+                x = f(x)
+        if not channels_first:
+            x = x.permute(0, 2, 3, 1)
+        x = x.contiguous()
+    return x
+def rand_brightness(x):
+    x = x + (torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) - 0.5)
+    return x
+def rand_saturation(x):
+    x_mean = x.mean(dim=1, keepdim=True)
+    x = (x - x_mean) * (
+        torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) * 2
+    ) + x_mean
+    return x
+def rand_contrast(x):
+    x_mean = x.mean(dim=[1, 2, 3], keepdim=True)
+    x = (x - x_mean) * (
+        torch.rand(x.size(0), 1, 1, 1, dtype=x.dtype, device=x.device) + 0.5
+    ) + x_mean
+    return x
+def rand_translation(x, ratio=0.125):
+    shift_x, shift_y = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5)
+    translation_x = torch.randint(
+        -shift_x, shift_x + 1, size=[x.size(0), 1, 1], device=x.device
+    )
+    translation_y = torch.randint(
+        -shift_y, shift_y + 1, size=[x.size(0), 1, 1], device=x.device
+    )
+    grid_batch, grid_x, grid_y = torch.meshgrid(
+        torch.arange(x.size(0), dtype=torch.long, device=x.device),
+        torch.arange(x.size(2), dtype=torch.long, device=x.device),
+        torch.arange(x.size(3), dtype=torch.long, device=x.device),
+    )
+    grid_x = torch.clamp(grid_x + translation_x + 1, 0, x.size(2) + 1)
+    grid_y = torch.clamp(grid_y + translation_y + 1, 0, x.size(3) + 1)
+    x_pad = F.pad(x, [1, 1, 1, 1, 0, 0, 0, 0])
+    x = (
+        x_pad.permute(0, 2, 3, 1)
+        .contiguous()[grid_batch, grid_x, grid_y]
+        .permute(0, 3, 1, 2)
+    )
+    return x
+def rand_cutout(x, ratio=0.5):
+    cutout_size = int(x.size(2) * ratio + 0.5), int(x.size(3) * ratio + 0.5)
+    offset_x = torch.randint(
+        0, x.size(2) + (1 - cutout_size[0] % 2), size=[x.size(0), 1, 1], device=x.device
+    )
+    offset_y = torch.randint(
+        0, x.size(3) + (1 - cutout_size[1] % 2), size=[x.size(0), 1, 1], device=x.device
+    )
+    grid_batch, grid_x, grid_y = torch.meshgrid(
+        torch.arange(x.size(0), dtype=torch.long, device=x.device),
+        torch.arange(cutout_size[0], dtype=torch.long, device=x.device),
+        torch.arange(cutout_size[1], dtype=torch.long, device=x.device),
+    )
+    grid_x = torch.clamp(
+        grid_x + offset_x - cutout_size[0] // 2, min=0, max=x.size(2) - 1
+    )
+    grid_y = torch.clamp(
+        grid_y + offset_y - cutout_size[1] // 2, min=0, max=x.size(3) - 1
+    )
+    mask = torch.ones(x.size(0), x.size(2), x.size(3), dtype=x.dtype, device=x.device)
+    mask[grid_batch, grid_x, grid_y] = 0
+    x = x * mask.unsqueeze(1)
+    return x
+AUGMENT_FNS = {
+    "color": [rand_brightness, rand_saturation, rand_contrast],
+    "translation": [rand_translation],
+    "cutout": [rand_cutout],
+}

BigGAN_PyTorch/imagenet_lt/ImageNet_LT_train.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

BigGAN_PyTorch/imagenet_lt/ImageNet_LT_val.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

BigGAN_PyTorch/imgs/D Singular Values.png ADDED Viewed

BigGAN_PyTorch/imgs/DeepSamples.png ADDED Viewed

BigGAN_PyTorch/imgs/DogBall.png ADDED Viewed

BigGAN_PyTorch/imgs/G Singular Values.png ADDED Viewed

BigGAN_PyTorch/imgs/IS_FID.png ADDED Viewed

BigGAN_PyTorch/imgs/Losses.png ADDED Viewed

BigGAN_PyTorch/imgs/header_image.jpg ADDED Viewed

BigGAN_PyTorch/imgs/interp_sample.jpg ADDED Viewed

BigGAN_PyTorch/layers.py ADDED Viewed

	@@ -0,0 +1,616 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+""" Layers
+    This file contains various layers for the BigGAN models.
+"""
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.nn import init
+import torch.optim as optim
+import torch.nn.functional as F
+from torch.nn import Parameter as P
+import sys
+sys.path.insert(1, os.path.join(sys.path[0], ".."))
+from BigGAN_PyTorch.sync_batchnorm import SynchronizedBatchNorm2d as SyncBN2d
+# Projection of x onto y
+def proj(x, y):
+    return torch.mm(y, x.t()) * y / torch.mm(y, y.t())
+# Orthogonalize x wrt list of vectors ys
+def gram_schmidt(x, ys):
+    for y in ys:
+        x = x - proj(x, y)
+    return x
+# Apply num_itrs steps of the power method to estimate top N singular values.
+def power_iteration(W, u_, update=True, eps=1e-12):
+    # Lists holding singular vectors and values
+    us, vs, svs = [], [], []
+    for i, u in enumerate(u_):
+        # Run one step of the power iteration
+        with torch.no_grad():
+            v = torch.matmul(u, W)
+            # Run Gram-Schmidt to subtract components of all other singular vectors
+            v = F.normalize(gram_schmidt(v, vs), eps=eps)
+            # Add to the list
+            vs += [v]
+            # Update the other singular vector
+            u = torch.matmul(v, W.t())
+            # Run Gram-Schmidt to subtract components of all other singular vectors
+            u = F.normalize(gram_schmidt(u, us), eps=eps)
+            # Add to the list
+            us += [u]
+            if update:
+                u_[i][:] = u
+        # Compute this singular value and add it to the list
+        svs += [torch.squeeze(torch.matmul(torch.matmul(v, W.t()), u.t()))]
+        # svs += [torch.sum(F.linear(u, W.transpose(0, 1)) * v)]
+    return svs, us, vs
+# Convenience passthrough function
+class identity(nn.Module):
+    def forward(self, input):
+        return input
+# Spectral normalization base class
+class SN(object):
+    def __init__(self, num_svs, num_itrs, num_outputs, transpose=False, eps=1e-12):
+        # Number of power iterations per step
+        self.num_itrs = num_itrs
+        # Number of singular values
+        self.num_svs = num_svs
+        # Transposed?
+        self.transpose = transpose
+        # Epsilon value for avoiding divide-by-0
+        self.eps = eps
+        # Register a singular vector for each sv
+        for i in range(self.num_svs):
+            self.register_buffer("u%d" % i, torch.randn(1, num_outputs))
+            self.register_buffer("sv%d" % i, torch.ones(1))
+    # Singular vectors (u side)
+    @property
+    def u(self):
+        return [getattr(self, "u%d" % i) for i in range(self.num_svs)]
+    # Singular values;
+    # note that these buffers are just for logging and are not used in training.
+    @property
+    def sv(self):
+        return [getattr(self, "sv%d" % i) for i in range(self.num_svs)]
+    # Compute the spectrally-normalized weight
+    def W_(self):
+        W_mat = self.weight.view(self.weight.size(0), -1)
+        if self.transpose:
+            W_mat = W_mat.t()
+        # Apply num_itrs power iterations
+        for _ in range(self.num_itrs):
+            svs, us, vs = power_iteration(
+                W_mat, self.u, update=self.training, eps=self.eps
+            )
+        # Update the svs
+        if self.training:
+            with torch.no_grad():  # Make sure to do this in a no_grad() context or you'll get memory leaks!
+                for i, sv in enumerate(svs):
+                    self.sv[i][:] = sv
+        return self.weight / svs[0]
+# 2D Conv layer with spectral norm
+class SNConv2d(nn.Conv2d, SN):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        dilation=1,
+        groups=1,
+        bias=True,
+        num_svs=1,
+        num_itrs=1,
+        eps=1e-12,
+    ):
+        nn.Conv2d.__init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+        )
+        SN.__init__(self, num_svs, num_itrs, out_channels, eps=eps)
+    def forward(self, x):
+        return F.conv2d(
+            x,
+            self.W_(),
+            self.bias,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+        )
+# Linear layer with spectral norm
+class SNLinear(nn.Linear, SN):
+    def __init__(
+        self, in_features, out_features, bias=True, num_svs=1, num_itrs=1, eps=1e-12
+    ):
+        nn.Linear.__init__(self, in_features, out_features, bias)
+        SN.__init__(self, num_svs, num_itrs, out_features, eps=eps)
+    def forward(self, x):
+        return F.linear(x, self.W_(), self.bias)
+# Embedding layer with spectral norm
+# We use num_embeddings as the dim instead of embedding_dim here
+# for convenience sake
+class SNEmbedding(nn.Embedding, SN):
+    def __init__(
+        self,
+        num_embeddings,
+        embedding_dim,
+        padding_idx=None,
+        max_norm=None,
+        norm_type=2,
+        scale_grad_by_freq=False,
+        sparse=False,
+        _weight=None,
+        num_svs=1,
+        num_itrs=1,
+        eps=1e-12,
+    ):
+        nn.Embedding.__init__(
+            self,
+            num_embeddings,
+            embedding_dim,
+            padding_idx,
+            max_norm,
+            norm_type,
+            scale_grad_by_freq,
+            sparse,
+            _weight,
+        )
+        SN.__init__(self, num_svs, num_itrs, num_embeddings, eps=eps)
+    def forward(self, x):
+        return F.embedding(x, self.W_())
+# A non-local block as used in SA-GAN
+# Note that the implementation as described in the paper is largely incorrect;
+# refer to the released code for the actual implementation.
+class Attention(nn.Module):
+    def __init__(self, ch, which_conv=SNConv2d, name="attention"):
+        super(Attention, self).__init__()
+        # Channel multiplier
+        self.ch = ch
+        self.which_conv = which_conv
+        self.theta = self.which_conv(
+            self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False
+        )
+        self.phi = self.which_conv(
+            self.ch, self.ch // 8, kernel_size=1, padding=0, bias=False
+        )
+        self.g = self.which_conv(
+            self.ch, self.ch // 2, kernel_size=1, padding=0, bias=False
+        )
+        self.o = self.which_conv(
+            self.ch // 2, self.ch, kernel_size=1, padding=0, bias=False
+        )
+        # Learnable gain parameter
+        self.gamma = P(torch.tensor(0.0), requires_grad=True)
+    def forward(self, x, y=None):
+        # Apply convs
+        theta = self.theta(x)
+        phi = F.max_pool2d(self.phi(x), [2, 2])
+        g = F.max_pool2d(self.g(x), [2, 2])
+        # Perform reshapes
+        theta = theta.view(-1, self.ch // 8, x.shape[2] * x.shape[3])
+        phi = phi.view(-1, self.ch // 8, x.shape[2] * x.shape[3] // 4)
+        g = g.view(-1, self.ch // 2, x.shape[2] * x.shape[3] // 4)
+        # Matmul and softmax to get attention maps
+        beta = F.softmax(torch.bmm(theta.transpose(1, 2), phi), -1)
+        # Attention map times g path
+        o = self.o(
+            torch.bmm(g, beta.transpose(1, 2)).view(
+                -1, self.ch // 2, x.shape[2], x.shape[3]
+            )
+        )
+        return self.gamma * o + x
+# Fused batchnorm op
+def fused_bn(x, mean, var, gain=None, bias=None, eps=1e-5):
+    # Apply scale and shift--if gain and bias are provided, fuse them here
+    # Prepare scale
+    scale = torch.rsqrt(var + eps)
+    # If a gain is provided, use it
+    if gain is not None:
+        scale = scale * gain
+    # Prepare shift
+    shift = mean * scale
+    # If bias is provided, use it
+    if bias is not None:
+        shift = shift - bias
+    return x * scale - shift
+    # return ((x - mean) / ((var + eps) ** 0.5)) * gain + bias # The unfused way.
+# Manual BN
+# Calculate means and variances using mean-of-squares minus mean-squared
+def manual_bn(x, gain=None, bias=None, return_mean_var=False, eps=1e-5):
+    # Cast x to float32 if necessary
+    float_x = x.float()
+    # Calculate expected value of x (m) and expected value of x**2 (m2)
+    # Mean of x
+    m = torch.mean(float_x, [0, 2, 3], keepdim=True)
+    # Mean of x squared
+    m2 = torch.mean(float_x ** 2, [0, 2, 3], keepdim=True)
+    # Calculate variance as mean of squared minus mean squared.
+    var = m2 - m ** 2
+    # Cast back to float 16 if necessary
+    var = var.type(x.type())
+    m = m.type(x.type())
+    # Return mean and variance for updating stored mean/var if requested
+    if return_mean_var:
+        return fused_bn(x, m, var, gain, bias, eps), m.squeeze(), var.squeeze()
+    else:
+        return fused_bn(x, m, var, gain, bias, eps)
+# My batchnorm, supports standing stats
+class myBN(nn.Module):
+    def __init__(self, num_channels, eps=1e-5, momentum=0.1):
+        super(myBN, self).__init__()
+        # momentum for updating running stats
+        self.momentum = momentum
+        # epsilon to avoid dividing by 0
+        self.eps = eps
+        # Momentum
+        self.momentum = momentum
+        # Register buffers
+        self.register_buffer("stored_mean", torch.zeros(num_channels))
+        self.register_buffer("stored_var", torch.ones(num_channels))
+        self.register_buffer("accumulation_counter", torch.zeros(1))
+        # Accumulate running means and vars
+        self.accumulate_standing = False
+    # reset standing stats
+    def reset_stats(self):
+        self.stored_mean[:] = 0
+        self.stored_var[:] = 0
+        self.accumulation_counter[:] = 0
+    def forward(self, x, gain, bias):
+        if self.training:
+            out, mean, var = manual_bn(
+                x, gain, bias, return_mean_var=True, eps=self.eps
+            )
+            # If accumulating standing stats, increment them
+            if self.accumulate_standing:
+                self.stored_mean[:] = self.stored_mean + mean.data
+                self.stored_var[:] = self.stored_var + var.data
+                self.accumulation_counter += 1.0
+            # If not accumulating standing stats, take running averages
+            else:
+                self.stored_mean[:] = (
+                    self.stored_mean * (1 - self.momentum) + mean * self.momentum
+                )
+                self.stored_var[:] = (
+                    self.stored_var * (1 - self.momentum) + var * self.momentum
+                )
+            return out
+        # If not in training mode, use the stored statistics
+        else:
+            mean = self.stored_mean.view(1, -1, 1, 1)
+            var = self.stored_var.view(1, -1, 1, 1)
+            # If using standing stats, divide them by the accumulation counter
+            if self.accumulate_standing:
+                mean = mean / self.accumulation_counter
+                var = var / self.accumulation_counter
+            return fused_bn(x, mean, var, gain, bias, self.eps)
+# Simple function to handle groupnorm norm stylization
+def groupnorm(x, norm_style):
+    # If number of channels specified in norm_style:
+    if "ch" in norm_style:
+        ch = int(norm_style.split("_")[-1])
+        groups = max(int(x.shape[1]) // ch, 1)
+    # If number of groups specified in norm style
+    elif "grp" in norm_style:
+        groups = int(norm_style.split("_")[-1])
+    # If neither, default to groups = 16
+    else:
+        groups = 16
+    return F.group_norm(x, groups)
+# Class-conditional bn
+# output size is the number of channels, input size is for the linear layers
+# Andy's Note: this class feels messy but I'm not really sure how to clean it up
+# Suggestions welcome! (By which I mean, refactor this and make a pull request
+# if you want to make this more readable/usable).
+class ccbn(nn.Module):
+    def __init__(
+        self,
+        output_size,
+        input_size,
+        which_linear,
+        eps=1e-5,
+        momentum=0.1,
+        cross_replica=False,
+        mybn=False,
+        norm_style="bn",
+    ):
+        super(ccbn, self).__init__()
+        self.output_size, self.input_size = output_size, input_size
+        # Prepare gain and bias layers
+        self.gain = which_linear(input_size, output_size)
+        self.bias = which_linear(input_size, output_size)
+        # epsilon to avoid dividing by 0
+        self.eps = eps
+        # Momentum
+        self.momentum = momentum
+        # Use cross-replica batchnorm?
+        self.cross_replica = cross_replica
+        # Use my batchnorm?
+        self.mybn = mybn
+        # Norm style?
+        self.norm_style = norm_style
+        if self.cross_replica:
+            # self.bn = SyncBN2d(output_size, eps=self.eps, momentum=self.momentum, affine=False)
+            self.bn = nn.BatchNorm2d(
+                output_size, eps=self.eps, momentum=self.momentum, affine=False
+            )
+        elif self.mybn:
+            self.bn = myBN(output_size, self.eps, self.momentum)
+        elif self.norm_style in ["bn", "in"]:
+            self.register_buffer("stored_mean", torch.zeros(output_size))
+            self.register_buffer("stored_var", torch.ones(output_size))
+    def forward(self, x, y):
+        # Calculate class-conditional gains and biases
+        gain = (1 + self.gain(y)).view(y.size(0), -1, 1, 1)
+        bias = self.bias(y).view(y.size(0), -1, 1, 1)
+        # If using my batchnorm
+        if self.cross_replica:
+            out = self.bn(x)
+            out = out * gain + bias
+            return out
+        elif self.mybn:
+            return self.bn(x, gain=gain, bias=bias)
+        else:
+            if self.norm_style == "bn":
+                out = F.batch_norm(
+                    x,
+                    self.stored_mean,
+                    self.stored_var,
+                    None,
+                    None,
+                    self.training,
+                    0.1,
+                    self.eps,
+                )
+            elif self.norm_style == "in":
+                out = F.instance_norm(
+                    x,
+                    self.stored_mean,
+                    self.stored_var,
+                    None,
+                    None,
+                    self.training,
+                    0.1,
+                    self.eps,
+                )
+            elif self.norm_style == "gn":
+                out = groupnorm(x, self.normstyle)
+            elif self.norm_style == "nonorm":
+                out = x
+            return out * gain + bias
+    def extra_repr(self):
+        s = "out: {output_size}, in: {input_size},"
+        s += " cross_replica={cross_replica}"
+        return s.format(**self.__dict__)
+# Normal, non-class-conditional BN
+class bn(nn.Module):
+    def __init__(
+        self,
+        output_size,
+        eps=1e-5,
+        momentum=0.1,
+        cross_replica=False,
+        mybn=False,
+        **kwargs
+    ):
+        super(bn, self).__init__()
+        self.output_size = output_size
+        # epsilon to avoid dividing by 0
+        self.eps = eps
+        # Momentum
+        self.momentum = momentum
+        # Use cross-replica batchnorm?
+        self.cross_replica = cross_replica
+        # Use my batchnorm?
+        self.mybn = mybn
+        if self.cross_replica:
+            # self.bn = SyncBN2d(output_size, eps=self.eps, momentum=self.momentum, affine=False)
+            self.bn = nn.BatchNorm2d(
+                output_size, eps=self.eps, momentum=self.momentum, affine=True
+            )
+        elif mybn:
+            # Prepare gain and bias layers
+            self.bn = myBN(output_size, self.eps, self.momentum)
+        # Register buffers if neither of the above
+        else:
+            self.register_buffer("stored_mean", torch.zeros(output_size))
+            self.register_buffer("stored_var", torch.ones(output_size))
+        if not self.cross_replica:
+            self.gain = P(torch.ones(output_size), requires_grad=True)
+            self.bias = P(torch.zeros(output_size), requires_grad=True)
+    def forward(self, x, y=None):
+        if self.cross_replica:
+            out = self.bn(x)
+            return out
+        elif self.mybn:
+            gain = self.gain.view(1, -1, 1, 1)
+            bias = self.bias.view(1, -1, 1, 1)
+            return self.bn(x, gain=gain, bias=bias)
+        else:
+            return F.batch_norm(
+                x,
+                self.stored_mean,
+                self.stored_var,
+                self.gain,
+                self.bias,
+                self.training,
+                self.momentum,
+                self.eps,
+            )
+# Generator blocks
+# Note that this class assumes the kernel size and padding (and any other
+# settings) have been selected in the main generator module and passed in
+# through the which_conv arg. Similar rules apply with which_bn (the input
+# size [which is actually the number of channels of the conditional info] must
+# be preselected)
+class GBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        which_conv=nn.Conv2d,
+        which_bn=bn,
+        activation=None,
+        upsample=None,
+    ):
+        super(GBlock, self).__init__()
+        self.in_channels, self.out_channels = in_channels, out_channels
+        self.which_conv, self.which_bn = which_conv, which_bn
+        self.activation = activation
+        self.upsample = upsample
+        # Conv layers
+        self.conv1 = self.which_conv(self.in_channels, self.out_channels)
+        self.conv2 = self.which_conv(self.out_channels, self.out_channels)
+        self.learnable_sc = in_channels != out_channels or upsample
+        if self.learnable_sc:
+            self.conv_sc = self.which_conv(
+                in_channels, out_channels, kernel_size=1, padding=0
+            )
+        # Batchnorm layers
+        self.bn1 = self.which_bn(in_channels)
+        self.bn2 = self.which_bn(out_channels)
+        # upsample layers
+        self.upsample = upsample
+    def forward(self, x, y):
+        h = self.activation(self.bn1(x, y))
+        if self.upsample:
+            h = self.upsample(h)
+            x = self.upsample(x)
+        h = self.conv1(h)
+        h = self.activation(self.bn2(h, y))
+        h = self.conv2(h)
+        if self.learnable_sc:
+            x = self.conv_sc(x)
+        return h + x
+# Residual block for the discriminator
+class DBlock(nn.Module):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        which_conv=SNConv2d,
+        wide=True,
+        preactivation=False,
+        activation=None,
+        downsample=None,
+    ):
+        super(DBlock, self).__init__()
+        self.in_channels, self.out_channels = in_channels, out_channels
+        # If using wide D (as in SA-GAN and BigGAN), change the channel pattern
+        self.hidden_channels = self.out_channels if wide else self.in_channels
+        self.which_conv = which_conv
+        self.preactivation = preactivation
+        self.activation = activation
+        self.downsample = downsample
+        # Conv layers
+        self.conv1 = self.which_conv(self.in_channels, self.hidden_channels)
+        self.conv2 = self.which_conv(self.hidden_channels, self.out_channels)
+        self.learnable_sc = (
+            True if (in_channels != out_channels) or downsample else False
+        )
+        if self.learnable_sc:
+            self.conv_sc = self.which_conv(
+                in_channels, out_channels, kernel_size=1, padding=0
+            )
+    def shortcut(self, x):
+        if self.preactivation:
+            if self.learnable_sc:
+                x = self.conv_sc(x)
+            if self.downsample:
+                x = self.downsample(x)
+        else:
+            if self.downsample:
+                x = self.downsample(x)
+            if self.learnable_sc:
+                x = self.conv_sc(x)
+        return x
+    def forward(self, x):
+        if self.preactivation:
+            # h = self.activation(x) # NOT TODAY SATAN
+            # Andy's note: This line *must* be an out-of-place ReLU or it
+            #              will negatively affect the shortcut connection.
+            h = F.relu(x)
+        else:
+            h = x
+        h = self.conv1(h)
+        h = self.conv2(self.activation(h))
+        if self.downsample:
+            h = self.downsample(h)
+        return h + self.shortcut(x)
+# dogball

BigGAN_PyTorch/logs/BigGAN_ch96_bs256x8.jsonl ADDED Viewed

	@@ -0,0 +1,68 @@

+{"itr": 2000, "IS_mean": 2.806771755218506, "IS_std": 0.019480662420392036, "FID": 173.76484159711126, "_stamp": 1551403232.0425167}
+{"itr": 4000, "IS_mean": 4.962374687194824, "IS_std": 0.07276841998100281, "FID": 113.86730514283107, "_stamp": 1551422228.743057}
+{"itr": 6000, "IS_mean": 6.939817905426025, "IS_std": 0.11417163163423538, "FID": 101.63548498447199, "_stamp": 1551457139.3400874}
+{"itr": 8000, "IS_mean": 8.142985343933105, "IS_std": 0.11931543797254562, "FID": 92.0014385772705, "_stamp": 1551476217.2409613}
+{"itr": 10000, "IS_mean": 10.355518341064453, "IS_std": 0.09094739705324173, "FID": 83.58068997965364, "_stamp": 1551494854.2419689}
+{"itr": 12000, "IS_mean": 11.288347244262695, "IS_std": 0.14952820539474487, "FID": 80.98066299357106, "_stamp": 1551513232.5049698}
+{"itr": 14000, "IS_mean": 11.755794525146484, "IS_std": 0.17969024181365967, "FID": 76.80603924280956, "_stamp": 1551531425.150371}
+{"itr": 18000, "IS_mean": 13.65534496307373, "IS_std": 0.11151058971881866, "FID": 65.95736694335938, "_stamp": 1551588271.9177916}
+{"itr": 20000, "IS_mean": 14.817827224731445, "IS_std": 0.23588882386684418, "FID": 61.32061767578125, "_stamp": 1551606713.6567464}
+{"itr": 22000, "IS_mean": 17.16551399230957, "IS_std": 0.19506946206092834, "FID": 53.387969970703125, "_stamp": 1551624876.6513028}
+{"itr": 24000, "IS_mean": 19.60654067993164, "IS_std": 0.5591856837272644, "FID": 46.5386962890625, "_stamp": 1551642822.6126688}
+{"itr": 26000, "IS_mean": 21.74416732788086, "IS_std": 0.2850531041622162, "FID": 41.595001220703125, "_stamp": 1551663522.6019194}
+{"itr": 28000, "IS_mean": 23.923612594604492, "IS_std": 0.41587772965431213, "FID": 37.894744873046875, "_stamp": 1551681794.6567173}
+{"itr": 30000, "IS_mean": 25.569377899169922, "IS_std": 0.3333457112312317, "FID": 35.49310302734375, "_stamp": 1551699773.7080302}
+{"itr": 32000, "IS_mean": 26.867944717407227, "IS_std": 0.5968036651611328, "FID": 33.4849853515625, "_stamp": 1551717623.887933}
+{"itr": 34000, "IS_mean": 28.719074249267578, "IS_std": 0.5698027014732361, "FID": 31.375518798828125, "_stamp": 1551735411.1578612}
+{"itr": 36000, "IS_mean": 30.587574005126953, "IS_std": 0.5044271349906921, "FID": 29.432281494140625, "_stamp": 1551783380.6357439}
+{"itr": 38000, "IS_mean": 32.08299255371094, "IS_std": 0.49342143535614014, "FID": 28.099456787109375, "_stamp": 1551801179.6495197}
+{"itr": 40000, "IS_mean": 34.24657440185547, "IS_std": 0.7709177732467651, "FID": 26.53802490234375, "_stamp": 1551818775.171794}
+{"itr": 42000, "IS_mean": 35.891212463378906, "IS_std": 0.7036871314048767, "FID": 25.03021240234375, "_stamp": 1551836329.6873965}
+{"itr": 44000, "IS_mean": 38.184898376464844, "IS_std": 0.32996198534965515, "FID": 23.4940185546875, "_stamp": 1551897864.911537}
+{"itr": 46000, "IS_mean": 40.239479064941406, "IS_std": 0.7761151194572449, "FID": 22.53167724609375, "_stamp": 1551915406.4840703}
+{"itr": 48000, "IS_mean": 41.46656036376953, "IS_std": 1.1031498908996582, "FID": 21.5338134765625, "_stamp": 1551932899.6074848}
+{"itr": 50000, "IS_mean": 43.31670379638672, "IS_std": 0.7796809077262878, "FID": 20.53253173828125, "_stamp": 1551950390.345334}
+{"itr": 52000, "IS_mean": 45.1517333984375, "IS_std": 1.2925242185592651, "FID": 19.656646728515625, "_stamp": 1551967838.1501615}
+{"itr": 54000, "IS_mean": 47.638771057128906, "IS_std": 1.0689665079116821, "FID": 18.898162841796875, "_stamp": 1552044534.5349634}
+{"itr": 56000, "IS_mean": 48.87520217895508, "IS_std": 1.1317559480667114, "FID": 18.1248779296875, "_stamp": 1552061763.3080354}
+{"itr": 58000, "IS_mean": 49.40987014770508, "IS_std": 1.1866596937179565, "FID": 17.751922607421875, "_stamp": 1552078939.9828825}
+{"itr": 60000, "IS_mean": 51.051334381103516, "IS_std": 1.2281248569488525, "FID": 17.19964599609375, "_stamp": 1552096167.889482}
+{"itr": 62000, "IS_mean": 52.0235481262207, "IS_std": 0.5391153693199158, "FID": 16.62115478515625, "_stamp": 1552113417.9520617}
+{"itr": 64000, "IS_mean": 53.868492126464844, "IS_std": 1.327082633972168, "FID": 16.237335205078125, "_stamp": 1552142961.09602}
+{"itr": 66000, "IS_mean": 54.978721618652344, "IS_std": 0.9502049088478088, "FID": 15.81170654296875, "_stamp": 1552162403.2232807}
+{"itr": 68000, "IS_mean": 55.73248291015625, "IS_std": 1.0323851108551025, "FID": 15.545623779296875, "_stamp": 1552181112.676657}
+{"itr": 70000, "IS_mean": 56.78422927856445, "IS_std": 1.211003303527832, "FID": 15.28369140625, "_stamp": 1552199498.887533}
+{"itr": 72000, "IS_mean": 57.972999572753906, "IS_std": 0.8668608665466309, "FID": 14.86395263671875, "_stamp": 1552217782.2738616}
+{"itr": 74000, "IS_mean": 58.845054626464844, "IS_std": 1.4297977685928345, "FID": 14.620635986328125, "_stamp": 1552251085.1781816}
+{"itr": 76000, "IS_mean": 59.60982131958008, "IS_std": 0.9095696210861206, "FID": 14.360198974609375, "_stamp": 1552270214.9345307}
+{"itr": 78000, "IS_mean": 60.71195602416992, "IS_std": 0.960899829864502, "FID": 14.07183837890625, "_stamp": 1552288697.1580262}
+{"itr": 80000, "IS_mean": 61.772125244140625, "IS_std": 0.6913255453109741, "FID": 13.781585693359375, "_stamp": 1552307170.0280282}
+{"itr": 82000, "IS_mean": 62.98079299926758, "IS_std": 1.4735801219940186, "FID": 13.55389404296875, "_stamp": 1552325252.8553352}
+{"itr": 84000, "IS_mean": 64.95240783691406, "IS_std": 0.9018951654434204, "FID": 13.231689453125, "_stamp": 1552344135.3111835}
+{"itr": 86000, "IS_mean": 65.13968658447266, "IS_std": 0.8772205114364624, "FID": 13.176849365234375, "_stamp": 1552362429.6782444}
+{"itr": 88000, "IS_mean": 65.84476470947266, "IS_std": 1.167534351348877, "FID": 12.87078857421875, "_stamp": 1552380560.7988124}
+{"itr": 90000, "IS_mean": 67.41099548339844, "IS_std": 1.6899267435073853, "FID": 12.586517333984375, "_stamp": 1552398550.2060475}
+{"itr": 92000, "IS_mean": 68.63685607910156, "IS_std": 1.9431978464126587, "FID": 12.49505615234375, "_stamp": 1552430781.6406457}
+{"itr": 94000, "IS_mean": 70.09907531738281, "IS_std": 1.0715738534927368, "FID": 12.047607421875, "_stamp": 1552449001.1950285}
+{"itr": 96000, "IS_mean": 70.34623718261719, "IS_std": 1.7962944507598877, "FID": 11.896697998046875, "_stamp": 1552466989.3587568}
+{"itr": 98000, "IS_mean": 71.08210754394531, "IS_std": 1.458209753036499, "FID": 11.73046875, "_stamp": 1552484800.7138846}
+{"itr": 100000, "IS_mean": 72.24256896972656, "IS_std": 1.3259714841842651, "FID": 11.7386474609375, "_stamp": 1552502538.0269725}
+{"itr": 102000, "IS_mean": 73.19488525390625, "IS_std": 1.3439149856567383, "FID": 11.50494384765625, "_stamp": 1552523284.4514356}
+{"itr": 104000, "IS_mean": 73.38243103027344, "IS_std": 1.4162707328796387, "FID": 11.374542236328125, "_stamp": 1552541012.0651608}
+{"itr": 106000, "IS_mean": 74.95563507080078, "IS_std": 1.089124083518982, "FID": 11.10479736328125, "_stamp": 1552558577.7458107}
+{"itr": 108000, "IS_mean": 76.42997741699219, "IS_std": 1.9282453060150146, "FID": 10.998870849609375, "_stamp": 1552576111.9480467}
+{"itr": 110000, "IS_mean": 76.89225769042969, "IS_std": 1.4771150350570679, "FID": 10.847015380859375, "_stamp": 1552593659.445132}
+{"itr": 112000, "IS_mean": 78.04684448242188, "IS_std": 1.4850096702575684, "FID": 10.772552490234375, "_stamp": 1552616479.5201895}
+{"itr": 114000, "IS_mean": 79.67677307128906, "IS_std": 2.0147368907928467, "FID": 10.528045654296875, "_stamp": 1552633850.9315467}
+{"itr": 116000, "IS_mean": 79.8828125, "IS_std": 0.978247344493866, "FID": 10.626068115234375, "_stamp": 1552651198.9012825}
+{"itr": 118000, "IS_mean": 79.95381164550781, "IS_std": 1.8608143329620361, "FID": 10.46771240234375, "_stamp": 1552668560.4420238}
+{"itr": 120000, "IS_mean": 82.37217712402344, "IS_std": 1.8909310102462769, "FID": 10.259033203125, "_stamp": 1552749673.4319007}
+{"itr": 122000, "IS_mean": 83.49666595458984, "IS_std": 2.38446044921875, "FID": 9.996185302734375, "_stamp": 1552766698.2706933}
+{"itr": 124000, "IS_mean": 83.05189514160156, "IS_std": 1.8844469785690308, "FID": 10.164398193359375, "_stamp": 1552783762.891172}
+{"itr": 126000, "IS_mean": 84.27763366699219, "IS_std": 0.9329544901847839, "FID": 10.03509521484375, "_stamp": 1552800953.5724175}
+{"itr": 128000, "IS_mean": 85.84852600097656, "IS_std": 2.2698562145233154, "FID": 9.91644287109375, "_stamp": 1552818112.227726}
+{"itr": 130000, "IS_mean": 87.356689453125, "IS_std": 2.0958640575408936, "FID": 9.771148681640625, "_stamp": 1552837539.995247}
+{"itr": 132000, "IS_mean": 88.72562408447266, "IS_std": 1.7551432847976685, "FID": 9.8258056640625, "_stamp": 1552859685.9305944}
+{"itr": 134000, "IS_mean": 88.0631103515625, "IS_std": 1.8199039697647095, "FID": 9.957183837890625, "_stamp": 1552880037.5408435}
+{"itr": 136000, "IS_mean": 91.50938415527344, "IS_std": 1.9926033020019531, "FID": 9.876556396484375, "_stamp": 1552899854.652669}
+{"itr": 138000, "IS_mean": 93.09217834472656, "IS_std": 2.3062736988067627, "FID": 9.908477783203125, "_stamp": 1552921580.958927}

BigGAN_PyTorch/logs/compare_IS.m ADDED Viewed

	@@ -0,0 +1,97 @@

+% Copyright (c) Facebook, Inc. and its affiliates.
+% All rights reserved.
+%
+% All contributions by Andy Brock:
+% Copyright (c) 2019 Andy Brock
+%
+% MIT License
+clc
+clear all
+close all
+fclose all;
+%% Get All logs and sort them
+s = {};
+d = dir();
+j = 1;
+for i = 1:length(d)
+    if  any(strfind(d(i).name,'.jsonl'))
+        s = [s; d(i).name];
+    end
+end
+j = 1;
+for i = 1:length(s)
+    fname = s{i,1};
+    % Check if the Inception metrics log exists, and if so, plot it
+    [itr, IS, FID, t] = process_inception_log(fname(1:end - 10), 'log.jsonl');
+    s{i,2} = itr;
+    s{i,3} = IS;
+    s{i,4} = FID;
+    s{i,5} = max(IS);
+    s{i,6} = min(FID);
+    s{i,7} = t;
+end
+% Sort by Inception Score?
+[IS_sorted, IS_index] = sort(cell2mat(s(:,5)));
+% Cutoff inception scores below a certain value?
+threshold = 22;
+IS_index = IS_index(IS_sorted > threshold);
+% Sort by FID?
+[FID_sorted, FID_index] = sort(cell2mat(s(:,6)));
+% Cutoff also based on IS?
+% threshold = 0;
+FID_index = FID_index(IS_sorted > threshold);
+%% Plot things?
+cc = hsv(length(IS_index));
+legend1 = {};
+legend2 = {};
+make_axis=true;%false % Turn this on to see the axis out to 1e6 iterations
+for i=1:length(IS_index)
+    legend1 = [legend1; s{IS_index(i), 1}];
+    figure(1)
+    plot(s{IS_index(i),2}, s{IS_index(i),3}, 'color', cc(i,:),'linewidth',2)
+    hold on;
+    xlabel('itr'); ylabel('IS');
+    grid on;
+    if make_axis
+        axis([0,1e6,0,80]); % 50% grid on;
+    end
+    legend(legend1,'Interpreter','none')
+    %pause(1) % Turn this on to animate stuff
+    legend2 = [legend2; s{IS_index(i), 1}];
+    figure(2)
+    plot(s{IS_index(i),2}, s{IS_index(i),4}, 'color', cc(i,:),'linewidth',2)
+    hold on;
+    xlabel('itr'); ylabel('FID');
+    j = j + 1;
+    grid on;
+    if make_axis
+        axis([0,1e6,0,50]);% grid on;
+    end
+    legend(legend2, 'Interpreter','none')
+end
+%% Quick script to plot IS versus timesteps
+if 0
+    figure(3);
+    this_index=4;
+    subplot(2,1,1);
+    %plot(s{this_index, 2}(2:end), s{this_index, 7}(2:end) - s{this_index, 7}(1:end-1), 'r*');
+    % xlabel('Iteration');ylabel('\Delta T')
+    plot(s{this_index, 2}, s{this_index, 7}, 'r*');
+    xlabel('Iteration');ylabel('T')
+    subplot(2,1,2);
+    plot(s{this_index, 2}, s{this_index, 3}, 'r', 'linewidth',2);
+    xlabel('Iteration'), ylabel('Inception score')
+    title(s{this_index,1})
+end

BigGAN_PyTorch/logs/metalog.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+datetime: 2019-03-18 13:27:59.181225
+config: {'dataset': 'I128_hdf5', 'augment': False, 'num_workers': 8, 'pin_memory': True, 'shuffle': True, 'load_in_mem': True, 'use_multiepoch_sampler': True, 'model': 'model', 'G_param': 'SN', 'D_param': 'SN', 'G_ch': 96, 'D_ch': 96, 'G_depth': 1, 'D_depth': 1, 'D_wide': True, 'G_shared': True, 'shared_dim': 128, 'dim_z': 120, 'z_var': 1.0, 'hier': True, 'cross_replica': False, 'mybn': False, 'G_nl': 'inplace_relu', 'D_nl': 'inplace_relu', 'G_attn': '64', 'D_attn': '64', 'norm_style': 'bn', 'seed': 0, 'G_init': 'ortho', 'D_init': 'ortho', 'skip_init': True, 'G_lr': 0.0001, 'D_lr': 0.0004, 'G_B1': 0.0, 'D_B1': 0.0, 'G_B2': 0.999, 'D_B2': 0.999, 'batch_size': 256, 'G_batch_size': 0, 'num_G_accumulations': 8, 'num_D_steps': 1, 'num_D_accumulations': 8, 'split_D': False, 'num_epochs': 400, 'parallel': True, 'G_fp16': False, 'D_fp16': False, 'D_mixed_precision': False, 'G_mixed_precision': False, 'accumulate_stats': False, 'num_standing_accumulations': 16, 'G_eval_mode': True, 'save_every': 500, 'num_save_copies': 2, 'num_best_copies': 5, 'which_best': 'IS', 'no_fid': False, 'test_every': 2000, 'num_inception_images': 50000, 'hashname': False, 'base_root': '', 'dataset_root': 'data', 'weights_root': 'weights', 'logs_root': 'logs', 'samples_root': 'samples', 'pbar': 'mine', 'name_suffix': '', 'experiment_name': 'Jade_BigGAN_B1_bs256x8_fp32', 'config_from_name': False, 'ema': True, 'ema_decay': 0.9999, 'use_ema': True, 'ema_start': 20000, 'adam_eps': 1e-06, 'BN_eps': 1e-05, 'SN_eps': 1e-06, 'num_G_SVs': 1, 'num_D_SVs': 1, 'num_G_SV_itrs': 1, 'num_D_SV_itrs': 1, 'G_ortho': 0.0, 'D_ortho': 0.0, 'toggle_grads': True, 'which_train_fn': 'GAN', 'load_weights': '', 'resume': True, 'logstyle': '%3.3e', 'log_G_spectra': False, 'log_D_spectra': False, 'sv_log_interval': 10, 'resolution': 128, 'n_classes': 1000, 'G_activation': ReLU(inplace), 'D_activation': ReLU(inplace)}
+state: {'itr': 137500, 'epoch': 2, 'save_num': 0, 'save_best_num': 1, 'best_IS': 91.509384, 'best_FID': tensor(9.7711, 'config': {'dataset': 'I128_hdf5', 'augment': False, 'num_workers': 8, 'pin_memory': True, 'shuffle': True, 'load_in_mem': True, 'use_multiepoch_sampler': True, 'model': 'model', 'G_param': 'SN', 'D_param': 'SN', 'G_ch': 96, 'D_ch': 96, 'D_wide': True, 'G_shared': True, 'shared_dim': 128, 'dim_z': 120, 'hier': True, 'cross_replica': False, 'mybn': False, 'G_nl': 'inplace_relu', 'D_nl': 'inplace_relu', 'G_attn': '64', 'D_attn': '64', 'norm_style': 'bn', 'seed': 0, 'G_init': 'ortho', 'D_init': 'ortho', 'skip_init': False, 'G_lr': 0.0001, 'D_lr': 0.0004, 'G_B1': 0.0, 'D_B1': 0.0, 'G_B2': 0.999, 'D_B2': 0.999, 'batch_size': 256, 'G_batch_size': 0, 'num_G_accumulations': 8, 'num_D_steps': 1, 'num_D_accumulations': 8, 'split_D': False, 'num_epochs': 100, 'parallel': True, 'G_fp16': False, 'D_fp16': False, 'D_mixed_precision': False, 'G_mixed_precision': False, 'accumulate_stats': False, 'num_standing_accumulations': 16, 'BN_sync': False, 'G_eval_mode': True, 'save_every': 500, 'num_save_copies': 2, 'num_best_copies': 5, 'which_best': 'IS', 'no_fid': False, 'test_every': 2000, 'num_inception_images': 50000, 'hashname': False, 'base_root': '', 'dataset_root': 'data', 'weights_root': 'weights', 'logs_root': 'logs', 'samples_root': 'samples', 'pbar': 'mine', 'name_suffix': '', 'experiment_name': 'Jade_BigGAN_B1_bs256x8_fp32', 'ema': True, 'ema_decay': 0.9999, 'use_ema': True, 'ema_start': 20000, 'adam_eps': 1e-06, 'BN_eps': 1e-05, 'SN_eps': 1e-06, 'num_G_SVs': 1, 'num_D_SVs': 1, 'num_G_SV_itrs': 1, 'num_D_SV_itrs': 1, 'G_ortho': 0.0, 'D_ortho': 0.0, 'toggle_grads': True, 'which_train_fn': 'GAN', 'load_weights': '', 'resume': False, 'logstyle': '%3.3e', 'log_G_spectra': False, 'log_D_spectra': False, 'sv_log_interval': 10, 'resolution': 128, 'n_classes': 1000, 'G_activation': ReLU(inplace), 'D_activation': ReLU(inplace)}}

BigGAN_PyTorch/logs/process_inception_log.m ADDED Viewed

	@@ -0,0 +1,27 @@

+% Copyright (c) Facebook, Inc. and its affiliates.
+% All rights reserved.
+%
+% All contributions by Andy Brock:
+% Copyright (c) 2019 Andy Brock
+%
+% MIT License
+%
+function [itr, IS, FID, t] = process_inception_log(fname, which_log)
+f = sprintf('%s_%s',fname, which_log);%'G_loss.log');
+fid = fopen(f,'r');
+itr = [];
+IS = [];
+FID = [];
+t = [];
+i = 1;
+while ~feof(fid);
+    s = fgets(fid);
+    parsed = sscanf(s,'{"itr": %d, "IS_mean": %f, "IS_std": %f, "FID": %f, "_stamp": %f}');
+    itr(i) = parsed(1);
+    IS(i) = parsed(2);
+    FID(i) = parsed(4);
+    t(i) = parsed(5);
+    i = i + 1;
+end
+fclose(fid);
+end

BigGAN_PyTorch/logs/process_training.m ADDED Viewed

	@@ -0,0 +1,117 @@

+% Copyright (c) Facebook, Inc. and its affiliates.
+% All rights reserved.
+%
+% All contributions by Andy Brock:
+% Copyright (c) 2019 Andy Brock
+%
+% MIT License
+%
+clc
+clear all
+close all
+fclose all;
+%% Get all training logs for a given run
+target_dir = '.';
+s = {};
+nm = {};
+d = dir(target_dir);
+j = 1;
+for i = 1:length(d)
+    if  any(strfind(d(i).name,'.log'))
+        s = [s; sprintf('%s\\%s', target_dir, d(i).name)];
+        nm = [nm; d(i).name];
+    end
+end
+%% Loop over training logs and acquire data
+D_count = 0;
+G_count = 0;
+for i = 1:length(s)
+    fname = s{i,1};
+    fid = fopen(s{i,1},'r');
+    % Prepare bookkeeping for sv0
+    if any(strfind(s{i,1},'sv'))
+        if any(strfind(s{i,1},'G_'))
+            G_count = G_count +1;
+        else
+            D_count = D_count + 1;
+        end
+    end
+    itr = [];
+    val = [];
+    j = 1;
+    while ~feof(fid);
+        line = fgets(fid);
+        parsed = sscanf(line, '%d: %e');
+        itr(j) = parsed(1);
+        val(j) = parsed(2);
+        j = j + 1;
+    end
+    s{i,2} = itr;
+    s{i,3} = val;
+    fclose(fid);
+end
+%% Plot SVs and losses
+close all;
+Gcc = hsv(G_count);
+Dcc = hsv(D_count);
+gi = 1;
+di = 1;
+li = 1;
+legendG = {};
+legendD = {};
+legendL = {};
+thresh=2; % wavelet denoising threshold
+losses = {};
+for i=1:length(s)
+    if any(strfind(s{i,1},'D_loss_real.log')) || any(strfind(s{i,1},'D_loss_fake.log')) || any(strfind(s{i,1},'G_loss.log'))
+        % Select colors
+        if any(strfind(s{i,1},'D_loss_real.log'))
+            color1 = [0.7,0.7,1.0];
+            color2 = [0, 0, 1];
+            dlr = {s{i,2}, s{i,3}, wden(s{i,3},'sqtwolog','s','mln', thresh, 'sym4'), color1, color2};
+            losses = [losses; dlr];
+        elseif any(strfind(s{i,1},'D_loss_fake.log'))
+            color1 = [0.7,1.0,0.7];
+            color2 = [0, 1, 0];
+            dlf = {s{i,2},s{i,3} wden(s{i,3},'sqtwolog','s','mln', thresh, 'sym4'), color1, color2};
+            losses = [losses; dlf];
+        else % g loss
+            color1 = [1.0, 0.7,0.7];
+            color2 = [1, 0, 0];
+            gl = {s{i,2},s{i,3}, wden(s{i,3},'sqtwolog','s','mln', thresh, 'sym4'), color1 color2};
+            losses = [losses; gl];
+        end
+        figure(1); hold on;
+        % Plot the unsmoothed losses; we'll plot the smoothed losses later
+        plot(s{i,2},s{i,3},'color', color1, 'HandleVisibility','off');
+        legendL = [legendL; nm{i}];
+        continue
+    end
+    if any(strfind(s{i,1},'G_'))
+        legendG = [legendG; nm{i}];
+        figure(2); hold on;
+        plot(s{i,2},s{i,3},'color',Gcc(gi,:),'linewidth',2);
+        gi = gi+1;
+    elseif any(strfind(s{i,1},'D_'))
+        legendD = [legendD; nm{i}];
+        figure(3); hold on;
+        plot(s{i,2},s{i,3},'color',Dcc(di,:),'linewidth',2);
+        di = di+1;
+    else
+        s{i,1} % Debug print to show the name of the log that was not processed.
+    end
+end
+figure(1);
+% Plot the smoothed losses last
+for i = 1:3
+% plot(losses{i,1}, losses{i,2},'color', losses{i,4}, 'HandleVisibility','off');
+plot(losses{i,1},losses{i,3},'color',losses{i,5});
+end
+legend(legendL, 'Interpreter', 'none'); title('Losses'); xlabel('Generator itr'); ylabel('loss'); axis([0, max(s{end,2}), -1, 4]);
+figure(2); legend(legendG,'Interpreter','none'); title('Singular Values in G'); xlabel('Generator itr'); ylabel('SV0');
+figure(3); legend(legendD, 'Interpreter', 'none'); title('Singular Values in D'); xlabel('Generator itr'); ylabel('SV0');

BigGAN_PyTorch/losses.py ADDED Viewed

	@@ -0,0 +1,43 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+import torch
+import torch.nn.functional as F
+# DCGAN loss
+def loss_dcgan_dis(dis_fake, dis_real):
+    L1 = torch.mean(F.softplus(-dis_real))
+    L2 = torch.mean(F.softplus(dis_fake))
+    return L1, L2
+def loss_dcgan_gen(dis_fake):
+    loss = torch.mean(F.softplus(-dis_fake))
+    return loss
+# Hinge Loss
+def loss_hinge_dis(dis_fake, dis_real):
+    loss_real = torch.mean(F.relu(1.0 - dis_real))
+    loss_fake = torch.mean(F.relu(1.0 + dis_fake))
+    return loss_real, loss_fake
+# def loss_hinge_dis(dis_fake, dis_real): # This version returns a single loss
+# loss = torch.mean(F.relu(1. - dis_real))
+# loss += torch.mean(F.relu(1. + dis_fake))
+# return loss
+def loss_hinge_gen(dis_fake):
+    loss = -torch.mean(dis_fake)
+    return loss
+# Default to hinge loss
+generator_loss = loss_hinge_gen
+discriminator_loss = loss_hinge_dis

BigGAN_PyTorch/make_hdf5.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+""" Convert dataset to HDF5
+    This script preprocesses a dataset and saves it (images and labels) to
+    an HDF5 file for improved I/O. """
+import os
+import sys
+from argparse import ArgumentParser
+from tqdm import tqdm, trange
+import h5py as h5
+import numpy as np
+import torch
+import torchvision.datasets as dset
+import torchvision.transforms as transforms
+from torchvision.utils import save_image
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+import utils
+def prepare_parser():
+    usage = "Parser for ImageNet HDF5 scripts."
+    parser = ArgumentParser(description=usage)
+    parser.add_argument(
+        "--resolution",
+        type=int,
+        default=128,
+        help="Which Dataset resolution to train on, out of 64, 128, 256, 512 (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--split",
+        type=str,
+        default="train",
+        help="Which Dataset to convert: train, val (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--data_root",
+        type=str,
+        default="data",
+        help="Default location where data is stored (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--out_path",
+        type=str,
+        default="data",
+        help="Default location where data in hdf5 format will be stored (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--longtail",
+        action="store_true",
+        default=False,
+        help="Use long-tail version of the dataset",
+    )
+    parser.add_argument(
+        "--batch_size",
+        type=int,
+        default=256,
+        help="Default overall batchsize (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--num_workers",
+        type=int,
+        default=16,
+        help="Number of dataloader workers (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--chunk_size",
+        type=int,
+        default=500,
+        help="Default overall batchsize (default: %(default)s)",
+    )
+    parser.add_argument(
+        "--compression",
+        action="store_true",
+        default=False,
+        help="Use LZF compression? (default: %(default)s)",
+    )
+    return parser
+def run(config):
+    # Get image size
+    # Update compression entry
+    config["compression"] = (
+        "lzf" if config["compression"] else None
+    )  # No compression; can also use 'lzf'
+    # Get dataset
+    kwargs = {
+        "num_workers": config["num_workers"],
+        "pin_memory": False,
+        "drop_last": False,
+    }
+    dataset = utils.get_dataset_images(
+        config["resolution"],
+        data_path=os.path.join(config["data_root"], config["split"]),
+        longtail=config["longtail"],
+    )
+    train_loader = utils.get_dataloader(
+        dataset, config["batch_size"], shuffle=False, **kwargs
+    )
+    # HDF5 supports chunking and compression. You may want to experiment
+    # with different chunk sizes to see how it runs on your machines.
+    # Chunk Size/compression     Read speed @ 256x256   Read speed @ 128x128  Filesize @ 128x128    Time to write @128x128
+    # 1 / None                   20/s
+    # 500 / None                 ramps up to 77/s       102/s                 61GB                  23min
+    # 500 / LZF                                         8/s                   56GB                  23min
+    # 1000 / None                78/s
+    # 5000 / None                81/s
+    # auto:(125,1,16,32) / None                         11/s                  61GB
+    print(
+        "Starting to load dataset into an HDF5 file with chunk size %i and compression %s..."
+        % (config["chunk_size"], config["compression"])
+    )
+    # Loop over train loader
+    for i, (x, y) in enumerate(tqdm(train_loader)):
+        # Stick X into the range [0, 255] since it's coming from the train loader
+        x = (255 * ((x + 1) / 2.0)).byte().numpy()
+        # Numpyify y
+        y = y.numpy()
+        # If we're on the first batch, prepare the hdf5
+        if i == 0:
+            with h5.File(
+                config["out_path"]
+                + "/ILSVRC%i%s_xy.hdf5"
+                % (config["resolution"], "" if not config["longtail"] else "longtail"),
+                "w",
+            ) as f:
+                print("Producing dataset of len %d" % len(train_loader.dataset))
+                imgs_dset = f.create_dataset(
+                    "imgs",
+                    x.shape,
+                    dtype="uint8",
+                    maxshape=(
+                        len(train_loader.dataset),
+                        3,
+                        config["resolution"],
+                        config["resolution"],
+                    ),
+                    chunks=(
+                        config["chunk_size"],
+                        3,
+                        config["resolution"],
+                        config["resolution"],
+                    ),
+                    compression=config["compression"],
+                )
+                print("Image chunks chosen as " + str(imgs_dset.chunks))
+                imgs_dset[...] = x
+                labels_dset = f.create_dataset(
+                    "labels",
+                    y.shape,
+                    dtype="int64",
+                    maxshape=(len(train_loader.dataset),),
+                    chunks=(config["chunk_size"],),
+                    compression=config["compression"],
+                )
+                print("Label chunks chosen as " + str(labels_dset.chunks))
+                labels_dset[...] = y
+        # Else append to the hdf5
+        else:
+            with h5.File(
+                config["out_path"]
+                + "/ILSVRC%i%s_xy.hdf5"
+                % (config["resolution"], "" if not config["longtail"] else "longtail"),
+                "a",
+            ) as f:
+                f["imgs"].resize(f["imgs"].shape[0] + x.shape[0], axis=0)
+                f["imgs"][-x.shape[0] :] = x
+                f["labels"].resize(f["labels"].shape[0] + y.shape[0], axis=0)
+                f["labels"][-y.shape[0] :] = y
+def main():
+    # parse command line and run
+    parser = prepare_parser()
+    config = vars(parser.parse_args())
+    print(config)
+    run(config)
+if __name__ == "__main__":
+    main()

BigGAN_PyTorch/run.py ADDED Viewed

	@@ -0,0 +1,75 @@

+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+import utils
+from trainer import run
+from submitit.helpers import Checkpointable
+LOCAL = False
+try:
+    import submitit
+except:
+    print(
+        "No submitit package found! Defaulting to executing the script in the local machine"
+    )
+    LOCAL = True
+import json
+class Trainer(Checkpointable):
+    def __call__(self, config):
+        if config["run_setup"] == "local_debug" or LOCAL:
+            run(config, "local_debug")
+        else:
+            run(config, "slurm", master_node=submitit.JobEnvironment().hostnames[0])
+if __name__ == "__main__":
+    parser = utils.prepare_parser()
+    config = vars(parser.parse_args())
+    if config["json_config"] != "":
+        data = json.load(open(config["json_config"]))
+        for key in data.keys():
+            config[key] = data[key]
+    else:
+        print("Not using JSON configuration file!")
+    config["G_batch_size"] = config["batch_size"]
+    config["batch_size"] = (
+        config["batch_size"] * config["num_D_accumulations"] * config["num_D_steps"]
+    )
+    trainer = Trainer()
+    if config["run_setup"] == "local_debug" or LOCAL:
+        trainer(config)
+    else:
+        print(
+            "Using ",
+            config["n_nodes"],
+            " nodes and ",
+            config["n_gpus_per_node"],
+            " GPUs per node.",
+        )
+        executor = submitit.SlurmExecutor(
+            folder=config["slurm_logdir"], max_num_timeout=60
+        )
+        executor.update_parameters(
+            gpus_per_node=config["n_gpus_per_node"],
+            partition=config["partition"],
+            constraint="volta32gb",
+            nodes=config["n_nodes"],
+            ntasks_per_node=config["n_gpus_per_node"],
+            cpus_per_task=8,
+            mem=256000,
+            time=3200,
+            job_name=config["experiment_name"],
+            exclusive=True if config["n_gpus_per_node"] == 8 else False,
+        )
+        executor.submit(trainer, config)
+        import time
+        time.sleep(1)

BigGAN_PyTorch/scripts/launch_BigGAN_bs256x8.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+#!/bin/bash
+#
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# All contributions by Andy Brock:
+# Copyright (c) 2019 Andy Brock
+#
+# MIT License
+#
+python train.py \
+--dataset I128_hdf5 --parallel --shuffle  --num_workers 8 --batch_size 256 --load_in_mem  \
+--num_G_accumulations 8 --num_D_accumulations 8 \
+--num_D_steps 1 --G_lr 1e-4 --D_lr 4e-4 --D_B2 0.999 --G_B2 0.999 \
+--G_attn 64 --D_attn 64 \
+--G_nl inplace_relu --D_nl inplace_relu \
+--SN_eps 1e-6 --BN_eps 1e-5 --adam_eps 1e-6 \
+--G_ortho 0.0 \
+--G_shared \
+--G_init ortho --D_init ortho \
+--hier --dim_z 120 --shared_dim 128 \
+--G_eval_mode \
+--G_ch 96 --D_ch 96 \
+--ema --use_ema --ema_start 20000 \
+--test_every 2000 --save_every 1000 --num_best_copies 5 --num_save_copies 2 --seed 0 \
+--use_multiepoch_sampler \