diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ed8ebf583f771da9150c35db3955987b7d757904
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__
\ No newline at end of file
diff --git a/submission/__pycache__/__init__.cpython-310.pyc b/submission/__pycache__/__init__.cpython-310.pyc
deleted file mode 100644
index 49d95f12dab43b5210719a46cd77487759d3b2de..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/__init__.cpython-310.pyc and /dev/null differ
diff --git a/submission/__pycache__/__init__.cpython-36.pyc b/submission/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index ddc1f46c28d2083ea572abefb0e943169a53a6bf..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/submission/__pycache__/__init__.cpython-38.pyc b/submission/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index 7fb9c21385f191a2e293c6c92bc0f2600eb63988..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/submission/__pycache__/__init__.cpython-39.pyc b/submission/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index a3eb6c3a4dc80bf848d05969fd9ba0adb1beaa14..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/submission/__pycache__/run_context.cpython-310.pyc b/submission/__pycache__/run_context.cpython-310.pyc
deleted file mode 100644
index 16a88b1fe5c61aa930139d80da56ac319bfc3599..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/run_context.cpython-310.pyc and /dev/null differ
diff --git a/submission/__pycache__/run_context.cpython-36.pyc b/submission/__pycache__/run_context.cpython-36.pyc
deleted file mode 100644
index c79464ccf271723928e7ea5846f3121642912acf..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/run_context.cpython-36.pyc and /dev/null differ
diff --git a/submission/__pycache__/run_context.cpython-38.pyc b/submission/__pycache__/run_context.cpython-38.pyc
deleted file mode 100644
index 27f155bf32573a58a1b14b2cfaeece781ca4db14..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/run_context.cpython-38.pyc and /dev/null differ
diff --git a/submission/__pycache__/run_context.cpython-39.pyc b/submission/__pycache__/run_context.cpython-39.pyc
deleted file mode 100644
index 82e906015f2d2c1aea6d1b2ea746dfe1f1aacd09..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/run_context.cpython-39.pyc and /dev/null differ
diff --git a/submission/__pycache__/submit.cpython-310.pyc b/submission/__pycache__/submit.cpython-310.pyc
deleted file mode 100644
index 0f0d56d928ab6555b9b4084a030dda1963e28424..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/submit.cpython-310.pyc and /dev/null differ
diff --git a/submission/__pycache__/submit.cpython-36.pyc b/submission/__pycache__/submit.cpython-36.pyc
deleted file mode 100644
index f0edb249c830bc74157540f24be2caa25d8f8817..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/submit.cpython-36.pyc and /dev/null differ
diff --git a/submission/__pycache__/submit.cpython-38.pyc b/submission/__pycache__/submit.cpython-38.pyc
deleted file mode 100644
index 6fb6b1a8a68da858aaebcbb567ae0dd9f1ee7def..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/submit.cpython-38.pyc and /dev/null differ
diff --git a/submission/__pycache__/submit.cpython-39.pyc b/submission/__pycache__/submit.cpython-39.pyc
deleted file mode 100644
index a631dc24ce45365f1f8274e563e1b4354afac50c..0000000000000000000000000000000000000000
Binary files a/submission/__pycache__/submit.cpython-39.pyc and /dev/null differ
diff --git a/submission/internal/__pycache__/__init__.cpython-36.pyc b/submission/internal/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index e197ab9bed4593646f04c5f1f123f49ddd9b34c5..0000000000000000000000000000000000000000
Binary files a/submission/internal/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/submission/internal/__pycache__/__init__.cpython-38.pyc b/submission/internal/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index 9d15b0c3a8da1eac47d29d581014b76e5ca26096..0000000000000000000000000000000000000000
Binary files a/submission/internal/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/submission/internal/__pycache__/__init__.cpython-39.pyc b/submission/internal/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index bc046661f9d0a58c4db9a911605f6b743a317747..0000000000000000000000000000000000000000
Binary files a/submission/internal/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/submission/internal/__pycache__/local.cpython-36.pyc b/submission/internal/__pycache__/local.cpython-36.pyc
deleted file mode 100644
index 7899b1660eb9752f9fb45b94b762a69807251023..0000000000000000000000000000000000000000
Binary files a/submission/internal/__pycache__/local.cpython-36.pyc and /dev/null differ
diff --git a/submission/internal/__pycache__/local.cpython-38.pyc b/submission/internal/__pycache__/local.cpython-38.pyc
deleted file mode 100644
index 679805d01f94611885c9e9241db82f629e9ab438..0000000000000000000000000000000000000000
Binary files a/submission/internal/__pycache__/local.cpython-38.pyc and /dev/null differ
diff --git a/submission/internal/__pycache__/local.cpython-39.pyc b/submission/internal/__pycache__/local.cpython-39.pyc
deleted file mode 100644
index 9f57b457b4f40416ed89a492062b3a26eb3b757f..0000000000000000000000000000000000000000
Binary files a/submission/internal/__pycache__/local.cpython-39.pyc and /dev/null differ
diff --git a/tflib/.custom_ops.py.un~ b/tflib/.custom_ops.py.un~
deleted file mode 100644
index e41c460c3b3111ddcbeb6b9567b4b6e41d6eb9c6..0000000000000000000000000000000000000000
Binary files a/tflib/.custom_ops.py.un~ and /dev/null differ
diff --git a/tflib/__init__.py b/tflib/__init__.py
deleted file mode 100644
index 02c25173d3f2391c88b142cf80af02cd93b0b5a0..0000000000000000000000000000000000000000
--- a/tflib/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-from . import autosummary
-from . import network
-from . import optimizer
-from . import tfutil
-from . import custom_ops
-
-from .tfutil import *
-from .network import Network
-
-from .optimizer import Optimizer
-
-from .custom_ops import get_plugin
diff --git a/tflib/__pycache__/__init__.cpython-36.pyc b/tflib/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index 55466792997d614403fe6026e63a5e2e32d5f4c5..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/tflib/__pycache__/__init__.cpython-38.pyc b/tflib/__pycache__/__init__.cpython-38.pyc
deleted file mode 100644
index c48dd77f8014fcbca868e4826ddc7c07e558bb87..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/__init__.cpython-38.pyc and /dev/null differ
diff --git a/tflib/__pycache__/__init__.cpython-39.pyc b/tflib/__pycache__/__init__.cpython-39.pyc
deleted file mode 100644
index 3010c9010ee84397e342ca1748e4bfb0869ea9d4..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/__init__.cpython-39.pyc and /dev/null differ
diff --git a/tflib/__pycache__/autosummary.cpython-36.pyc b/tflib/__pycache__/autosummary.cpython-36.pyc
deleted file mode 100644
index 8663ce369c5c3d996a2d372e75b53f9c67ecac63..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/autosummary.cpython-36.pyc and /dev/null differ
diff --git a/tflib/__pycache__/autosummary.cpython-38.pyc b/tflib/__pycache__/autosummary.cpython-38.pyc
deleted file mode 100644
index 772042b65a5c87fdd55625424b014978a08a8aba..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/autosummary.cpython-38.pyc and /dev/null differ
diff --git a/tflib/__pycache__/autosummary.cpython-39.pyc b/tflib/__pycache__/autosummary.cpython-39.pyc
deleted file mode 100644
index 51636d9cbd98f5953b8ed47a1a29aadec86da7ce..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/autosummary.cpython-39.pyc and /dev/null differ
diff --git a/tflib/__pycache__/custom_ops.cpython-36.pyc b/tflib/__pycache__/custom_ops.cpython-36.pyc
deleted file mode 100644
index c5bc61adabc982614e494f66403172a241a0a274..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/custom_ops.cpython-36.pyc and /dev/null differ
diff --git a/tflib/__pycache__/network.cpython-36.pyc b/tflib/__pycache__/network.cpython-36.pyc
deleted file mode 100644
index 15b13f2d4d385523fdfa41e298f4a4db2d3c28de..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/network.cpython-36.pyc and /dev/null differ
diff --git a/tflib/__pycache__/optimizer.cpython-36.pyc b/tflib/__pycache__/optimizer.cpython-36.pyc
deleted file mode 100644
index b676edf06988f727c4684867a0d4921c7fa2a62a..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/optimizer.cpython-36.pyc and /dev/null differ
diff --git a/tflib/__pycache__/tfutil.cpython-36.pyc b/tflib/__pycache__/tfutil.cpython-36.pyc
deleted file mode 100644
index fc0253c2da1f3aa0979212e78adf111de2311459..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/tfutil.cpython-36.pyc and /dev/null differ
diff --git a/tflib/__pycache__/tfutil.cpython-38.pyc b/tflib/__pycache__/tfutil.cpython-38.pyc
deleted file mode 100644
index eaddd1d7edc28bb2f69e914274e24c9febfd2989..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/tfutil.cpython-38.pyc and /dev/null differ
diff --git a/tflib/__pycache__/tfutil.cpython-39.pyc b/tflib/__pycache__/tfutil.cpython-39.pyc
deleted file mode 100644
index e0ef28b02cff59ccbc79a0804c30d90fbfdded44..0000000000000000000000000000000000000000
Binary files a/tflib/__pycache__/tfutil.cpython-39.pyc and /dev/null differ
diff --git a/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll b/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll
deleted file mode 100644
index f03635ef2de5533438983be7e5430b2d68e703fe..0000000000000000000000000000000000000000
Binary files a/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll and /dev/null differ
diff --git a/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so b/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so
deleted file mode 100644
index 271fc0309be80c13246bd506e4f4e9cae1da3a5c..0000000000000000000000000000000000000000
Binary files a/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so and /dev/null differ
diff --git a/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll b/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll
deleted file mode 100644
index 34f36c1e1c1393b01ab6adbd96cb91d99d895379..0000000000000000000000000000000000000000
Binary files a/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll and /dev/null differ
diff --git a/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll b/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll
deleted file mode 100644
index c77d3dbe97574675decdfcf3f332e9064018f652..0000000000000000000000000000000000000000
Binary files a/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll and /dev/null differ
diff --git a/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so b/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so
deleted file mode 100644
index f168469daf6becbe8129f5344f8632a089873a1d..0000000000000000000000000000000000000000
--- a/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:11a9d92fab510cb996dcd2bf8644c34f7c5e963e6abba4ad3e606e590917e3e0
-size 1099920
diff --git a/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll b/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll
deleted file mode 100644
index dfa6979fc99fdf7b052769b88cb23a852dc223d1..0000000000000000000000000000000000000000
Binary files a/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll and /dev/null differ
diff --git a/tflib/autosummary.py b/tflib/autosummary.py
deleted file mode 100644
index 6b0d80b371620bedadf8164772b7d6f87806fc11..0000000000000000000000000000000000000000
--- a/tflib/autosummary.py
+++ /dev/null
@@ -1,191 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-"""Helper for adding automatically tracked values to Tensorboard.
-
-Autosummary creates an identity op that internally keeps track of the input
-values and automatically shows up in TensorBoard. The reported value
-represents an average over input components. The average is accumulated
-constantly over time and flushed when save_summaries() is called.
-
-Notes:
-- The output tensor must be used as an input for something else in the
-  graph. Otherwise, the autosummary op will not get executed, and the average
-  value will not get accumulated.
-- It is perfectly fine to include autosummaries with the same name in
-  several places throughout the graph, even if they are executed concurrently.
-- It is ok to also pass in a python scalar or numpy array. In this case, it
-  is added to the average immediately.
-"""
-
-from collections import OrderedDict
-import numpy as np
-import tensorflow as tf
-from tensorboard import summary as summary_lib
-from tensorboard.plugins.custom_scalar import layout_pb2
-
-from . import tfutil
-from .tfutil import TfExpression
-from .tfutil import TfExpressionEx
-
-# Enable "Custom scalars" tab in TensorBoard for advanced formatting.
-# Disabled by default to reduce tfevents file size.
-enable_custom_scalars = False
-
-_dtype = tf.float64
-_vars = OrderedDict()  # name => [var, ...]
-_immediate = OrderedDict()  # name => update_op, update_value
-_finalized = False
-_merge_op = None
-
-
-def _create_var(name: str, value_expr: TfExpression) -> TfExpression:
-    """Internal helper for creating autosummary accumulators."""
-    assert not _finalized
-    name_id = name.replace("/", "_")
-    v = tf.cast(value_expr, _dtype)
-
-    if v.shape.is_fully_defined():
-        size = np.prod(v.shape.as_list())
-        size_expr = tf.constant(size, dtype=_dtype)
-    else:
-        size = None
-        size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype))
-
-    if size == 1:
-        if v.shape.ndims != 0:
-            v = tf.reshape(v, [])
-        v = [size_expr, v, tf.square(v)]
-    else:
-        v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))]
-    v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype))
-
-    with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None):
-        var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False)  # [sum(1), sum(x), sum(x**2)]
-    update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v))
-
-    if name in _vars:
-        _vars[name].append(var)
-    else:
-        _vars[name] = [var]
-    return update_op
-
-
-def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx:
-    """Create a new autosummary.
-
-    Args:
-        name:     Name to use in TensorBoard
-        value:    TensorFlow expression or python value to track
-        passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node.
-
-    Example use of the passthru mechanism:
-
-    n = autosummary('l2loss', loss, passthru=n)
-
-    This is a shorthand for the following code:
-
-    with tf.control_dependencies([autosummary('l2loss', loss)]):
-        n = tf.identity(n)
-    """
-    tfutil.assert_tf_initialized()
-    name_id = name.replace("/", "_")
-
-    if tfutil.is_tf_expression(value):
-        with tf.name_scope("summary_" + name_id), tf.device(value.device):
-            condition = tf.convert_to_tensor(condition, name='condition')
-            update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op)
-            with tf.control_dependencies([update_op]):
-                return tf.identity(value if passthru is None else passthru)
-
-    else:  # python scalar or numpy array
-        assert not tfutil.is_tf_expression(passthru)
-        assert not tfutil.is_tf_expression(condition)
-        if condition:
-            if name not in _immediate:
-                with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None):
-                    update_value = tf.placeholder(_dtype)
-                    update_op = _create_var(name, update_value)
-                    _immediate[name] = update_op, update_value
-            update_op, update_value = _immediate[name]
-            tfutil.run(update_op, {update_value: value})
-        return value if passthru is None else passthru
-
-
-def finalize_autosummaries() -> None:
-    """Create the necessary ops to include autosummaries in TensorBoard report.
-    Note: This should be done only once per graph.
-    """
-    global _finalized
-    tfutil.assert_tf_initialized()
-
-    if _finalized:
-        return None
-
-    _finalized = True
-    tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list])
-
-    # Create summary ops.
-    with tf.device(None), tf.control_dependencies(None):
-        for name, vars_list in _vars.items():
-            name_id = name.replace("/", "_")
-            with tfutil.absolute_name_scope("Autosummary/" + name_id):
-                moments = tf.add_n(vars_list)
-                moments /= moments[0]
-                with tf.control_dependencies([moments]):  # read before resetting
-                    reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list]
-                    with tf.name_scope(None), tf.control_dependencies(reset_ops):  # reset before reporting
-                        mean = moments[1]
-                        std = tf.sqrt(moments[2] - tf.square(moments[1]))
-                        tf.summary.scalar(name, mean)
-                        if enable_custom_scalars:
-                            tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std)
-                            tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std)
-
-    # Setup layout for custom scalars.
-    layout = None
-    if enable_custom_scalars:
-        cat_dict = OrderedDict()
-        for series_name in sorted(_vars.keys()):
-            p = series_name.split("/")
-            cat = p[0] if len(p) >= 2 else ""
-            chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1]
-            if cat not in cat_dict:
-                cat_dict[cat] = OrderedDict()
-            if chart not in cat_dict[cat]:
-                cat_dict[cat][chart] = []
-            cat_dict[cat][chart].append(series_name)
-        categories = []
-        for cat_name, chart_dict in cat_dict.items():
-            charts = []
-            for chart_name, series_names in chart_dict.items():
-                series = []
-                for series_name in series_names:
-                    series.append(layout_pb2.MarginChartContent.Series(
-                        value=series_name,
-                        lower="xCustomScalars/" + series_name + "/margin_lo",
-                        upper="xCustomScalars/" + series_name + "/margin_hi"))
-                margin = layout_pb2.MarginChartContent(series=series)
-                charts.append(layout_pb2.Chart(title=chart_name, margin=margin))
-            categories.append(layout_pb2.Category(title=cat_name, chart=charts))
-        layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories))
-    return layout
-
-def save_summaries(file_writer, global_step=None):
-    """Call FileWriter.add_summary() with all summaries in the default graph,
-    automatically finalizing and merging them on the first call.
-    """
-    global _merge_op
-    tfutil.assert_tf_initialized()
-
-    if _merge_op is None:
-        layout = finalize_autosummaries()
-        if layout is not None:
-            file_writer.add_summary(layout)
-        with tf.device(None), tf.control_dependencies(None):
-            _merge_op = tf.summary.merge_all()
-
-    file_writer.add_summary(_merge_op.eval(), global_step)
diff --git a/tflib/custom_ops.py b/tflib/custom_ops.py
deleted file mode 100644
index 9046787626a0530f03b9f8982ec625e6bd257858..0000000000000000000000000000000000000000
--- a/tflib/custom_ops.py
+++ /dev/null
@@ -1,169 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-"""TensorFlow custom ops builder.
-"""
-
-import os
-import re
-import uuid
-import hashlib
-import tempfile
-import shutil
-import tensorflow as tf
-from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
-
-#----------------------------------------------------------------------------
-# Global options.
-
-cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache')
-cuda_cache_version_tag = 'v1'
-do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
-verbose = True # Print status messages to stdout.
-
-compiler_bindir_search_path = [
-    'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64',
-    'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64',
-    'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin',
-]
-
-#----------------------------------------------------------------------------
-# Internal helper funcs.
-
-def _find_compiler_bindir():
-    for compiler_path in compiler_bindir_search_path:
-        if os.path.isdir(compiler_path):
-            return compiler_path
-    return None
-
-def _get_compute_cap(device):
-    caps_str = device.physical_device_desc
-    m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
-    major = m.group(1)
-    minor = m.group(2)
-    return (major, minor)
-
-def _get_cuda_gpu_arch_string():
-    gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
-    if len(gpus) == 0:
-        raise RuntimeError('No GPU devices found')
-    (major, minor) = _get_compute_cap(gpus[0])
-    return 'sm_%s%s' % (major, minor)
-
-def _run_cmd(cmd):
-    with os.popen(cmd) as pipe:
-        output = pipe.read()
-        status = pipe.close()
-    if status is not None:
-        raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
-
-def _prepare_nvcc_cli(opts):
-    cmd = 'nvcc ' + opts.strip()
-    cmd += ' --disable-warnings'
-    cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
-    cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
-    cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
-    cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
-
-    compiler_bindir = _find_compiler_bindir()
-    if compiler_bindir is None:
-        # Require that _find_compiler_bindir succeeds on Windows.  Allow
-        # nvcc to use whatever is the default on Linux.
-        if os.name == 'nt':
-            raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
-    else:
-        cmd += ' --compiler-bindir "%s"' % compiler_bindir
-    cmd += ' 2>&1'
-    return cmd
-
-#----------------------------------------------------------------------------
-# Main entry point.
-
-_plugin_cache = dict()
-
-def get_plugin(cuda_file):
-    cuda_file_base = os.path.basename(cuda_file)
-    cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
-
-    # Already in cache?
-    if cuda_file in _plugin_cache:
-        return _plugin_cache[cuda_file]
-
-    # Setup plugin.
-    if verbose:
-        print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
-    try:
-        # Hash CUDA source.
-        md5 = hashlib.md5()
-        with open(cuda_file, 'rb') as f:
-            md5.update(f.read())
-        md5.update(b'\n')
-
-        # Hash headers included by the CUDA code by running it through the preprocessor.
-        if not do_not_hash_included_headers:
-            if verbose:
-                print('Preprocessing... ', end='', flush=True)
-            with tempfile.TemporaryDirectory() as tmp_dir:
-                tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
-                _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
-                with open(tmp_file, 'rb') as f:
-                    bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
-                    good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
-                    for ln in f:
-                        if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
-                            ln = ln.replace(bad_file_str, good_file_str)
-                            md5.update(ln)
-                    md5.update(b'\n')
-
-        # Select compiler options.
-        compile_opts = ''
-        if os.name == 'nt':
-            compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
-        elif os.name == 'posix':
-            compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
-            compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
-        else:
-            assert False # not Windows or Linux, w00t?
-        compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
-        compile_opts += ' --use_fast_math'
-        nvcc_cmd = _prepare_nvcc_cli(compile_opts)
-
-        # Hash build configuration.
-        md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
-        md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
-        md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
-
-        # Compile if not already compiled.
-        bin_file_ext = '.dll' if os.name == 'nt' else '.so'
-        bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
-        if not os.path.isfile(bin_file):
-            if verbose:
-                print('Compiling... ', end='', flush=True)
-            with tempfile.TemporaryDirectory() as tmp_dir:
-                tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
-                _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
-                os.makedirs(cuda_cache_path, exist_ok=True)
-                intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
-                shutil.copyfile(tmp_file, intermediate_file)
-                os.rename(intermediate_file, bin_file) # atomic
-
-        # Load.
-        if verbose:
-            print('Loading... ', end='', flush=True)
-        plugin = tf.load_op_library(bin_file)
-
-        # Add to cache.
-        _plugin_cache[cuda_file] = plugin
-        if verbose:
-            print('Done.', flush=True)
-        return plugin
-
-    except:
-        if verbose:
-            print('Failed!', flush=True)
-        raise
-
-#----------------------------------------------------------------------------
diff --git a/tflib/custom_ops.py~ b/tflib/custom_ops.py~
deleted file mode 100644
index 9046787626a0530f03b9f8982ec625e6bd257858..0000000000000000000000000000000000000000
--- a/tflib/custom_ops.py~
+++ /dev/null
@@ -1,169 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-"""TensorFlow custom ops builder.
-"""
-
-import os
-import re
-import uuid
-import hashlib
-import tempfile
-import shutil
-import tensorflow as tf
-from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
-
-#----------------------------------------------------------------------------
-# Global options.
-
-cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache')
-cuda_cache_version_tag = 'v1'
-do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
-verbose = True # Print status messages to stdout.
-
-compiler_bindir_search_path = [
-    'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64',
-    'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64',
-    'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin',
-]
-
-#----------------------------------------------------------------------------
-# Internal helper funcs.
-
-def _find_compiler_bindir():
-    for compiler_path in compiler_bindir_search_path:
-        if os.path.isdir(compiler_path):
-            return compiler_path
-    return None
-
-def _get_compute_cap(device):
-    caps_str = device.physical_device_desc
-    m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
-    major = m.group(1)
-    minor = m.group(2)
-    return (major, minor)
-
-def _get_cuda_gpu_arch_string():
-    gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
-    if len(gpus) == 0:
-        raise RuntimeError('No GPU devices found')
-    (major, minor) = _get_compute_cap(gpus[0])
-    return 'sm_%s%s' % (major, minor)
-
-def _run_cmd(cmd):
-    with os.popen(cmd) as pipe:
-        output = pipe.read()
-        status = pipe.close()
-    if status is not None:
-        raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
-
-def _prepare_nvcc_cli(opts):
-    cmd = 'nvcc ' + opts.strip()
-    cmd += ' --disable-warnings'
-    cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
-    cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
-    cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
-    cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
-
-    compiler_bindir = _find_compiler_bindir()
-    if compiler_bindir is None:
-        # Require that _find_compiler_bindir succeeds on Windows.  Allow
-        # nvcc to use whatever is the default on Linux.
-        if os.name == 'nt':
-            raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
-    else:
-        cmd += ' --compiler-bindir "%s"' % compiler_bindir
-    cmd += ' 2>&1'
-    return cmd
-
-#----------------------------------------------------------------------------
-# Main entry point.
-
-_plugin_cache = dict()
-
-def get_plugin(cuda_file):
-    cuda_file_base = os.path.basename(cuda_file)
-    cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
-
-    # Already in cache?
-    if cuda_file in _plugin_cache:
-        return _plugin_cache[cuda_file]
-
-    # Setup plugin.
-    if verbose:
-        print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
-    try:
-        # Hash CUDA source.
-        md5 = hashlib.md5()
-        with open(cuda_file, 'rb') as f:
-            md5.update(f.read())
-        md5.update(b'\n')
-
-        # Hash headers included by the CUDA code by running it through the preprocessor.
-        if not do_not_hash_included_headers:
-            if verbose:
-                print('Preprocessing... ', end='', flush=True)
-            with tempfile.TemporaryDirectory() as tmp_dir:
-                tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
-                _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
-                with open(tmp_file, 'rb') as f:
-                    bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
-                    good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
-                    for ln in f:
-                        if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
-                            ln = ln.replace(bad_file_str, good_file_str)
-                            md5.update(ln)
-                    md5.update(b'\n')
-
-        # Select compiler options.
-        compile_opts = ''
-        if os.name == 'nt':
-            compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
-        elif os.name == 'posix':
-            compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
-            compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
-        else:
-            assert False # not Windows or Linux, w00t?
-        compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
-        compile_opts += ' --use_fast_math'
-        nvcc_cmd = _prepare_nvcc_cli(compile_opts)
-
-        # Hash build configuration.
-        md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
-        md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
-        md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
-
-        # Compile if not already compiled.
-        bin_file_ext = '.dll' if os.name == 'nt' else '.so'
-        bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
-        if not os.path.isfile(bin_file):
-            if verbose:
-                print('Compiling... ', end='', flush=True)
-            with tempfile.TemporaryDirectory() as tmp_dir:
-                tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
-                _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
-                os.makedirs(cuda_cache_path, exist_ok=True)
-                intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
-                shutil.copyfile(tmp_file, intermediate_file)
-                os.rename(intermediate_file, bin_file) # atomic
-
-        # Load.
-        if verbose:
-            print('Loading... ', end='', flush=True)
-        plugin = tf.load_op_library(bin_file)
-
-        # Add to cache.
-        _plugin_cache[cuda_file] = plugin
-        if verbose:
-            print('Done.', flush=True)
-        return plugin
-
-    except:
-        if verbose:
-            print('Failed!', flush=True)
-        raise
-
-#----------------------------------------------------------------------------
diff --git a/tflib/network.py b/tflib/network.py
deleted file mode 100644
index 409babb1d2166d341bfaee2ef460d8810bdaf51f..0000000000000000000000000000000000000000
--- a/tflib/network.py
+++ /dev/null
@@ -1,590 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-"""Helper for managing networks."""
-
-import types
-import inspect
-import re
-import uuid
-import sys
-import numpy as np
-import tensorflow as tf
-
-from collections import OrderedDict
-from typing import Any, List, Tuple, Union
-
-from . import tfutil
-from .. import util
-
-from .tfutil import TfExpression, TfExpressionEx
-
-_import_handlers = []  # Custom import handlers for dealing with legacy data in pickle import.
-_import_module_src = dict()  # Source code for temporary modules created during pickle import.
-
-
-def import_handler(handler_func):
-    """Function decorator for declaring custom import handlers."""
-    _import_handlers.append(handler_func)
-    return handler_func
-
-
-class Network:
-    """Generic network abstraction.
-
-    Acts as a convenience wrapper for a parameterized network construction
-    function, providing several utility methods and convenient access to
-    the inputs/outputs/weights.
-
-    Network objects can be safely pickled and unpickled for long-term
-    archival purposes. The pickling works reliably as long as the underlying
-    network construction function is defined in a standalone Python module
-    that has no side effects or application-specific imports.
-
-    Args:
-        name: Network name. Used to select TensorFlow name and variable scopes.
-        func_name: Fully qualified name of the underlying network construction function, or a top-level function object.
-        static_kwargs: Keyword arguments to be passed in to the network construction function.
-
-    Attributes:
-        name: User-specified name, defaults to build func name if None.
-        scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name.
-        static_kwargs: Arguments passed to the user-supplied build func.
-        components: Container for sub-networks. Passed to the build func, and retained between calls.
-        num_inputs: Number of input tensors.
-        num_outputs: Number of output tensors.
-        input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension.
-        output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension.
-        input_shape: Short-hand for input_shapes[0].
-        output_shape: Short-hand for output_shapes[0].
-        input_templates: Input placeholders in the template graph.
-        output_templates: Output tensors in the template graph.
-        input_names: Name string for each input.
-        output_names: Name string for each output.
-        own_vars: Variables defined by this network (local_name => var), excluding sub-networks.
-        vars: All variables (local_name => var).
-        trainables: All trainable variables (local_name => var).
-        var_global_to_local: Mapping from variable global names to local names.
-    """
-
-    def __init__(self, name: str = None, func_name: Any = None, **static_kwargs):
-        tfutil.assert_tf_initialized()
-        assert isinstance(name, str) or name is None
-        assert func_name is not None
-        assert isinstance(func_name, str) or util.is_top_level_function(func_name)
-        assert util.is_pickleable(static_kwargs)
-
-        self._init_fields()
-        self.name = name
-        self.static_kwargs = util.EasyDict(static_kwargs)
-
-        # Locate the user-specified network build function.
-        if util.is_top_level_function(func_name):
-            func_name = util.get_top_level_function_name(func_name)
-        module, self._build_func_name = util.get_module_from_obj_name(func_name)
-        self._build_func = util.get_obj_from_module(module, self._build_func_name)
-        assert callable(self._build_func)
-
-        # Dig up source code for the module containing the build function.
-        self._build_module_src = _import_module_src.get(module, None)
-        if self._build_module_src is None:
-            self._build_module_src = inspect.getsource(module)
-
-        # Init TensorFlow graph.
-        self._init_graph()
-        self.reset_own_vars()
-
-    def _init_fields(self) -> None:
-        self.name = None
-        self.scope = None
-        self.static_kwargs = util.EasyDict()
-        self.components = util.EasyDict()
-        self.num_inputs = 0
-        self.num_outputs = 0
-        self.input_shapes = [[]]
-        self.output_shapes = [[]]
-        self.input_shape = []
-        self.output_shape = []
-        self.input_templates = []
-        self.output_templates = []
-        self.input_names = []
-        self.output_names = []
-        self.own_vars = OrderedDict()
-        self.vars = OrderedDict()
-        self.trainables = OrderedDict()
-        self.var_global_to_local = OrderedDict()
-
-        self._build_func = None  # User-supplied build function that constructs the network.
-        self._build_func_name = None  # Name of the build function.
-        self._build_module_src = None  # Full source code of the module containing the build function.
-        self._run_cache = dict()  # Cached graph data for Network.run().
-
-    def _init_graph(self) -> None:
-        # Collect inputs.
-        self.input_names = []
-
-        for param in inspect.signature(self._build_func).parameters.values():
-            if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty:
-                self.input_names.append(param.name)
-
-        self.num_inputs = len(self.input_names)
-        assert self.num_inputs >= 1
-
-        # Choose name and scope.
-        if self.name is None:
-            self.name = self._build_func_name
-        assert re.match("^[A-Za-z0-9_.\\-]*$", self.name)
-        with tf.name_scope(None):
-            self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True)
-
-        # Finalize build func kwargs.
-        build_kwargs = dict(self.static_kwargs)
-        build_kwargs["is_template_graph"] = True
-        build_kwargs["components"] = self.components
-
-        # Build template graph.
-        with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope):  # ignore surrounding scopes
-            assert tf.get_variable_scope().name == self.scope
-            assert tf.get_default_graph().get_name_scope() == self.scope
-            with tf.control_dependencies(None):  # ignore surrounding control dependencies
-                self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
-                out_expr = self._build_func(*self.input_templates, **build_kwargs)
-
-        # Collect outputs.
-        assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
-        self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
-        self.num_outputs = len(self.output_templates)
-        assert self.num_outputs >= 1
-        assert all(tfutil.is_tf_expression(t) for t in self.output_templates)
-
-        # Perform sanity checks.
-        if any(t.shape.ndims is None for t in self.input_templates):
-            raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.")
-        if any(t.shape.ndims is None for t in self.output_templates):
-            raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.")
-        if any(not isinstance(comp, Network) for comp in self.components.values()):
-            raise ValueError("Components of a Network must be Networks themselves.")
-        if len(self.components) != len(set(comp.name for comp in self.components.values())):
-            raise ValueError("Components of a Network must have unique names.")
-
-        # List inputs and outputs.
-        self.input_shapes = [t.shape.as_list() for t in self.input_templates]
-        self.output_shapes = [t.shape.as_list() for t in self.output_templates]
-        self.input_shape = self.input_shapes[0]
-        self.output_shape = self.output_shapes[0]
-        self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates]
-
-        # List variables.
-        self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/"))
-        self.vars = OrderedDict(self.own_vars)
-        self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items())
-        self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable)
-        self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items())
-
-    def reset_own_vars(self) -> None:
-        """Re-initialize all variables of this network, excluding sub-networks."""
-        tfutil.run([var.initializer for var in self.own_vars.values()])
-
-    def reset_vars(self) -> None:
-        """Re-initialize all variables of this network, including sub-networks."""
-        tfutil.run([var.initializer for var in self.vars.values()])
-
-    def reset_trainables(self) -> None:
-        """Re-initialize all trainable variables of this network, including sub-networks."""
-        tfutil.run([var.initializer for var in self.trainables.values()])
-
-    def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]:
-        """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s)."""
-        assert len(in_expr) == self.num_inputs
-        assert not all(expr is None for expr in in_expr)
-
-        # Finalize build func kwargs.
-        build_kwargs = dict(self.static_kwargs)
-        build_kwargs.update(dynamic_kwargs)
-        build_kwargs["is_template_graph"] = False
-        build_kwargs["components"] = self.components
-
-        # Build TensorFlow graph to evaluate the network.
-        with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name):
-            assert tf.get_variable_scope().name == self.scope
-            valid_inputs = [expr for expr in in_expr if expr is not None]
-            final_inputs = []
-            for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes):
-                if expr is not None:
-                    expr = tf.identity(expr, name=name)
-                else:
-                    expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name)
-                final_inputs.append(expr)
-            out_expr = self._build_func(*final_inputs, **build_kwargs)
-
-        # Propagate input shapes back to the user-specified expressions.
-        for expr, final in zip(in_expr, final_inputs):
-            if isinstance(expr, tf.Tensor):
-                expr.set_shape(final.shape)
-
-        # Express outputs in the desired format.
-        assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
-        if return_as_list:
-            out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
-        return out_expr
-
-    def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str:
-        """Get the local name of a given variable, without any surrounding name scopes."""
-        assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str)
-        global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name
-        return self.var_global_to_local[global_name]
-
-    def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression:
-        """Find variable by local or global name."""
-        assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str)
-        return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name
-
-    def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray:
-        """Get the value of a given variable as NumPy array.
-        Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible."""
-        return self.find_var(var_or_local_name).eval()
-
-    def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None:
-        """Set the value of a given variable based on the given NumPy array.
-        Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible."""
-        tfutil.set_vars({self.find_var(var_or_local_name): new_value})
-
-    def __getstate__(self) -> dict:
-        """Pickle export."""
-        state = dict()
-        state["version"]            = 4
-        state["name"]               = self.name
-        state["static_kwargs"]      = dict(self.static_kwargs)
-        state["components"]         = dict(self.components)
-        state["build_module_src"]   = self._build_module_src
-        state["build_func_name"]    = self._build_func_name
-        state["variables"]          = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values()))))
-        return state
-
-    def __setstate__(self, state: dict) -> None:
-        """Pickle import."""
-        # pylint: disable=attribute-defined-outside-init
-        tfutil.assert_tf_initialized()
-        self._init_fields()
-
-        # Execute custom import handlers.
-        for handler in _import_handlers:
-            state = handler(state)
-
-        # Set basic fields.
-        assert state["version"] in [2, 3, 4]
-        self.name = state["name"]
-        self.static_kwargs = util.EasyDict(state["static_kwargs"])
-        self.components = util.EasyDict(state.get("components", {}))
-        self._build_module_src = state["build_module_src"]
-        self._build_func_name = state["build_func_name"]
-
-        # Create temporary module from the imported source code.
-        module_name = "_tflib_network_import_" + uuid.uuid4().hex
-        module = types.ModuleType(module_name)
-        sys.modules[module_name] = module
-        _import_module_src[module] = self._build_module_src
-        exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used
-
-        # Locate network build function in the temporary module.
-        self._build_func = util.get_obj_from_module(module, self._build_func_name)
-        assert callable(self._build_func)
-
-        # Init TensorFlow graph.
-        self._init_graph()
-        self.reset_own_vars()
-        tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]})
-
-    def clone(self, name: str = None, **new_static_kwargs) -> "Network":
-        """Create a clone of this network with its own copy of the variables."""
-        # pylint: disable=protected-access
-        net = object.__new__(Network)
-        net._init_fields()
-        net.name = name if name is not None else self.name
-        net.static_kwargs = util.EasyDict(self.static_kwargs)
-        net.static_kwargs.update(new_static_kwargs)
-        net._build_module_src = self._build_module_src
-        net._build_func_name = self._build_func_name
-        net._build_func = self._build_func
-        net._init_graph()
-        net.copy_vars_from(self)
-        return net
-
-    def copy_own_vars_from(self, src_net: "Network") -> None:
-        """Copy the values of all variables from the given network, excluding sub-networks."""
-        names = [name for name in self.own_vars.keys() if name in src_net.own_vars]
-        tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
-
-    def copy_vars_from(self, src_net: "Network") -> None:
-        """Copy the values of all variables from the given network, including sub-networks."""
-        names = [name for name in self.vars.keys() if name in src_net.vars]
-        tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
-
-    def copy_trainables_from(self, src_net: "Network") -> None:
-        """Copy the values of all trainable variables from the given network, including sub-networks."""
-        names = [name for name in self.trainables.keys() if name in src_net.trainables]
-        tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
-
-    def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network":
-        """Create new network with the given parameters, and copy all variables from this network."""
-        if new_name is None:
-            new_name = self.name
-        static_kwargs = dict(self.static_kwargs)
-        static_kwargs.update(new_static_kwargs)
-        net = Network(name=new_name, func_name=new_func_name, **static_kwargs)
-        net.copy_vars_from(self)
-        return net
-
-    def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation:
-        """Construct a TensorFlow op that updates the variables of this network
-        to be slightly closer to those of the given network."""
-        with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"):
-            ops = []
-            for name, var in self.vars.items():
-                if name in src_net.vars:
-                    cur_beta = beta if name in self.trainables else beta_nontrainable
-                    new_value = tfutil.lerp(src_net.vars[name], var, cur_beta)
-                    ops.append(var.assign(new_value))
-            return tf.group(*ops)
-
-    def run(self,
-            *in_arrays: Tuple[Union[np.ndarray, None], ...],
-            input_transform: dict = None,
-            output_transform: dict = None,
-            return_as_list: bool = False,
-            print_progress: bool = False,
-            minibatch_size: int = None,
-            num_gpus: int = 1,
-            assume_frozen: bool = False,
-            **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]:
-        """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s).
-
-        Args:
-            input_transform:    A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network.
-                                The dict must contain a 'func' field that points to a top-level function. The function is called with the input
-                                TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
-            output_transform:   A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network.
-                                The dict must contain a 'func' field that points to a top-level function. The function is called with the output
-                                TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
-            return_as_list:     True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
-            print_progress:     Print progress to the console? Useful for very large input arrays.
-            minibatch_size:     Maximum minibatch size to use, None = disable batching.
-            num_gpus:           Number of GPUs to use.
-            assume_frozen:      Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls.
-            dynamic_kwargs:     Additional keyword arguments to be passed into the network build function.
-        """
-        assert len(in_arrays) == self.num_inputs
-        assert not all(arr is None for arr in in_arrays)
-        assert input_transform is None or util.is_top_level_function(input_transform["func"])
-        assert output_transform is None or util.is_top_level_function(output_transform["func"])
-        output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs)
-        num_items = in_arrays[0].shape[0]
-        if minibatch_size is None:
-            minibatch_size = num_items
-
-        # Construct unique hash key from all arguments that affect the TensorFlow graph.
-        key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs)
-        def unwind_key(obj):
-            if isinstance(obj, dict):
-                return [(key, unwind_key(value)) for key, value in sorted(obj.items())]
-            if callable(obj):
-                return util.get_top_level_function_name(obj)
-            return obj
-        key = repr(unwind_key(key))
-
-        # Build graph.
-        if key not in self._run_cache:
-            with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None):
-                with tf.device("/cpu:0"):
-                    in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
-                    in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr]))
-
-                out_split = []
-                for gpu in range(num_gpus):
-                    with tf.device("/gpu:%d" % gpu):
-                        net_gpu = self.clone() if assume_frozen else self
-                        in_gpu = in_split[gpu]
-
-                        if input_transform is not None:
-                            in_kwargs = dict(input_transform)
-                            in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs)
-                            in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu)
-
-                        assert len(in_gpu) == self.num_inputs
-                        out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs)
-
-                        if output_transform is not None:
-                            out_kwargs = dict(output_transform)
-                            out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs)
-                            out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu)
-
-                        assert len(out_gpu) == self.num_outputs
-                        out_split.append(out_gpu)
-
-                with tf.device("/cpu:0"):
-                    out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)]
-                    self._run_cache[key] = in_expr, out_expr
-
-        # Run minibatches.
-        in_expr, out_expr = self._run_cache[key]
-        out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr]
-
-        for mb_begin in range(0, num_items, minibatch_size):
-            if print_progress:
-                print("\r%d / %d" % (mb_begin, num_items), end="")
-
-            mb_end = min(mb_begin + minibatch_size, num_items)
-            mb_num = mb_end - mb_begin
-            mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)]
-            mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in)))
-
-            for dst, src in zip(out_arrays, mb_out):
-                dst[mb_begin: mb_end] = src
-
-        # Done.
-        if print_progress:
-            print("\r%d / %d" % (num_items, num_items))
-
-        if not return_as_list:
-            out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays)
-        return out_arrays
-
-    def list_ops(self) -> List[TfExpression]:
-        include_prefix = self.scope + "/"
-        exclude_prefix = include_prefix + "_"
-        ops = tf.get_default_graph().get_operations()
-        ops = [op for op in ops if op.name.startswith(include_prefix)]
-        ops = [op for op in ops if not op.name.startswith(exclude_prefix)]
-        return ops
-
-    def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]:
-        """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to
-        individual layers of the network. Mainly intended to be used for reporting."""
-        layers = []
-
-        def recurse(scope, parent_ops, parent_vars, level):
-            # Ignore specific patterns.
-            if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]):
-                return
-
-            # Filter ops and vars by scope.
-            global_prefix = scope + "/"
-            local_prefix = global_prefix[len(self.scope) + 1:]
-            cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]]
-            cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]]
-            if not cur_ops and not cur_vars:
-                return
-
-            # Filter out all ops related to variables.
-            for var in [op for op in cur_ops if op.type.startswith("Variable")]:
-                var_prefix = var.name + "/"
-                cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)]
-
-            # Scope does not contain ops as immediate children => recurse deeper.
-            contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops)
-            if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1:
-                visited = set()
-                for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]:
-                    token = rel_name.split("/")[0]
-                    if token not in visited:
-                        recurse(global_prefix + token, cur_ops, cur_vars, level + 1)
-                        visited.add(token)
-                return
-
-            # Report layer.
-            layer_name = scope[len(self.scope) + 1:]
-            layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1]
-            layer_trainables = [var for _name, var in cur_vars if var.trainable]
-            layers.append((layer_name, layer_output, layer_trainables))
-
-        recurse(self.scope, self.list_ops(), list(self.vars.items()), 0)
-        return layers
-
-    def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None:
-        """Print a summary table of the network structure."""
-        rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]]
-        rows += [["---"] * 4]
-        total_params = 0
-
-        for layer_name, layer_output, layer_trainables in self.list_layers():
-            num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables)
-            weights = [var for var in layer_trainables if var.name.endswith("/weight:0")]
-            weights.sort(key=lambda x: len(x.name))
-            if len(weights) == 0 and len(layer_trainables) == 1:
-                weights = layer_trainables
-            total_params += num_params
-
-            if not hide_layers_with_no_params or num_params != 0:
-                num_params_str = str(num_params) if num_params > 0 else "-"
-                output_shape_str = str(layer_output.shape)
-                weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-"
-                rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]]
-
-        rows += [["---"] * 4]
-        rows += [["Total", str(total_params), "", ""]]
-
-        widths = [max(len(cell) for cell in column) for column in zip(*rows)]
-        print()
-        for row in rows:
-            print("  ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths)))
-        print()
-
-    def setup_weight_histograms(self, title: str = None) -> None:
-        """Construct summary ops to include histograms of all trainable parameters in TensorBoard."""
-        if title is None:
-            title = self.name
-
-        with tf.name_scope(None), tf.device(None), tf.control_dependencies(None):
-            for local_name, var in self.trainables.items():
-                if "/" in local_name:
-                    p = local_name.split("/")
-                    name = title + "_" + p[-1] + "/" + "_".join(p[:-1])
-                else:
-                    name = title + "_toplevel/" + local_name
-
-                tf.summary.histogram(name, var)
-
-#----------------------------------------------------------------------------
-# Backwards-compatible emulation of legacy output transformation in Network.run().
-
-_print_legacy_warning = True
-
-def _handle_legacy_output_transforms(output_transform, dynamic_kwargs):
-    global _print_legacy_warning
-    legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"]
-    if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs):
-        return output_transform, dynamic_kwargs
-
-    if _print_legacy_warning:
-        _print_legacy_warning = False
-        print()
-        print("WARNING: Old-style output transformations in Network.run() are deprecated.")
-        print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'")
-        print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.")
-        print()
-    assert output_transform is None
-
-    new_kwargs = dict(dynamic_kwargs)
-    new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs}
-    new_transform["func"] = _legacy_output_transform_func
-    return new_transform, new_kwargs
-
-def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None):
-    if out_mul != 1.0:
-        expr = [x * out_mul for x in expr]
-
-    if out_add != 0.0:
-        expr = [x + out_add for x in expr]
-
-    if out_shrink > 1:
-        ksize = [1, 1, out_shrink, out_shrink]
-        expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr]
-
-    if out_dtype is not None:
-        if tf.as_dtype(out_dtype).is_integer:
-            expr = [tf.round(x) for x in expr]
-        expr = [tf.saturate_cast(x, out_dtype) for x in expr]
-    return expr
diff --git a/tflib/ops/__init__.py b/tflib/ops/__init__.py
deleted file mode 100644
index 9ab9908efa3cb38af52e8d5bcaa8acffde5a8875..0000000000000000000000000000000000000000
--- a/tflib/ops/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-# empty
diff --git a/tflib/ops/__pycache__/__init__.cpython-36.pyc b/tflib/ops/__pycache__/__init__.cpython-36.pyc
deleted file mode 100644
index d6d9fe932896826b80acf49e15fff6079f2f3c49..0000000000000000000000000000000000000000
Binary files a/tflib/ops/__pycache__/__init__.cpython-36.pyc and /dev/null differ
diff --git a/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc b/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc
deleted file mode 100644
index aaf22aefb1529d3d0cd19bbd0ba49816d532ac6e..0000000000000000000000000000000000000000
Binary files a/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc and /dev/null differ
diff --git a/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc b/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc
deleted file mode 100644
index 740a77b3c7d29cb546482a3352e74b74f3b27c2b..0000000000000000000000000000000000000000
Binary files a/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc and /dev/null differ
diff --git a/tflib/ops/fused_bias_act.cu b/tflib/ops/fused_bias_act.cu
deleted file mode 100644
index 1102f624fadd0b803bdfb99fecfe145d7ec8abc4..0000000000000000000000000000000000000000
--- a/tflib/ops/fused_bias_act.cu
+++ /dev/null
@@ -1,188 +0,0 @@
-// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-//
-// This work is made available under the Nvidia Source Code License-NC.
-// To view a copy of this license, visit
-// https://nvlabs.github.io/stylegan2/license.html
-
-#define EIGEN_USE_GPU
-#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include <stdio.h>
-
-using namespace tensorflow;
-using namespace tensorflow::shape_inference;
-
-#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
-
-//------------------------------------------------------------------------
-// CUDA kernel.
-
-template <class T>
-struct FusedBiasActKernelParams
-{
-    const T*    x;      // [sizeX]
-    const T*    b;      // [sizeB] or NULL
-    const T*    ref;    // [sizeX] or NULL
-    T*          y;      // [sizeX]
-
-    int         grad;
-    int         axis;
-    int         act;
-    float       alpha;
-    float       gain;
-
-    int         sizeX;
-    int         sizeB;
-    int         stepB;
-    int         loopX;
-};
-
-template <class T>
-static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams<T> p)
-{
-    const float expRange        = 80.0f;
-    const float halfExpRange    = 40.0f;
-    const float seluScale       = 1.0507009873554804934193349852946f;
-    const float seluAlpha       = 1.6732632423543772848170429916717f;
-
-    // Loop over elements.
-    int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x;
-    for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x)
-    {
-        // Load and apply bias.
-        float x = (float)p.x[xi];
-        if (p.b)
-            x += (float)p.b[(xi / p.stepB) % p.sizeB];
-        float ref = (p.ref) ? (float)p.ref[xi] : 0.0f;
-        if (p.gain != 0.0f & p.act != 9)
-            ref /= p.gain;
-
-        // Evaluate activation func.
-        float y;
-        switch (p.act * 10 + p.grad)
-        {
-            // linear
-            default:
-            case 10: y = x; break;
-            case 11: y = x; break;
-            case 12: y = 0.0f; break;
-
-            // relu
-            case 20: y = (x > 0.0f) ? x : 0.0f; break;
-            case 21: y = (ref > 0.0f) ? x : 0.0f; break;
-            case 22: y = 0.0f; break;
-
-            // lrelu
-            case 30: y = (x > 0.0f) ? x : x * p.alpha; break;
-            case 31: y = (ref > 0.0f) ? x : x * p.alpha; break;
-            case 32: y = 0.0f; break;
-
-            // tanh
-            case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break;
-            case 41: y = x * (1.0f - ref * ref); break;
-            case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break;
-
-            // sigmoid
-            case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break;
-            case 51: y = x * ref * (1.0f - ref); break;
-            case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break;
-
-            // elu
-            case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break;
-            case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break;
-            case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break;
-
-            // selu
-            case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break;
-            case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break;
-            case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break;
-
-            // softplus
-            case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break;
-            case 81: y = x * (1.0f - expf(-ref)); break;
-            case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break;
-
-            // swish
-            case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break;
-            case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break;
-            case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break;
-        }
-
-        // Apply gain and store.
-        p.y[xi] = (T)(y * p.gain);
-    }
-}
-
-//------------------------------------------------------------------------
-// TensorFlow op.
-
-template <class T>
-struct FusedBiasActOp : public OpKernel
-{
-    FusedBiasActKernelParams<T> m_attribs;
-
-    FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx)
-    {
-        memset(&m_attribs, 0, sizeof(m_attribs));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain));
-        OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative"));
-        OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative"));
-        OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative"));
-    }
-
-    void Compute(OpKernelContext* ctx)
-    {
-        FusedBiasActKernelParams<T> p = m_attribs;
-        cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream();
-
-        const Tensor& x     = ctx->input(0); // [...]
-        const Tensor& b     = ctx->input(1); // [sizeB] or [0]
-        const Tensor& ref   = ctx->input(2); // x.shape or [0]
-        p.x = x.flat<T>().data();
-        p.b = (b.NumElements()) ? b.flat<T>().data() : NULL;
-        p.ref = (ref.NumElements()) ? ref.flat<T>().data() : NULL;
-        OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds"));
-        OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1"));
-        OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements"));
-        OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements"));
-        OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large"));
-
-        p.sizeX = (int)x.NumElements();
-        p.sizeB = (int)b.NumElements();
-        p.stepB = 1;
-        for (int i = m_attribs.axis + 1; i < x.dims(); i++)
-            p.stepB *= (int)x.dim_size(i);
-
-        Tensor* y = NULL; // x.shape
-        OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y));
-        p.y = y->flat<T>().data();
-
-        p.loopX = 4;
-        int blockSize = 4 * 32;
-        int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1;
-        void* args[] = {&p};
-        OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel<T>, gridSize, blockSize, args, 0, stream));
-    }
-};
-
-REGISTER_OP("FusedBiasAct")
-    .Input      ("x: T")
-    .Input      ("b: T")
-    .Input      ("ref: T")
-    .Output     ("y: T")
-    .Attr       ("T: {float, half}")
-    .Attr       ("grad: int = 0")
-    .Attr       ("axis: int = 1")
-    .Attr       ("act: int = 0")
-    .Attr       ("alpha: float = 0.0")
-    .Attr       ("gain: float = 1.0");
-REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<float>("T"), FusedBiasActOp<float>);
-REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), FusedBiasActOp<Eigen::half>);
-
-//------------------------------------------------------------------------
diff --git a/tflib/ops/fused_bias_act.py b/tflib/ops/fused_bias_act.py
deleted file mode 100644
index 52f6bfd77a4b0151103c1a76fa877e084831f7c4..0000000000000000000000000000000000000000
--- a/tflib/ops/fused_bias_act.py
+++ /dev/null
@@ -1,196 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-"""Custom TensorFlow ops for efficient bias and activation."""
-
-import os
-import numpy as np
-import tensorflow as tf
-from .. import custom_ops
-from ...util import EasyDict
-
-def _get_plugin():
-    return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
-
-#----------------------------------------------------------------------------
-
-activation_funcs = {
-    'linear':   EasyDict(func=lambda x, **_:        x,                          def_alpha=None, def_gain=1.0,           cuda_idx=1, ref='y', zero_2nd_grad=True),
-    'relu':     EasyDict(func=lambda x, **_:        tf.nn.relu(x),              def_alpha=None, def_gain=np.sqrt(2),    cuda_idx=2, ref='y', zero_2nd_grad=True),
-    'lrelu':    EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2,  def_gain=np.sqrt(2),    cuda_idx=3, ref='y', zero_2nd_grad=True),
-    'tanh':     EasyDict(func=lambda x, **_:        tf.nn.tanh(x),              def_alpha=None, def_gain=1.0,           cuda_idx=4, ref='y', zero_2nd_grad=False),
-    'sigmoid':  EasyDict(func=lambda x, **_:        tf.nn.sigmoid(x),           def_alpha=None, def_gain=1.0,           cuda_idx=5, ref='y', zero_2nd_grad=False),
-    'elu':      EasyDict(func=lambda x, **_:        tf.nn.elu(x),               def_alpha=None, def_gain=1.0,           cuda_idx=6, ref='y', zero_2nd_grad=False),
-    'selu':     EasyDict(func=lambda x, **_:        tf.nn.selu(x),              def_alpha=None, def_gain=1.0,           cuda_idx=7, ref='y', zero_2nd_grad=False),
-    'softplus': EasyDict(func=lambda x, **_:        tf.nn.softplus(x),          def_alpha=None, def_gain=1.0,           cuda_idx=8, ref='y', zero_2nd_grad=False),
-    'swish':    EasyDict(func=lambda x, **_:        tf.nn.sigmoid(x) * x,       def_alpha=None, def_gain=np.sqrt(2),    cuda_idx=9, ref='x', zero_2nd_grad=False),
-}
-
-#----------------------------------------------------------------------------
-
-def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'):
-    r"""Fused bias and activation function.
-
-    Adds bias `b` to activation tensor `x`, evaluates activation function `act`,
-    and scales the result by `gain`. Each of the steps is optional. In most cases,
-    the fused op is considerably more efficient than performing the same calculation
-    using standard TensorFlow ops. It supports first and second order gradients,
-    but not third order gradients.
-
-    Args:
-        x:      Input activation tensor. Can have any shape, but if `b` is defined, the
-                dimension corresponding to `axis`, as well as the rank, must be known.
-        b:      Bias vector, or `None` to disable. Must be a 1D tensor of the same type
-                as `x`. The shape must be known, and it must match the dimension of `x`
-                corresponding to `axis`.
-        axis:   The dimension in `x` corresponding to the elements of `b`.
-                The value of `axis` is ignored if `b` is not specified.
-        act:    Name of the activation function to evaluate, or `"linear"` to disable.
-                Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc.
-                See `activation_funcs` for a full list. `None` is not allowed.
-        alpha:  Shape parameter for the activation function, or `None` to use the default.
-        gain:   Scaling factor for the output tensor, or `None` to use default.
-                See `activation_funcs` for the default scaling of each activation function.
-                If unsure, consider specifying `1.0`.
-        impl:   Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
-
-    Returns:
-        Tensor of the same shape and datatype as `x`.
-    """
-
-    impl_dict = {
-        'ref':  _fused_bias_act_ref,
-        'cuda': _fused_bias_act_cuda,
-    }
-    return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
-
-#----------------------------------------------------------------------------
-
-def _fused_bias_act_ref(x, b, axis, act, alpha, gain):
-    """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops."""
-
-    # Validate arguments.
-    x = tf.convert_to_tensor(x)
-    b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype)
-    act_spec = activation_funcs[act]
-    assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
-    assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
-    if alpha is None:
-        alpha = act_spec.def_alpha
-    if gain is None:
-        gain = act_spec.def_gain
-
-    # Add bias.
-    if b.shape[0] != 0:
-        x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)])
-
-    # Evaluate activation function.
-    x = act_spec.func(x, alpha=alpha)
-
-    # Scale by gain.
-    if gain != 1:
-        x *= gain
-    return x
-
-#----------------------------------------------------------------------------
-
-def _fused_bias_act_cuda(x, b, axis, act, alpha, gain):
-    """Fast CUDA implementation of `fused_bias_act()` using custom ops."""
-
-    # Validate arguments.
-    x = tf.convert_to_tensor(x)
-    empty_tensor = tf.constant([], dtype=x.dtype)
-    b = tf.convert_to_tensor(b) if b is not None else empty_tensor
-    act_spec = activation_funcs[act]
-    assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
-    assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
-    if alpha is None:
-        alpha = act_spec.def_alpha
-    if gain is None:
-        gain = act_spec.def_gain
-
-    # Special cases.
-    if act == 'linear' and b is None and gain == 1.0:
-        return x
-    if act_spec.cuda_idx is None:
-        return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
-
-    # CUDA kernel.
-    cuda_kernel = _get_plugin().fused_bias_act
-    cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain)
-
-    # Forward pass: y = func(x, b).
-    def func_y(x, b):
-        y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs)
-        y.set_shape(x.shape)
-        return y
-
-    # Backward pass: dx, db = grad(dy, x, y)
-    def grad_dx(dy, x, y):
-        ref = {'x': x, 'y': y}[act_spec.ref]
-        dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs)
-        dx.set_shape(x.shape)
-        return dx
-    def grad_db(dx):
-        if b.shape[0] == 0:
-            return empty_tensor
-        db = dx
-        if axis < x.shape.rank - 1:
-            db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank)))
-        if axis > 0:
-            db = tf.reduce_sum(db, list(range(axis)))
-        db.set_shape(b.shape)
-        return db
-
-    # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y)
-    def grad2_d_dy(d_dx, d_db, x, y):
-        ref = {'x': x, 'y': y}[act_spec.ref]
-        d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs)
-        d_dy.set_shape(x.shape)
-        return d_dy
-    def grad2_d_x(d_dx, d_db, x, y):
-        ref = {'x': x, 'y': y}[act_spec.ref]
-        d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs)
-        d_x.set_shape(x.shape)
-        return d_x
-
-    # Fast version for piecewise-linear activation funcs.
-    @tf.custom_gradient
-    def func_zero_2nd_grad(x, b):
-        y = func_y(x, b)
-        @tf.custom_gradient
-        def grad(dy):
-            dx = grad_dx(dy, x, y)
-            db = grad_db(dx)
-            def grad2(d_dx, d_db):
-                d_dy = grad2_d_dy(d_dx, d_db, x, y)
-                return d_dy
-            return (dx, db), grad2
-        return y, grad
-
-    # Slow version for general activation funcs.
-    @tf.custom_gradient
-    def func_nonzero_2nd_grad(x, b):
-        y = func_y(x, b)
-        def grad_wrap(dy):
-            @tf.custom_gradient
-            def grad_impl(dy, x):
-                dx = grad_dx(dy, x, y)
-                db = grad_db(dx)
-                def grad2(d_dx, d_db):
-                    d_dy = grad2_d_dy(d_dx, d_db, x, y)
-                    d_x = grad2_d_x(d_dx, d_db, x, y)
-                    return d_dy, d_x
-                return (dx, db), grad2
-            return grad_impl(dy, x)
-        return y, grad_wrap
-
-    # Which version to use?
-    if act_spec.zero_2nd_grad:
-        return func_zero_2nd_grad(x, b)
-    return func_nonzero_2nd_grad(x, b)
-
-#----------------------------------------------------------------------------
diff --git a/tflib/ops/upfirdn_2d.cu b/tflib/ops/upfirdn_2d.cu
deleted file mode 100644
index b97ef36c9e5ba46a92a380dbc687e275235a1ccf..0000000000000000000000000000000000000000
--- a/tflib/ops/upfirdn_2d.cu
+++ /dev/null
@@ -1,326 +0,0 @@
-// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-//
-// This work is made available under the Nvidia Source Code License-NC.
-// To view a copy of this license, visit
-// https://nvlabs.github.io/stylegan2/license.html
-
-#define EIGEN_USE_GPU
-#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
-#include "tensorflow/core/framework/op.h"
-#include "tensorflow/core/framework/op_kernel.h"
-#include "tensorflow/core/framework/shape_inference.h"
-#include <stdio.h>
-
-using namespace tensorflow;
-using namespace tensorflow::shape_inference;
-
-//------------------------------------------------------------------------
-// Helpers.
-
-#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
-
-static __host__ __device__ __forceinline__ int floorDiv(int a, int b)
-{
-    int c = a / b;
-    if (c * b > a)
-        c--;
-    return c;
-}
-
-//------------------------------------------------------------------------
-// CUDA kernel params.
-
-template <class T>
-struct UpFirDn2DKernelParams
-{
-    const T*    x;          // [majorDim, inH, inW, minorDim]
-    const T*    k;          // [kernelH, kernelW]
-    T*          y;          // [majorDim, outH, outW, minorDim]
-
-    int         upx;
-    int         upy;
-    int         downx;
-    int         downy;
-    int         padx0;
-    int         padx1;
-    int         pady0;
-    int         pady1;
-
-    int         majorDim;
-    int         inH;
-    int         inW;
-    int         minorDim;
-    int         kernelH;
-    int         kernelW;
-    int         outH;
-    int         outW;
-    int         loopMajor;
-    int         loopX;
-};
-
-//------------------------------------------------------------------------
-// General CUDA implementation for large filter kernels.
-
-template <class T>
-static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams<T> p)
-{
-    // Calculate thread index.
-    int minorIdx = blockIdx.x * blockDim.x + threadIdx.x;
-    int outY = minorIdx / p.minorDim;
-    minorIdx -= outY * p.minorDim;
-    int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y;
-    int majorIdxBase = blockIdx.z * p.loopMajor;
-    if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim)
-        return;
-
-    // Setup Y receptive field.
-    int midY = outY * p.downy + p.upy - 1 - p.pady0;
-    int inY = min(max(floorDiv(midY, p.upy), 0), p.inH);
-    int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY;
-    int kernelY = midY + p.kernelH - (inY + 1) * p.upy;
-
-    // Loop over majorDim and outX.
-    for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++)
-    for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y)
-    {
-        // Setup X receptive field.
-        int midX = outX * p.downx + p.upx - 1 - p.padx0;
-        int inX = min(max(floorDiv(midX, p.upx), 0), p.inW);
-        int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX;
-        int kernelX = midX + p.kernelW - (inX + 1) * p.upx;
-
-        // Initialize pointers.
-        const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
-        const T* kp = &p.k[kernelY * p.kernelW + kernelX];
-        int xpx = p.minorDim;
-        int kpx = -p.upx;
-        int xpy = p.inW * p.minorDim;
-        int kpy = -p.upy * p.kernelW;
-
-        // Inner loop.
-        float v = 0.0f;
-        for (int y = 0; y < h; y++)
-        {
-            for (int x = 0; x < w; x++)
-            {
-                v += (float)(*xp) * (float)(*kp);
-                xp += xpx;
-                kp += kpx;
-            }
-            xp += xpy - w * xpx;
-            kp += kpy - w * kpx;
-        }
-
-        // Store result.
-        p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
-    }
-}
-
-//------------------------------------------------------------------------
-// Specialized CUDA implementation for small filter kernels.
-
-template <class T, int upx, int upy, int downx, int downy, int kernelW, int kernelH, int tileOutW, int tileOutH>
-static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams<T> p)
-{
-    //assert(kernelW % upx == 0);
-    //assert(kernelH % upy == 0);
-    const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1;
-    const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1;
-    __shared__ volatile float sk[kernelH][kernelW];
-    __shared__ volatile float sx[tileInH][tileInW];
-
-    // Calculate tile index.
-    int minorIdx = blockIdx.x;
-    int tileOutY = minorIdx / p.minorDim;
-    minorIdx -= tileOutY * p.minorDim;
-    tileOutY *= tileOutH;
-    int tileOutXBase = blockIdx.y * p.loopX * tileOutW;
-    int majorIdxBase = blockIdx.z * p.loopMajor;
-    if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim)
-        return;
-
-    // Load filter kernel (flipped).
-    for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x)
-    {
-        int ky = tapIdx / kernelW;
-        int kx = tapIdx - ky * kernelW;
-        float v = 0.0f;
-        if (kx < p.kernelW & ky < p.kernelH)
-            v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)];
-        sk[ky][kx] = v;
-    }
-
-    // Loop over majorDim and outX.
-    for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++)
-    for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW)
-    {
-        // Load input pixels.
-        int tileMidX = tileOutX * downx + upx - 1 - p.padx0;
-        int tileMidY = tileOutY * downy + upy - 1 - p.pady0;
-        int tileInX = floorDiv(tileMidX, upx);
-        int tileInY = floorDiv(tileMidY, upy);
-        __syncthreads();
-        for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x)
-        {
-            int relInY = inIdx / tileInW;
-            int relInX = inIdx - relInY * tileInW;
-            int inX = relInX + tileInX;
-            int inY = relInY + tileInY;
-            float v = 0.0f;
-            if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH)
-                v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
-            sx[relInY][relInX] = v;
-        }
-
-        // Loop over output pixels.
-        __syncthreads();
-        for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x)
-        {
-            int relOutY = outIdx / tileOutW;
-            int relOutX = outIdx - relOutY * tileOutW;
-            int outX = relOutX + tileOutX;
-            int outY = relOutY + tileOutY;
-
-            // Setup receptive field.
-            int midX = tileMidX + relOutX * downx;
-            int midY = tileMidY + relOutY * downy;
-            int inX = floorDiv(midX, upx);
-            int inY = floorDiv(midY, upy);
-            int relInX = inX - tileInX;
-            int relInY = inY - tileInY;
-            int kernelX = (inX + 1) * upx - midX - 1; // flipped
-            int kernelY = (inY + 1) * upy - midY - 1; // flipped
-
-            // Inner loop.
-            float v = 0.0f;
-            #pragma unroll
-            for (int y = 0; y < kernelH / upy; y++)
-                #pragma unroll
-                for (int x = 0; x < kernelW / upx; x++)
-                    v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx];
-
-            // Store result.
-            if (outX < p.outW & outY < p.outH)
-                p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
-        }
-    }
-}
-
-//------------------------------------------------------------------------
-// TensorFlow op.
-
-template <class T>
-struct UpFirDn2DOp : public OpKernel
-{
-    UpFirDn2DKernelParams<T> m_attribs;
-
-    UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx)
-    {
-        memset(&m_attribs, 0, sizeof(m_attribs));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0));
-        OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1));
-        OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1"));
-        OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1"));
-    }
-
-    void Compute(OpKernelContext* ctx)
-    {
-        UpFirDn2DKernelParams<T> p = m_attribs;
-        cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream();
-
-        const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim]
-        const Tensor& k = ctx->input(1); // [kernelH, kernelW]
-        p.x = x.flat<T>().data();
-        p.k = k.flat<T>().data();
-        OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4"));
-        OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2"));
-        OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large"));
-        OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large"));
-
-        p.majorDim  = (int)x.dim_size(0);
-        p.inH       = (int)x.dim_size(1);
-        p.inW       = (int)x.dim_size(2);
-        p.minorDim  = (int)x.dim_size(3);
-        p.kernelH   = (int)k.dim_size(0);
-        p.kernelW   = (int)k.dim_size(1);
-        OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1"));
-
-        p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx;
-        p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy;
-        OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1"));
-
-        Tensor* y = NULL; // [majorDim, outH, outW, minorDim]
-        TensorShape ys;
-        ys.AddDim(p.majorDim);
-        ys.AddDim(p.outH);
-        ys.AddDim(p.outW);
-        ys.AddDim(p.minorDim);
-        OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y));
-        p.y = y->flat<T>().data();
-        OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large"));
-
-        // Choose CUDA kernel to use.
-        void* cudaKernel = (void*)UpFirDn2DKernel_large<T>;
-        int tileOutW = -1;
-        int tileOutH = -1;
-        if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 7,7, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 5,5, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 3,3, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 8,8, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 2,2, 64,16>; tileOutW = 64; tileOutH = 16; }
-        if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 8,8, 32,8>;  tileOutW = 32; tileOutH = 8;  }
-        if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 6,6, 32,8>;  tileOutW = 32; tileOutH = 8;  }
-        if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 4,4, 32,8>;  tileOutW = 32; tileOutH = 8;  }
-        if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 2,2, 32,8>;  tileOutW = 32; tileOutH = 8;  }
-
-        // Choose launch params.
-        dim3 blockSize;
-        dim3 gridSize;
-        if (tileOutW > 0 && tileOutH > 0) // small
-        {
-            p.loopMajor = (p.majorDim - 1) / 16384 + 1;
-            p.loopX = 1;
-            blockSize = dim3(32 * 8, 1, 1);
-            gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1);
-        }
-        else // large
-        {
-            p.loopMajor = (p.majorDim - 1) / 16384 + 1;
-            p.loopX = 4;
-            blockSize = dim3(4, 32, 1);
-            gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1);
-        }
-
-        // Launch CUDA kernel.
-        void* args[] = {&p};
-        OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream));
-    }
-};
-
-REGISTER_OP("UpFirDn2D")
-    .Input      ("x: T")
-    .Input      ("k: T")
-    .Output     ("y: T")
-    .Attr       ("T: {float, half}")
-    .Attr       ("upx: int = 1")
-    .Attr       ("upy: int = 1")
-    .Attr       ("downx: int = 1")
-    .Attr       ("downy: int = 1")
-    .Attr       ("padx0: int = 0")
-    .Attr       ("padx1: int = 0")
-    .Attr       ("pady0: int = 0")
-    .Attr       ("pady1: int = 0");
-REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<float>("T"), UpFirDn2DOp<float>);
-REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), UpFirDn2DOp<Eigen::half>);
-
-//------------------------------------------------------------------------
diff --git a/tflib/ops/upfirdn_2d.py b/tflib/ops/upfirdn_2d.py
deleted file mode 100644
index fd23777ebb87bc83e8728d6fe3904fbbfb5c524c..0000000000000000000000000000000000000000
--- a/tflib/ops/upfirdn_2d.py
+++ /dev/null
@@ -1,364 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-"""Custom TensorFlow ops for efficient resampling of 2D images."""
-
-import os
-import numpy as np
-import tensorflow as tf
-from .. import custom_ops
-
-def _get_plugin():
-    return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
-
-#----------------------------------------------------------------------------
-
-def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'):
-    r"""Pad, upsample, FIR filter, and downsample a batch of 2D images.
-
-    Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]`
-    and performs the following operations for each image, batched across
-    `majorDim` and `minorDim`:
-
-    1. Pad the image with zeros by the specified number of pixels on each side
-       (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value
-       corresponds to cropping the image.
-
-    2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`).
-
-    3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the
-       image so that the footprint of all output pixels lies within the input image.
-
-    4. Downsample the image by throwing away pixels (`downx`, `downy`).
-
-    This sequence of operations bears close resemblance to scipy.signal.upfirdn().
-    The fused op is considerably more efficient than performing the same calculation
-    using standard TensorFlow ops. It supports gradients of arbitrary order.
-
-    Args:
-        x:      Input tensor of the shape `[majorDim, inH, inW, minorDim]`.
-        k:      2D FIR filter of the shape `[firH, firW]`.
-        upx:    Integer upsampling factor along the X-axis (default: 1).
-        upy:    Integer upsampling factor along the Y-axis (default: 1).
-        downx:  Integer downsampling factor along the X-axis (default: 1).
-        downy:  Integer downsampling factor along the Y-axis (default: 1).
-        padx0:  Number of pixels to pad on the left side (default: 0).
-        padx1:  Number of pixels to pad on the right side (default: 0).
-        pady0:  Number of pixels to pad on the top side (default: 0).
-        pady1:  Number of pixels to pad on the bottom side (default: 0).
-        impl:   Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
-
-    Returns:
-        Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`.
-    """
-
-    impl_dict = {
-        'ref':  _upfirdn_2d_ref,
-        'cuda': _upfirdn_2d_cuda,
-    }
-    return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)
-
-#----------------------------------------------------------------------------
-
-def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
-    """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops."""
-
-    x = tf.convert_to_tensor(x)
-    k = np.asarray(k, dtype=np.float32)
-    assert x.shape.rank == 4
-    inH = x.shape[1].value
-    inW = x.shape[2].value
-    minorDim = _shape(x, 3)
-    kernelH, kernelW = k.shape
-    assert inW >= 1 and inH >= 1
-    assert kernelW >= 1 and kernelH >= 1
-    assert isinstance(upx, int) and isinstance(upy, int)
-    assert isinstance(downx, int) and isinstance(downy, int)
-    assert isinstance(padx0, int) and isinstance(padx1, int)
-    assert isinstance(pady0, int) and isinstance(pady1, int)
-
-    # Upsample (insert zeros).
-    x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim])
-    x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]])
-    x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim])
-
-    # Pad (crop if negative).
-    x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]])
-    x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :]
-
-    # Convolve with filter.
-    x = tf.transpose(x, [0, 3, 1, 2])
-    x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1])
-    w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype)
-    x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW')
-    x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1])
-    x = tf.transpose(x, [0, 2, 3, 1])
-
-    # Downsample (throw away pixels).
-    return x[:, ::downy, ::downx, :]
-
-#----------------------------------------------------------------------------
-
-def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1):
-    """Fast CUDA implementation of `upfirdn_2d()` using custom ops."""
-
-    x = tf.convert_to_tensor(x)
-    k = np.asarray(k, dtype=np.float32)
-    majorDim, inH, inW, minorDim = x.shape.as_list()
-    kernelH, kernelW = k.shape
-    assert inW >= 1 and inH >= 1
-    assert kernelW >= 1 and kernelH >= 1
-    assert isinstance(upx, int) and isinstance(upy, int)
-    assert isinstance(downx, int) and isinstance(downy, int)
-    assert isinstance(padx0, int) and isinstance(padx1, int)
-    assert isinstance(pady0, int) and isinstance(pady1, int)
-
-    outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1
-    outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1
-    assert outW >= 1 and outH >= 1
-
-    kc = tf.constant(k, dtype=x.dtype)
-    gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype)
-    gpadx0 = kernelW - padx0 - 1
-    gpady0 = kernelH - pady0 - 1
-    gpadx1 = inW * upx - outW * downx + padx0 - upx + 1
-    gpady1 = inH * upy - outH * downy + pady0 - upy + 1
-
-    @tf.custom_gradient
-    def func(x):
-        y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1)
-        y.set_shape([majorDim, outH, outW, minorDim])
-        @tf.custom_gradient
-        def grad(dy):
-            dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1)
-            dx.set_shape([majorDim, inH, inW, minorDim])
-            return dx, func
-        return y, grad
-    return func(x)
-
-#----------------------------------------------------------------------------
-
-def filter_2d(x, k, gain=1, data_format='NCHW', impl='cuda'):
-    r"""Filter a batch of 2D images with the given FIR filter.
-
-    Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
-    and filters each image with the given filter. The filter is normalized so that
-    if the input pixels are constant, they will be scaled by the specified `gain`.
-    Pixels outside the image are assumed to be zero.
-
-    Args:
-        x:            Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
-        k:            FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
-        gain:         Scaling factor for signal magnitude (default: 1.0).
-        data_format:  `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
-        impl:         Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
-
-    Returns:
-        Tensor of the same shape and datatype as `x`.
-    """
-
-    k = _setup_kernel(k) * gain
-    p = k.shape[0] - 1
-    return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
-
-#----------------------------------------------------------------------------
-
-def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
-    r"""Upsample a batch of 2D images with the given filter.
-
-    Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
-    and upsamples each image with the given filter. The filter is normalized so that
-    if the input pixels are constant, they will be scaled by the specified `gain`.
-    Pixels outside the image are assumed to be zero, and the filter is padded with
-    zeros so that its shape is a multiple of the upsampling factor.
-
-    Args:
-        x:            Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
-        k:            FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
-                      The default is `[1] * factor`, which corresponds to nearest-neighbor
-                      upsampling.
-        factor:       Integer upsampling factor (default: 2).
-        gain:         Scaling factor for signal magnitude (default: 1.0).
-        data_format:  `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
-        impl:         Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
-
-    Returns:
-        Tensor of the shape `[N, C, H * factor, W * factor]` or
-        `[N, H * factor, W * factor, C]`, and same datatype as `x`.
-    """
-
-    assert isinstance(factor, int) and factor >= 1
-    if k is None:
-        k = [1] * factor
-    k = _setup_kernel(k) * (gain * (factor ** 2))
-    p = k.shape[0] - factor
-    return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl)
-
-#----------------------------------------------------------------------------
-
-def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
-    r"""Downsample a batch of 2D images with the given filter.
-
-    Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]`
-    and downsamples each image with the given filter. The filter is normalized so that
-    if the input pixels are constant, they will be scaled by the specified `gain`.
-    Pixels outside the image are assumed to be zero, and the filter is padded with
-    zeros so that its shape is a multiple of the downsampling factor.
-
-    Args:
-        x:            Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
-        k:            FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
-                      The default is `[1] * factor`, which corresponds to average pooling.
-        factor:       Integer downsampling factor (default: 2).
-        gain:         Scaling factor for signal magnitude (default: 1.0).
-        data_format:  `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
-        impl:         Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
-
-    Returns:
-        Tensor of the shape `[N, C, H // factor, W // factor]` or
-        `[N, H // factor, W // factor, C]`, and same datatype as `x`.
-    """
-
-    assert isinstance(factor, int) and factor >= 1
-    if k is None:
-        k = [1] * factor
-    k = _setup_kernel(k) * gain
-    p = k.shape[0] - factor
-    return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
-
-#----------------------------------------------------------------------------
-
-def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
-    r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`.
-
-    Padding is performed only once at the beginning, not between the operations.
-    The fused op is considerably more efficient than performing the same calculation
-    using standard TensorFlow ops. It supports gradients of arbitrary order.
-
-    Args:
-        x:            Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
-        w:            Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
-                      Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
-        k:            FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
-                      The default is `[1] * factor`, which corresponds to nearest-neighbor
-                      upsampling.
-        factor:       Integer upsampling factor (default: 2).
-        gain:         Scaling factor for signal magnitude (default: 1.0).
-        data_format:  `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
-        impl:         Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
-
-    Returns:
-        Tensor of the shape `[N, C, H * factor, W * factor]` or
-        `[N, H * factor, W * factor, C]`, and same datatype as `x`.
-    """
-
-    assert isinstance(factor, int) and factor >= 1
-
-    # Check weight shape.
-    w = tf.convert_to_tensor(w)
-    assert w.shape.rank == 4
-    convH = w.shape[0].value
-    convW = w.shape[1].value
-    inC = _shape(w, 2)
-    outC = _shape(w, 3)
-    assert convW == convH
-
-    # Setup filter kernel.
-    if k is None:
-        k = [1] * factor
-    k = _setup_kernel(k) * (gain * (factor ** 2))
-    p = (k.shape[0] - factor) - (convW - 1)
-
-    # Determine data dimensions.
-    if data_format == 'NCHW':
-        stride = [1, 1, factor, factor]
-        output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW]
-        num_groups = _shape(x, 1) // inC
-    else:
-        stride = [1, factor, factor, 1]
-        output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC]
-        num_groups = _shape(x, 3) // inC
-
-    # Transpose weights.
-    w = tf.reshape(w, [convH, convW, inC, num_groups, -1])
-    w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2])
-    w = tf.reshape(w, [convH, convW, -1, num_groups * inC])
-
-    # Execute.
-    x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format)
-    return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl)
-
-#----------------------------------------------------------------------------
-
-def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'):
-    r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`.
-
-    Padding is performed only once at the beginning, not between the operations.
-    The fused op is considerably more efficient than performing the same calculation
-    using standard TensorFlow ops. It supports gradients of arbitrary order.
-
-    Args:
-        x:            Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`.
-        w:            Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`.
-                      Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`.
-        k:            FIR filter of the shape `[firH, firW]` or `[firN]` (separable).
-                      The default is `[1] * factor`, which corresponds to average pooling.
-        factor:       Integer downsampling factor (default: 2).
-        gain:         Scaling factor for signal magnitude (default: 1.0).
-        data_format:  `'NCHW'` or `'NHWC'` (default: `'NCHW'`).
-        impl:         Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
-
-    Returns:
-        Tensor of the shape `[N, C, H // factor, W // factor]` or
-        `[N, H // factor, W // factor, C]`, and same datatype as `x`.
-    """
-
-    assert isinstance(factor, int) and factor >= 1
-    w = tf.convert_to_tensor(w)
-    convH, convW, _inC, _outC = w.shape.as_list()
-    assert convW == convH
-    if k is None:
-        k = [1] * factor
-    k = _setup_kernel(k) * gain
-    p = (k.shape[0] - factor) + (convW - 1)
-    if data_format == 'NCHW':
-        s = [1, 1, factor, factor]
-    else:
-        s = [1, factor, factor, 1]
-    x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl)
-    return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format)
-
-#----------------------------------------------------------------------------
-# Internal helper funcs.
-
-def _shape(tf_expr, dim_idx):
-    if tf_expr.shape.rank is not None:
-        dim = tf_expr.shape[dim_idx].value
-        if dim is not None:
-            return dim
-    return tf.shape(tf_expr)[dim_idx]
-
-def _setup_kernel(k):
-    k = np.asarray(k, dtype=np.float32)
-    if k.ndim == 1:
-        k = np.outer(k, k)
-    k /= np.sum(k)
-    assert k.ndim == 2
-    assert k.shape[0] == k.shape[1]
-    return k
-
-def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'):
-    assert data_format in ['NCHW', 'NHWC']
-    assert x.shape.rank == 4
-    y = x
-    if data_format == 'NCHW':
-        y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1])
-    y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl)
-    if data_format == 'NCHW':
-        y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)])
-    return y
-
-#----------------------------------------------------------------------------
diff --git a/tflib/optimizer.py b/tflib/optimizer.py
deleted file mode 100644
index 9389b5adaa4b47a896410d549016316e6b85ea2d..0000000000000000000000000000000000000000
--- a/tflib/optimizer.py
+++ /dev/null
@@ -1,370 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-"""Helper wrapper for a Tensorflow optimizer."""
-
-import platform
-import numpy as np
-import tensorflow as tf
-
-from collections import OrderedDict
-from typing import List, Union
-
-from . import autosummary
-from . import tfutil
-from .. import util
-
-from .tfutil import TfExpression, TfExpressionEx
-
-_collective_ops_warning_printed = False
-_collective_ops_group_key       = 831766147
-_collective_ops_instance_key    = 436340067
-
-class Optimizer:
-    """A Wrapper for tf.train.Optimizer.
-
-    Automatically takes care of:
-    - Gradient averaging for multi-GPU training.
-    - Gradient accumulation for arbitrarily large minibatches.
-    - Dynamic loss scaling and typecasts for FP16 training.
-    - Ignoring corrupted gradients that contain NaNs/Infs.
-    - Reporting statistics.
-    - Well-chosen default settings.
-    """
-
-    def __init__(self,
-        name:                   str             = "Train",                  # Name string that will appear in TensorFlow graph.
-        tf_optimizer:           str             = "tf.train.AdamOptimizer", # Underlying optimizer class.
-        learning_rate:          TfExpressionEx  = 0.001,                    # Learning rate. Can vary over time.
-        minibatch_multiplier:   TfExpressionEx  = None,                     # Treat N consecutive minibatches as one by accumulating gradients.
-        share:                  "Optimizer"     = None,                     # Share internal state with a previously created optimizer?
-        use_loss_scaling:       bool            = False,                    # Enable dynamic loss scaling for robust mixed-precision training?
-        loss_scaling_init:      float           = 64.0,                     # Log2 of initial loss scaling factor.
-        loss_scaling_inc:       float           = 0.0005,                   # Log2 of per-minibatch loss scaling increment when there is no overflow.
-        loss_scaling_dec:       float           = 1.0,                      # Log2 of per-minibatch loss scaling decrement when there is an overflow.
-        report_mem_usage:       bool            = False,                    # Report fine-grained memory usage statistics in TensorBoard?
-        **kwargs):
-
-        # Public fields.
-        self.name                   = name
-        self.learning_rate          = learning_rate
-        self.minibatch_multiplier   = minibatch_multiplier
-        self.id                     = self.name.replace("/", ".")
-        self.scope                  = tf.get_default_graph().unique_name(self.id)
-        self.optimizer_class        = util.get_obj_by_name(tf_optimizer)
-        self.optimizer_kwargs       = dict(kwargs)
-        self.use_loss_scaling       = use_loss_scaling
-        self.loss_scaling_init      = loss_scaling_init
-        self.loss_scaling_inc       = loss_scaling_inc
-        self.loss_scaling_dec       = loss_scaling_dec
-
-        # Private fields.
-        self._updates_applied       = False
-        self._devices               = OrderedDict() # device_name => EasyDict()
-        self._shared_optimizers     = OrderedDict() # device_name => optimizer_class
-        self._gradient_shapes       = None          # [shape, ...]
-        self._report_mem_usage      = report_mem_usage
-
-        # Validate arguments.
-        assert callable(self.optimizer_class)
-
-        # Share internal state if requested.
-        if share is not None:
-            assert isinstance(share, Optimizer)
-            assert self.optimizer_class is share.optimizer_class
-            assert self.learning_rate is share.learning_rate
-            assert self.optimizer_kwargs == share.optimizer_kwargs
-            self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access
-
-    def _get_device(self, device_name: str):
-        """Get internal state for the given TensorFlow device."""
-        tfutil.assert_tf_initialized()
-        if device_name in self._devices:
-            return self._devices[device_name]
-
-        # Initialize fields.
-        device = util.EasyDict()
-        device.name             = device_name
-        device.optimizer        = None          # Underlying optimizer:     optimizer_class
-        device.loss_scaling_var = None          # Log2 of loss scaling:     tf.Variable
-        device.grad_raw         = OrderedDict() # Raw gradients:            var => [grad, ...]
-        device.grad_clean       = OrderedDict() # Clean gradients:          var => grad
-        device.grad_acc_vars    = OrderedDict() # Accumulation sums:        var => tf.Variable
-        device.grad_acc_count   = None          # Accumulation counter:     tf.Variable
-        device.grad_acc         = OrderedDict() # Accumulated gradients:    var => grad
-
-        # Setup TensorFlow objects.
-        with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None):
-            if device_name not in self._shared_optimizers:
-                optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers)
-                self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs)
-            device.optimizer = self._shared_optimizers[device_name]
-            if self.use_loss_scaling:
-                device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var")
-
-        # Register device.
-        self._devices[device_name] = device
-        return device
-
-    def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None:
-        """Register the gradients of the given loss function with respect to the given variables.
-        Intended to be called once per GPU."""
-        tfutil.assert_tf_initialized()
-        assert not self._updates_applied
-        device = self._get_device(loss.device)
-
-        # Validate trainables.
-        if isinstance(trainable_vars, dict):
-            trainable_vars = list(trainable_vars.values())  # allow passing in Network.trainables as vars
-        assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1
-        assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss])
-        assert all(var.device == device.name for var in trainable_vars)
-
-        # Validate shapes.
-        if self._gradient_shapes is None:
-            self._gradient_shapes = [var.shape.as_list() for var in trainable_vars]
-        assert len(trainable_vars) == len(self._gradient_shapes)
-        assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes))
-
-        # Report memory usage if requested.
-        deps = []
-        if self._report_mem_usage:
-            self._report_mem_usage = False
-            try:
-                with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]):
-                    deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30))
-            except tf.errors.NotFoundError:
-                pass
-
-        # Compute gradients.
-        with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps):
-            loss = self.apply_loss_scaling(tf.cast(loss, tf.float32))
-            gate = tf.train.Optimizer.GATE_NONE  # disable gating to reduce memory usage
-            grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate)
-
-        # Register gradients.
-        for grad, var in grad_list:
-            if var not in device.grad_raw:
-                device.grad_raw[var] = []
-            device.grad_raw[var].append(grad)
-
-    def apply_updates(self, allow_no_op: bool = False) -> tf.Operation:
-        """Construct training op to update the registered variables based on their gradients."""
-        tfutil.assert_tf_initialized()
-        assert not self._updates_applied
-        self._updates_applied = True
-        all_ops = []
-
-        # Check for no-op.
-        if allow_no_op and len(self._devices) == 0:
-            with tfutil.absolute_name_scope(self.scope):
-                return tf.no_op(name='TrainingOp')
-
-        # Clean up gradients.
-        for device_idx, device in enumerate(self._devices.values()):
-            with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name):
-                for var, grad in device.grad_raw.items():
-
-                    # Filter out disconnected gradients and convert to float32.
-                    grad = [g for g in grad if g is not None]
-                    grad = [tf.cast(g, tf.float32) for g in grad]
-
-                    # Sum within the device.
-                    if len(grad) == 0:
-                        grad = tf.zeros(var.shape)  # No gradients => zero.
-                    elif len(grad) == 1:
-                        grad = grad[0]              # Single gradient => use as is.
-                    else:
-                        grad = tf.add_n(grad)       # Multiple gradients => sum.
-
-                    # Scale as needed.
-                    scale = 1.0 / len(device.grad_raw[var]) / len(self._devices)
-                    scale = tf.constant(scale, dtype=tf.float32, name="scale")
-                    if self.minibatch_multiplier is not None:
-                        scale /= tf.cast(self.minibatch_multiplier, tf.float32)
-                    scale = self.undo_loss_scaling(scale)
-                    device.grad_clean[var] = grad * scale
-
-        # Sum gradients across devices.
-        if len(self._devices) > 1:
-            with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None):
-                if platform.system() == "Windows":    # Windows => NCCL ops are not available.
-                    self._broadcast_fallback()
-                elif tf.VERSION.startswith("1.15."):  # TF 1.15 => NCCL ops are broken: https://github.com/tensorflow/tensorflow/issues/41539
-                    self._broadcast_fallback()
-                else:                                 # Otherwise => NCCL ops are safe to use.
-                    self._broadcast_nccl()
-
-        # Apply updates separately on each device.
-        for device_idx, device in enumerate(self._devices.values()):
-            with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name):
-                # pylint: disable=cell-var-from-loop
-
-                # Accumulate gradients over time.
-                if self.minibatch_multiplier is None:
-                    acc_ok = tf.constant(True, name='acc_ok')
-                    device.grad_acc = OrderedDict(device.grad_clean)
-                else:
-                    # Create variables.
-                    with tf.control_dependencies(None):
-                        for var in device.grad_clean.keys():
-                            device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var")
-                        device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count")
-
-                    # Track counter.
-                    count_cur = device.grad_acc_count + 1.0
-                    count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur)
-                    count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([]))
-                    acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32))
-                    all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op))
-
-                    # Track gradients.
-                    for var, grad in device.grad_clean.items():
-                        acc_var = device.grad_acc_vars[var]
-                        acc_cur = acc_var + grad
-                        device.grad_acc[var] = acc_cur
-                        with tf.control_dependencies([acc_cur]):
-                            acc_inc_op = lambda: tf.assign(acc_var, acc_cur)
-                            acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape))
-                            all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op))
-
-                # No overflow => apply gradients.
-                all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()]))
-                apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()])
-                all_ops.append(tf.cond(all_ok, apply_op, tf.no_op))
-
-                # Adjust loss scaling.
-                if self.use_loss_scaling:
-                    ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc)
-                    ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec)
-                    ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op))
-                    all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op))
-
-                # Last device => report statistics.
-                if device_idx == len(self._devices) - 1:
-                    all_ops.append(autosummary.autosummary(self.id + "/learning_rate", tf.convert_to_tensor(self.learning_rate)))
-                    all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok))
-                    if self.use_loss_scaling:
-                        all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var))
-
-        # Initialize variables.
-        self.reset_optimizer_state()
-        if self.use_loss_scaling:
-            tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()])
-        if self.minibatch_multiplier is not None:
-            tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]])
-
-        # Group everything into a single op.
-        with tfutil.absolute_name_scope(self.scope):
-            return tf.group(*all_ops, name="TrainingOp")
-
-    def reset_optimizer_state(self) -> None:
-        """Reset internal state of the underlying optimizer."""
-        tfutil.assert_tf_initialized()
-        tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()])
-
-    def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]:
-        """Get or create variable representing log2 of the current dynamic loss scaling factor."""
-        return self._get_device(device).loss_scaling_var
-
-    def apply_loss_scaling(self, value: TfExpression) -> TfExpression:
-        """Apply dynamic loss scaling for the given expression."""
-        assert tfutil.is_tf_expression(value)
-        if not self.use_loss_scaling:
-            return value
-        return value * tfutil.exp2(self.get_loss_scaling_var(value.device))
-
-    def undo_loss_scaling(self, value: TfExpression) -> TfExpression:
-        """Undo the effect of dynamic loss scaling for the given expression."""
-        assert tfutil.is_tf_expression(value)
-        if not self.use_loss_scaling:
-            return value
-        return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type
-
-    def _broadcast_nccl(self):
-        """Sum gradients across devices using NCCL ops (fast path)."""
-        from tensorflow.python.ops import nccl_ops # pylint: disable=no-name-in-module
-        for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]):
-            if any(x.shape.num_elements() > 0 for x in all_vars):
-                all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)]
-                all_grads = nccl_ops.all_sum(all_grads)
-                for device, var, grad in zip(self._devices.values(), all_vars, all_grads):
-                    device.grad_clean[var] = grad
-
-    def _broadcast_fallback(self):
-        """Sum gradients across devices using TensorFlow collective ops (slow fallback path)."""
-        from tensorflow.python.ops import collective_ops # pylint: disable=no-name-in-module
-        global _collective_ops_warning_printed, _collective_ops_group_key, _collective_ops_instance_key
-        if all(x.shape.num_elements() == 0 for device in self._devices.values() for x in device.grad_clean.values()):
-            return
-        if not _collective_ops_warning_printed:
-            print("------------------------------------------------------------------------")
-            print("WARNING: Using slow fallback implementation for inter-GPU communication.")
-            print("Please use TensorFlow 1.14 on Linux for optimal training performance.")
-            print("------------------------------------------------------------------------")
-            _collective_ops_warning_printed = True
-        for device in self._devices.values():
-            with tf.device(device.name):
-                combo = [tf.reshape(x, [x.shape.num_elements()]) for x in device.grad_clean.values()]
-                combo = tf.concat(combo, axis=0)
-                combo = collective_ops.all_reduce(combo, merge_op='Add', final_op='Id',
-                    group_size=len(self._devices), group_key=_collective_ops_group_key,
-                    instance_key=_collective_ops_instance_key)
-                cur_ofs = 0
-                for var, grad_old in device.grad_clean.items():
-                    grad_new = tf.reshape(combo[cur_ofs : cur_ofs + grad_old.shape.num_elements()], grad_old.shape)
-                    cur_ofs += grad_old.shape.num_elements()
-                    device.grad_clean[var] = grad_new
-        _collective_ops_instance_key += 1
-
-
-class SimpleAdam:
-    """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer."""
-
-    def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8):
-        self.name = name
-        self.learning_rate = learning_rate
-        self.beta1 = beta1
-        self.beta2 = beta2
-        self.epsilon = epsilon
-        self.all_state_vars = []
-
-    def variables(self):
-        return self.all_state_vars
-
-    def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE):
-        assert gate_gradients == tf.train.Optimizer.GATE_NONE
-        return list(zip(tf.gradients(loss, var_list), var_list))
-
-    def apply_gradients(self, grads_and_vars):
-        with tf.name_scope(self.name):
-            state_vars = []
-            update_ops = []
-
-            # Adjust learning rate to deal with startup bias.
-            with tf.control_dependencies(None):
-                b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False)
-                b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False)
-                state_vars += [b1pow_var, b2pow_var]
-            b1pow_new = b1pow_var * self.beta1
-            b2pow_new = b2pow_var * self.beta2
-            update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)]
-            lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new)
-
-            # Construct ops to update each variable.
-            for grad, var in grads_and_vars:
-                with tf.control_dependencies(None):
-                    m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False)
-                    v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False)
-                    state_vars += [m_var, v_var]
-                m_new = self.beta1 * m_var + (1 - self.beta1) * grad
-                v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad)
-                var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon)
-                update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)]
-
-            # Group everything together.
-            self.all_state_vars += state_vars
-            return tf.group(*update_ops)
diff --git a/tflib/tfutil.py b/tflib/tfutil.py
deleted file mode 100644
index 1127c7beecfe526b459b3b99ee34e1c431e19e1c..0000000000000000000000000000000000000000
--- a/tflib/tfutil.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
-#
-# This work is made available under the Nvidia Source Code License-NC.
-# To view a copy of this license, visit
-# https://nvlabs.github.io/stylegan2/license.html
-
-"""Miscellaneous helper utils for Tensorflow."""
-
-import os
-import numpy as np
-import tensorflow as tf
-
-# Silence deprecation warnings from TensorFlow 1.13 onwards
-import logging
-logging.getLogger('tensorflow').setLevel(logging.ERROR)
-import tensorflow.contrib   # requires TensorFlow 1.x!
-tf.contrib = tensorflow.contrib
-
-from typing import Any, Iterable, List, Union
-
-TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation]
-"""A type that represents a valid Tensorflow expression."""
-
-TfExpressionEx = Union[TfExpression, int, float, np.ndarray]
-"""A type that can be converted to a valid Tensorflow expression."""
-
-
-def run(*args, **kwargs) -> Any:
-    """Run the specified ops in the default session."""
-    assert_tf_initialized()
-    return tf.get_default_session().run(*args, **kwargs)
-
-
-def is_tf_expression(x: Any) -> bool:
-    """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation."""
-    return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation))
-
-
-def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]:
-    """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code."""
-    return [dim.value for dim in shape]
-
-
-def flatten(x: TfExpressionEx) -> TfExpression:
-    """Shortcut function for flattening a tensor."""
-    with tf.name_scope("Flatten"):
-        return tf.reshape(x, [-1])
-
-
-def log2(x: TfExpressionEx) -> TfExpression:
-    """Logarithm in base 2."""
-    with tf.name_scope("Log2"):
-        return tf.log(x) * np.float32(1.0 / np.log(2.0))
-
-
-def exp2(x: TfExpressionEx) -> TfExpression:
-    """Exponent in base 2."""
-    with tf.name_scope("Exp2"):
-        return tf.exp(x * np.float32(np.log(2.0)))
-
-
-def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx:
-    """Linear interpolation."""
-    with tf.name_scope("Lerp"):
-        return a + (b - a) * t
-
-
-def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression:
-    """Linear interpolation with clip."""
-    with tf.name_scope("LerpClip"):
-        return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0)
-
-
-def absolute_name_scope(scope: str) -> tf.name_scope:
-    """Forcefully enter the specified name scope, ignoring any surrounding scopes."""
-    return tf.name_scope(scope + "/")
-
-
-def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope:
-    """Forcefully enter the specified variable scope, ignoring any surrounding scopes."""
-    return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False)
-
-
-def _sanitize_tf_config(config_dict: dict = None) -> dict:
-    # Defaults.
-    cfg = dict()
-    cfg["rnd.np_random_seed"]               = None      # Random seed for NumPy. None = keep as is.
-    cfg["rnd.tf_random_seed"]               = "auto"    # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is.
-    cfg["env.TF_CPP_MIN_LOG_LEVEL"]         = "1"       # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info.
-    cfg["graph_options.place_pruned_graph"] = True      # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used.
-    cfg["gpu_options.allow_growth"]         = True      # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed.
-
-    # Remove defaults for environment variables that are already set.
-    for key in list(cfg):
-        fields = key.split(".")
-        if fields[0] == "env":
-            assert len(fields) == 2
-            if fields[1] in os.environ:
-                del cfg[key]
-
-    # User overrides.
-    if config_dict is not None:
-        cfg.update(config_dict)
-    return cfg
-
-
-def init_tf(config_dict: dict = None) -> None:
-    """Initialize TensorFlow session using good default settings."""
-    # Skip if already initialized.
-    if tf.get_default_session() is not None:
-        return
-
-    # Setup config dict and random seeds.
-    cfg = _sanitize_tf_config(config_dict)
-    np_random_seed = cfg["rnd.np_random_seed"]
-    if np_random_seed is not None:
-        np.random.seed(np_random_seed)
-    tf_random_seed = cfg["rnd.tf_random_seed"]
-    if tf_random_seed == "auto":
-        tf_random_seed = np.random.randint(1 << 31)
-    if tf_random_seed is not None:
-        tf.set_random_seed(tf_random_seed)
-
-    # Setup environment variables.
-    for key, value in cfg.items():
-        fields = key.split(".")
-        if fields[0] == "env":
-            assert len(fields) == 2
-            os.environ[fields[1]] = str(value)
-
-    # Create default TensorFlow session.
-    create_session(cfg, force_as_default=True)
-
-
-def assert_tf_initialized():
-    """Check that TensorFlow session has been initialized."""
-    if tf.get_default_session() is None:
-        raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().")
-
-
-def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session:
-    """Create tf.Session based on config dict."""
-    # Setup TensorFlow config proto.
-    cfg = _sanitize_tf_config(config_dict)
-    config_proto = tf.ConfigProto()
-    for key, value in cfg.items():
-        fields = key.split(".")
-        if fields[0] not in ["rnd", "env"]:
-            obj = config_proto
-            for field in fields[:-1]:
-                obj = getattr(obj, field)
-            setattr(obj, fields[-1], value)
-
-    # Create session.
-    session = tf.Session(config=config_proto)
-    if force_as_default:
-        # pylint: disable=protected-access
-        session._default_session = session.as_default()
-        session._default_session.enforce_nesting = False
-        session._default_session.__enter__()
-    return session
-
-
-def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None:
-    """Initialize all tf.Variables that have not already been initialized.
-
-    Equivalent to the following, but more efficient and does not bloat the tf graph:
-    tf.variables_initializer(tf.report_uninitialized_variables()).run()
-    """
-    assert_tf_initialized()
-    if target_vars is None:
-        target_vars = tf.global_variables()
-
-    test_vars = []
-    test_ops = []
-
-    with tf.control_dependencies(None):  # ignore surrounding control_dependencies
-        for var in target_vars:
-            assert is_tf_expression(var)
-
-            try:
-                tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0"))
-            except KeyError:
-                # Op does not exist => variable may be uninitialized.
-                test_vars.append(var)
-
-                with absolute_name_scope(var.name.split(":")[0]):
-                    test_ops.append(tf.is_variable_initialized(var))
-
-    init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited]
-    run([var.initializer for var in init_vars])
-
-
-def set_vars(var_to_value_dict: dict) -> None:
-    """Set the values of given tf.Variables.
-
-    Equivalent to the following, but more efficient and does not bloat the tf graph:
-    tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()]
-    """
-    assert_tf_initialized()
-    ops = []
-    feed_dict = {}
-
-    for var, value in var_to_value_dict.items():
-        assert is_tf_expression(var)
-
-        try:
-            setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0"))  # look for existing op
-        except KeyError:
-            with absolute_name_scope(var.name.split(":")[0]):
-                with tf.control_dependencies(None):  # ignore surrounding control_dependencies
-                    setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter")  # create new setter
-
-        ops.append(setter)
-        feed_dict[setter.op.inputs[1]] = value
-
-    run(ops, feed_dict)
-
-
-def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs):
-    """Create tf.Variable with large initial value without bloating the tf graph."""
-    assert_tf_initialized()
-    assert isinstance(initial_value, np.ndarray)
-    zeros = tf.zeros(initial_value.shape, initial_value.dtype)
-    var = tf.Variable(zeros, *args, **kwargs)
-    set_vars({var: initial_value})
-    return var
-
-
-def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False):
-    """Convert a minibatch of images from uint8 to float32 with configurable dynamic range.
-    Can be used as an input transformation for Network.run().
-    """
-    images = tf.cast(images, tf.float32)
-    if nhwc_to_nchw:
-        images = tf.transpose(images, [0, 3, 1, 2])
-    return images * ((drange[1] - drange[0]) / 255) + drange[0]
-
-
-def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1):
-    """Convert a minibatch of images from float32 to uint8 with configurable dynamic range.
-    Can be used as an output transformation for Network.run().
-    """
-    images = tf.cast(images, tf.float32)
-    if shrink > 1:
-        ksize = [1, 1, shrink, shrink]
-        images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW")
-    if nchw_to_nhwc:
-        images = tf.transpose(images, [0, 2, 3, 1])
-    scale = 255 / (drange[1] - drange[0])
-    images = images * scale + (0.5 - drange[0] * scale)
-    return tf.saturate_cast(images, tf.uint8)