diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..ed8ebf583f771da9150c35db3955987b7d757904 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ \ No newline at end of file diff --git a/submission/__pycache__/__init__.cpython-310.pyc b/submission/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 49d95f12dab43b5210719a46cd77487759d3b2de..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/__init__.cpython-310.pyc and /dev/null differ diff --git a/submission/__pycache__/__init__.cpython-36.pyc b/submission/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index ddc1f46c28d2083ea572abefb0e943169a53a6bf..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/submission/__pycache__/__init__.cpython-38.pyc b/submission/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 7fb9c21385f191a2e293c6c92bc0f2600eb63988..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/submission/__pycache__/__init__.cpython-39.pyc b/submission/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index a3eb6c3a4dc80bf848d05969fd9ba0adb1beaa14..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/submission/__pycache__/run_context.cpython-310.pyc b/submission/__pycache__/run_context.cpython-310.pyc deleted file mode 100644 index 16a88b1fe5c61aa930139d80da56ac319bfc3599..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/run_context.cpython-310.pyc and /dev/null differ diff --git a/submission/__pycache__/run_context.cpython-36.pyc b/submission/__pycache__/run_context.cpython-36.pyc deleted file mode 100644 index c79464ccf271723928e7ea5846f3121642912acf..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/run_context.cpython-36.pyc and /dev/null differ diff --git a/submission/__pycache__/run_context.cpython-38.pyc b/submission/__pycache__/run_context.cpython-38.pyc deleted file mode 100644 index 27f155bf32573a58a1b14b2cfaeece781ca4db14..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/run_context.cpython-38.pyc and /dev/null differ diff --git a/submission/__pycache__/run_context.cpython-39.pyc b/submission/__pycache__/run_context.cpython-39.pyc deleted file mode 100644 index 82e906015f2d2c1aea6d1b2ea746dfe1f1aacd09..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/run_context.cpython-39.pyc and /dev/null differ diff --git a/submission/__pycache__/submit.cpython-310.pyc b/submission/__pycache__/submit.cpython-310.pyc deleted file mode 100644 index 0f0d56d928ab6555b9b4084a030dda1963e28424..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/submit.cpython-310.pyc and /dev/null differ diff --git a/submission/__pycache__/submit.cpython-36.pyc b/submission/__pycache__/submit.cpython-36.pyc deleted file mode 100644 index f0edb249c830bc74157540f24be2caa25d8f8817..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/submit.cpython-36.pyc and /dev/null differ diff --git a/submission/__pycache__/submit.cpython-38.pyc b/submission/__pycache__/submit.cpython-38.pyc deleted file mode 100644 index 6fb6b1a8a68da858aaebcbb567ae0dd9f1ee7def..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/submit.cpython-38.pyc and /dev/null differ diff --git a/submission/__pycache__/submit.cpython-39.pyc b/submission/__pycache__/submit.cpython-39.pyc deleted file mode 100644 index a631dc24ce45365f1f8274e563e1b4354afac50c..0000000000000000000000000000000000000000 Binary files a/submission/__pycache__/submit.cpython-39.pyc and /dev/null differ diff --git a/submission/internal/__pycache__/__init__.cpython-36.pyc b/submission/internal/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index e197ab9bed4593646f04c5f1f123f49ddd9b34c5..0000000000000000000000000000000000000000 Binary files a/submission/internal/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/submission/internal/__pycache__/__init__.cpython-38.pyc b/submission/internal/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index 9d15b0c3a8da1eac47d29d581014b76e5ca26096..0000000000000000000000000000000000000000 Binary files a/submission/internal/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/submission/internal/__pycache__/__init__.cpython-39.pyc b/submission/internal/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index bc046661f9d0a58c4db9a911605f6b743a317747..0000000000000000000000000000000000000000 Binary files a/submission/internal/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/submission/internal/__pycache__/local.cpython-36.pyc b/submission/internal/__pycache__/local.cpython-36.pyc deleted file mode 100644 index 7899b1660eb9752f9fb45b94b762a69807251023..0000000000000000000000000000000000000000 Binary files a/submission/internal/__pycache__/local.cpython-36.pyc and /dev/null differ diff --git a/submission/internal/__pycache__/local.cpython-38.pyc b/submission/internal/__pycache__/local.cpython-38.pyc deleted file mode 100644 index 679805d01f94611885c9e9241db82f629e9ab438..0000000000000000000000000000000000000000 Binary files a/submission/internal/__pycache__/local.cpython-38.pyc and /dev/null differ diff --git a/submission/internal/__pycache__/local.cpython-39.pyc b/submission/internal/__pycache__/local.cpython-39.pyc deleted file mode 100644 index 9f57b457b4f40416ed89a492062b3a26eb3b757f..0000000000000000000000000000000000000000 Binary files a/submission/internal/__pycache__/local.cpython-39.pyc and /dev/null differ diff --git a/tflib/.custom_ops.py.un~ b/tflib/.custom_ops.py.un~ deleted file mode 100644 index e41c460c3b3111ddcbeb6b9567b4b6e41d6eb9c6..0000000000000000000000000000000000000000 Binary files a/tflib/.custom_ops.py.un~ and /dev/null differ diff --git a/tflib/__init__.py b/tflib/__init__.py deleted file mode 100644 index 02c25173d3f2391c88b142cf80af02cd93b0b5a0..0000000000000000000000000000000000000000 --- a/tflib/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -from . import autosummary -from . import network -from . import optimizer -from . import tfutil -from . import custom_ops - -from .tfutil import * -from .network import Network - -from .optimizer import Optimizer - -from .custom_ops import get_plugin diff --git a/tflib/__pycache__/__init__.cpython-36.pyc b/tflib/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index 55466792997d614403fe6026e63a5e2e32d5f4c5..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/tflib/__pycache__/__init__.cpython-38.pyc b/tflib/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index c48dd77f8014fcbca868e4826ddc7c07e558bb87..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/tflib/__pycache__/__init__.cpython-39.pyc b/tflib/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 3010c9010ee84397e342ca1748e4bfb0869ea9d4..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/__init__.cpython-39.pyc and /dev/null differ diff --git a/tflib/__pycache__/autosummary.cpython-36.pyc b/tflib/__pycache__/autosummary.cpython-36.pyc deleted file mode 100644 index 8663ce369c5c3d996a2d372e75b53f9c67ecac63..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/autosummary.cpython-36.pyc and /dev/null differ diff --git a/tflib/__pycache__/autosummary.cpython-38.pyc b/tflib/__pycache__/autosummary.cpython-38.pyc deleted file mode 100644 index 772042b65a5c87fdd55625424b014978a08a8aba..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/autosummary.cpython-38.pyc and /dev/null differ diff --git a/tflib/__pycache__/autosummary.cpython-39.pyc b/tflib/__pycache__/autosummary.cpython-39.pyc deleted file mode 100644 index 51636d9cbd98f5953b8ed47a1a29aadec86da7ce..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/autosummary.cpython-39.pyc and /dev/null differ diff --git a/tflib/__pycache__/custom_ops.cpython-36.pyc b/tflib/__pycache__/custom_ops.cpython-36.pyc deleted file mode 100644 index c5bc61adabc982614e494f66403172a241a0a274..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/custom_ops.cpython-36.pyc and /dev/null differ diff --git a/tflib/__pycache__/network.cpython-36.pyc b/tflib/__pycache__/network.cpython-36.pyc deleted file mode 100644 index 15b13f2d4d385523fdfa41e298f4a4db2d3c28de..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/network.cpython-36.pyc and /dev/null differ diff --git a/tflib/__pycache__/optimizer.cpython-36.pyc b/tflib/__pycache__/optimizer.cpython-36.pyc deleted file mode 100644 index b676edf06988f727c4684867a0d4921c7fa2a62a..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/optimizer.cpython-36.pyc and /dev/null differ diff --git a/tflib/__pycache__/tfutil.cpython-36.pyc b/tflib/__pycache__/tfutil.cpython-36.pyc deleted file mode 100644 index fc0253c2da1f3aa0979212e78adf111de2311459..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/tfutil.cpython-36.pyc and /dev/null differ diff --git a/tflib/__pycache__/tfutil.cpython-38.pyc b/tflib/__pycache__/tfutil.cpython-38.pyc deleted file mode 100644 index eaddd1d7edc28bb2f69e914274e24c9febfd2989..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/tfutil.cpython-38.pyc and /dev/null differ diff --git a/tflib/__pycache__/tfutil.cpython-39.pyc b/tflib/__pycache__/tfutil.cpython-39.pyc deleted file mode 100644 index e0ef28b02cff59ccbc79a0804c30d90fbfdded44..0000000000000000000000000000000000000000 Binary files a/tflib/__pycache__/tfutil.cpython-39.pyc and /dev/null differ diff --git a/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll b/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll deleted file mode 100644 index f03635ef2de5533438983be7e5430b2d68e703fe..0000000000000000000000000000000000000000 Binary files a/tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll and /dev/null differ diff --git a/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so b/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so deleted file mode 100644 index 271fc0309be80c13246bd506e4f4e9cae1da3a5c..0000000000000000000000000000000000000000 Binary files a/tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so and /dev/null differ diff --git a/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll b/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll deleted file mode 100644 index 34f36c1e1c1393b01ab6adbd96cb91d99d895379..0000000000000000000000000000000000000000 Binary files a/tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll and /dev/null differ diff --git a/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll b/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll deleted file mode 100644 index c77d3dbe97574675decdfcf3f332e9064018f652..0000000000000000000000000000000000000000 Binary files a/tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll and /dev/null differ diff --git a/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so b/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so deleted file mode 100644 index f168469daf6becbe8129f5344f8632a089873a1d..0000000000000000000000000000000000000000 --- a/tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11a9d92fab510cb996dcd2bf8644c34f7c5e963e6abba4ad3e606e590917e3e0 -size 1099920 diff --git a/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll b/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll deleted file mode 100644 index dfa6979fc99fdf7b052769b88cb23a852dc223d1..0000000000000000000000000000000000000000 Binary files a/tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll and /dev/null differ diff --git a/tflib/autosummary.py b/tflib/autosummary.py deleted file mode 100644 index 6b0d80b371620bedadf8164772b7d6f87806fc11..0000000000000000000000000000000000000000 --- a/tflib/autosummary.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -"""Helper for adding automatically tracked values to Tensorboard. - -Autosummary creates an identity op that internally keeps track of the input -values and automatically shows up in TensorBoard. The reported value -represents an average over input components. The average is accumulated -constantly over time and flushed when save_summaries() is called. - -Notes: -- The output tensor must be used as an input for something else in the - graph. Otherwise, the autosummary op will not get executed, and the average - value will not get accumulated. -- It is perfectly fine to include autosummaries with the same name in - several places throughout the graph, even if they are executed concurrently. -- It is ok to also pass in a python scalar or numpy array. In this case, it - is added to the average immediately. -""" - -from collections import OrderedDict -import numpy as np -import tensorflow as tf -from tensorboard import summary as summary_lib -from tensorboard.plugins.custom_scalar import layout_pb2 - -from . import tfutil -from .tfutil import TfExpression -from .tfutil import TfExpressionEx - -# Enable "Custom scalars" tab in TensorBoard for advanced formatting. -# Disabled by default to reduce tfevents file size. -enable_custom_scalars = False - -_dtype = tf.float64 -_vars = OrderedDict() # name => [var, ...] -_immediate = OrderedDict() # name => update_op, update_value -_finalized = False -_merge_op = None - - -def _create_var(name: str, value_expr: TfExpression) -> TfExpression: - """Internal helper for creating autosummary accumulators.""" - assert not _finalized - name_id = name.replace("/", "_") - v = tf.cast(value_expr, _dtype) - - if v.shape.is_fully_defined(): - size = np.prod(v.shape.as_list()) - size_expr = tf.constant(size, dtype=_dtype) - else: - size = None - size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype)) - - if size == 1: - if v.shape.ndims != 0: - v = tf.reshape(v, []) - v = [size_expr, v, tf.square(v)] - else: - v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))] - v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype)) - - with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None): - var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)] - update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) - - if name in _vars: - _vars[name].append(var) - else: - _vars[name] = [var] - return update_op - - -def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx: - """Create a new autosummary. - - Args: - name: Name to use in TensorBoard - value: TensorFlow expression or python value to track - passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node. - - Example use of the passthru mechanism: - - n = autosummary('l2loss', loss, passthru=n) - - This is a shorthand for the following code: - - with tf.control_dependencies([autosummary('l2loss', loss)]): - n = tf.identity(n) - """ - tfutil.assert_tf_initialized() - name_id = name.replace("/", "_") - - if tfutil.is_tf_expression(value): - with tf.name_scope("summary_" + name_id), tf.device(value.device): - condition = tf.convert_to_tensor(condition, name='condition') - update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op) - with tf.control_dependencies([update_op]): - return tf.identity(value if passthru is None else passthru) - - else: # python scalar or numpy array - assert not tfutil.is_tf_expression(passthru) - assert not tfutil.is_tf_expression(condition) - if condition: - if name not in _immediate: - with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None): - update_value = tf.placeholder(_dtype) - update_op = _create_var(name, update_value) - _immediate[name] = update_op, update_value - update_op, update_value = _immediate[name] - tfutil.run(update_op, {update_value: value}) - return value if passthru is None else passthru - - -def finalize_autosummaries() -> None: - """Create the necessary ops to include autosummaries in TensorBoard report. - Note: This should be done only once per graph. - """ - global _finalized - tfutil.assert_tf_initialized() - - if _finalized: - return None - - _finalized = True - tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list]) - - # Create summary ops. - with tf.device(None), tf.control_dependencies(None): - for name, vars_list in _vars.items(): - name_id = name.replace("/", "_") - with tfutil.absolute_name_scope("Autosummary/" + name_id): - moments = tf.add_n(vars_list) - moments /= moments[0] - with tf.control_dependencies([moments]): # read before resetting - reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list] - with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting - mean = moments[1] - std = tf.sqrt(moments[2] - tf.square(moments[1])) - tf.summary.scalar(name, mean) - if enable_custom_scalars: - tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std) - tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std) - - # Setup layout for custom scalars. - layout = None - if enable_custom_scalars: - cat_dict = OrderedDict() - for series_name in sorted(_vars.keys()): - p = series_name.split("/") - cat = p[0] if len(p) >= 2 else "" - chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1] - if cat not in cat_dict: - cat_dict[cat] = OrderedDict() - if chart not in cat_dict[cat]: - cat_dict[cat][chart] = [] - cat_dict[cat][chart].append(series_name) - categories = [] - for cat_name, chart_dict in cat_dict.items(): - charts = [] - for chart_name, series_names in chart_dict.items(): - series = [] - for series_name in series_names: - series.append(layout_pb2.MarginChartContent.Series( - value=series_name, - lower="xCustomScalars/" + series_name + "/margin_lo", - upper="xCustomScalars/" + series_name + "/margin_hi")) - margin = layout_pb2.MarginChartContent(series=series) - charts.append(layout_pb2.Chart(title=chart_name, margin=margin)) - categories.append(layout_pb2.Category(title=cat_name, chart=charts)) - layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories)) - return layout - -def save_summaries(file_writer, global_step=None): - """Call FileWriter.add_summary() with all summaries in the default graph, - automatically finalizing and merging them on the first call. - """ - global _merge_op - tfutil.assert_tf_initialized() - - if _merge_op is None: - layout = finalize_autosummaries() - if layout is not None: - file_writer.add_summary(layout) - with tf.device(None), tf.control_dependencies(None): - _merge_op = tf.summary.merge_all() - - file_writer.add_summary(_merge_op.eval(), global_step) diff --git a/tflib/custom_ops.py b/tflib/custom_ops.py deleted file mode 100644 index 9046787626a0530f03b9f8982ec625e6bd257858..0000000000000000000000000000000000000000 --- a/tflib/custom_ops.py +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -"""TensorFlow custom ops builder. -""" - -import os -import re -import uuid -import hashlib -import tempfile -import shutil -import tensorflow as tf -from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module - -#---------------------------------------------------------------------------- -# Global options. - -cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache') -cuda_cache_version_tag = 'v1' -do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe! -verbose = True # Print status messages to stdout. - -compiler_bindir_search_path = [ - 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64', - 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64', - 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin', -] - -#---------------------------------------------------------------------------- -# Internal helper funcs. - -def _find_compiler_bindir(): - for compiler_path in compiler_bindir_search_path: - if os.path.isdir(compiler_path): - return compiler_path - return None - -def _get_compute_cap(device): - caps_str = device.physical_device_desc - m = re.search('compute capability: (\\d+).(\\d+)', caps_str) - major = m.group(1) - minor = m.group(2) - return (major, minor) - -def _get_cuda_gpu_arch_string(): - gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] - if len(gpus) == 0: - raise RuntimeError('No GPU devices found') - (major, minor) = _get_compute_cap(gpus[0]) - return 'sm_%s%s' % (major, minor) - -def _run_cmd(cmd): - with os.popen(cmd) as pipe: - output = pipe.read() - status = pipe.close() - if status is not None: - raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) - -def _prepare_nvcc_cli(opts): - cmd = 'nvcc ' + opts.strip() - cmd += ' --disable-warnings' - cmd += ' --include-path "%s"' % tf.sysconfig.get_include() - cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') - cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') - cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') - - compiler_bindir = _find_compiler_bindir() - if compiler_bindir is None: - # Require that _find_compiler_bindir succeeds on Windows. Allow - # nvcc to use whatever is the default on Linux. - if os.name == 'nt': - raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) - else: - cmd += ' --compiler-bindir "%s"' % compiler_bindir - cmd += ' 2>&1' - return cmd - -#---------------------------------------------------------------------------- -# Main entry point. - -_plugin_cache = dict() - -def get_plugin(cuda_file): - cuda_file_base = os.path.basename(cuda_file) - cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) - - # Already in cache? - if cuda_file in _plugin_cache: - return _plugin_cache[cuda_file] - - # Setup plugin. - if verbose: - print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) - try: - # Hash CUDA source. - md5 = hashlib.md5() - with open(cuda_file, 'rb') as f: - md5.update(f.read()) - md5.update(b'\n') - - # Hash headers included by the CUDA code by running it through the preprocessor. - if not do_not_hash_included_headers: - if verbose: - print('Preprocessing... ', end='', flush=True) - with tempfile.TemporaryDirectory() as tmp_dir: - tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) - _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) - with open(tmp_file, 'rb') as f: - bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros - good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') - for ln in f: - if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas - ln = ln.replace(bad_file_str, good_file_str) - md5.update(ln) - md5.update(b'\n') - - # Select compiler options. - compile_opts = '' - if os.name == 'nt': - compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') - elif os.name == 'posix': - compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so') - compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\'' - else: - assert False # not Windows or Linux, w00t? - compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string() - compile_opts += ' --use_fast_math' - nvcc_cmd = _prepare_nvcc_cli(compile_opts) - - # Hash build configuration. - md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') - md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') - md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') - - # Compile if not already compiled. - bin_file_ext = '.dll' if os.name == 'nt' else '.so' - bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) - if not os.path.isfile(bin_file): - if verbose: - print('Compiling... ', end='', flush=True) - with tempfile.TemporaryDirectory() as tmp_dir: - tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) - _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) - os.makedirs(cuda_cache_path, exist_ok=True) - intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) - shutil.copyfile(tmp_file, intermediate_file) - os.rename(intermediate_file, bin_file) # atomic - - # Load. - if verbose: - print('Loading... ', end='', flush=True) - plugin = tf.load_op_library(bin_file) - - # Add to cache. - _plugin_cache[cuda_file] = plugin - if verbose: - print('Done.', flush=True) - return plugin - - except: - if verbose: - print('Failed!', flush=True) - raise - -#---------------------------------------------------------------------------- diff --git a/tflib/custom_ops.py~ b/tflib/custom_ops.py~ deleted file mode 100644 index 9046787626a0530f03b9f8982ec625e6bd257858..0000000000000000000000000000000000000000 --- a/tflib/custom_ops.py~ +++ /dev/null @@ -1,169 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -"""TensorFlow custom ops builder. -""" - -import os -import re -import uuid -import hashlib -import tempfile -import shutil -import tensorflow as tf -from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module - -#---------------------------------------------------------------------------- -# Global options. - -cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache') -cuda_cache_version_tag = 'v1' -do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe! -verbose = True # Print status messages to stdout. - -compiler_bindir_search_path = [ - 'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64', - 'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64', - 'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin', -] - -#---------------------------------------------------------------------------- -# Internal helper funcs. - -def _find_compiler_bindir(): - for compiler_path in compiler_bindir_search_path: - if os.path.isdir(compiler_path): - return compiler_path - return None - -def _get_compute_cap(device): - caps_str = device.physical_device_desc - m = re.search('compute capability: (\\d+).(\\d+)', caps_str) - major = m.group(1) - minor = m.group(2) - return (major, minor) - -def _get_cuda_gpu_arch_string(): - gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU'] - if len(gpus) == 0: - raise RuntimeError('No GPU devices found') - (major, minor) = _get_compute_cap(gpus[0]) - return 'sm_%s%s' % (major, minor) - -def _run_cmd(cmd): - with os.popen(cmd) as pipe: - output = pipe.read() - status = pipe.close() - if status is not None: - raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output)) - -def _prepare_nvcc_cli(opts): - cmd = 'nvcc ' + opts.strip() - cmd += ' --disable-warnings' - cmd += ' --include-path "%s"' % tf.sysconfig.get_include() - cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src') - cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl') - cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive') - - compiler_bindir = _find_compiler_bindir() - if compiler_bindir is None: - # Require that _find_compiler_bindir succeeds on Windows. Allow - # nvcc to use whatever is the default on Linux. - if os.name == 'nt': - raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__) - else: - cmd += ' --compiler-bindir "%s"' % compiler_bindir - cmd += ' 2>&1' - return cmd - -#---------------------------------------------------------------------------- -# Main entry point. - -_plugin_cache = dict() - -def get_plugin(cuda_file): - cuda_file_base = os.path.basename(cuda_file) - cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base) - - # Already in cache? - if cuda_file in _plugin_cache: - return _plugin_cache[cuda_file] - - # Setup plugin. - if verbose: - print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True) - try: - # Hash CUDA source. - md5 = hashlib.md5() - with open(cuda_file, 'rb') as f: - md5.update(f.read()) - md5.update(b'\n') - - # Hash headers included by the CUDA code by running it through the preprocessor. - if not do_not_hash_included_headers: - if verbose: - print('Preprocessing... ', end='', flush=True) - with tempfile.TemporaryDirectory() as tmp_dir: - tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext) - _run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))) - with open(tmp_file, 'rb') as f: - bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros - good_file_str = ('"' + cuda_file_base + '"').encode('utf-8') - for ln in f: - if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas - ln = ln.replace(bad_file_str, good_file_str) - md5.update(ln) - md5.update(b'\n') - - # Select compiler options. - compile_opts = '' - if os.name == 'nt': - compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib') - elif os.name == 'posix': - compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so') - compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\'' - else: - assert False # not Windows or Linux, w00t? - compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string() - compile_opts += ' --use_fast_math' - nvcc_cmd = _prepare_nvcc_cli(compile_opts) - - # Hash build configuration. - md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n') - md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n') - md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n') - - # Compile if not already compiled. - bin_file_ext = '.dll' if os.name == 'nt' else '.so' - bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext) - if not os.path.isfile(bin_file): - if verbose: - print('Compiling... ', end='', flush=True) - with tempfile.TemporaryDirectory() as tmp_dir: - tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext) - _run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)) - os.makedirs(cuda_cache_path, exist_ok=True) - intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext) - shutil.copyfile(tmp_file, intermediate_file) - os.rename(intermediate_file, bin_file) # atomic - - # Load. - if verbose: - print('Loading... ', end='', flush=True) - plugin = tf.load_op_library(bin_file) - - # Add to cache. - _plugin_cache[cuda_file] = plugin - if verbose: - print('Done.', flush=True) - return plugin - - except: - if verbose: - print('Failed!', flush=True) - raise - -#---------------------------------------------------------------------------- diff --git a/tflib/network.py b/tflib/network.py deleted file mode 100644 index 409babb1d2166d341bfaee2ef460d8810bdaf51f..0000000000000000000000000000000000000000 --- a/tflib/network.py +++ /dev/null @@ -1,590 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -"""Helper for managing networks.""" - -import types -import inspect -import re -import uuid -import sys -import numpy as np -import tensorflow as tf - -from collections import OrderedDict -from typing import Any, List, Tuple, Union - -from . import tfutil -from .. import util - -from .tfutil import TfExpression, TfExpressionEx - -_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import. -_import_module_src = dict() # Source code for temporary modules created during pickle import. - - -def import_handler(handler_func): - """Function decorator for declaring custom import handlers.""" - _import_handlers.append(handler_func) - return handler_func - - -class Network: - """Generic network abstraction. - - Acts as a convenience wrapper for a parameterized network construction - function, providing several utility methods and convenient access to - the inputs/outputs/weights. - - Network objects can be safely pickled and unpickled for long-term - archival purposes. The pickling works reliably as long as the underlying - network construction function is defined in a standalone Python module - that has no side effects or application-specific imports. - - Args: - name: Network name. Used to select TensorFlow name and variable scopes. - func_name: Fully qualified name of the underlying network construction function, or a top-level function object. - static_kwargs: Keyword arguments to be passed in to the network construction function. - - Attributes: - name: User-specified name, defaults to build func name if None. - scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name. - static_kwargs: Arguments passed to the user-supplied build func. - components: Container for sub-networks. Passed to the build func, and retained between calls. - num_inputs: Number of input tensors. - num_outputs: Number of output tensors. - input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension. - output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension. - input_shape: Short-hand for input_shapes[0]. - output_shape: Short-hand for output_shapes[0]. - input_templates: Input placeholders in the template graph. - output_templates: Output tensors in the template graph. - input_names: Name string for each input. - output_names: Name string for each output. - own_vars: Variables defined by this network (local_name => var), excluding sub-networks. - vars: All variables (local_name => var). - trainables: All trainable variables (local_name => var). - var_global_to_local: Mapping from variable global names to local names. - """ - - def __init__(self, name: str = None, func_name: Any = None, **static_kwargs): - tfutil.assert_tf_initialized() - assert isinstance(name, str) or name is None - assert func_name is not None - assert isinstance(func_name, str) or util.is_top_level_function(func_name) - assert util.is_pickleable(static_kwargs) - - self._init_fields() - self.name = name - self.static_kwargs = util.EasyDict(static_kwargs) - - # Locate the user-specified network build function. - if util.is_top_level_function(func_name): - func_name = util.get_top_level_function_name(func_name) - module, self._build_func_name = util.get_module_from_obj_name(func_name) - self._build_func = util.get_obj_from_module(module, self._build_func_name) - assert callable(self._build_func) - - # Dig up source code for the module containing the build function. - self._build_module_src = _import_module_src.get(module, None) - if self._build_module_src is None: - self._build_module_src = inspect.getsource(module) - - # Init TensorFlow graph. - self._init_graph() - self.reset_own_vars() - - def _init_fields(self) -> None: - self.name = None - self.scope = None - self.static_kwargs = util.EasyDict() - self.components = util.EasyDict() - self.num_inputs = 0 - self.num_outputs = 0 - self.input_shapes = [[]] - self.output_shapes = [[]] - self.input_shape = [] - self.output_shape = [] - self.input_templates = [] - self.output_templates = [] - self.input_names = [] - self.output_names = [] - self.own_vars = OrderedDict() - self.vars = OrderedDict() - self.trainables = OrderedDict() - self.var_global_to_local = OrderedDict() - - self._build_func = None # User-supplied build function that constructs the network. - self._build_func_name = None # Name of the build function. - self._build_module_src = None # Full source code of the module containing the build function. - self._run_cache = dict() # Cached graph data for Network.run(). - - def _init_graph(self) -> None: - # Collect inputs. - self.input_names = [] - - for param in inspect.signature(self._build_func).parameters.values(): - if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: - self.input_names.append(param.name) - - self.num_inputs = len(self.input_names) - assert self.num_inputs >= 1 - - # Choose name and scope. - if self.name is None: - self.name = self._build_func_name - assert re.match("^[A-Za-z0-9_.\\-]*$", self.name) - with tf.name_scope(None): - self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True) - - # Finalize build func kwargs. - build_kwargs = dict(self.static_kwargs) - build_kwargs["is_template_graph"] = True - build_kwargs["components"] = self.components - - # Build template graph. - with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes - assert tf.get_variable_scope().name == self.scope - assert tf.get_default_graph().get_name_scope() == self.scope - with tf.control_dependencies(None): # ignore surrounding control dependencies - self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names] - out_expr = self._build_func(*self.input_templates, **build_kwargs) - - # Collect outputs. - assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) - self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) - self.num_outputs = len(self.output_templates) - assert self.num_outputs >= 1 - assert all(tfutil.is_tf_expression(t) for t in self.output_templates) - - # Perform sanity checks. - if any(t.shape.ndims is None for t in self.input_templates): - raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.") - if any(t.shape.ndims is None for t in self.output_templates): - raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.") - if any(not isinstance(comp, Network) for comp in self.components.values()): - raise ValueError("Components of a Network must be Networks themselves.") - if len(self.components) != len(set(comp.name for comp in self.components.values())): - raise ValueError("Components of a Network must have unique names.") - - # List inputs and outputs. - self.input_shapes = [t.shape.as_list() for t in self.input_templates] - self.output_shapes = [t.shape.as_list() for t in self.output_templates] - self.input_shape = self.input_shapes[0] - self.output_shape = self.output_shapes[0] - self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates] - - # List variables. - self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/")) - self.vars = OrderedDict(self.own_vars) - self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items()) - self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable) - self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items()) - - def reset_own_vars(self) -> None: - """Re-initialize all variables of this network, excluding sub-networks.""" - tfutil.run([var.initializer for var in self.own_vars.values()]) - - def reset_vars(self) -> None: - """Re-initialize all variables of this network, including sub-networks.""" - tfutil.run([var.initializer for var in self.vars.values()]) - - def reset_trainables(self) -> None: - """Re-initialize all trainable variables of this network, including sub-networks.""" - tfutil.run([var.initializer for var in self.trainables.values()]) - - def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]: - """Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s).""" - assert len(in_expr) == self.num_inputs - assert not all(expr is None for expr in in_expr) - - # Finalize build func kwargs. - build_kwargs = dict(self.static_kwargs) - build_kwargs.update(dynamic_kwargs) - build_kwargs["is_template_graph"] = False - build_kwargs["components"] = self.components - - # Build TensorFlow graph to evaluate the network. - with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name): - assert tf.get_variable_scope().name == self.scope - valid_inputs = [expr for expr in in_expr if expr is not None] - final_inputs = [] - for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes): - if expr is not None: - expr = tf.identity(expr, name=name) - else: - expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name) - final_inputs.append(expr) - out_expr = self._build_func(*final_inputs, **build_kwargs) - - # Propagate input shapes back to the user-specified expressions. - for expr, final in zip(in_expr, final_inputs): - if isinstance(expr, tf.Tensor): - expr.set_shape(final.shape) - - # Express outputs in the desired format. - assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple) - if return_as_list: - out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr) - return out_expr - - def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str: - """Get the local name of a given variable, without any surrounding name scopes.""" - assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str) - global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name - return self.var_global_to_local[global_name] - - def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression: - """Find variable by local or global name.""" - assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str) - return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name - - def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray: - """Get the value of a given variable as NumPy array. - Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible.""" - return self.find_var(var_or_local_name).eval() - - def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None: - """Set the value of a given variable based on the given NumPy array. - Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible.""" - tfutil.set_vars({self.find_var(var_or_local_name): new_value}) - - def __getstate__(self) -> dict: - """Pickle export.""" - state = dict() - state["version"] = 4 - state["name"] = self.name - state["static_kwargs"] = dict(self.static_kwargs) - state["components"] = dict(self.components) - state["build_module_src"] = self._build_module_src - state["build_func_name"] = self._build_func_name - state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values())))) - return state - - def __setstate__(self, state: dict) -> None: - """Pickle import.""" - # pylint: disable=attribute-defined-outside-init - tfutil.assert_tf_initialized() - self._init_fields() - - # Execute custom import handlers. - for handler in _import_handlers: - state = handler(state) - - # Set basic fields. - assert state["version"] in [2, 3, 4] - self.name = state["name"] - self.static_kwargs = util.EasyDict(state["static_kwargs"]) - self.components = util.EasyDict(state.get("components", {})) - self._build_module_src = state["build_module_src"] - self._build_func_name = state["build_func_name"] - - # Create temporary module from the imported source code. - module_name = "_tflib_network_import_" + uuid.uuid4().hex - module = types.ModuleType(module_name) - sys.modules[module_name] = module - _import_module_src[module] = self._build_module_src - exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used - - # Locate network build function in the temporary module. - self._build_func = util.get_obj_from_module(module, self._build_func_name) - assert callable(self._build_func) - - # Init TensorFlow graph. - self._init_graph() - self.reset_own_vars() - tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]}) - - def clone(self, name: str = None, **new_static_kwargs) -> "Network": - """Create a clone of this network with its own copy of the variables.""" - # pylint: disable=protected-access - net = object.__new__(Network) - net._init_fields() - net.name = name if name is not None else self.name - net.static_kwargs = util.EasyDict(self.static_kwargs) - net.static_kwargs.update(new_static_kwargs) - net._build_module_src = self._build_module_src - net._build_func_name = self._build_func_name - net._build_func = self._build_func - net._init_graph() - net.copy_vars_from(self) - return net - - def copy_own_vars_from(self, src_net: "Network") -> None: - """Copy the values of all variables from the given network, excluding sub-networks.""" - names = [name for name in self.own_vars.keys() if name in src_net.own_vars] - tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) - - def copy_vars_from(self, src_net: "Network") -> None: - """Copy the values of all variables from the given network, including sub-networks.""" - names = [name for name in self.vars.keys() if name in src_net.vars] - tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) - - def copy_trainables_from(self, src_net: "Network") -> None: - """Copy the values of all trainable variables from the given network, including sub-networks.""" - names = [name for name in self.trainables.keys() if name in src_net.trainables] - tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names})) - - def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network": - """Create new network with the given parameters, and copy all variables from this network.""" - if new_name is None: - new_name = self.name - static_kwargs = dict(self.static_kwargs) - static_kwargs.update(new_static_kwargs) - net = Network(name=new_name, func_name=new_func_name, **static_kwargs) - net.copy_vars_from(self) - return net - - def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation: - """Construct a TensorFlow op that updates the variables of this network - to be slightly closer to those of the given network.""" - with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"): - ops = [] - for name, var in self.vars.items(): - if name in src_net.vars: - cur_beta = beta if name in self.trainables else beta_nontrainable - new_value = tfutil.lerp(src_net.vars[name], var, cur_beta) - ops.append(var.assign(new_value)) - return tf.group(*ops) - - def run(self, - *in_arrays: Tuple[Union[np.ndarray, None], ...], - input_transform: dict = None, - output_transform: dict = None, - return_as_list: bool = False, - print_progress: bool = False, - minibatch_size: int = None, - num_gpus: int = 1, - assume_frozen: bool = False, - **dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]: - """Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). - - Args: - input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network. - The dict must contain a 'func' field that points to a top-level function. The function is called with the input - TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. - output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network. - The dict must contain a 'func' field that points to a top-level function. The function is called with the output - TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs. - return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. - print_progress: Print progress to the console? Useful for very large input arrays. - minibatch_size: Maximum minibatch size to use, None = disable batching. - num_gpus: Number of GPUs to use. - assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls. - dynamic_kwargs: Additional keyword arguments to be passed into the network build function. - """ - assert len(in_arrays) == self.num_inputs - assert not all(arr is None for arr in in_arrays) - assert input_transform is None or util.is_top_level_function(input_transform["func"]) - assert output_transform is None or util.is_top_level_function(output_transform["func"]) - output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs) - num_items = in_arrays[0].shape[0] - if minibatch_size is None: - minibatch_size = num_items - - # Construct unique hash key from all arguments that affect the TensorFlow graph. - key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs) - def unwind_key(obj): - if isinstance(obj, dict): - return [(key, unwind_key(value)) for key, value in sorted(obj.items())] - if callable(obj): - return util.get_top_level_function_name(obj) - return obj - key = repr(unwind_key(key)) - - # Build graph. - if key not in self._run_cache: - with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None): - with tf.device("/cpu:0"): - in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names] - in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr])) - - out_split = [] - for gpu in range(num_gpus): - with tf.device("/gpu:%d" % gpu): - net_gpu = self.clone() if assume_frozen else self - in_gpu = in_split[gpu] - - if input_transform is not None: - in_kwargs = dict(input_transform) - in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs) - in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu) - - assert len(in_gpu) == self.num_inputs - out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs) - - if output_transform is not None: - out_kwargs = dict(output_transform) - out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs) - out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu) - - assert len(out_gpu) == self.num_outputs - out_split.append(out_gpu) - - with tf.device("/cpu:0"): - out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)] - self._run_cache[key] = in_expr, out_expr - - # Run minibatches. - in_expr, out_expr = self._run_cache[key] - out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr] - - for mb_begin in range(0, num_items, minibatch_size): - if print_progress: - print("\r%d / %d" % (mb_begin, num_items), end="") - - mb_end = min(mb_begin + minibatch_size, num_items) - mb_num = mb_end - mb_begin - mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)] - mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in))) - - for dst, src in zip(out_arrays, mb_out): - dst[mb_begin: mb_end] = src - - # Done. - if print_progress: - print("\r%d / %d" % (num_items, num_items)) - - if not return_as_list: - out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays) - return out_arrays - - def list_ops(self) -> List[TfExpression]: - include_prefix = self.scope + "/" - exclude_prefix = include_prefix + "_" - ops = tf.get_default_graph().get_operations() - ops = [op for op in ops if op.name.startswith(include_prefix)] - ops = [op for op in ops if not op.name.startswith(exclude_prefix)] - return ops - - def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]: - """Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to - individual layers of the network. Mainly intended to be used for reporting.""" - layers = [] - - def recurse(scope, parent_ops, parent_vars, level): - # Ignore specific patterns. - if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]): - return - - # Filter ops and vars by scope. - global_prefix = scope + "/" - local_prefix = global_prefix[len(self.scope) + 1:] - cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]] - cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]] - if not cur_ops and not cur_vars: - return - - # Filter out all ops related to variables. - for var in [op for op in cur_ops if op.type.startswith("Variable")]: - var_prefix = var.name + "/" - cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)] - - # Scope does not contain ops as immediate children => recurse deeper. - contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops) - if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1: - visited = set() - for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]: - token = rel_name.split("/")[0] - if token not in visited: - recurse(global_prefix + token, cur_ops, cur_vars, level + 1) - visited.add(token) - return - - # Report layer. - layer_name = scope[len(self.scope) + 1:] - layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1] - layer_trainables = [var for _name, var in cur_vars if var.trainable] - layers.append((layer_name, layer_output, layer_trainables)) - - recurse(self.scope, self.list_ops(), list(self.vars.items()), 0) - return layers - - def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None: - """Print a summary table of the network structure.""" - rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]] - rows += [["---"] * 4] - total_params = 0 - - for layer_name, layer_output, layer_trainables in self.list_layers(): - num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables) - weights = [var for var in layer_trainables if var.name.endswith("/weight:0")] - weights.sort(key=lambda x: len(x.name)) - if len(weights) == 0 and len(layer_trainables) == 1: - weights = layer_trainables - total_params += num_params - - if not hide_layers_with_no_params or num_params != 0: - num_params_str = str(num_params) if num_params > 0 else "-" - output_shape_str = str(layer_output.shape) - weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-" - rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]] - - rows += [["---"] * 4] - rows += [["Total", str(total_params), "", ""]] - - widths = [max(len(cell) for cell in column) for column in zip(*rows)] - print() - for row in rows: - print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths))) - print() - - def setup_weight_histograms(self, title: str = None) -> None: - """Construct summary ops to include histograms of all trainable parameters in TensorBoard.""" - if title is None: - title = self.name - - with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): - for local_name, var in self.trainables.items(): - if "/" in local_name: - p = local_name.split("/") - name = title + "_" + p[-1] + "/" + "_".join(p[:-1]) - else: - name = title + "_toplevel/" + local_name - - tf.summary.histogram(name, var) - -#---------------------------------------------------------------------------- -# Backwards-compatible emulation of legacy output transformation in Network.run(). - -_print_legacy_warning = True - -def _handle_legacy_output_transforms(output_transform, dynamic_kwargs): - global _print_legacy_warning - legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"] - if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs): - return output_transform, dynamic_kwargs - - if _print_legacy_warning: - _print_legacy_warning = False - print() - print("WARNING: Old-style output transformations in Network.run() are deprecated.") - print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'") - print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.") - print() - assert output_transform is None - - new_kwargs = dict(dynamic_kwargs) - new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs} - new_transform["func"] = _legacy_output_transform_func - return new_transform, new_kwargs - -def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None): - if out_mul != 1.0: - expr = [x * out_mul for x in expr] - - if out_add != 0.0: - expr = [x + out_add for x in expr] - - if out_shrink > 1: - ksize = [1, 1, out_shrink, out_shrink] - expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr] - - if out_dtype is not None: - if tf.as_dtype(out_dtype).is_integer: - expr = [tf.round(x) for x in expr] - expr = [tf.saturate_cast(x, out_dtype) for x in expr] - return expr diff --git a/tflib/ops/__init__.py b/tflib/ops/__init__.py deleted file mode 100644 index 9ab9908efa3cb38af52e8d5bcaa8acffde5a8875..0000000000000000000000000000000000000000 --- a/tflib/ops/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -# empty diff --git a/tflib/ops/__pycache__/__init__.cpython-36.pyc b/tflib/ops/__pycache__/__init__.cpython-36.pyc deleted file mode 100644 index d6d9fe932896826b80acf49e15fff6079f2f3c49..0000000000000000000000000000000000000000 Binary files a/tflib/ops/__pycache__/__init__.cpython-36.pyc and /dev/null differ diff --git a/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc b/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc deleted file mode 100644 index aaf22aefb1529d3d0cd19bbd0ba49816d532ac6e..0000000000000000000000000000000000000000 Binary files a/tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc and /dev/null differ diff --git a/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc b/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc deleted file mode 100644 index 740a77b3c7d29cb546482a3352e74b74f3b27c2b..0000000000000000000000000000000000000000 Binary files a/tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc and /dev/null differ diff --git a/tflib/ops/fused_bias_act.cu b/tflib/ops/fused_bias_act.cu deleted file mode 100644 index 1102f624fadd0b803bdfb99fecfe145d7ec8abc4..0000000000000000000000000000000000000000 --- a/tflib/ops/fused_bias_act.cu +++ /dev/null @@ -1,188 +0,0 @@ -// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -// -// This work is made available under the Nvidia Source Code License-NC. -// To view a copy of this license, visit -// https://nvlabs.github.io/stylegan2/license.html - -#define EIGEN_USE_GPU -#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/shape_inference.h" -#include - -using namespace tensorflow; -using namespace tensorflow::shape_inference; - -#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) - -//------------------------------------------------------------------------ -// CUDA kernel. - -template -struct FusedBiasActKernelParams -{ - const T* x; // [sizeX] - const T* b; // [sizeB] or NULL - const T* ref; // [sizeX] or NULL - T* y; // [sizeX] - - int grad; - int axis; - int act; - float alpha; - float gain; - - int sizeX; - int sizeB; - int stepB; - int loopX; -}; - -template -static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams p) -{ - const float expRange = 80.0f; - const float halfExpRange = 40.0f; - const float seluScale = 1.0507009873554804934193349852946f; - const float seluAlpha = 1.6732632423543772848170429916717f; - - // Loop over elements. - int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x; - for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x) - { - // Load and apply bias. - float x = (float)p.x[xi]; - if (p.b) - x += (float)p.b[(xi / p.stepB) % p.sizeB]; - float ref = (p.ref) ? (float)p.ref[xi] : 0.0f; - if (p.gain != 0.0f & p.act != 9) - ref /= p.gain; - - // Evaluate activation func. - float y; - switch (p.act * 10 + p.grad) - { - // linear - default: - case 10: y = x; break; - case 11: y = x; break; - case 12: y = 0.0f; break; - - // relu - case 20: y = (x > 0.0f) ? x : 0.0f; break; - case 21: y = (ref > 0.0f) ? x : 0.0f; break; - case 22: y = 0.0f; break; - - // lrelu - case 30: y = (x > 0.0f) ? x : x * p.alpha; break; - case 31: y = (ref > 0.0f) ? x : x * p.alpha; break; - case 32: y = 0.0f; break; - - // tanh - case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break; - case 41: y = x * (1.0f - ref * ref); break; - case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break; - - // sigmoid - case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break; - case 51: y = x * ref * (1.0f - ref); break; - case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break; - - // elu - case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break; - case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break; - case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break; - - // selu - case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break; - case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break; - case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break; - - // softplus - case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break; - case 81: y = x * (1.0f - expf(-ref)); break; - case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break; - - // swish - case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break; - case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break; - case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break; - } - - // Apply gain and store. - p.y[xi] = (T)(y * p.gain); - } -} - -//------------------------------------------------------------------------ -// TensorFlow op. - -template -struct FusedBiasActOp : public OpKernel -{ - FusedBiasActKernelParams m_attribs; - - FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx) - { - memset(&m_attribs, 0, sizeof(m_attribs)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain)); - OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative")); - OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative")); - OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative")); - } - - void Compute(OpKernelContext* ctx) - { - FusedBiasActKernelParams p = m_attribs; - cudaStream_t stream = ctx->eigen_device().stream(); - - const Tensor& x = ctx->input(0); // [...] - const Tensor& b = ctx->input(1); // [sizeB] or [0] - const Tensor& ref = ctx->input(2); // x.shape or [0] - p.x = x.flat().data(); - p.b = (b.NumElements()) ? b.flat().data() : NULL; - p.ref = (ref.NumElements()) ? ref.flat().data() : NULL; - OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds")); - OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1")); - OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements")); - OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements")); - OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large")); - - p.sizeX = (int)x.NumElements(); - p.sizeB = (int)b.NumElements(); - p.stepB = 1; - for (int i = m_attribs.axis + 1; i < x.dims(); i++) - p.stepB *= (int)x.dim_size(i); - - Tensor* y = NULL; // x.shape - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y)); - p.y = y->flat().data(); - - p.loopX = 4; - int blockSize = 4 * 32; - int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1; - void* args[] = {&p}; - OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel, gridSize, blockSize, args, 0, stream)); - } -}; - -REGISTER_OP("FusedBiasAct") - .Input ("x: T") - .Input ("b: T") - .Input ("ref: T") - .Output ("y: T") - .Attr ("T: {float, half}") - .Attr ("grad: int = 0") - .Attr ("axis: int = 1") - .Attr ("act: int = 0") - .Attr ("alpha: float = 0.0") - .Attr ("gain: float = 1.0"); -REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); -REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint("T"), FusedBiasActOp); - -//------------------------------------------------------------------------ diff --git a/tflib/ops/fused_bias_act.py b/tflib/ops/fused_bias_act.py deleted file mode 100644 index 52f6bfd77a4b0151103c1a76fa877e084831f7c4..0000000000000000000000000000000000000000 --- a/tflib/ops/fused_bias_act.py +++ /dev/null @@ -1,196 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -"""Custom TensorFlow ops for efficient bias and activation.""" - -import os -import numpy as np -import tensorflow as tf -from .. import custom_ops -from ...util import EasyDict - -def _get_plugin(): - return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') - -#---------------------------------------------------------------------------- - -activation_funcs = { - 'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True), - 'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True), - 'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True), - 'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False), - 'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False), - 'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False), - 'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False), - 'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False), - 'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False), -} - -#---------------------------------------------------------------------------- - -def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'): - r"""Fused bias and activation function. - - Adds bias `b` to activation tensor `x`, evaluates activation function `act`, - and scales the result by `gain`. Each of the steps is optional. In most cases, - the fused op is considerably more efficient than performing the same calculation - using standard TensorFlow ops. It supports first and second order gradients, - but not third order gradients. - - Args: - x: Input activation tensor. Can have any shape, but if `b` is defined, the - dimension corresponding to `axis`, as well as the rank, must be known. - b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type - as `x`. The shape must be known, and it must match the dimension of `x` - corresponding to `axis`. - axis: The dimension in `x` corresponding to the elements of `b`. - The value of `axis` is ignored if `b` is not specified. - act: Name of the activation function to evaluate, or `"linear"` to disable. - Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc. - See `activation_funcs` for a full list. `None` is not allowed. - alpha: Shape parameter for the activation function, or `None` to use the default. - gain: Scaling factor for the output tensor, or `None` to use default. - See `activation_funcs` for the default scaling of each activation function. - If unsure, consider specifying `1.0`. - impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). - - Returns: - Tensor of the same shape and datatype as `x`. - """ - - impl_dict = { - 'ref': _fused_bias_act_ref, - 'cuda': _fused_bias_act_cuda, - } - return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) - -#---------------------------------------------------------------------------- - -def _fused_bias_act_ref(x, b, axis, act, alpha, gain): - """Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops.""" - - # Validate arguments. - x = tf.convert_to_tensor(x) - b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype) - act_spec = activation_funcs[act] - assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) - assert b.shape[0] == 0 or 0 <= axis < x.shape.rank - if alpha is None: - alpha = act_spec.def_alpha - if gain is None: - gain = act_spec.def_gain - - # Add bias. - if b.shape[0] != 0: - x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)]) - - # Evaluate activation function. - x = act_spec.func(x, alpha=alpha) - - # Scale by gain. - if gain != 1: - x *= gain - return x - -#---------------------------------------------------------------------------- - -def _fused_bias_act_cuda(x, b, axis, act, alpha, gain): - """Fast CUDA implementation of `fused_bias_act()` using custom ops.""" - - # Validate arguments. - x = tf.convert_to_tensor(x) - empty_tensor = tf.constant([], dtype=x.dtype) - b = tf.convert_to_tensor(b) if b is not None else empty_tensor - act_spec = activation_funcs[act] - assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis]) - assert b.shape[0] == 0 or 0 <= axis < x.shape.rank - if alpha is None: - alpha = act_spec.def_alpha - if gain is None: - gain = act_spec.def_gain - - # Special cases. - if act == 'linear' and b is None and gain == 1.0: - return x - if act_spec.cuda_idx is None: - return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain) - - # CUDA kernel. - cuda_kernel = _get_plugin().fused_bias_act - cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain) - - # Forward pass: y = func(x, b). - def func_y(x, b): - y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs) - y.set_shape(x.shape) - return y - - # Backward pass: dx, db = grad(dy, x, y) - def grad_dx(dy, x, y): - ref = {'x': x, 'y': y}[act_spec.ref] - dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs) - dx.set_shape(x.shape) - return dx - def grad_db(dx): - if b.shape[0] == 0: - return empty_tensor - db = dx - if axis < x.shape.rank - 1: - db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank))) - if axis > 0: - db = tf.reduce_sum(db, list(range(axis))) - db.set_shape(b.shape) - return db - - # Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y) - def grad2_d_dy(d_dx, d_db, x, y): - ref = {'x': x, 'y': y}[act_spec.ref] - d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs) - d_dy.set_shape(x.shape) - return d_dy - def grad2_d_x(d_dx, d_db, x, y): - ref = {'x': x, 'y': y}[act_spec.ref] - d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs) - d_x.set_shape(x.shape) - return d_x - - # Fast version for piecewise-linear activation funcs. - @tf.custom_gradient - def func_zero_2nd_grad(x, b): - y = func_y(x, b) - @tf.custom_gradient - def grad(dy): - dx = grad_dx(dy, x, y) - db = grad_db(dx) - def grad2(d_dx, d_db): - d_dy = grad2_d_dy(d_dx, d_db, x, y) - return d_dy - return (dx, db), grad2 - return y, grad - - # Slow version for general activation funcs. - @tf.custom_gradient - def func_nonzero_2nd_grad(x, b): - y = func_y(x, b) - def grad_wrap(dy): - @tf.custom_gradient - def grad_impl(dy, x): - dx = grad_dx(dy, x, y) - db = grad_db(dx) - def grad2(d_dx, d_db): - d_dy = grad2_d_dy(d_dx, d_db, x, y) - d_x = grad2_d_x(d_dx, d_db, x, y) - return d_dy, d_x - return (dx, db), grad2 - return grad_impl(dy, x) - return y, grad_wrap - - # Which version to use? - if act_spec.zero_2nd_grad: - return func_zero_2nd_grad(x, b) - return func_nonzero_2nd_grad(x, b) - -#---------------------------------------------------------------------------- diff --git a/tflib/ops/upfirdn_2d.cu b/tflib/ops/upfirdn_2d.cu deleted file mode 100644 index b97ef36c9e5ba46a92a380dbc687e275235a1ccf..0000000000000000000000000000000000000000 --- a/tflib/ops/upfirdn_2d.cu +++ /dev/null @@ -1,326 +0,0 @@ -// Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -// -// This work is made available under the Nvidia Source Code License-NC. -// To view a copy of this license, visit -// https://nvlabs.github.io/stylegan2/license.html - -#define EIGEN_USE_GPU -#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__ -#include "tensorflow/core/framework/op.h" -#include "tensorflow/core/framework/op_kernel.h" -#include "tensorflow/core/framework/shape_inference.h" -#include - -using namespace tensorflow; -using namespace tensorflow::shape_inference; - -//------------------------------------------------------------------------ -// Helpers. - -#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false) - -static __host__ __device__ __forceinline__ int floorDiv(int a, int b) -{ - int c = a / b; - if (c * b > a) - c--; - return c; -} - -//------------------------------------------------------------------------ -// CUDA kernel params. - -template -struct UpFirDn2DKernelParams -{ - const T* x; // [majorDim, inH, inW, minorDim] - const T* k; // [kernelH, kernelW] - T* y; // [majorDim, outH, outW, minorDim] - - int upx; - int upy; - int downx; - int downy; - int padx0; - int padx1; - int pady0; - int pady1; - - int majorDim; - int inH; - int inW; - int minorDim; - int kernelH; - int kernelW; - int outH; - int outW; - int loopMajor; - int loopX; -}; - -//------------------------------------------------------------------------ -// General CUDA implementation for large filter kernels. - -template -static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams p) -{ - // Calculate thread index. - int minorIdx = blockIdx.x * blockDim.x + threadIdx.x; - int outY = minorIdx / p.minorDim; - minorIdx -= outY * p.minorDim; - int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y; - int majorIdxBase = blockIdx.z * p.loopMajor; - if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim) - return; - - // Setup Y receptive field. - int midY = outY * p.downy + p.upy - 1 - p.pady0; - int inY = min(max(floorDiv(midY, p.upy), 0), p.inH); - int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY; - int kernelY = midY + p.kernelH - (inY + 1) * p.upy; - - // Loop over majorDim and outX. - for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++) - for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y) - { - // Setup X receptive field. - int midX = outX * p.downx + p.upx - 1 - p.padx0; - int inX = min(max(floorDiv(midX, p.upx), 0), p.inW); - int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX; - int kernelX = midX + p.kernelW - (inX + 1) * p.upx; - - // Initialize pointers. - const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; - const T* kp = &p.k[kernelY * p.kernelW + kernelX]; - int xpx = p.minorDim; - int kpx = -p.upx; - int xpy = p.inW * p.minorDim; - int kpy = -p.upy * p.kernelW; - - // Inner loop. - float v = 0.0f; - for (int y = 0; y < h; y++) - { - for (int x = 0; x < w; x++) - { - v += (float)(*xp) * (float)(*kp); - xp += xpx; - kp += kpx; - } - xp += xpy - w * xpx; - kp += kpy - w * kpx; - } - - // Store result. - p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; - } -} - -//------------------------------------------------------------------------ -// Specialized CUDA implementation for small filter kernels. - -template -static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams p) -{ - //assert(kernelW % upx == 0); - //assert(kernelH % upy == 0); - const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1; - const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1; - __shared__ volatile float sk[kernelH][kernelW]; - __shared__ volatile float sx[tileInH][tileInW]; - - // Calculate tile index. - int minorIdx = blockIdx.x; - int tileOutY = minorIdx / p.minorDim; - minorIdx -= tileOutY * p.minorDim; - tileOutY *= tileOutH; - int tileOutXBase = blockIdx.y * p.loopX * tileOutW; - int majorIdxBase = blockIdx.z * p.loopMajor; - if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim) - return; - - // Load filter kernel (flipped). - for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x) - { - int ky = tapIdx / kernelW; - int kx = tapIdx - ky * kernelW; - float v = 0.0f; - if (kx < p.kernelW & ky < p.kernelH) - v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)]; - sk[ky][kx] = v; - } - - // Loop over majorDim and outX. - for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++) - for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW) - { - // Load input pixels. - int tileMidX = tileOutX * downx + upx - 1 - p.padx0; - int tileMidY = tileOutY * downy + upy - 1 - p.pady0; - int tileInX = floorDiv(tileMidX, upx); - int tileInY = floorDiv(tileMidY, upy); - __syncthreads(); - for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x) - { - int relInY = inIdx / tileInW; - int relInX = inIdx - relInY * tileInW; - int inX = relInX + tileInX; - int inY = relInY + tileInY; - float v = 0.0f; - if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH) - v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx]; - sx[relInY][relInX] = v; - } - - // Loop over output pixels. - __syncthreads(); - for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x) - { - int relOutY = outIdx / tileOutW; - int relOutX = outIdx - relOutY * tileOutW; - int outX = relOutX + tileOutX; - int outY = relOutY + tileOutY; - - // Setup receptive field. - int midX = tileMidX + relOutX * downx; - int midY = tileMidY + relOutY * downy; - int inX = floorDiv(midX, upx); - int inY = floorDiv(midY, upy); - int relInX = inX - tileInX; - int relInY = inY - tileInY; - int kernelX = (inX + 1) * upx - midX - 1; // flipped - int kernelY = (inY + 1) * upy - midY - 1; // flipped - - // Inner loop. - float v = 0.0f; - #pragma unroll - for (int y = 0; y < kernelH / upy; y++) - #pragma unroll - for (int x = 0; x < kernelW / upx; x++) - v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx]; - - // Store result. - if (outX < p.outW & outY < p.outH) - p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v; - } - } -} - -//------------------------------------------------------------------------ -// TensorFlow op. - -template -struct UpFirDn2DOp : public OpKernel -{ - UpFirDn2DKernelParams m_attribs; - - UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx) - { - memset(&m_attribs, 0, sizeof(m_attribs)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0)); - OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1)); - OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1")); - OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1")); - } - - void Compute(OpKernelContext* ctx) - { - UpFirDn2DKernelParams p = m_attribs; - cudaStream_t stream = ctx->eigen_device().stream(); - - const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim] - const Tensor& k = ctx->input(1); // [kernelH, kernelW] - p.x = x.flat().data(); - p.k = k.flat().data(); - OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4")); - OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2")); - OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large")); - OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large")); - - p.majorDim = (int)x.dim_size(0); - p.inH = (int)x.dim_size(1); - p.inW = (int)x.dim_size(2); - p.minorDim = (int)x.dim_size(3); - p.kernelH = (int)k.dim_size(0); - p.kernelW = (int)k.dim_size(1); - OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1")); - - p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx; - p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy; - OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1")); - - Tensor* y = NULL; // [majorDim, outH, outW, minorDim] - TensorShape ys; - ys.AddDim(p.majorDim); - ys.AddDim(p.outH); - ys.AddDim(p.outW); - ys.AddDim(p.minorDim); - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y)); - p.y = y->flat().data(); - OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large")); - - // Choose CUDA kernel to use. - void* cudaKernel = (void*)UpFirDn2DKernel_large; - int tileOutW = -1; - int tileOutH = -1; - if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 64; tileOutH = 16; } - if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } - if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } - if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } - if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small; tileOutW = 32; tileOutH = 8; } - - // Choose launch params. - dim3 blockSize; - dim3 gridSize; - if (tileOutW > 0 && tileOutH > 0) // small - { - p.loopMajor = (p.majorDim - 1) / 16384 + 1; - p.loopX = 1; - blockSize = dim3(32 * 8, 1, 1); - gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1); - } - else // large - { - p.loopMajor = (p.majorDim - 1) / 16384 + 1; - p.loopX = 4; - blockSize = dim3(4, 32, 1); - gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1); - } - - // Launch CUDA kernel. - void* args[] = {&p}; - OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream)); - } -}; - -REGISTER_OP("UpFirDn2D") - .Input ("x: T") - .Input ("k: T") - .Output ("y: T") - .Attr ("T: {float, half}") - .Attr ("upx: int = 1") - .Attr ("upy: int = 1") - .Attr ("downx: int = 1") - .Attr ("downy: int = 1") - .Attr ("padx0: int = 0") - .Attr ("padx1: int = 0") - .Attr ("pady0: int = 0") - .Attr ("pady1: int = 0"); -REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); -REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint("T"), UpFirDn2DOp); - -//------------------------------------------------------------------------ diff --git a/tflib/ops/upfirdn_2d.py b/tflib/ops/upfirdn_2d.py deleted file mode 100644 index fd23777ebb87bc83e8728d6fe3904fbbfb5c524c..0000000000000000000000000000000000000000 --- a/tflib/ops/upfirdn_2d.py +++ /dev/null @@ -1,364 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -"""Custom TensorFlow ops for efficient resampling of 2D images.""" - -import os -import numpy as np -import tensorflow as tf -from .. import custom_ops - -def _get_plugin(): - return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu') - -#---------------------------------------------------------------------------- - -def upfirdn_2d(x, k, upx=1, upy=1, downx=1, downy=1, padx0=0, padx1=0, pady0=0, pady1=0, impl='cuda'): - r"""Pad, upsample, FIR filter, and downsample a batch of 2D images. - - Accepts a batch of 2D images of the shape `[majorDim, inH, inW, minorDim]` - and performs the following operations for each image, batched across - `majorDim` and `minorDim`: - - 1. Pad the image with zeros by the specified number of pixels on each side - (`padx0`, `padx1`, `pady0`, `pady1`). Specifying a negative value - corresponds to cropping the image. - - 2. Upsample the image by inserting the zeros after each pixel (`upx`, `upy`). - - 3. Convolve the image with the specified 2D FIR filter (`k`), shrinking the - image so that the footprint of all output pixels lies within the input image. - - 4. Downsample the image by throwing away pixels (`downx`, `downy`). - - This sequence of operations bears close resemblance to scipy.signal.upfirdn(). - The fused op is considerably more efficient than performing the same calculation - using standard TensorFlow ops. It supports gradients of arbitrary order. - - Args: - x: Input tensor of the shape `[majorDim, inH, inW, minorDim]`. - k: 2D FIR filter of the shape `[firH, firW]`. - upx: Integer upsampling factor along the X-axis (default: 1). - upy: Integer upsampling factor along the Y-axis (default: 1). - downx: Integer downsampling factor along the X-axis (default: 1). - downy: Integer downsampling factor along the Y-axis (default: 1). - padx0: Number of pixels to pad on the left side (default: 0). - padx1: Number of pixels to pad on the right side (default: 0). - pady0: Number of pixels to pad on the top side (default: 0). - pady1: Number of pixels to pad on the bottom side (default: 0). - impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). - - Returns: - Tensor of the shape `[majorDim, outH, outW, minorDim]`, and same datatype as `x`. - """ - - impl_dict = { - 'ref': _upfirdn_2d_ref, - 'cuda': _upfirdn_2d_cuda, - } - return impl_dict[impl](x=x, k=k, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) - -#---------------------------------------------------------------------------- - -def _upfirdn_2d_ref(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): - """Slow reference implementation of `upfirdn_2d()` using standard TensorFlow ops.""" - - x = tf.convert_to_tensor(x) - k = np.asarray(k, dtype=np.float32) - assert x.shape.rank == 4 - inH = x.shape[1].value - inW = x.shape[2].value - minorDim = _shape(x, 3) - kernelH, kernelW = k.shape - assert inW >= 1 and inH >= 1 - assert kernelW >= 1 and kernelH >= 1 - assert isinstance(upx, int) and isinstance(upy, int) - assert isinstance(downx, int) and isinstance(downy, int) - assert isinstance(padx0, int) and isinstance(padx1, int) - assert isinstance(pady0, int) and isinstance(pady1, int) - - # Upsample (insert zeros). - x = tf.reshape(x, [-1, inH, 1, inW, 1, minorDim]) - x = tf.pad(x, [[0, 0], [0, 0], [0, upy - 1], [0, 0], [0, upx - 1], [0, 0]]) - x = tf.reshape(x, [-1, inH * upy, inW * upx, minorDim]) - - # Pad (crop if negative). - x = tf.pad(x, [[0, 0], [max(pady0, 0), max(pady1, 0)], [max(padx0, 0), max(padx1, 0)], [0, 0]]) - x = x[:, max(-pady0, 0) : x.shape[1].value - max(-pady1, 0), max(-padx0, 0) : x.shape[2].value - max(-padx1, 0), :] - - # Convolve with filter. - x = tf.transpose(x, [0, 3, 1, 2]) - x = tf.reshape(x, [-1, 1, inH * upy + pady0 + pady1, inW * upx + padx0 + padx1]) - w = tf.constant(k[::-1, ::-1, np.newaxis, np.newaxis], dtype=x.dtype) - x = tf.nn.conv2d(x, w, strides=[1,1,1,1], padding='VALID', data_format='NCHW') - x = tf.reshape(x, [-1, minorDim, inH * upy + pady0 + pady1 - kernelH + 1, inW * upx + padx0 + padx1 - kernelW + 1]) - x = tf.transpose(x, [0, 2, 3, 1]) - - # Downsample (throw away pixels). - return x[:, ::downy, ::downx, :] - -#---------------------------------------------------------------------------- - -def _upfirdn_2d_cuda(x, k, upx, upy, downx, downy, padx0, padx1, pady0, pady1): - """Fast CUDA implementation of `upfirdn_2d()` using custom ops.""" - - x = tf.convert_to_tensor(x) - k = np.asarray(k, dtype=np.float32) - majorDim, inH, inW, minorDim = x.shape.as_list() - kernelH, kernelW = k.shape - assert inW >= 1 and inH >= 1 - assert kernelW >= 1 and kernelH >= 1 - assert isinstance(upx, int) and isinstance(upy, int) - assert isinstance(downx, int) and isinstance(downy, int) - assert isinstance(padx0, int) and isinstance(padx1, int) - assert isinstance(pady0, int) and isinstance(pady1, int) - - outW = (inW * upx + padx0 + padx1 - kernelW) // downx + 1 - outH = (inH * upy + pady0 + pady1 - kernelH) // downy + 1 - assert outW >= 1 and outH >= 1 - - kc = tf.constant(k, dtype=x.dtype) - gkc = tf.constant(k[::-1, ::-1], dtype=x.dtype) - gpadx0 = kernelW - padx0 - 1 - gpady0 = kernelH - pady0 - 1 - gpadx1 = inW * upx - outW * downx + padx0 - upx + 1 - gpady1 = inH * upy - outH * downy + pady0 - upy + 1 - - @tf.custom_gradient - def func(x): - y = _get_plugin().up_fir_dn2d(x=x, k=kc, upx=upx, upy=upy, downx=downx, downy=downy, padx0=padx0, padx1=padx1, pady0=pady0, pady1=pady1) - y.set_shape([majorDim, outH, outW, minorDim]) - @tf.custom_gradient - def grad(dy): - dx = _get_plugin().up_fir_dn2d(x=dy, k=gkc, upx=downx, upy=downy, downx=upx, downy=upy, padx0=gpadx0, padx1=gpadx1, pady0=gpady0, pady1=gpady1) - dx.set_shape([majorDim, inH, inW, minorDim]) - return dx, func - return y, grad - return func(x) - -#---------------------------------------------------------------------------- - -def filter_2d(x, k, gain=1, data_format='NCHW', impl='cuda'): - r"""Filter a batch of 2D images with the given FIR filter. - - Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` - and filters each image with the given filter. The filter is normalized so that - if the input pixels are constant, they will be scaled by the specified `gain`. - Pixels outside the image are assumed to be zero. - - Args: - x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. - k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). - gain: Scaling factor for signal magnitude (default: 1.0). - data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). - impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). - - Returns: - Tensor of the same shape and datatype as `x`. - """ - - k = _setup_kernel(k) * gain - p = k.shape[0] - 1 - return _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) - -#---------------------------------------------------------------------------- - -def upsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): - r"""Upsample a batch of 2D images with the given filter. - - Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` - and upsamples each image with the given filter. The filter is normalized so that - if the input pixels are constant, they will be scaled by the specified `gain`. - Pixels outside the image are assumed to be zero, and the filter is padded with - zeros so that its shape is a multiple of the upsampling factor. - - Args: - x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. - k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). - The default is `[1] * factor`, which corresponds to nearest-neighbor - upsampling. - factor: Integer upsampling factor (default: 2). - gain: Scaling factor for signal magnitude (default: 1.0). - data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). - impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). - - Returns: - Tensor of the shape `[N, C, H * factor, W * factor]` or - `[N, H * factor, W * factor, C]`, and same datatype as `x`. - """ - - assert isinstance(factor, int) and factor >= 1 - if k is None: - k = [1] * factor - k = _setup_kernel(k) * (gain * (factor ** 2)) - p = k.shape[0] - factor - return _simple_upfirdn_2d(x, k, up=factor, pad0=(p+1)//2+factor-1, pad1=p//2, data_format=data_format, impl=impl) - -#---------------------------------------------------------------------------- - -def downsample_2d(x, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): - r"""Downsample a batch of 2D images with the given filter. - - Accepts a batch of 2D images of the shape `[N, C, H, W]` or `[N, H, W, C]` - and downsamples each image with the given filter. The filter is normalized so that - if the input pixels are constant, they will be scaled by the specified `gain`. - Pixels outside the image are assumed to be zero, and the filter is padded with - zeros so that its shape is a multiple of the downsampling factor. - - Args: - x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. - k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). - The default is `[1] * factor`, which corresponds to average pooling. - factor: Integer downsampling factor (default: 2). - gain: Scaling factor for signal magnitude (default: 1.0). - data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). - impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). - - Returns: - Tensor of the shape `[N, C, H // factor, W // factor]` or - `[N, H // factor, W // factor, C]`, and same datatype as `x`. - """ - - assert isinstance(factor, int) and factor >= 1 - if k is None: - k = [1] * factor - k = _setup_kernel(k) * gain - p = k.shape[0] - factor - return _simple_upfirdn_2d(x, k, down=factor, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) - -#---------------------------------------------------------------------------- - -def upsample_conv_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): - r"""Fused `upsample_2d()` followed by `tf.nn.conv2d()`. - - Padding is performed only once at the beginning, not between the operations. - The fused op is considerably more efficient than performing the same calculation - using standard TensorFlow ops. It supports gradients of arbitrary order. - - Args: - x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. - w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. - Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. - k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). - The default is `[1] * factor`, which corresponds to nearest-neighbor - upsampling. - factor: Integer upsampling factor (default: 2). - gain: Scaling factor for signal magnitude (default: 1.0). - data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). - impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). - - Returns: - Tensor of the shape `[N, C, H * factor, W * factor]` or - `[N, H * factor, W * factor, C]`, and same datatype as `x`. - """ - - assert isinstance(factor, int) and factor >= 1 - - # Check weight shape. - w = tf.convert_to_tensor(w) - assert w.shape.rank == 4 - convH = w.shape[0].value - convW = w.shape[1].value - inC = _shape(w, 2) - outC = _shape(w, 3) - assert convW == convH - - # Setup filter kernel. - if k is None: - k = [1] * factor - k = _setup_kernel(k) * (gain * (factor ** 2)) - p = (k.shape[0] - factor) - (convW - 1) - - # Determine data dimensions. - if data_format == 'NCHW': - stride = [1, 1, factor, factor] - output_shape = [_shape(x, 0), outC, (_shape(x, 2) - 1) * factor + convH, (_shape(x, 3) - 1) * factor + convW] - num_groups = _shape(x, 1) // inC - else: - stride = [1, factor, factor, 1] - output_shape = [_shape(x, 0), (_shape(x, 1) - 1) * factor + convH, (_shape(x, 2) - 1) * factor + convW, outC] - num_groups = _shape(x, 3) // inC - - # Transpose weights. - w = tf.reshape(w, [convH, convW, inC, num_groups, -1]) - w = tf.transpose(w[::-1, ::-1], [0, 1, 4, 3, 2]) - w = tf.reshape(w, [convH, convW, -1, num_groups * inC]) - - # Execute. - x = tf.nn.conv2d_transpose(x, w, output_shape=output_shape, strides=stride, padding='VALID', data_format=data_format) - return _simple_upfirdn_2d(x, k, pad0=(p+1)//2+factor-1, pad1=p//2+1, data_format=data_format, impl=impl) - -#---------------------------------------------------------------------------- - -def conv_downsample_2d(x, w, k=None, factor=2, gain=1, data_format='NCHW', impl='cuda'): - r"""Fused `tf.nn.conv2d()` followed by `downsample_2d()`. - - Padding is performed only once at the beginning, not between the operations. - The fused op is considerably more efficient than performing the same calculation - using standard TensorFlow ops. It supports gradients of arbitrary order. - - Args: - x: Input tensor of the shape `[N, C, H, W]` or `[N, H, W, C]`. - w: Weight tensor of the shape `[filterH, filterW, inChannels, outChannels]`. - Grouped convolution can be performed by `inChannels = x.shape[0] // numGroups`. - k: FIR filter of the shape `[firH, firW]` or `[firN]` (separable). - The default is `[1] * factor`, which corresponds to average pooling. - factor: Integer downsampling factor (default: 2). - gain: Scaling factor for signal magnitude (default: 1.0). - data_format: `'NCHW'` or `'NHWC'` (default: `'NCHW'`). - impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default). - - Returns: - Tensor of the shape `[N, C, H // factor, W // factor]` or - `[N, H // factor, W // factor, C]`, and same datatype as `x`. - """ - - assert isinstance(factor, int) and factor >= 1 - w = tf.convert_to_tensor(w) - convH, convW, _inC, _outC = w.shape.as_list() - assert convW == convH - if k is None: - k = [1] * factor - k = _setup_kernel(k) * gain - p = (k.shape[0] - factor) + (convW - 1) - if data_format == 'NCHW': - s = [1, 1, factor, factor] - else: - s = [1, factor, factor, 1] - x = _simple_upfirdn_2d(x, k, pad0=(p+1)//2, pad1=p//2, data_format=data_format, impl=impl) - return tf.nn.conv2d(x, w, strides=s, padding='VALID', data_format=data_format) - -#---------------------------------------------------------------------------- -# Internal helper funcs. - -def _shape(tf_expr, dim_idx): - if tf_expr.shape.rank is not None: - dim = tf_expr.shape[dim_idx].value - if dim is not None: - return dim - return tf.shape(tf_expr)[dim_idx] - -def _setup_kernel(k): - k = np.asarray(k, dtype=np.float32) - if k.ndim == 1: - k = np.outer(k, k) - k /= np.sum(k) - assert k.ndim == 2 - assert k.shape[0] == k.shape[1] - return k - -def _simple_upfirdn_2d(x, k, up=1, down=1, pad0=0, pad1=0, data_format='NCHW', impl='cuda'): - assert data_format in ['NCHW', 'NHWC'] - assert x.shape.rank == 4 - y = x - if data_format == 'NCHW': - y = tf.reshape(y, [-1, _shape(y, 2), _shape(y, 3), 1]) - y = upfirdn_2d(y, k, upx=up, upy=up, downx=down, downy=down, padx0=pad0, padx1=pad1, pady0=pad0, pady1=pad1, impl=impl) - if data_format == 'NCHW': - y = tf.reshape(y, [-1, _shape(x, 1), _shape(y, 1), _shape(y, 2)]) - return y - -#---------------------------------------------------------------------------- diff --git a/tflib/optimizer.py b/tflib/optimizer.py deleted file mode 100644 index 9389b5adaa4b47a896410d549016316e6b85ea2d..0000000000000000000000000000000000000000 --- a/tflib/optimizer.py +++ /dev/null @@ -1,370 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -"""Helper wrapper for a Tensorflow optimizer.""" - -import platform -import numpy as np -import tensorflow as tf - -from collections import OrderedDict -from typing import List, Union - -from . import autosummary -from . import tfutil -from .. import util - -from .tfutil import TfExpression, TfExpressionEx - -_collective_ops_warning_printed = False -_collective_ops_group_key = 831766147 -_collective_ops_instance_key = 436340067 - -class Optimizer: - """A Wrapper for tf.train.Optimizer. - - Automatically takes care of: - - Gradient averaging for multi-GPU training. - - Gradient accumulation for arbitrarily large minibatches. - - Dynamic loss scaling and typecasts for FP16 training. - - Ignoring corrupted gradients that contain NaNs/Infs. - - Reporting statistics. - - Well-chosen default settings. - """ - - def __init__(self, - name: str = "Train", # Name string that will appear in TensorFlow graph. - tf_optimizer: str = "tf.train.AdamOptimizer", # Underlying optimizer class. - learning_rate: TfExpressionEx = 0.001, # Learning rate. Can vary over time. - minibatch_multiplier: TfExpressionEx = None, # Treat N consecutive minibatches as one by accumulating gradients. - share: "Optimizer" = None, # Share internal state with a previously created optimizer? - use_loss_scaling: bool = False, # Enable dynamic loss scaling for robust mixed-precision training? - loss_scaling_init: float = 64.0, # Log2 of initial loss scaling factor. - loss_scaling_inc: float = 0.0005, # Log2 of per-minibatch loss scaling increment when there is no overflow. - loss_scaling_dec: float = 1.0, # Log2 of per-minibatch loss scaling decrement when there is an overflow. - report_mem_usage: bool = False, # Report fine-grained memory usage statistics in TensorBoard? - **kwargs): - - # Public fields. - self.name = name - self.learning_rate = learning_rate - self.minibatch_multiplier = minibatch_multiplier - self.id = self.name.replace("/", ".") - self.scope = tf.get_default_graph().unique_name(self.id) - self.optimizer_class = util.get_obj_by_name(tf_optimizer) - self.optimizer_kwargs = dict(kwargs) - self.use_loss_scaling = use_loss_scaling - self.loss_scaling_init = loss_scaling_init - self.loss_scaling_inc = loss_scaling_inc - self.loss_scaling_dec = loss_scaling_dec - - # Private fields. - self._updates_applied = False - self._devices = OrderedDict() # device_name => EasyDict() - self._shared_optimizers = OrderedDict() # device_name => optimizer_class - self._gradient_shapes = None # [shape, ...] - self._report_mem_usage = report_mem_usage - - # Validate arguments. - assert callable(self.optimizer_class) - - # Share internal state if requested. - if share is not None: - assert isinstance(share, Optimizer) - assert self.optimizer_class is share.optimizer_class - assert self.learning_rate is share.learning_rate - assert self.optimizer_kwargs == share.optimizer_kwargs - self._shared_optimizers = share._shared_optimizers # pylint: disable=protected-access - - def _get_device(self, device_name: str): - """Get internal state for the given TensorFlow device.""" - tfutil.assert_tf_initialized() - if device_name in self._devices: - return self._devices[device_name] - - # Initialize fields. - device = util.EasyDict() - device.name = device_name - device.optimizer = None # Underlying optimizer: optimizer_class - device.loss_scaling_var = None # Log2 of loss scaling: tf.Variable - device.grad_raw = OrderedDict() # Raw gradients: var => [grad, ...] - device.grad_clean = OrderedDict() # Clean gradients: var => grad - device.grad_acc_vars = OrderedDict() # Accumulation sums: var => tf.Variable - device.grad_acc_count = None # Accumulation counter: tf.Variable - device.grad_acc = OrderedDict() # Accumulated gradients: var => grad - - # Setup TensorFlow objects. - with tfutil.absolute_name_scope(self.scope + "/Devices"), tf.device(device_name), tf.control_dependencies(None): - if device_name not in self._shared_optimizers: - optimizer_name = self.scope.replace("/", "_") + "_opt%d" % len(self._shared_optimizers) - self._shared_optimizers[device_name] = self.optimizer_class(name=optimizer_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) - device.optimizer = self._shared_optimizers[device_name] - if self.use_loss_scaling: - device.loss_scaling_var = tf.Variable(np.float32(self.loss_scaling_init), trainable=False, name="loss_scaling_var") - - # Register device. - self._devices[device_name] = device - return device - - def register_gradients(self, loss: TfExpression, trainable_vars: Union[List, dict]) -> None: - """Register the gradients of the given loss function with respect to the given variables. - Intended to be called once per GPU.""" - tfutil.assert_tf_initialized() - assert not self._updates_applied - device = self._get_device(loss.device) - - # Validate trainables. - if isinstance(trainable_vars, dict): - trainable_vars = list(trainable_vars.values()) # allow passing in Network.trainables as vars - assert isinstance(trainable_vars, list) and len(trainable_vars) >= 1 - assert all(tfutil.is_tf_expression(expr) for expr in trainable_vars + [loss]) - assert all(var.device == device.name for var in trainable_vars) - - # Validate shapes. - if self._gradient_shapes is None: - self._gradient_shapes = [var.shape.as_list() for var in trainable_vars] - assert len(trainable_vars) == len(self._gradient_shapes) - assert all(var.shape.as_list() == var_shape for var, var_shape in zip(trainable_vars, self._gradient_shapes)) - - # Report memory usage if requested. - deps = [] - if self._report_mem_usage: - self._report_mem_usage = False - try: - with tf.name_scope(self.id + '_mem'), tf.device(device.name), tf.control_dependencies([loss]): - deps.append(autosummary.autosummary(self.id + "/mem_usage_gb", tf.contrib.memory_stats.BytesInUse() / 2**30)) - except tf.errors.NotFoundError: - pass - - # Compute gradients. - with tf.name_scope(self.id + "_grad"), tf.device(device.name), tf.control_dependencies(deps): - loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) - gate = tf.train.Optimizer.GATE_NONE # disable gating to reduce memory usage - grad_list = device.optimizer.compute_gradients(loss=loss, var_list=trainable_vars, gate_gradients=gate) - - # Register gradients. - for grad, var in grad_list: - if var not in device.grad_raw: - device.grad_raw[var] = [] - device.grad_raw[var].append(grad) - - def apply_updates(self, allow_no_op: bool = False) -> tf.Operation: - """Construct training op to update the registered variables based on their gradients.""" - tfutil.assert_tf_initialized() - assert not self._updates_applied - self._updates_applied = True - all_ops = [] - - # Check for no-op. - if allow_no_op and len(self._devices) == 0: - with tfutil.absolute_name_scope(self.scope): - return tf.no_op(name='TrainingOp') - - # Clean up gradients. - for device_idx, device in enumerate(self._devices.values()): - with tfutil.absolute_name_scope(self.scope + "/Clean%d" % device_idx), tf.device(device.name): - for var, grad in device.grad_raw.items(): - - # Filter out disconnected gradients and convert to float32. - grad = [g for g in grad if g is not None] - grad = [tf.cast(g, tf.float32) for g in grad] - - # Sum within the device. - if len(grad) == 0: - grad = tf.zeros(var.shape) # No gradients => zero. - elif len(grad) == 1: - grad = grad[0] # Single gradient => use as is. - else: - grad = tf.add_n(grad) # Multiple gradients => sum. - - # Scale as needed. - scale = 1.0 / len(device.grad_raw[var]) / len(self._devices) - scale = tf.constant(scale, dtype=tf.float32, name="scale") - if self.minibatch_multiplier is not None: - scale /= tf.cast(self.minibatch_multiplier, tf.float32) - scale = self.undo_loss_scaling(scale) - device.grad_clean[var] = grad * scale - - # Sum gradients across devices. - if len(self._devices) > 1: - with tfutil.absolute_name_scope(self.scope + "/Broadcast"), tf.device(None): - if platform.system() == "Windows": # Windows => NCCL ops are not available. - self._broadcast_fallback() - elif tf.VERSION.startswith("1.15."): # TF 1.15 => NCCL ops are broken: https://github.com/tensorflow/tensorflow/issues/41539 - self._broadcast_fallback() - else: # Otherwise => NCCL ops are safe to use. - self._broadcast_nccl() - - # Apply updates separately on each device. - for device_idx, device in enumerate(self._devices.values()): - with tfutil.absolute_name_scope(self.scope + "/Apply%d" % device_idx), tf.device(device.name): - # pylint: disable=cell-var-from-loop - - # Accumulate gradients over time. - if self.minibatch_multiplier is None: - acc_ok = tf.constant(True, name='acc_ok') - device.grad_acc = OrderedDict(device.grad_clean) - else: - # Create variables. - with tf.control_dependencies(None): - for var in device.grad_clean.keys(): - device.grad_acc_vars[var] = tf.Variable(tf.zeros(var.shape), trainable=False, name="grad_acc_var") - device.grad_acc_count = tf.Variable(tf.zeros([]), trainable=False, name="grad_acc_count") - - # Track counter. - count_cur = device.grad_acc_count + 1.0 - count_inc_op = lambda: tf.assign(device.grad_acc_count, count_cur) - count_reset_op = lambda: tf.assign(device.grad_acc_count, tf.zeros([])) - acc_ok = (count_cur >= tf.cast(self.minibatch_multiplier, tf.float32)) - all_ops.append(tf.cond(acc_ok, count_reset_op, count_inc_op)) - - # Track gradients. - for var, grad in device.grad_clean.items(): - acc_var = device.grad_acc_vars[var] - acc_cur = acc_var + grad - device.grad_acc[var] = acc_cur - with tf.control_dependencies([acc_cur]): - acc_inc_op = lambda: tf.assign(acc_var, acc_cur) - acc_reset_op = lambda: tf.assign(acc_var, tf.zeros(var.shape)) - all_ops.append(tf.cond(acc_ok, acc_reset_op, acc_inc_op)) - - # No overflow => apply gradients. - all_ok = tf.reduce_all(tf.stack([acc_ok] + [tf.reduce_all(tf.is_finite(g)) for g in device.grad_acc.values()])) - apply_op = lambda: device.optimizer.apply_gradients([(tf.cast(grad, var.dtype), var) for var, grad in device.grad_acc.items()]) - all_ops.append(tf.cond(all_ok, apply_op, tf.no_op)) - - # Adjust loss scaling. - if self.use_loss_scaling: - ls_inc_op = lambda: tf.assign_add(device.loss_scaling_var, self.loss_scaling_inc) - ls_dec_op = lambda: tf.assign_sub(device.loss_scaling_var, self.loss_scaling_dec) - ls_update_op = lambda: tf.group(tf.cond(all_ok, ls_inc_op, ls_dec_op)) - all_ops.append(tf.cond(acc_ok, ls_update_op, tf.no_op)) - - # Last device => report statistics. - if device_idx == len(self._devices) - 1: - all_ops.append(autosummary.autosummary(self.id + "/learning_rate", tf.convert_to_tensor(self.learning_rate))) - all_ops.append(autosummary.autosummary(self.id + "/overflow_frequency", tf.where(all_ok, 0, 1), condition=acc_ok)) - if self.use_loss_scaling: - all_ops.append(autosummary.autosummary(self.id + "/loss_scaling_log2", device.loss_scaling_var)) - - # Initialize variables. - self.reset_optimizer_state() - if self.use_loss_scaling: - tfutil.init_uninitialized_vars([device.loss_scaling_var for device in self._devices.values()]) - if self.minibatch_multiplier is not None: - tfutil.run([var.initializer for device in self._devices.values() for var in list(device.grad_acc_vars.values()) + [device.grad_acc_count]]) - - # Group everything into a single op. - with tfutil.absolute_name_scope(self.scope): - return tf.group(*all_ops, name="TrainingOp") - - def reset_optimizer_state(self) -> None: - """Reset internal state of the underlying optimizer.""" - tfutil.assert_tf_initialized() - tfutil.run([var.initializer for device in self._devices.values() for var in device.optimizer.variables()]) - - def get_loss_scaling_var(self, device: str) -> Union[tf.Variable, None]: - """Get or create variable representing log2 of the current dynamic loss scaling factor.""" - return self._get_device(device).loss_scaling_var - - def apply_loss_scaling(self, value: TfExpression) -> TfExpression: - """Apply dynamic loss scaling for the given expression.""" - assert tfutil.is_tf_expression(value) - if not self.use_loss_scaling: - return value - return value * tfutil.exp2(self.get_loss_scaling_var(value.device)) - - def undo_loss_scaling(self, value: TfExpression) -> TfExpression: - """Undo the effect of dynamic loss scaling for the given expression.""" - assert tfutil.is_tf_expression(value) - if not self.use_loss_scaling: - return value - return value * tfutil.exp2(-self.get_loss_scaling_var(value.device)) # pylint: disable=invalid-unary-operand-type - - def _broadcast_nccl(self): - """Sum gradients across devices using NCCL ops (fast path).""" - from tensorflow.python.ops import nccl_ops # pylint: disable=no-name-in-module - for all_vars in zip(*[device.grad_clean.keys() for device in self._devices.values()]): - if any(x.shape.num_elements() > 0 for x in all_vars): - all_grads = [device.grad_clean[var] for device, var in zip(self._devices.values(), all_vars)] - all_grads = nccl_ops.all_sum(all_grads) - for device, var, grad in zip(self._devices.values(), all_vars, all_grads): - device.grad_clean[var] = grad - - def _broadcast_fallback(self): - """Sum gradients across devices using TensorFlow collective ops (slow fallback path).""" - from tensorflow.python.ops import collective_ops # pylint: disable=no-name-in-module - global _collective_ops_warning_printed, _collective_ops_group_key, _collective_ops_instance_key - if all(x.shape.num_elements() == 0 for device in self._devices.values() for x in device.grad_clean.values()): - return - if not _collective_ops_warning_printed: - print("------------------------------------------------------------------------") - print("WARNING: Using slow fallback implementation for inter-GPU communication.") - print("Please use TensorFlow 1.14 on Linux for optimal training performance.") - print("------------------------------------------------------------------------") - _collective_ops_warning_printed = True - for device in self._devices.values(): - with tf.device(device.name): - combo = [tf.reshape(x, [x.shape.num_elements()]) for x in device.grad_clean.values()] - combo = tf.concat(combo, axis=0) - combo = collective_ops.all_reduce(combo, merge_op='Add', final_op='Id', - group_size=len(self._devices), group_key=_collective_ops_group_key, - instance_key=_collective_ops_instance_key) - cur_ofs = 0 - for var, grad_old in device.grad_clean.items(): - grad_new = tf.reshape(combo[cur_ofs : cur_ofs + grad_old.shape.num_elements()], grad_old.shape) - cur_ofs += grad_old.shape.num_elements() - device.grad_clean[var] = grad_new - _collective_ops_instance_key += 1 - - -class SimpleAdam: - """Simplified version of tf.train.AdamOptimizer that behaves identically when used with dnnlib.tflib.Optimizer.""" - - def __init__(self, name="Adam", learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8): - self.name = name - self.learning_rate = learning_rate - self.beta1 = beta1 - self.beta2 = beta2 - self.epsilon = epsilon - self.all_state_vars = [] - - def variables(self): - return self.all_state_vars - - def compute_gradients(self, loss, var_list, gate_gradients=tf.train.Optimizer.GATE_NONE): - assert gate_gradients == tf.train.Optimizer.GATE_NONE - return list(zip(tf.gradients(loss, var_list), var_list)) - - def apply_gradients(self, grads_and_vars): - with tf.name_scope(self.name): - state_vars = [] - update_ops = [] - - # Adjust learning rate to deal with startup bias. - with tf.control_dependencies(None): - b1pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) - b2pow_var = tf.Variable(dtype=tf.float32, initial_value=1, trainable=False) - state_vars += [b1pow_var, b2pow_var] - b1pow_new = b1pow_var * self.beta1 - b2pow_new = b2pow_var * self.beta2 - update_ops += [tf.assign(b1pow_var, b1pow_new), tf.assign(b2pow_var, b2pow_new)] - lr_new = self.learning_rate * tf.sqrt(1 - b2pow_new) / (1 - b1pow_new) - - # Construct ops to update each variable. - for grad, var in grads_and_vars: - with tf.control_dependencies(None): - m_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) - v_var = tf.Variable(dtype=tf.float32, initial_value=tf.zeros_like(var), trainable=False) - state_vars += [m_var, v_var] - m_new = self.beta1 * m_var + (1 - self.beta1) * grad - v_new = self.beta2 * v_var + (1 - self.beta2) * tf.square(grad) - var_delta = lr_new * m_new / (tf.sqrt(v_new) + self.epsilon) - update_ops += [tf.assign(m_var, m_new), tf.assign(v_var, v_new), tf.assign_sub(var, var_delta)] - - # Group everything together. - self.all_state_vars += state_vars - return tf.group(*update_ops) diff --git a/tflib/tfutil.py b/tflib/tfutil.py deleted file mode 100644 index 1127c7beecfe526b459b3b99ee34e1c431e19e1c..0000000000000000000000000000000000000000 --- a/tflib/tfutil.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright (c) 2019, NVIDIA Corporation. All rights reserved. -# -# This work is made available under the Nvidia Source Code License-NC. -# To view a copy of this license, visit -# https://nvlabs.github.io/stylegan2/license.html - -"""Miscellaneous helper utils for Tensorflow.""" - -import os -import numpy as np -import tensorflow as tf - -# Silence deprecation warnings from TensorFlow 1.13 onwards -import logging -logging.getLogger('tensorflow').setLevel(logging.ERROR) -import tensorflow.contrib # requires TensorFlow 1.x! -tf.contrib = tensorflow.contrib - -from typing import Any, Iterable, List, Union - -TfExpression = Union[tf.Tensor, tf.Variable, tf.Operation] -"""A type that represents a valid Tensorflow expression.""" - -TfExpressionEx = Union[TfExpression, int, float, np.ndarray] -"""A type that can be converted to a valid Tensorflow expression.""" - - -def run(*args, **kwargs) -> Any: - """Run the specified ops in the default session.""" - assert_tf_initialized() - return tf.get_default_session().run(*args, **kwargs) - - -def is_tf_expression(x: Any) -> bool: - """Check whether the input is a valid Tensorflow expression, i.e., Tensorflow Tensor, Variable, or Operation.""" - return isinstance(x, (tf.Tensor, tf.Variable, tf.Operation)) - - -def shape_to_list(shape: Iterable[tf.Dimension]) -> List[Union[int, None]]: - """Convert a Tensorflow shape to a list of ints. Retained for backwards compatibility -- use TensorShape.as_list() in new code.""" - return [dim.value for dim in shape] - - -def flatten(x: TfExpressionEx) -> TfExpression: - """Shortcut function for flattening a tensor.""" - with tf.name_scope("Flatten"): - return tf.reshape(x, [-1]) - - -def log2(x: TfExpressionEx) -> TfExpression: - """Logarithm in base 2.""" - with tf.name_scope("Log2"): - return tf.log(x) * np.float32(1.0 / np.log(2.0)) - - -def exp2(x: TfExpressionEx) -> TfExpression: - """Exponent in base 2.""" - with tf.name_scope("Exp2"): - return tf.exp(x * np.float32(np.log(2.0))) - - -def lerp(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpressionEx: - """Linear interpolation.""" - with tf.name_scope("Lerp"): - return a + (b - a) * t - - -def lerp_clip(a: TfExpressionEx, b: TfExpressionEx, t: TfExpressionEx) -> TfExpression: - """Linear interpolation with clip.""" - with tf.name_scope("LerpClip"): - return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) - - -def absolute_name_scope(scope: str) -> tf.name_scope: - """Forcefully enter the specified name scope, ignoring any surrounding scopes.""" - return tf.name_scope(scope + "/") - - -def absolute_variable_scope(scope: str, **kwargs) -> tf.variable_scope: - """Forcefully enter the specified variable scope, ignoring any surrounding scopes.""" - return tf.variable_scope(tf.VariableScope(name=scope, **kwargs), auxiliary_name_scope=False) - - -def _sanitize_tf_config(config_dict: dict = None) -> dict: - # Defaults. - cfg = dict() - cfg["rnd.np_random_seed"] = None # Random seed for NumPy. None = keep as is. - cfg["rnd.tf_random_seed"] = "auto" # Random seed for TensorFlow. 'auto' = derive from NumPy random state. None = keep as is. - cfg["env.TF_CPP_MIN_LOG_LEVEL"] = "1" # 0 = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. - cfg["graph_options.place_pruned_graph"] = True # False = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. - cfg["gpu_options.allow_growth"] = True # False = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. - - # Remove defaults for environment variables that are already set. - for key in list(cfg): - fields = key.split(".") - if fields[0] == "env": - assert len(fields) == 2 - if fields[1] in os.environ: - del cfg[key] - - # User overrides. - if config_dict is not None: - cfg.update(config_dict) - return cfg - - -def init_tf(config_dict: dict = None) -> None: - """Initialize TensorFlow session using good default settings.""" - # Skip if already initialized. - if tf.get_default_session() is not None: - return - - # Setup config dict and random seeds. - cfg = _sanitize_tf_config(config_dict) - np_random_seed = cfg["rnd.np_random_seed"] - if np_random_seed is not None: - np.random.seed(np_random_seed) - tf_random_seed = cfg["rnd.tf_random_seed"] - if tf_random_seed == "auto": - tf_random_seed = np.random.randint(1 << 31) - if tf_random_seed is not None: - tf.set_random_seed(tf_random_seed) - - # Setup environment variables. - for key, value in cfg.items(): - fields = key.split(".") - if fields[0] == "env": - assert len(fields) == 2 - os.environ[fields[1]] = str(value) - - # Create default TensorFlow session. - create_session(cfg, force_as_default=True) - - -def assert_tf_initialized(): - """Check that TensorFlow session has been initialized.""" - if tf.get_default_session() is None: - raise RuntimeError("No default TensorFlow session found. Please call dnnlib.tflib.init_tf().") - - -def create_session(config_dict: dict = None, force_as_default: bool = False) -> tf.Session: - """Create tf.Session based on config dict.""" - # Setup TensorFlow config proto. - cfg = _sanitize_tf_config(config_dict) - config_proto = tf.ConfigProto() - for key, value in cfg.items(): - fields = key.split(".") - if fields[0] not in ["rnd", "env"]: - obj = config_proto - for field in fields[:-1]: - obj = getattr(obj, field) - setattr(obj, fields[-1], value) - - # Create session. - session = tf.Session(config=config_proto) - if force_as_default: - # pylint: disable=protected-access - session._default_session = session.as_default() - session._default_session.enforce_nesting = False - session._default_session.__enter__() - return session - - -def init_uninitialized_vars(target_vars: List[tf.Variable] = None) -> None: - """Initialize all tf.Variables that have not already been initialized. - - Equivalent to the following, but more efficient and does not bloat the tf graph: - tf.variables_initializer(tf.report_uninitialized_variables()).run() - """ - assert_tf_initialized() - if target_vars is None: - target_vars = tf.global_variables() - - test_vars = [] - test_ops = [] - - with tf.control_dependencies(None): # ignore surrounding control_dependencies - for var in target_vars: - assert is_tf_expression(var) - - try: - tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/IsVariableInitialized:0")) - except KeyError: - # Op does not exist => variable may be uninitialized. - test_vars.append(var) - - with absolute_name_scope(var.name.split(":")[0]): - test_ops.append(tf.is_variable_initialized(var)) - - init_vars = [var for var, inited in zip(test_vars, run(test_ops)) if not inited] - run([var.initializer for var in init_vars]) - - -def set_vars(var_to_value_dict: dict) -> None: - """Set the values of given tf.Variables. - - Equivalent to the following, but more efficient and does not bloat the tf graph: - tflib.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] - """ - assert_tf_initialized() - ops = [] - feed_dict = {} - - for var, value in var_to_value_dict.items(): - assert is_tf_expression(var) - - try: - setter = tf.get_default_graph().get_tensor_by_name(var.name.replace(":0", "/setter:0")) # look for existing op - except KeyError: - with absolute_name_scope(var.name.split(":")[0]): - with tf.control_dependencies(None): # ignore surrounding control_dependencies - setter = tf.assign(var, tf.placeholder(var.dtype, var.shape, "new_value"), name="setter") # create new setter - - ops.append(setter) - feed_dict[setter.op.inputs[1]] = value - - run(ops, feed_dict) - - -def create_var_with_large_initial_value(initial_value: np.ndarray, *args, **kwargs): - """Create tf.Variable with large initial value without bloating the tf graph.""" - assert_tf_initialized() - assert isinstance(initial_value, np.ndarray) - zeros = tf.zeros(initial_value.shape, initial_value.dtype) - var = tf.Variable(zeros, *args, **kwargs) - set_vars({var: initial_value}) - return var - - -def convert_images_from_uint8(images, drange=[-1,1], nhwc_to_nchw=False): - """Convert a minibatch of images from uint8 to float32 with configurable dynamic range. - Can be used as an input transformation for Network.run(). - """ - images = tf.cast(images, tf.float32) - if nhwc_to_nchw: - images = tf.transpose(images, [0, 3, 1, 2]) - return images * ((drange[1] - drange[0]) / 255) + drange[0] - - -def convert_images_to_uint8(images, drange=[-1,1], nchw_to_nhwc=False, shrink=1): - """Convert a minibatch of images from float32 to uint8 with configurable dynamic range. - Can be used as an output transformation for Network.run(). - """ - images = tf.cast(images, tf.float32) - if shrink > 1: - ksize = [1, 1, shrink, shrink] - images = tf.nn.avg_pool(images, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") - if nchw_to_nhwc: - images = tf.transpose(images, [0, 2, 3, 1]) - scale = 255 / (drange[1] - drange[0]) - images = images * scale + (0.5 - drange[0] * scale) - return tf.saturate_cast(images, tf.uint8)