Spaces:
Sleeping
Sleeping
vukadinovic936
commited on
Commit
•
8806acd
1
Parent(s):
a6bdf5e
deleted
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +1 -0
- submission/__pycache__/__init__.cpython-310.pyc +0 -0
- submission/__pycache__/__init__.cpython-36.pyc +0 -0
- submission/__pycache__/__init__.cpython-38.pyc +0 -0
- submission/__pycache__/__init__.cpython-39.pyc +0 -0
- submission/__pycache__/run_context.cpython-310.pyc +0 -0
- submission/__pycache__/run_context.cpython-36.pyc +0 -0
- submission/__pycache__/run_context.cpython-38.pyc +0 -0
- submission/__pycache__/run_context.cpython-39.pyc +0 -0
- submission/__pycache__/submit.cpython-310.pyc +0 -0
- submission/__pycache__/submit.cpython-36.pyc +0 -0
- submission/__pycache__/submit.cpython-38.pyc +0 -0
- submission/__pycache__/submit.cpython-39.pyc +0 -0
- submission/internal/__pycache__/__init__.cpython-36.pyc +0 -0
- submission/internal/__pycache__/__init__.cpython-38.pyc +0 -0
- submission/internal/__pycache__/__init__.cpython-39.pyc +0 -0
- submission/internal/__pycache__/local.cpython-36.pyc +0 -0
- submission/internal/__pycache__/local.cpython-38.pyc +0 -0
- submission/internal/__pycache__/local.cpython-39.pyc +0 -0
- tflib/.custom_ops.py.un~ +0 -0
- tflib/__init__.py +0 -18
- tflib/__pycache__/__init__.cpython-36.pyc +0 -0
- tflib/__pycache__/__init__.cpython-38.pyc +0 -0
- tflib/__pycache__/__init__.cpython-39.pyc +0 -0
- tflib/__pycache__/autosummary.cpython-36.pyc +0 -0
- tflib/__pycache__/autosummary.cpython-38.pyc +0 -0
- tflib/__pycache__/autosummary.cpython-39.pyc +0 -0
- tflib/__pycache__/custom_ops.cpython-36.pyc +0 -0
- tflib/__pycache__/network.cpython-36.pyc +0 -0
- tflib/__pycache__/optimizer.cpython-36.pyc +0 -0
- tflib/__pycache__/tfutil.cpython-36.pyc +0 -0
- tflib/__pycache__/tfutil.cpython-38.pyc +0 -0
- tflib/__pycache__/tfutil.cpython-39.pyc +0 -0
- tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll +0 -0
- tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so +0 -0
- tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll +0 -0
- tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll +0 -0
- tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so +0 -3
- tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll +0 -0
- tflib/autosummary.py +0 -191
- tflib/custom_ops.py +0 -169
- tflib/custom_ops.py~ +0 -169
- tflib/network.py +0 -590
- tflib/ops/__init__.py +0 -7
- tflib/ops/__pycache__/__init__.cpython-36.pyc +0 -0
- tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc +0 -0
- tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc +0 -0
- tflib/ops/fused_bias_act.cu +0 -188
- tflib/ops/fused_bias_act.py +0 -196
- tflib/ops/upfirdn_2d.cu +0 -326
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
__pycache__
|
submission/__pycache__/__init__.cpython-310.pyc
DELETED
Binary file (200 Bytes)
|
|
submission/__pycache__/__init__.cpython-36.pyc
DELETED
Binary file (183 Bytes)
|
|
submission/__pycache__/__init__.cpython-38.pyc
DELETED
Binary file (198 Bytes)
|
|
submission/__pycache__/__init__.cpython-39.pyc
DELETED
Binary file (202 Bytes)
|
|
submission/__pycache__/run_context.cpython-310.pyc
DELETED
Binary file (4.26 kB)
|
|
submission/__pycache__/run_context.cpython-36.pyc
DELETED
Binary file (4.19 kB)
|
|
submission/__pycache__/run_context.cpython-38.pyc
DELETED
Binary file (4.22 kB)
|
|
submission/__pycache__/run_context.cpython-39.pyc
DELETED
Binary file (4.24 kB)
|
|
submission/__pycache__/submit.cpython-310.pyc
DELETED
Binary file (11.1 kB)
|
|
submission/__pycache__/submit.cpython-36.pyc
DELETED
Binary file (11.1 kB)
|
|
submission/__pycache__/submit.cpython-38.pyc
DELETED
Binary file (11.2 kB)
|
|
submission/__pycache__/submit.cpython-39.pyc
DELETED
Binary file (11.2 kB)
|
|
submission/internal/__pycache__/__init__.cpython-36.pyc
DELETED
Binary file (157 Bytes)
|
|
submission/internal/__pycache__/__init__.cpython-38.pyc
DELETED
Binary file (172 Bytes)
|
|
submission/internal/__pycache__/__init__.cpython-39.pyc
DELETED
Binary file (176 Bytes)
|
|
submission/internal/__pycache__/local.cpython-36.pyc
DELETED
Binary file (1.15 kB)
|
|
submission/internal/__pycache__/local.cpython-38.pyc
DELETED
Binary file (1.19 kB)
|
|
submission/internal/__pycache__/local.cpython-39.pyc
DELETED
Binary file (1.19 kB)
|
|
tflib/.custom_ops.py.un~
DELETED
Binary file (1.81 kB)
|
|
tflib/__init__.py
DELETED
@@ -1,18 +0,0 @@
|
|
1 |
-
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
#
|
3 |
-
# This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
# To view a copy of this license, visit
|
5 |
-
# https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
from . import autosummary
|
8 |
-
from . import network
|
9 |
-
from . import optimizer
|
10 |
-
from . import tfutil
|
11 |
-
from . import custom_ops
|
12 |
-
|
13 |
-
from .tfutil import *
|
14 |
-
from .network import Network
|
15 |
-
|
16 |
-
from .optimizer import Optimizer
|
17 |
-
|
18 |
-
from .custom_ops import get_plugin
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tflib/__pycache__/__init__.cpython-36.pyc
DELETED
Binary file (383 Bytes)
|
|
tflib/__pycache__/__init__.cpython-38.pyc
DELETED
Binary file (398 Bytes)
|
|
tflib/__pycache__/__init__.cpython-39.pyc
DELETED
Binary file (402 Bytes)
|
|
tflib/__pycache__/autosummary.cpython-36.pyc
DELETED
Binary file (6.71 kB)
|
|
tflib/__pycache__/autosummary.cpython-38.pyc
DELETED
Binary file (6.75 kB)
|
|
tflib/__pycache__/autosummary.cpython-39.pyc
DELETED
Binary file (7.07 kB)
|
|
tflib/__pycache__/custom_ops.cpython-36.pyc
DELETED
Binary file (4.87 kB)
|
|
tflib/__pycache__/network.cpython-36.pyc
DELETED
Binary file (28.9 kB)
|
|
tflib/__pycache__/optimizer.cpython-36.pyc
DELETED
Binary file (14.7 kB)
|
|
tflib/__pycache__/tfutil.cpython-36.pyc
DELETED
Binary file (8.65 kB)
|
|
tflib/__pycache__/tfutil.cpython-38.pyc
DELETED
Binary file (8.7 kB)
|
|
tflib/__pycache__/tfutil.cpython-39.pyc
DELETED
Binary file (8.82 kB)
|
|
tflib/_cudacache/fused_bias_act_26d9116e2d7d28cdd451d2d21a1bb7ac.dll
DELETED
Binary file (462 kB)
|
|
tflib/_cudacache/fused_bias_act_40c2d778d681ce8357acbd8537ee6613.so
DELETED
Binary file (850 kB)
|
|
tflib/_cudacache/fused_bias_act_d3f49482b02b6a96d02e754e40c292bc.dll
DELETED
Binary file (462 kB)
|
|
tflib/_cudacache/upfirdn_2d_0acb578db4996f3ae2116b184d4a1bdb.dll
DELETED
Binary file (703 kB)
|
|
tflib/_cudacache/upfirdn_2d_79757bc08e1fcc2d526ecf3cb32fad92.so
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:11a9d92fab510cb996dcd2bf8644c34f7c5e963e6abba4ad3e606e590917e3e0
|
3 |
-
size 1099920
|
|
|
|
|
|
|
|
tflib/_cudacache/upfirdn_2d_f65c24ab0d389aaa83e5b3537e3134e8.dll
DELETED
Binary file (703 kB)
|
|
tflib/autosummary.py
DELETED
@@ -1,191 +0,0 @@
|
|
1 |
-
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
#
|
3 |
-
# This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
# To view a copy of this license, visit
|
5 |
-
# https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
"""Helper for adding automatically tracked values to Tensorboard.
|
8 |
-
|
9 |
-
Autosummary creates an identity op that internally keeps track of the input
|
10 |
-
values and automatically shows up in TensorBoard. The reported value
|
11 |
-
represents an average over input components. The average is accumulated
|
12 |
-
constantly over time and flushed when save_summaries() is called.
|
13 |
-
|
14 |
-
Notes:
|
15 |
-
- The output tensor must be used as an input for something else in the
|
16 |
-
graph. Otherwise, the autosummary op will not get executed, and the average
|
17 |
-
value will not get accumulated.
|
18 |
-
- It is perfectly fine to include autosummaries with the same name in
|
19 |
-
several places throughout the graph, even if they are executed concurrently.
|
20 |
-
- It is ok to also pass in a python scalar or numpy array. In this case, it
|
21 |
-
is added to the average immediately.
|
22 |
-
"""
|
23 |
-
|
24 |
-
from collections import OrderedDict
|
25 |
-
import numpy as np
|
26 |
-
import tensorflow as tf
|
27 |
-
from tensorboard import summary as summary_lib
|
28 |
-
from tensorboard.plugins.custom_scalar import layout_pb2
|
29 |
-
|
30 |
-
from . import tfutil
|
31 |
-
from .tfutil import TfExpression
|
32 |
-
from .tfutil import TfExpressionEx
|
33 |
-
|
34 |
-
# Enable "Custom scalars" tab in TensorBoard for advanced formatting.
|
35 |
-
# Disabled by default to reduce tfevents file size.
|
36 |
-
enable_custom_scalars = False
|
37 |
-
|
38 |
-
_dtype = tf.float64
|
39 |
-
_vars = OrderedDict() # name => [var, ...]
|
40 |
-
_immediate = OrderedDict() # name => update_op, update_value
|
41 |
-
_finalized = False
|
42 |
-
_merge_op = None
|
43 |
-
|
44 |
-
|
45 |
-
def _create_var(name: str, value_expr: TfExpression) -> TfExpression:
|
46 |
-
"""Internal helper for creating autosummary accumulators."""
|
47 |
-
assert not _finalized
|
48 |
-
name_id = name.replace("/", "_")
|
49 |
-
v = tf.cast(value_expr, _dtype)
|
50 |
-
|
51 |
-
if v.shape.is_fully_defined():
|
52 |
-
size = np.prod(v.shape.as_list())
|
53 |
-
size_expr = tf.constant(size, dtype=_dtype)
|
54 |
-
else:
|
55 |
-
size = None
|
56 |
-
size_expr = tf.reduce_prod(tf.cast(tf.shape(v), _dtype))
|
57 |
-
|
58 |
-
if size == 1:
|
59 |
-
if v.shape.ndims != 0:
|
60 |
-
v = tf.reshape(v, [])
|
61 |
-
v = [size_expr, v, tf.square(v)]
|
62 |
-
else:
|
63 |
-
v = [size_expr, tf.reduce_sum(v), tf.reduce_sum(tf.square(v))]
|
64 |
-
v = tf.cond(tf.is_finite(v[1]), lambda: tf.stack(v), lambda: tf.zeros(3, dtype=_dtype))
|
65 |
-
|
66 |
-
with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.control_dependencies(None):
|
67 |
-
var = tf.Variable(tf.zeros(3, dtype=_dtype), trainable=False) # [sum(1), sum(x), sum(x**2)]
|
68 |
-
update_op = tf.cond(tf.is_variable_initialized(var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v))
|
69 |
-
|
70 |
-
if name in _vars:
|
71 |
-
_vars[name].append(var)
|
72 |
-
else:
|
73 |
-
_vars[name] = [var]
|
74 |
-
return update_op
|
75 |
-
|
76 |
-
|
77 |
-
def autosummary(name: str, value: TfExpressionEx, passthru: TfExpressionEx = None, condition: TfExpressionEx = True) -> TfExpressionEx:
|
78 |
-
"""Create a new autosummary.
|
79 |
-
|
80 |
-
Args:
|
81 |
-
name: Name to use in TensorBoard
|
82 |
-
value: TensorFlow expression or python value to track
|
83 |
-
passthru: Optionally return this TF node without modifications but tack an autosummary update side-effect to this node.
|
84 |
-
|
85 |
-
Example use of the passthru mechanism:
|
86 |
-
|
87 |
-
n = autosummary('l2loss', loss, passthru=n)
|
88 |
-
|
89 |
-
This is a shorthand for the following code:
|
90 |
-
|
91 |
-
with tf.control_dependencies([autosummary('l2loss', loss)]):
|
92 |
-
n = tf.identity(n)
|
93 |
-
"""
|
94 |
-
tfutil.assert_tf_initialized()
|
95 |
-
name_id = name.replace("/", "_")
|
96 |
-
|
97 |
-
if tfutil.is_tf_expression(value):
|
98 |
-
with tf.name_scope("summary_" + name_id), tf.device(value.device):
|
99 |
-
condition = tf.convert_to_tensor(condition, name='condition')
|
100 |
-
update_op = tf.cond(condition, lambda: tf.group(_create_var(name, value)), tf.no_op)
|
101 |
-
with tf.control_dependencies([update_op]):
|
102 |
-
return tf.identity(value if passthru is None else passthru)
|
103 |
-
|
104 |
-
else: # python scalar or numpy array
|
105 |
-
assert not tfutil.is_tf_expression(passthru)
|
106 |
-
assert not tfutil.is_tf_expression(condition)
|
107 |
-
if condition:
|
108 |
-
if name not in _immediate:
|
109 |
-
with tfutil.absolute_name_scope("Autosummary/" + name_id), tf.device(None), tf.control_dependencies(None):
|
110 |
-
update_value = tf.placeholder(_dtype)
|
111 |
-
update_op = _create_var(name, update_value)
|
112 |
-
_immediate[name] = update_op, update_value
|
113 |
-
update_op, update_value = _immediate[name]
|
114 |
-
tfutil.run(update_op, {update_value: value})
|
115 |
-
return value if passthru is None else passthru
|
116 |
-
|
117 |
-
|
118 |
-
def finalize_autosummaries() -> None:
|
119 |
-
"""Create the necessary ops to include autosummaries in TensorBoard report.
|
120 |
-
Note: This should be done only once per graph.
|
121 |
-
"""
|
122 |
-
global _finalized
|
123 |
-
tfutil.assert_tf_initialized()
|
124 |
-
|
125 |
-
if _finalized:
|
126 |
-
return None
|
127 |
-
|
128 |
-
_finalized = True
|
129 |
-
tfutil.init_uninitialized_vars([var for vars_list in _vars.values() for var in vars_list])
|
130 |
-
|
131 |
-
# Create summary ops.
|
132 |
-
with tf.device(None), tf.control_dependencies(None):
|
133 |
-
for name, vars_list in _vars.items():
|
134 |
-
name_id = name.replace("/", "_")
|
135 |
-
with tfutil.absolute_name_scope("Autosummary/" + name_id):
|
136 |
-
moments = tf.add_n(vars_list)
|
137 |
-
moments /= moments[0]
|
138 |
-
with tf.control_dependencies([moments]): # read before resetting
|
139 |
-
reset_ops = [tf.assign(var, tf.zeros(3, dtype=_dtype)) for var in vars_list]
|
140 |
-
with tf.name_scope(None), tf.control_dependencies(reset_ops): # reset before reporting
|
141 |
-
mean = moments[1]
|
142 |
-
std = tf.sqrt(moments[2] - tf.square(moments[1]))
|
143 |
-
tf.summary.scalar(name, mean)
|
144 |
-
if enable_custom_scalars:
|
145 |
-
tf.summary.scalar("xCustomScalars/" + name + "/margin_lo", mean - std)
|
146 |
-
tf.summary.scalar("xCustomScalars/" + name + "/margin_hi", mean + std)
|
147 |
-
|
148 |
-
# Setup layout for custom scalars.
|
149 |
-
layout = None
|
150 |
-
if enable_custom_scalars:
|
151 |
-
cat_dict = OrderedDict()
|
152 |
-
for series_name in sorted(_vars.keys()):
|
153 |
-
p = series_name.split("/")
|
154 |
-
cat = p[0] if len(p) >= 2 else ""
|
155 |
-
chart = "/".join(p[1:-1]) if len(p) >= 3 else p[-1]
|
156 |
-
if cat not in cat_dict:
|
157 |
-
cat_dict[cat] = OrderedDict()
|
158 |
-
if chart not in cat_dict[cat]:
|
159 |
-
cat_dict[cat][chart] = []
|
160 |
-
cat_dict[cat][chart].append(series_name)
|
161 |
-
categories = []
|
162 |
-
for cat_name, chart_dict in cat_dict.items():
|
163 |
-
charts = []
|
164 |
-
for chart_name, series_names in chart_dict.items():
|
165 |
-
series = []
|
166 |
-
for series_name in series_names:
|
167 |
-
series.append(layout_pb2.MarginChartContent.Series(
|
168 |
-
value=series_name,
|
169 |
-
lower="xCustomScalars/" + series_name + "/margin_lo",
|
170 |
-
upper="xCustomScalars/" + series_name + "/margin_hi"))
|
171 |
-
margin = layout_pb2.MarginChartContent(series=series)
|
172 |
-
charts.append(layout_pb2.Chart(title=chart_name, margin=margin))
|
173 |
-
categories.append(layout_pb2.Category(title=cat_name, chart=charts))
|
174 |
-
layout = summary_lib.custom_scalar_pb(layout_pb2.Layout(category=categories))
|
175 |
-
return layout
|
176 |
-
|
177 |
-
def save_summaries(file_writer, global_step=None):
|
178 |
-
"""Call FileWriter.add_summary() with all summaries in the default graph,
|
179 |
-
automatically finalizing and merging them on the first call.
|
180 |
-
"""
|
181 |
-
global _merge_op
|
182 |
-
tfutil.assert_tf_initialized()
|
183 |
-
|
184 |
-
if _merge_op is None:
|
185 |
-
layout = finalize_autosummaries()
|
186 |
-
if layout is not None:
|
187 |
-
file_writer.add_summary(layout)
|
188 |
-
with tf.device(None), tf.control_dependencies(None):
|
189 |
-
_merge_op = tf.summary.merge_all()
|
190 |
-
|
191 |
-
file_writer.add_summary(_merge_op.eval(), global_step)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tflib/custom_ops.py
DELETED
@@ -1,169 +0,0 @@
|
|
1 |
-
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
#
|
3 |
-
# This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
# To view a copy of this license, visit
|
5 |
-
# https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
"""TensorFlow custom ops builder.
|
8 |
-
"""
|
9 |
-
|
10 |
-
import os
|
11 |
-
import re
|
12 |
-
import uuid
|
13 |
-
import hashlib
|
14 |
-
import tempfile
|
15 |
-
import shutil
|
16 |
-
import tensorflow as tf
|
17 |
-
from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
|
18 |
-
|
19 |
-
#----------------------------------------------------------------------------
|
20 |
-
# Global options.
|
21 |
-
|
22 |
-
cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache')
|
23 |
-
cuda_cache_version_tag = 'v1'
|
24 |
-
do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
|
25 |
-
verbose = True # Print status messages to stdout.
|
26 |
-
|
27 |
-
compiler_bindir_search_path = [
|
28 |
-
'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64',
|
29 |
-
'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64',
|
30 |
-
'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin',
|
31 |
-
]
|
32 |
-
|
33 |
-
#----------------------------------------------------------------------------
|
34 |
-
# Internal helper funcs.
|
35 |
-
|
36 |
-
def _find_compiler_bindir():
|
37 |
-
for compiler_path in compiler_bindir_search_path:
|
38 |
-
if os.path.isdir(compiler_path):
|
39 |
-
return compiler_path
|
40 |
-
return None
|
41 |
-
|
42 |
-
def _get_compute_cap(device):
|
43 |
-
caps_str = device.physical_device_desc
|
44 |
-
m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
|
45 |
-
major = m.group(1)
|
46 |
-
minor = m.group(2)
|
47 |
-
return (major, minor)
|
48 |
-
|
49 |
-
def _get_cuda_gpu_arch_string():
|
50 |
-
gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
|
51 |
-
if len(gpus) == 0:
|
52 |
-
raise RuntimeError('No GPU devices found')
|
53 |
-
(major, minor) = _get_compute_cap(gpus[0])
|
54 |
-
return 'sm_%s%s' % (major, minor)
|
55 |
-
|
56 |
-
def _run_cmd(cmd):
|
57 |
-
with os.popen(cmd) as pipe:
|
58 |
-
output = pipe.read()
|
59 |
-
status = pipe.close()
|
60 |
-
if status is not None:
|
61 |
-
raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
|
62 |
-
|
63 |
-
def _prepare_nvcc_cli(opts):
|
64 |
-
cmd = 'nvcc ' + opts.strip()
|
65 |
-
cmd += ' --disable-warnings'
|
66 |
-
cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
|
67 |
-
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
|
68 |
-
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
|
69 |
-
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
|
70 |
-
|
71 |
-
compiler_bindir = _find_compiler_bindir()
|
72 |
-
if compiler_bindir is None:
|
73 |
-
# Require that _find_compiler_bindir succeeds on Windows. Allow
|
74 |
-
# nvcc to use whatever is the default on Linux.
|
75 |
-
if os.name == 'nt':
|
76 |
-
raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
|
77 |
-
else:
|
78 |
-
cmd += ' --compiler-bindir "%s"' % compiler_bindir
|
79 |
-
cmd += ' 2>&1'
|
80 |
-
return cmd
|
81 |
-
|
82 |
-
#----------------------------------------------------------------------------
|
83 |
-
# Main entry point.
|
84 |
-
|
85 |
-
_plugin_cache = dict()
|
86 |
-
|
87 |
-
def get_plugin(cuda_file):
|
88 |
-
cuda_file_base = os.path.basename(cuda_file)
|
89 |
-
cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
|
90 |
-
|
91 |
-
# Already in cache?
|
92 |
-
if cuda_file in _plugin_cache:
|
93 |
-
return _plugin_cache[cuda_file]
|
94 |
-
|
95 |
-
# Setup plugin.
|
96 |
-
if verbose:
|
97 |
-
print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
|
98 |
-
try:
|
99 |
-
# Hash CUDA source.
|
100 |
-
md5 = hashlib.md5()
|
101 |
-
with open(cuda_file, 'rb') as f:
|
102 |
-
md5.update(f.read())
|
103 |
-
md5.update(b'\n')
|
104 |
-
|
105 |
-
# Hash headers included by the CUDA code by running it through the preprocessor.
|
106 |
-
if not do_not_hash_included_headers:
|
107 |
-
if verbose:
|
108 |
-
print('Preprocessing... ', end='', flush=True)
|
109 |
-
with tempfile.TemporaryDirectory() as tmp_dir:
|
110 |
-
tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
|
111 |
-
_run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
|
112 |
-
with open(tmp_file, 'rb') as f:
|
113 |
-
bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
|
114 |
-
good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
|
115 |
-
for ln in f:
|
116 |
-
if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
|
117 |
-
ln = ln.replace(bad_file_str, good_file_str)
|
118 |
-
md5.update(ln)
|
119 |
-
md5.update(b'\n')
|
120 |
-
|
121 |
-
# Select compiler options.
|
122 |
-
compile_opts = ''
|
123 |
-
if os.name == 'nt':
|
124 |
-
compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
|
125 |
-
elif os.name == 'posix':
|
126 |
-
compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
|
127 |
-
compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
|
128 |
-
else:
|
129 |
-
assert False # not Windows or Linux, w00t?
|
130 |
-
compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
|
131 |
-
compile_opts += ' --use_fast_math'
|
132 |
-
nvcc_cmd = _prepare_nvcc_cli(compile_opts)
|
133 |
-
|
134 |
-
# Hash build configuration.
|
135 |
-
md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
|
136 |
-
md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
|
137 |
-
md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
|
138 |
-
|
139 |
-
# Compile if not already compiled.
|
140 |
-
bin_file_ext = '.dll' if os.name == 'nt' else '.so'
|
141 |
-
bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
|
142 |
-
if not os.path.isfile(bin_file):
|
143 |
-
if verbose:
|
144 |
-
print('Compiling... ', end='', flush=True)
|
145 |
-
with tempfile.TemporaryDirectory() as tmp_dir:
|
146 |
-
tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
|
147 |
-
_run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
|
148 |
-
os.makedirs(cuda_cache_path, exist_ok=True)
|
149 |
-
intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
|
150 |
-
shutil.copyfile(tmp_file, intermediate_file)
|
151 |
-
os.rename(intermediate_file, bin_file) # atomic
|
152 |
-
|
153 |
-
# Load.
|
154 |
-
if verbose:
|
155 |
-
print('Loading... ', end='', flush=True)
|
156 |
-
plugin = tf.load_op_library(bin_file)
|
157 |
-
|
158 |
-
# Add to cache.
|
159 |
-
_plugin_cache[cuda_file] = plugin
|
160 |
-
if verbose:
|
161 |
-
print('Done.', flush=True)
|
162 |
-
return plugin
|
163 |
-
|
164 |
-
except:
|
165 |
-
if verbose:
|
166 |
-
print('Failed!', flush=True)
|
167 |
-
raise
|
168 |
-
|
169 |
-
#----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tflib/custom_ops.py~
DELETED
@@ -1,169 +0,0 @@
|
|
1 |
-
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
#
|
3 |
-
# This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
# To view a copy of this license, visit
|
5 |
-
# https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
"""TensorFlow custom ops builder.
|
8 |
-
"""
|
9 |
-
|
10 |
-
import os
|
11 |
-
import re
|
12 |
-
import uuid
|
13 |
-
import hashlib
|
14 |
-
import tempfile
|
15 |
-
import shutil
|
16 |
-
import tensorflow as tf
|
17 |
-
from tensorflow.python.client import device_lib # pylint: disable=no-name-in-module
|
18 |
-
|
19 |
-
#----------------------------------------------------------------------------
|
20 |
-
# Global options.
|
21 |
-
|
22 |
-
cuda_cache_path = os.path.join(os.path.dirname(__file__), '_cudacache')
|
23 |
-
cuda_cache_version_tag = 'v1'
|
24 |
-
do_not_hash_included_headers = False # Speed up compilation by assuming that headers included by the CUDA code never change. Unsafe!
|
25 |
-
verbose = True # Print status messages to stdout.
|
26 |
-
|
27 |
-
compiler_bindir_search_path = [
|
28 |
-
'C:/Program Files (x86)/Microsoft Visual Studio/2017/Community/VC/Tools/MSVC/14.16.27023/bin/Hostx64/x64',
|
29 |
-
'C:/Program Files (x86)/Microsoft Visual Studio/2019/Community/VC/Tools/MSVC/14.23.28105/bin/Hostx64/x64',
|
30 |
-
'C:/Program Files (x86)/Microsoft Visual Studio 14.0/vc/bin',
|
31 |
-
]
|
32 |
-
|
33 |
-
#----------------------------------------------------------------------------
|
34 |
-
# Internal helper funcs.
|
35 |
-
|
36 |
-
def _find_compiler_bindir():
|
37 |
-
for compiler_path in compiler_bindir_search_path:
|
38 |
-
if os.path.isdir(compiler_path):
|
39 |
-
return compiler_path
|
40 |
-
return None
|
41 |
-
|
42 |
-
def _get_compute_cap(device):
|
43 |
-
caps_str = device.physical_device_desc
|
44 |
-
m = re.search('compute capability: (\\d+).(\\d+)', caps_str)
|
45 |
-
major = m.group(1)
|
46 |
-
minor = m.group(2)
|
47 |
-
return (major, minor)
|
48 |
-
|
49 |
-
def _get_cuda_gpu_arch_string():
|
50 |
-
gpus = [x for x in device_lib.list_local_devices() if x.device_type == 'GPU']
|
51 |
-
if len(gpus) == 0:
|
52 |
-
raise RuntimeError('No GPU devices found')
|
53 |
-
(major, minor) = _get_compute_cap(gpus[0])
|
54 |
-
return 'sm_%s%s' % (major, minor)
|
55 |
-
|
56 |
-
def _run_cmd(cmd):
|
57 |
-
with os.popen(cmd) as pipe:
|
58 |
-
output = pipe.read()
|
59 |
-
status = pipe.close()
|
60 |
-
if status is not None:
|
61 |
-
raise RuntimeError('NVCC returned an error. See below for full command line and output log:\n\n%s\n\n%s' % (cmd, output))
|
62 |
-
|
63 |
-
def _prepare_nvcc_cli(opts):
|
64 |
-
cmd = 'nvcc ' + opts.strip()
|
65 |
-
cmd += ' --disable-warnings'
|
66 |
-
cmd += ' --include-path "%s"' % tf.sysconfig.get_include()
|
67 |
-
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'protobuf_archive', 'src')
|
68 |
-
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'com_google_absl')
|
69 |
-
cmd += ' --include-path "%s"' % os.path.join(tf.sysconfig.get_include(), 'external', 'eigen_archive')
|
70 |
-
|
71 |
-
compiler_bindir = _find_compiler_bindir()
|
72 |
-
if compiler_bindir is None:
|
73 |
-
# Require that _find_compiler_bindir succeeds on Windows. Allow
|
74 |
-
# nvcc to use whatever is the default on Linux.
|
75 |
-
if os.name == 'nt':
|
76 |
-
raise RuntimeError('Could not find MSVC/GCC/CLANG installation on this computer. Check compiler_bindir_search_path list in "%s".' % __file__)
|
77 |
-
else:
|
78 |
-
cmd += ' --compiler-bindir "%s"' % compiler_bindir
|
79 |
-
cmd += ' 2>&1'
|
80 |
-
return cmd
|
81 |
-
|
82 |
-
#----------------------------------------------------------------------------
|
83 |
-
# Main entry point.
|
84 |
-
|
85 |
-
_plugin_cache = dict()
|
86 |
-
|
87 |
-
def get_plugin(cuda_file):
|
88 |
-
cuda_file_base = os.path.basename(cuda_file)
|
89 |
-
cuda_file_name, cuda_file_ext = os.path.splitext(cuda_file_base)
|
90 |
-
|
91 |
-
# Already in cache?
|
92 |
-
if cuda_file in _plugin_cache:
|
93 |
-
return _plugin_cache[cuda_file]
|
94 |
-
|
95 |
-
# Setup plugin.
|
96 |
-
if verbose:
|
97 |
-
print('Setting up TensorFlow plugin "%s": ' % cuda_file_base, end='', flush=True)
|
98 |
-
try:
|
99 |
-
# Hash CUDA source.
|
100 |
-
md5 = hashlib.md5()
|
101 |
-
with open(cuda_file, 'rb') as f:
|
102 |
-
md5.update(f.read())
|
103 |
-
md5.update(b'\n')
|
104 |
-
|
105 |
-
# Hash headers included by the CUDA code by running it through the preprocessor.
|
106 |
-
if not do_not_hash_included_headers:
|
107 |
-
if verbose:
|
108 |
-
print('Preprocessing... ', end='', flush=True)
|
109 |
-
with tempfile.TemporaryDirectory() as tmp_dir:
|
110 |
-
tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + cuda_file_ext)
|
111 |
-
_run_cmd(_prepare_nvcc_cli('"%s" --preprocess -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir)))
|
112 |
-
with open(tmp_file, 'rb') as f:
|
113 |
-
bad_file_str = ('"' + cuda_file.replace('\\', '/') + '"').encode('utf-8') # __FILE__ in error check macros
|
114 |
-
good_file_str = ('"' + cuda_file_base + '"').encode('utf-8')
|
115 |
-
for ln in f:
|
116 |
-
if not ln.startswith(b'# ') and not ln.startswith(b'#line '): # ignore line number pragmas
|
117 |
-
ln = ln.replace(bad_file_str, good_file_str)
|
118 |
-
md5.update(ln)
|
119 |
-
md5.update(b'\n')
|
120 |
-
|
121 |
-
# Select compiler options.
|
122 |
-
compile_opts = ''
|
123 |
-
if os.name == 'nt':
|
124 |
-
compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.lib')
|
125 |
-
elif os.name == 'posix':
|
126 |
-
compile_opts += '"%s"' % os.path.join(tf.sysconfig.get_lib(), 'python', '_pywrap_tensorflow_internal.so')
|
127 |
-
compile_opts += ' --compiler-options \'-fPIC -D_GLIBCXX_USE_CXX11_ABI=0\''
|
128 |
-
else:
|
129 |
-
assert False # not Windows or Linux, w00t?
|
130 |
-
compile_opts += ' --gpu-architecture=%s' % _get_cuda_gpu_arch_string()
|
131 |
-
compile_opts += ' --use_fast_math'
|
132 |
-
nvcc_cmd = _prepare_nvcc_cli(compile_opts)
|
133 |
-
|
134 |
-
# Hash build configuration.
|
135 |
-
md5.update(('nvcc_cmd: ' + nvcc_cmd).encode('utf-8') + b'\n')
|
136 |
-
md5.update(('tf.VERSION: ' + tf.VERSION).encode('utf-8') + b'\n')
|
137 |
-
md5.update(('cuda_cache_version_tag: ' + cuda_cache_version_tag).encode('utf-8') + b'\n')
|
138 |
-
|
139 |
-
# Compile if not already compiled.
|
140 |
-
bin_file_ext = '.dll' if os.name == 'nt' else '.so'
|
141 |
-
bin_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + md5.hexdigest() + bin_file_ext)
|
142 |
-
if not os.path.isfile(bin_file):
|
143 |
-
if verbose:
|
144 |
-
print('Compiling... ', end='', flush=True)
|
145 |
-
with tempfile.TemporaryDirectory() as tmp_dir:
|
146 |
-
tmp_file = os.path.join(tmp_dir, cuda_file_name + '_tmp' + bin_file_ext)
|
147 |
-
_run_cmd(nvcc_cmd + ' "%s" --shared -o "%s" --keep --keep-dir "%s"' % (cuda_file, tmp_file, tmp_dir))
|
148 |
-
os.makedirs(cuda_cache_path, exist_ok=True)
|
149 |
-
intermediate_file = os.path.join(cuda_cache_path, cuda_file_name + '_' + uuid.uuid4().hex + '_tmp' + bin_file_ext)
|
150 |
-
shutil.copyfile(tmp_file, intermediate_file)
|
151 |
-
os.rename(intermediate_file, bin_file) # atomic
|
152 |
-
|
153 |
-
# Load.
|
154 |
-
if verbose:
|
155 |
-
print('Loading... ', end='', flush=True)
|
156 |
-
plugin = tf.load_op_library(bin_file)
|
157 |
-
|
158 |
-
# Add to cache.
|
159 |
-
_plugin_cache[cuda_file] = plugin
|
160 |
-
if verbose:
|
161 |
-
print('Done.', flush=True)
|
162 |
-
return plugin
|
163 |
-
|
164 |
-
except:
|
165 |
-
if verbose:
|
166 |
-
print('Failed!', flush=True)
|
167 |
-
raise
|
168 |
-
|
169 |
-
#----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tflib/network.py
DELETED
@@ -1,590 +0,0 @@
|
|
1 |
-
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
#
|
3 |
-
# This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
# To view a copy of this license, visit
|
5 |
-
# https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
"""Helper for managing networks."""
|
8 |
-
|
9 |
-
import types
|
10 |
-
import inspect
|
11 |
-
import re
|
12 |
-
import uuid
|
13 |
-
import sys
|
14 |
-
import numpy as np
|
15 |
-
import tensorflow as tf
|
16 |
-
|
17 |
-
from collections import OrderedDict
|
18 |
-
from typing import Any, List, Tuple, Union
|
19 |
-
|
20 |
-
from . import tfutil
|
21 |
-
from .. import util
|
22 |
-
|
23 |
-
from .tfutil import TfExpression, TfExpressionEx
|
24 |
-
|
25 |
-
_import_handlers = [] # Custom import handlers for dealing with legacy data in pickle import.
|
26 |
-
_import_module_src = dict() # Source code for temporary modules created during pickle import.
|
27 |
-
|
28 |
-
|
29 |
-
def import_handler(handler_func):
|
30 |
-
"""Function decorator for declaring custom import handlers."""
|
31 |
-
_import_handlers.append(handler_func)
|
32 |
-
return handler_func
|
33 |
-
|
34 |
-
|
35 |
-
class Network:
|
36 |
-
"""Generic network abstraction.
|
37 |
-
|
38 |
-
Acts as a convenience wrapper for a parameterized network construction
|
39 |
-
function, providing several utility methods and convenient access to
|
40 |
-
the inputs/outputs/weights.
|
41 |
-
|
42 |
-
Network objects can be safely pickled and unpickled for long-term
|
43 |
-
archival purposes. The pickling works reliably as long as the underlying
|
44 |
-
network construction function is defined in a standalone Python module
|
45 |
-
that has no side effects or application-specific imports.
|
46 |
-
|
47 |
-
Args:
|
48 |
-
name: Network name. Used to select TensorFlow name and variable scopes.
|
49 |
-
func_name: Fully qualified name of the underlying network construction function, or a top-level function object.
|
50 |
-
static_kwargs: Keyword arguments to be passed in to the network construction function.
|
51 |
-
|
52 |
-
Attributes:
|
53 |
-
name: User-specified name, defaults to build func name if None.
|
54 |
-
scope: Unique TensorFlow scope containing template graph and variables, derived from the user-specified name.
|
55 |
-
static_kwargs: Arguments passed to the user-supplied build func.
|
56 |
-
components: Container for sub-networks. Passed to the build func, and retained between calls.
|
57 |
-
num_inputs: Number of input tensors.
|
58 |
-
num_outputs: Number of output tensors.
|
59 |
-
input_shapes: Input tensor shapes (NC or NCHW), including minibatch dimension.
|
60 |
-
output_shapes: Output tensor shapes (NC or NCHW), including minibatch dimension.
|
61 |
-
input_shape: Short-hand for input_shapes[0].
|
62 |
-
output_shape: Short-hand for output_shapes[0].
|
63 |
-
input_templates: Input placeholders in the template graph.
|
64 |
-
output_templates: Output tensors in the template graph.
|
65 |
-
input_names: Name string for each input.
|
66 |
-
output_names: Name string for each output.
|
67 |
-
own_vars: Variables defined by this network (local_name => var), excluding sub-networks.
|
68 |
-
vars: All variables (local_name => var).
|
69 |
-
trainables: All trainable variables (local_name => var).
|
70 |
-
var_global_to_local: Mapping from variable global names to local names.
|
71 |
-
"""
|
72 |
-
|
73 |
-
def __init__(self, name: str = None, func_name: Any = None, **static_kwargs):
|
74 |
-
tfutil.assert_tf_initialized()
|
75 |
-
assert isinstance(name, str) or name is None
|
76 |
-
assert func_name is not None
|
77 |
-
assert isinstance(func_name, str) or util.is_top_level_function(func_name)
|
78 |
-
assert util.is_pickleable(static_kwargs)
|
79 |
-
|
80 |
-
self._init_fields()
|
81 |
-
self.name = name
|
82 |
-
self.static_kwargs = util.EasyDict(static_kwargs)
|
83 |
-
|
84 |
-
# Locate the user-specified network build function.
|
85 |
-
if util.is_top_level_function(func_name):
|
86 |
-
func_name = util.get_top_level_function_name(func_name)
|
87 |
-
module, self._build_func_name = util.get_module_from_obj_name(func_name)
|
88 |
-
self._build_func = util.get_obj_from_module(module, self._build_func_name)
|
89 |
-
assert callable(self._build_func)
|
90 |
-
|
91 |
-
# Dig up source code for the module containing the build function.
|
92 |
-
self._build_module_src = _import_module_src.get(module, None)
|
93 |
-
if self._build_module_src is None:
|
94 |
-
self._build_module_src = inspect.getsource(module)
|
95 |
-
|
96 |
-
# Init TensorFlow graph.
|
97 |
-
self._init_graph()
|
98 |
-
self.reset_own_vars()
|
99 |
-
|
100 |
-
def _init_fields(self) -> None:
|
101 |
-
self.name = None
|
102 |
-
self.scope = None
|
103 |
-
self.static_kwargs = util.EasyDict()
|
104 |
-
self.components = util.EasyDict()
|
105 |
-
self.num_inputs = 0
|
106 |
-
self.num_outputs = 0
|
107 |
-
self.input_shapes = [[]]
|
108 |
-
self.output_shapes = [[]]
|
109 |
-
self.input_shape = []
|
110 |
-
self.output_shape = []
|
111 |
-
self.input_templates = []
|
112 |
-
self.output_templates = []
|
113 |
-
self.input_names = []
|
114 |
-
self.output_names = []
|
115 |
-
self.own_vars = OrderedDict()
|
116 |
-
self.vars = OrderedDict()
|
117 |
-
self.trainables = OrderedDict()
|
118 |
-
self.var_global_to_local = OrderedDict()
|
119 |
-
|
120 |
-
self._build_func = None # User-supplied build function that constructs the network.
|
121 |
-
self._build_func_name = None # Name of the build function.
|
122 |
-
self._build_module_src = None # Full source code of the module containing the build function.
|
123 |
-
self._run_cache = dict() # Cached graph data for Network.run().
|
124 |
-
|
125 |
-
def _init_graph(self) -> None:
|
126 |
-
# Collect inputs.
|
127 |
-
self.input_names = []
|
128 |
-
|
129 |
-
for param in inspect.signature(self._build_func).parameters.values():
|
130 |
-
if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty:
|
131 |
-
self.input_names.append(param.name)
|
132 |
-
|
133 |
-
self.num_inputs = len(self.input_names)
|
134 |
-
assert self.num_inputs >= 1
|
135 |
-
|
136 |
-
# Choose name and scope.
|
137 |
-
if self.name is None:
|
138 |
-
self.name = self._build_func_name
|
139 |
-
assert re.match("^[A-Za-z0-9_.\\-]*$", self.name)
|
140 |
-
with tf.name_scope(None):
|
141 |
-
self.scope = tf.get_default_graph().unique_name(self.name, mark_as_used=True)
|
142 |
-
|
143 |
-
# Finalize build func kwargs.
|
144 |
-
build_kwargs = dict(self.static_kwargs)
|
145 |
-
build_kwargs["is_template_graph"] = True
|
146 |
-
build_kwargs["components"] = self.components
|
147 |
-
|
148 |
-
# Build template graph.
|
149 |
-
with tfutil.absolute_variable_scope(self.scope, reuse=False), tfutil.absolute_name_scope(self.scope): # ignore surrounding scopes
|
150 |
-
assert tf.get_variable_scope().name == self.scope
|
151 |
-
assert tf.get_default_graph().get_name_scope() == self.scope
|
152 |
-
with tf.control_dependencies(None): # ignore surrounding control dependencies
|
153 |
-
self.input_templates = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
|
154 |
-
out_expr = self._build_func(*self.input_templates, **build_kwargs)
|
155 |
-
|
156 |
-
# Collect outputs.
|
157 |
-
assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
|
158 |
-
self.output_templates = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
|
159 |
-
self.num_outputs = len(self.output_templates)
|
160 |
-
assert self.num_outputs >= 1
|
161 |
-
assert all(tfutil.is_tf_expression(t) for t in self.output_templates)
|
162 |
-
|
163 |
-
# Perform sanity checks.
|
164 |
-
if any(t.shape.ndims is None for t in self.input_templates):
|
165 |
-
raise ValueError("Network input shapes not defined. Please call x.set_shape() for each input.")
|
166 |
-
if any(t.shape.ndims is None for t in self.output_templates):
|
167 |
-
raise ValueError("Network output shapes not defined. Please call x.set_shape() where applicable.")
|
168 |
-
if any(not isinstance(comp, Network) for comp in self.components.values()):
|
169 |
-
raise ValueError("Components of a Network must be Networks themselves.")
|
170 |
-
if len(self.components) != len(set(comp.name for comp in self.components.values())):
|
171 |
-
raise ValueError("Components of a Network must have unique names.")
|
172 |
-
|
173 |
-
# List inputs and outputs.
|
174 |
-
self.input_shapes = [t.shape.as_list() for t in self.input_templates]
|
175 |
-
self.output_shapes = [t.shape.as_list() for t in self.output_templates]
|
176 |
-
self.input_shape = self.input_shapes[0]
|
177 |
-
self.output_shape = self.output_shapes[0]
|
178 |
-
self.output_names = [t.name.split("/")[-1].split(":")[0] for t in self.output_templates]
|
179 |
-
|
180 |
-
# List variables.
|
181 |
-
self.own_vars = OrderedDict((var.name[len(self.scope) + 1:].split(":")[0], var) for var in tf.global_variables(self.scope + "/"))
|
182 |
-
self.vars = OrderedDict(self.own_vars)
|
183 |
-
self.vars.update((comp.name + "/" + name, var) for comp in self.components.values() for name, var in comp.vars.items())
|
184 |
-
self.trainables = OrderedDict((name, var) for name, var in self.vars.items() if var.trainable)
|
185 |
-
self.var_global_to_local = OrderedDict((var.name.split(":")[0], name) for name, var in self.vars.items())
|
186 |
-
|
187 |
-
def reset_own_vars(self) -> None:
|
188 |
-
"""Re-initialize all variables of this network, excluding sub-networks."""
|
189 |
-
tfutil.run([var.initializer for var in self.own_vars.values()])
|
190 |
-
|
191 |
-
def reset_vars(self) -> None:
|
192 |
-
"""Re-initialize all variables of this network, including sub-networks."""
|
193 |
-
tfutil.run([var.initializer for var in self.vars.values()])
|
194 |
-
|
195 |
-
def reset_trainables(self) -> None:
|
196 |
-
"""Re-initialize all trainable variables of this network, including sub-networks."""
|
197 |
-
tfutil.run([var.initializer for var in self.trainables.values()])
|
198 |
-
|
199 |
-
def get_output_for(self, *in_expr: TfExpression, return_as_list: bool = False, **dynamic_kwargs) -> Union[TfExpression, List[TfExpression]]:
|
200 |
-
"""Construct TensorFlow expression(s) for the output(s) of this network, given the input expression(s)."""
|
201 |
-
assert len(in_expr) == self.num_inputs
|
202 |
-
assert not all(expr is None for expr in in_expr)
|
203 |
-
|
204 |
-
# Finalize build func kwargs.
|
205 |
-
build_kwargs = dict(self.static_kwargs)
|
206 |
-
build_kwargs.update(dynamic_kwargs)
|
207 |
-
build_kwargs["is_template_graph"] = False
|
208 |
-
build_kwargs["components"] = self.components
|
209 |
-
|
210 |
-
# Build TensorFlow graph to evaluate the network.
|
211 |
-
with tfutil.absolute_variable_scope(self.scope, reuse=True), tf.name_scope(self.name):
|
212 |
-
assert tf.get_variable_scope().name == self.scope
|
213 |
-
valid_inputs = [expr for expr in in_expr if expr is not None]
|
214 |
-
final_inputs = []
|
215 |
-
for expr, name, shape in zip(in_expr, self.input_names, self.input_shapes):
|
216 |
-
if expr is not None:
|
217 |
-
expr = tf.identity(expr, name=name)
|
218 |
-
else:
|
219 |
-
expr = tf.zeros([tf.shape(valid_inputs[0])[0]] + shape[1:], name=name)
|
220 |
-
final_inputs.append(expr)
|
221 |
-
out_expr = self._build_func(*final_inputs, **build_kwargs)
|
222 |
-
|
223 |
-
# Propagate input shapes back to the user-specified expressions.
|
224 |
-
for expr, final in zip(in_expr, final_inputs):
|
225 |
-
if isinstance(expr, tf.Tensor):
|
226 |
-
expr.set_shape(final.shape)
|
227 |
-
|
228 |
-
# Express outputs in the desired format.
|
229 |
-
assert tfutil.is_tf_expression(out_expr) or isinstance(out_expr, tuple)
|
230 |
-
if return_as_list:
|
231 |
-
out_expr = [out_expr] if tfutil.is_tf_expression(out_expr) else list(out_expr)
|
232 |
-
return out_expr
|
233 |
-
|
234 |
-
def get_var_local_name(self, var_or_global_name: Union[TfExpression, str]) -> str:
|
235 |
-
"""Get the local name of a given variable, without any surrounding name scopes."""
|
236 |
-
assert tfutil.is_tf_expression(var_or_global_name) or isinstance(var_or_global_name, str)
|
237 |
-
global_name = var_or_global_name if isinstance(var_or_global_name, str) else var_or_global_name.name
|
238 |
-
return self.var_global_to_local[global_name]
|
239 |
-
|
240 |
-
def find_var(self, var_or_local_name: Union[TfExpression, str]) -> TfExpression:
|
241 |
-
"""Find variable by local or global name."""
|
242 |
-
assert tfutil.is_tf_expression(var_or_local_name) or isinstance(var_or_local_name, str)
|
243 |
-
return self.vars[var_or_local_name] if isinstance(var_or_local_name, str) else var_or_local_name
|
244 |
-
|
245 |
-
def get_var(self, var_or_local_name: Union[TfExpression, str]) -> np.ndarray:
|
246 |
-
"""Get the value of a given variable as NumPy array.
|
247 |
-
Note: This method is very inefficient -- prefer to use tflib.run(list_of_vars) whenever possible."""
|
248 |
-
return self.find_var(var_or_local_name).eval()
|
249 |
-
|
250 |
-
def set_var(self, var_or_local_name: Union[TfExpression, str], new_value: Union[int, float, np.ndarray]) -> None:
|
251 |
-
"""Set the value of a given variable based on the given NumPy array.
|
252 |
-
Note: This method is very inefficient -- prefer to use tflib.set_vars() whenever possible."""
|
253 |
-
tfutil.set_vars({self.find_var(var_or_local_name): new_value})
|
254 |
-
|
255 |
-
def __getstate__(self) -> dict:
|
256 |
-
"""Pickle export."""
|
257 |
-
state = dict()
|
258 |
-
state["version"] = 4
|
259 |
-
state["name"] = self.name
|
260 |
-
state["static_kwargs"] = dict(self.static_kwargs)
|
261 |
-
state["components"] = dict(self.components)
|
262 |
-
state["build_module_src"] = self._build_module_src
|
263 |
-
state["build_func_name"] = self._build_func_name
|
264 |
-
state["variables"] = list(zip(self.own_vars.keys(), tfutil.run(list(self.own_vars.values()))))
|
265 |
-
return state
|
266 |
-
|
267 |
-
def __setstate__(self, state: dict) -> None:
|
268 |
-
"""Pickle import."""
|
269 |
-
# pylint: disable=attribute-defined-outside-init
|
270 |
-
tfutil.assert_tf_initialized()
|
271 |
-
self._init_fields()
|
272 |
-
|
273 |
-
# Execute custom import handlers.
|
274 |
-
for handler in _import_handlers:
|
275 |
-
state = handler(state)
|
276 |
-
|
277 |
-
# Set basic fields.
|
278 |
-
assert state["version"] in [2, 3, 4]
|
279 |
-
self.name = state["name"]
|
280 |
-
self.static_kwargs = util.EasyDict(state["static_kwargs"])
|
281 |
-
self.components = util.EasyDict(state.get("components", {}))
|
282 |
-
self._build_module_src = state["build_module_src"]
|
283 |
-
self._build_func_name = state["build_func_name"]
|
284 |
-
|
285 |
-
# Create temporary module from the imported source code.
|
286 |
-
module_name = "_tflib_network_import_" + uuid.uuid4().hex
|
287 |
-
module = types.ModuleType(module_name)
|
288 |
-
sys.modules[module_name] = module
|
289 |
-
_import_module_src[module] = self._build_module_src
|
290 |
-
exec(self._build_module_src, module.__dict__) # pylint: disable=exec-used
|
291 |
-
|
292 |
-
# Locate network build function in the temporary module.
|
293 |
-
self._build_func = util.get_obj_from_module(module, self._build_func_name)
|
294 |
-
assert callable(self._build_func)
|
295 |
-
|
296 |
-
# Init TensorFlow graph.
|
297 |
-
self._init_graph()
|
298 |
-
self.reset_own_vars()
|
299 |
-
tfutil.set_vars({self.find_var(name): value for name, value in state["variables"]})
|
300 |
-
|
301 |
-
def clone(self, name: str = None, **new_static_kwargs) -> "Network":
|
302 |
-
"""Create a clone of this network with its own copy of the variables."""
|
303 |
-
# pylint: disable=protected-access
|
304 |
-
net = object.__new__(Network)
|
305 |
-
net._init_fields()
|
306 |
-
net.name = name if name is not None else self.name
|
307 |
-
net.static_kwargs = util.EasyDict(self.static_kwargs)
|
308 |
-
net.static_kwargs.update(new_static_kwargs)
|
309 |
-
net._build_module_src = self._build_module_src
|
310 |
-
net._build_func_name = self._build_func_name
|
311 |
-
net._build_func = self._build_func
|
312 |
-
net._init_graph()
|
313 |
-
net.copy_vars_from(self)
|
314 |
-
return net
|
315 |
-
|
316 |
-
def copy_own_vars_from(self, src_net: "Network") -> None:
|
317 |
-
"""Copy the values of all variables from the given network, excluding sub-networks."""
|
318 |
-
names = [name for name in self.own_vars.keys() if name in src_net.own_vars]
|
319 |
-
tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
|
320 |
-
|
321 |
-
def copy_vars_from(self, src_net: "Network") -> None:
|
322 |
-
"""Copy the values of all variables from the given network, including sub-networks."""
|
323 |
-
names = [name for name in self.vars.keys() if name in src_net.vars]
|
324 |
-
tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
|
325 |
-
|
326 |
-
def copy_trainables_from(self, src_net: "Network") -> None:
|
327 |
-
"""Copy the values of all trainable variables from the given network, including sub-networks."""
|
328 |
-
names = [name for name in self.trainables.keys() if name in src_net.trainables]
|
329 |
-
tfutil.set_vars(tfutil.run({self.vars[name]: src_net.vars[name] for name in names}))
|
330 |
-
|
331 |
-
def convert(self, new_func_name: str, new_name: str = None, **new_static_kwargs) -> "Network":
|
332 |
-
"""Create new network with the given parameters, and copy all variables from this network."""
|
333 |
-
if new_name is None:
|
334 |
-
new_name = self.name
|
335 |
-
static_kwargs = dict(self.static_kwargs)
|
336 |
-
static_kwargs.update(new_static_kwargs)
|
337 |
-
net = Network(name=new_name, func_name=new_func_name, **static_kwargs)
|
338 |
-
net.copy_vars_from(self)
|
339 |
-
return net
|
340 |
-
|
341 |
-
def setup_as_moving_average_of(self, src_net: "Network", beta: TfExpressionEx = 0.99, beta_nontrainable: TfExpressionEx = 0.0) -> tf.Operation:
|
342 |
-
"""Construct a TensorFlow op that updates the variables of this network
|
343 |
-
to be slightly closer to those of the given network."""
|
344 |
-
with tfutil.absolute_name_scope(self.scope + "/_MovingAvg"):
|
345 |
-
ops = []
|
346 |
-
for name, var in self.vars.items():
|
347 |
-
if name in src_net.vars:
|
348 |
-
cur_beta = beta if name in self.trainables else beta_nontrainable
|
349 |
-
new_value = tfutil.lerp(src_net.vars[name], var, cur_beta)
|
350 |
-
ops.append(var.assign(new_value))
|
351 |
-
return tf.group(*ops)
|
352 |
-
|
353 |
-
def run(self,
|
354 |
-
*in_arrays: Tuple[Union[np.ndarray, None], ...],
|
355 |
-
input_transform: dict = None,
|
356 |
-
output_transform: dict = None,
|
357 |
-
return_as_list: bool = False,
|
358 |
-
print_progress: bool = False,
|
359 |
-
minibatch_size: int = None,
|
360 |
-
num_gpus: int = 1,
|
361 |
-
assume_frozen: bool = False,
|
362 |
-
**dynamic_kwargs) -> Union[np.ndarray, Tuple[np.ndarray, ...], List[np.ndarray]]:
|
363 |
-
"""Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s).
|
364 |
-
|
365 |
-
Args:
|
366 |
-
input_transform: A dict specifying a custom transformation to be applied to the input tensor(s) before evaluating the network.
|
367 |
-
The dict must contain a 'func' field that points to a top-level function. The function is called with the input
|
368 |
-
TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
|
369 |
-
output_transform: A dict specifying a custom transformation to be applied to the output tensor(s) after evaluating the network.
|
370 |
-
The dict must contain a 'func' field that points to a top-level function. The function is called with the output
|
371 |
-
TensorFlow expression(s) as positional arguments. Any remaining fields of the dict will be passed in as kwargs.
|
372 |
-
return_as_list: True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs.
|
373 |
-
print_progress: Print progress to the console? Useful for very large input arrays.
|
374 |
-
minibatch_size: Maximum minibatch size to use, None = disable batching.
|
375 |
-
num_gpus: Number of GPUs to use.
|
376 |
-
assume_frozen: Improve multi-GPU performance by assuming that the trainable parameters will remain changed between calls.
|
377 |
-
dynamic_kwargs: Additional keyword arguments to be passed into the network build function.
|
378 |
-
"""
|
379 |
-
assert len(in_arrays) == self.num_inputs
|
380 |
-
assert not all(arr is None for arr in in_arrays)
|
381 |
-
assert input_transform is None or util.is_top_level_function(input_transform["func"])
|
382 |
-
assert output_transform is None or util.is_top_level_function(output_transform["func"])
|
383 |
-
output_transform, dynamic_kwargs = _handle_legacy_output_transforms(output_transform, dynamic_kwargs)
|
384 |
-
num_items = in_arrays[0].shape[0]
|
385 |
-
if minibatch_size is None:
|
386 |
-
minibatch_size = num_items
|
387 |
-
|
388 |
-
# Construct unique hash key from all arguments that affect the TensorFlow graph.
|
389 |
-
key = dict(input_transform=input_transform, output_transform=output_transform, num_gpus=num_gpus, assume_frozen=assume_frozen, dynamic_kwargs=dynamic_kwargs)
|
390 |
-
def unwind_key(obj):
|
391 |
-
if isinstance(obj, dict):
|
392 |
-
return [(key, unwind_key(value)) for key, value in sorted(obj.items())]
|
393 |
-
if callable(obj):
|
394 |
-
return util.get_top_level_function_name(obj)
|
395 |
-
return obj
|
396 |
-
key = repr(unwind_key(key))
|
397 |
-
|
398 |
-
# Build graph.
|
399 |
-
if key not in self._run_cache:
|
400 |
-
with tfutil.absolute_name_scope(self.scope + "/_Run"), tf.control_dependencies(None):
|
401 |
-
with tf.device("/cpu:0"):
|
402 |
-
in_expr = [tf.placeholder(tf.float32, name=name) for name in self.input_names]
|
403 |
-
in_split = list(zip(*[tf.split(x, num_gpus) for x in in_expr]))
|
404 |
-
|
405 |
-
out_split = []
|
406 |
-
for gpu in range(num_gpus):
|
407 |
-
with tf.device("/gpu:%d" % gpu):
|
408 |
-
net_gpu = self.clone() if assume_frozen else self
|
409 |
-
in_gpu = in_split[gpu]
|
410 |
-
|
411 |
-
if input_transform is not None:
|
412 |
-
in_kwargs = dict(input_transform)
|
413 |
-
in_gpu = in_kwargs.pop("func")(*in_gpu, **in_kwargs)
|
414 |
-
in_gpu = [in_gpu] if tfutil.is_tf_expression(in_gpu) else list(in_gpu)
|
415 |
-
|
416 |
-
assert len(in_gpu) == self.num_inputs
|
417 |
-
out_gpu = net_gpu.get_output_for(*in_gpu, return_as_list=True, **dynamic_kwargs)
|
418 |
-
|
419 |
-
if output_transform is not None:
|
420 |
-
out_kwargs = dict(output_transform)
|
421 |
-
out_gpu = out_kwargs.pop("func")(*out_gpu, **out_kwargs)
|
422 |
-
out_gpu = [out_gpu] if tfutil.is_tf_expression(out_gpu) else list(out_gpu)
|
423 |
-
|
424 |
-
assert len(out_gpu) == self.num_outputs
|
425 |
-
out_split.append(out_gpu)
|
426 |
-
|
427 |
-
with tf.device("/cpu:0"):
|
428 |
-
out_expr = [tf.concat(outputs, axis=0) for outputs in zip(*out_split)]
|
429 |
-
self._run_cache[key] = in_expr, out_expr
|
430 |
-
|
431 |
-
# Run minibatches.
|
432 |
-
in_expr, out_expr = self._run_cache[key]
|
433 |
-
out_arrays = [np.empty([num_items] + expr.shape.as_list()[1:], expr.dtype.name) for expr in out_expr]
|
434 |
-
|
435 |
-
for mb_begin in range(0, num_items, minibatch_size):
|
436 |
-
if print_progress:
|
437 |
-
print("\r%d / %d" % (mb_begin, num_items), end="")
|
438 |
-
|
439 |
-
mb_end = min(mb_begin + minibatch_size, num_items)
|
440 |
-
mb_num = mb_end - mb_begin
|
441 |
-
mb_in = [src[mb_begin : mb_end] if src is not None else np.zeros([mb_num] + shape[1:]) for src, shape in zip(in_arrays, self.input_shapes)]
|
442 |
-
mb_out = tf.get_default_session().run(out_expr, dict(zip(in_expr, mb_in)))
|
443 |
-
|
444 |
-
for dst, src in zip(out_arrays, mb_out):
|
445 |
-
dst[mb_begin: mb_end] = src
|
446 |
-
|
447 |
-
# Done.
|
448 |
-
if print_progress:
|
449 |
-
print("\r%d / %d" % (num_items, num_items))
|
450 |
-
|
451 |
-
if not return_as_list:
|
452 |
-
out_arrays = out_arrays[0] if len(out_arrays) == 1 else tuple(out_arrays)
|
453 |
-
return out_arrays
|
454 |
-
|
455 |
-
def list_ops(self) -> List[TfExpression]:
|
456 |
-
include_prefix = self.scope + "/"
|
457 |
-
exclude_prefix = include_prefix + "_"
|
458 |
-
ops = tf.get_default_graph().get_operations()
|
459 |
-
ops = [op for op in ops if op.name.startswith(include_prefix)]
|
460 |
-
ops = [op for op in ops if not op.name.startswith(exclude_prefix)]
|
461 |
-
return ops
|
462 |
-
|
463 |
-
def list_layers(self) -> List[Tuple[str, TfExpression, List[TfExpression]]]:
|
464 |
-
"""Returns a list of (layer_name, output_expr, trainable_vars) tuples corresponding to
|
465 |
-
individual layers of the network. Mainly intended to be used for reporting."""
|
466 |
-
layers = []
|
467 |
-
|
468 |
-
def recurse(scope, parent_ops, parent_vars, level):
|
469 |
-
# Ignore specific patterns.
|
470 |
-
if any(p in scope for p in ["/Shape", "/strided_slice", "/Cast", "/concat", "/Assign"]):
|
471 |
-
return
|
472 |
-
|
473 |
-
# Filter ops and vars by scope.
|
474 |
-
global_prefix = scope + "/"
|
475 |
-
local_prefix = global_prefix[len(self.scope) + 1:]
|
476 |
-
cur_ops = [op for op in parent_ops if op.name.startswith(global_prefix) or op.name == global_prefix[:-1]]
|
477 |
-
cur_vars = [(name, var) for name, var in parent_vars if name.startswith(local_prefix) or name == local_prefix[:-1]]
|
478 |
-
if not cur_ops and not cur_vars:
|
479 |
-
return
|
480 |
-
|
481 |
-
# Filter out all ops related to variables.
|
482 |
-
for var in [op for op in cur_ops if op.type.startswith("Variable")]:
|
483 |
-
var_prefix = var.name + "/"
|
484 |
-
cur_ops = [op for op in cur_ops if not op.name.startswith(var_prefix)]
|
485 |
-
|
486 |
-
# Scope does not contain ops as immediate children => recurse deeper.
|
487 |
-
contains_direct_ops = any("/" not in op.name[len(global_prefix):] and op.type not in ["Identity", "Cast", "Transpose"] for op in cur_ops)
|
488 |
-
if (level == 0 or not contains_direct_ops) and (len(cur_ops) + len(cur_vars)) > 1:
|
489 |
-
visited = set()
|
490 |
-
for rel_name in [op.name[len(global_prefix):] for op in cur_ops] + [name[len(local_prefix):] for name, _var in cur_vars]:
|
491 |
-
token = rel_name.split("/")[0]
|
492 |
-
if token not in visited:
|
493 |
-
recurse(global_prefix + token, cur_ops, cur_vars, level + 1)
|
494 |
-
visited.add(token)
|
495 |
-
return
|
496 |
-
|
497 |
-
# Report layer.
|
498 |
-
layer_name = scope[len(self.scope) + 1:]
|
499 |
-
layer_output = cur_ops[-1].outputs[0] if cur_ops else cur_vars[-1][1]
|
500 |
-
layer_trainables = [var for _name, var in cur_vars if var.trainable]
|
501 |
-
layers.append((layer_name, layer_output, layer_trainables))
|
502 |
-
|
503 |
-
recurse(self.scope, self.list_ops(), list(self.vars.items()), 0)
|
504 |
-
return layers
|
505 |
-
|
506 |
-
def print_layers(self, title: str = None, hide_layers_with_no_params: bool = False) -> None:
|
507 |
-
"""Print a summary table of the network structure."""
|
508 |
-
rows = [[title if title is not None else self.name, "Params", "OutputShape", "WeightShape"]]
|
509 |
-
rows += [["---"] * 4]
|
510 |
-
total_params = 0
|
511 |
-
|
512 |
-
for layer_name, layer_output, layer_trainables in self.list_layers():
|
513 |
-
num_params = sum(int(np.prod(var.shape.as_list())) for var in layer_trainables)
|
514 |
-
weights = [var for var in layer_trainables if var.name.endswith("/weight:0")]
|
515 |
-
weights.sort(key=lambda x: len(x.name))
|
516 |
-
if len(weights) == 0 and len(layer_trainables) == 1:
|
517 |
-
weights = layer_trainables
|
518 |
-
total_params += num_params
|
519 |
-
|
520 |
-
if not hide_layers_with_no_params or num_params != 0:
|
521 |
-
num_params_str = str(num_params) if num_params > 0 else "-"
|
522 |
-
output_shape_str = str(layer_output.shape)
|
523 |
-
weight_shape_str = str(weights[0].shape) if len(weights) >= 1 else "-"
|
524 |
-
rows += [[layer_name, num_params_str, output_shape_str, weight_shape_str]]
|
525 |
-
|
526 |
-
rows += [["---"] * 4]
|
527 |
-
rows += [["Total", str(total_params), "", ""]]
|
528 |
-
|
529 |
-
widths = [max(len(cell) for cell in column) for column in zip(*rows)]
|
530 |
-
print()
|
531 |
-
for row in rows:
|
532 |
-
print(" ".join(cell + " " * (width - len(cell)) for cell, width in zip(row, widths)))
|
533 |
-
print()
|
534 |
-
|
535 |
-
def setup_weight_histograms(self, title: str = None) -> None:
|
536 |
-
"""Construct summary ops to include histograms of all trainable parameters in TensorBoard."""
|
537 |
-
if title is None:
|
538 |
-
title = self.name
|
539 |
-
|
540 |
-
with tf.name_scope(None), tf.device(None), tf.control_dependencies(None):
|
541 |
-
for local_name, var in self.trainables.items():
|
542 |
-
if "/" in local_name:
|
543 |
-
p = local_name.split("/")
|
544 |
-
name = title + "_" + p[-1] + "/" + "_".join(p[:-1])
|
545 |
-
else:
|
546 |
-
name = title + "_toplevel/" + local_name
|
547 |
-
|
548 |
-
tf.summary.histogram(name, var)
|
549 |
-
|
550 |
-
#----------------------------------------------------------------------------
|
551 |
-
# Backwards-compatible emulation of legacy output transformation in Network.run().
|
552 |
-
|
553 |
-
_print_legacy_warning = True
|
554 |
-
|
555 |
-
def _handle_legacy_output_transforms(output_transform, dynamic_kwargs):
|
556 |
-
global _print_legacy_warning
|
557 |
-
legacy_kwargs = ["out_mul", "out_add", "out_shrink", "out_dtype"]
|
558 |
-
if not any(kwarg in dynamic_kwargs for kwarg in legacy_kwargs):
|
559 |
-
return output_transform, dynamic_kwargs
|
560 |
-
|
561 |
-
if _print_legacy_warning:
|
562 |
-
_print_legacy_warning = False
|
563 |
-
print()
|
564 |
-
print("WARNING: Old-style output transformations in Network.run() are deprecated.")
|
565 |
-
print("Consider using 'output_transform=dict(func=tflib.convert_images_to_uint8)'")
|
566 |
-
print("instead of 'out_mul=127.5, out_add=127.5, out_dtype=np.uint8'.")
|
567 |
-
print()
|
568 |
-
assert output_transform is None
|
569 |
-
|
570 |
-
new_kwargs = dict(dynamic_kwargs)
|
571 |
-
new_transform = {kwarg: new_kwargs.pop(kwarg) for kwarg in legacy_kwargs if kwarg in dynamic_kwargs}
|
572 |
-
new_transform["func"] = _legacy_output_transform_func
|
573 |
-
return new_transform, new_kwargs
|
574 |
-
|
575 |
-
def _legacy_output_transform_func(*expr, out_mul=1.0, out_add=0.0, out_shrink=1, out_dtype=None):
|
576 |
-
if out_mul != 1.0:
|
577 |
-
expr = [x * out_mul for x in expr]
|
578 |
-
|
579 |
-
if out_add != 0.0:
|
580 |
-
expr = [x + out_add for x in expr]
|
581 |
-
|
582 |
-
if out_shrink > 1:
|
583 |
-
ksize = [1, 1, out_shrink, out_shrink]
|
584 |
-
expr = [tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding="VALID", data_format="NCHW") for x in expr]
|
585 |
-
|
586 |
-
if out_dtype is not None:
|
587 |
-
if tf.as_dtype(out_dtype).is_integer:
|
588 |
-
expr = [tf.round(x) for x in expr]
|
589 |
-
expr = [tf.saturate_cast(x, out_dtype) for x in expr]
|
590 |
-
return expr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tflib/ops/__init__.py
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
#
|
3 |
-
# This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
# To view a copy of this license, visit
|
5 |
-
# https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
# empty
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tflib/ops/__pycache__/__init__.cpython-36.pyc
DELETED
Binary file (117 Bytes)
|
|
tflib/ops/__pycache__/fused_bias_act.cpython-36.pyc
DELETED
Binary file (8.54 kB)
|
|
tflib/ops/__pycache__/upfirdn_2d.cpython-36.pyc
DELETED
Binary file (14.8 kB)
|
|
tflib/ops/fused_bias_act.cu
DELETED
@@ -1,188 +0,0 @@
|
|
1 |
-
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
//
|
3 |
-
// This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
// To view a copy of this license, visit
|
5 |
-
// https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
#define EIGEN_USE_GPU
|
8 |
-
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
|
9 |
-
#include "tensorflow/core/framework/op.h"
|
10 |
-
#include "tensorflow/core/framework/op_kernel.h"
|
11 |
-
#include "tensorflow/core/framework/shape_inference.h"
|
12 |
-
#include <stdio.h>
|
13 |
-
|
14 |
-
using namespace tensorflow;
|
15 |
-
using namespace tensorflow::shape_inference;
|
16 |
-
|
17 |
-
#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
|
18 |
-
|
19 |
-
//------------------------------------------------------------------------
|
20 |
-
// CUDA kernel.
|
21 |
-
|
22 |
-
template <class T>
|
23 |
-
struct FusedBiasActKernelParams
|
24 |
-
{
|
25 |
-
const T* x; // [sizeX]
|
26 |
-
const T* b; // [sizeB] or NULL
|
27 |
-
const T* ref; // [sizeX] or NULL
|
28 |
-
T* y; // [sizeX]
|
29 |
-
|
30 |
-
int grad;
|
31 |
-
int axis;
|
32 |
-
int act;
|
33 |
-
float alpha;
|
34 |
-
float gain;
|
35 |
-
|
36 |
-
int sizeX;
|
37 |
-
int sizeB;
|
38 |
-
int stepB;
|
39 |
-
int loopX;
|
40 |
-
};
|
41 |
-
|
42 |
-
template <class T>
|
43 |
-
static __global__ void FusedBiasActKernel(const FusedBiasActKernelParams<T> p)
|
44 |
-
{
|
45 |
-
const float expRange = 80.0f;
|
46 |
-
const float halfExpRange = 40.0f;
|
47 |
-
const float seluScale = 1.0507009873554804934193349852946f;
|
48 |
-
const float seluAlpha = 1.6732632423543772848170429916717f;
|
49 |
-
|
50 |
-
// Loop over elements.
|
51 |
-
int xi = blockIdx.x * p.loopX * blockDim.x + threadIdx.x;
|
52 |
-
for (int loopIdx = 0; loopIdx < p.loopX && xi < p.sizeX; loopIdx++, xi += blockDim.x)
|
53 |
-
{
|
54 |
-
// Load and apply bias.
|
55 |
-
float x = (float)p.x[xi];
|
56 |
-
if (p.b)
|
57 |
-
x += (float)p.b[(xi / p.stepB) % p.sizeB];
|
58 |
-
float ref = (p.ref) ? (float)p.ref[xi] : 0.0f;
|
59 |
-
if (p.gain != 0.0f & p.act != 9)
|
60 |
-
ref /= p.gain;
|
61 |
-
|
62 |
-
// Evaluate activation func.
|
63 |
-
float y;
|
64 |
-
switch (p.act * 10 + p.grad)
|
65 |
-
{
|
66 |
-
// linear
|
67 |
-
default:
|
68 |
-
case 10: y = x; break;
|
69 |
-
case 11: y = x; break;
|
70 |
-
case 12: y = 0.0f; break;
|
71 |
-
|
72 |
-
// relu
|
73 |
-
case 20: y = (x > 0.0f) ? x : 0.0f; break;
|
74 |
-
case 21: y = (ref > 0.0f) ? x : 0.0f; break;
|
75 |
-
case 22: y = 0.0f; break;
|
76 |
-
|
77 |
-
// lrelu
|
78 |
-
case 30: y = (x > 0.0f) ? x : x * p.alpha; break;
|
79 |
-
case 31: y = (ref > 0.0f) ? x : x * p.alpha; break;
|
80 |
-
case 32: y = 0.0f; break;
|
81 |
-
|
82 |
-
// tanh
|
83 |
-
case 40: { float c = expf(x); float d = 1.0f / c; y = (x < -expRange) ? -1.0f : (x > expRange) ? 1.0f : (c - d) / (c + d); } break;
|
84 |
-
case 41: y = x * (1.0f - ref * ref); break;
|
85 |
-
case 42: y = x * (1.0f - ref * ref) * (-2.0f * ref); break;
|
86 |
-
|
87 |
-
// sigmoid
|
88 |
-
case 50: y = (x < -expRange) ? 0.0f : 1.0f / (expf(-x) + 1.0f); break;
|
89 |
-
case 51: y = x * ref * (1.0f - ref); break;
|
90 |
-
case 52: y = x * ref * (1.0f - ref) * (1.0f - 2.0f * ref); break;
|
91 |
-
|
92 |
-
// elu
|
93 |
-
case 60: y = (x >= 0.0f) ? x : expf(x) - 1.0f; break;
|
94 |
-
case 61: y = (ref >= 0.0f) ? x : x * (ref + 1.0f); break;
|
95 |
-
case 62: y = (ref >= 0.0f) ? 0.0f : x * (ref + 1.0f); break;
|
96 |
-
|
97 |
-
// selu
|
98 |
-
case 70: y = (x >= 0.0f) ? seluScale * x : (seluScale * seluAlpha) * (expf(x) - 1.0f); break;
|
99 |
-
case 71: y = (ref >= 0.0f) ? x * seluScale : x * (ref + seluScale * seluAlpha); break;
|
100 |
-
case 72: y = (ref >= 0.0f) ? 0.0f : x * (ref + seluScale * seluAlpha); break;
|
101 |
-
|
102 |
-
// softplus
|
103 |
-
case 80: y = (x > expRange) ? x : logf(expf(x) + 1.0f); break;
|
104 |
-
case 81: y = x * (1.0f - expf(-ref)); break;
|
105 |
-
case 82: { float c = expf(-ref); y = x * c * (1.0f - c); } break;
|
106 |
-
|
107 |
-
// swish
|
108 |
-
case 90: y = (x < -expRange) ? 0.0f : x / (expf(-x) + 1.0f); break;
|
109 |
-
case 91: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? x : x * c * (ref + d) / (d * d); } break;
|
110 |
-
case 92: { float c = expf(ref); float d = c + 1.0f; y = (ref > halfExpRange) ? 0.0f : x * c * (ref * (2.0f - d) + 2.0f * d) / (d * d * d); } break;
|
111 |
-
}
|
112 |
-
|
113 |
-
// Apply gain and store.
|
114 |
-
p.y[xi] = (T)(y * p.gain);
|
115 |
-
}
|
116 |
-
}
|
117 |
-
|
118 |
-
//------------------------------------------------------------------------
|
119 |
-
// TensorFlow op.
|
120 |
-
|
121 |
-
template <class T>
|
122 |
-
struct FusedBiasActOp : public OpKernel
|
123 |
-
{
|
124 |
-
FusedBiasActKernelParams<T> m_attribs;
|
125 |
-
|
126 |
-
FusedBiasActOp(OpKernelConstruction* ctx) : OpKernel(ctx)
|
127 |
-
{
|
128 |
-
memset(&m_attribs, 0, sizeof(m_attribs));
|
129 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("grad", &m_attribs.grad));
|
130 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("axis", &m_attribs.axis));
|
131 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("act", &m_attribs.act));
|
132 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("alpha", &m_attribs.alpha));
|
133 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("gain", &m_attribs.gain));
|
134 |
-
OP_REQUIRES(ctx, m_attribs.grad >= 0, errors::InvalidArgument("grad must be non-negative"));
|
135 |
-
OP_REQUIRES(ctx, m_attribs.axis >= 0, errors::InvalidArgument("axis must be non-negative"));
|
136 |
-
OP_REQUIRES(ctx, m_attribs.act >= 0, errors::InvalidArgument("act must be non-negative"));
|
137 |
-
}
|
138 |
-
|
139 |
-
void Compute(OpKernelContext* ctx)
|
140 |
-
{
|
141 |
-
FusedBiasActKernelParams<T> p = m_attribs;
|
142 |
-
cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream();
|
143 |
-
|
144 |
-
const Tensor& x = ctx->input(0); // [...]
|
145 |
-
const Tensor& b = ctx->input(1); // [sizeB] or [0]
|
146 |
-
const Tensor& ref = ctx->input(2); // x.shape or [0]
|
147 |
-
p.x = x.flat<T>().data();
|
148 |
-
p.b = (b.NumElements()) ? b.flat<T>().data() : NULL;
|
149 |
-
p.ref = (ref.NumElements()) ? ref.flat<T>().data() : NULL;
|
150 |
-
OP_REQUIRES(ctx, b.NumElements() == 0 || m_attribs.axis < x.dims(), errors::InvalidArgument("axis out of bounds"));
|
151 |
-
OP_REQUIRES(ctx, b.dims() == 1, errors::InvalidArgument("b must have rank 1"));
|
152 |
-
OP_REQUIRES(ctx, b.NumElements() == 0 || b.NumElements() == x.dim_size(m_attribs.axis), errors::InvalidArgument("b has wrong number of elements"));
|
153 |
-
OP_REQUIRES(ctx, ref.NumElements() == ((p.grad == 0) ? 0 : x.NumElements()), errors::InvalidArgument("ref has wrong number of elements"));
|
154 |
-
OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("x is too large"));
|
155 |
-
|
156 |
-
p.sizeX = (int)x.NumElements();
|
157 |
-
p.sizeB = (int)b.NumElements();
|
158 |
-
p.stepB = 1;
|
159 |
-
for (int i = m_attribs.axis + 1; i < x.dims(); i++)
|
160 |
-
p.stepB *= (int)x.dim_size(i);
|
161 |
-
|
162 |
-
Tensor* y = NULL; // x.shape
|
163 |
-
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, x.shape(), &y));
|
164 |
-
p.y = y->flat<T>().data();
|
165 |
-
|
166 |
-
p.loopX = 4;
|
167 |
-
int blockSize = 4 * 32;
|
168 |
-
int gridSize = (p.sizeX - 1) / (p.loopX * blockSize) + 1;
|
169 |
-
void* args[] = {&p};
|
170 |
-
OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel((void*)FusedBiasActKernel<T>, gridSize, blockSize, args, 0, stream));
|
171 |
-
}
|
172 |
-
};
|
173 |
-
|
174 |
-
REGISTER_OP("FusedBiasAct")
|
175 |
-
.Input ("x: T")
|
176 |
-
.Input ("b: T")
|
177 |
-
.Input ("ref: T")
|
178 |
-
.Output ("y: T")
|
179 |
-
.Attr ("T: {float, half}")
|
180 |
-
.Attr ("grad: int = 0")
|
181 |
-
.Attr ("axis: int = 1")
|
182 |
-
.Attr ("act: int = 0")
|
183 |
-
.Attr ("alpha: float = 0.0")
|
184 |
-
.Attr ("gain: float = 1.0");
|
185 |
-
REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<float>("T"), FusedBiasActOp<float>);
|
186 |
-
REGISTER_KERNEL_BUILDER(Name("FusedBiasAct").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), FusedBiasActOp<Eigen::half>);
|
187 |
-
|
188 |
-
//------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tflib/ops/fused_bias_act.py
DELETED
@@ -1,196 +0,0 @@
|
|
1 |
-
# Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
#
|
3 |
-
# This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
# To view a copy of this license, visit
|
5 |
-
# https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
"""Custom TensorFlow ops for efficient bias and activation."""
|
8 |
-
|
9 |
-
import os
|
10 |
-
import numpy as np
|
11 |
-
import tensorflow as tf
|
12 |
-
from .. import custom_ops
|
13 |
-
from ...util import EasyDict
|
14 |
-
|
15 |
-
def _get_plugin():
|
16 |
-
return custom_ops.get_plugin(os.path.splitext(__file__)[0] + '.cu')
|
17 |
-
|
18 |
-
#----------------------------------------------------------------------------
|
19 |
-
|
20 |
-
activation_funcs = {
|
21 |
-
'linear': EasyDict(func=lambda x, **_: x, def_alpha=None, def_gain=1.0, cuda_idx=1, ref='y', zero_2nd_grad=True),
|
22 |
-
'relu': EasyDict(func=lambda x, **_: tf.nn.relu(x), def_alpha=None, def_gain=np.sqrt(2), cuda_idx=2, ref='y', zero_2nd_grad=True),
|
23 |
-
'lrelu': EasyDict(func=lambda x, alpha, **_: tf.nn.leaky_relu(x, alpha), def_alpha=0.2, def_gain=np.sqrt(2), cuda_idx=3, ref='y', zero_2nd_grad=True),
|
24 |
-
'tanh': EasyDict(func=lambda x, **_: tf.nn.tanh(x), def_alpha=None, def_gain=1.0, cuda_idx=4, ref='y', zero_2nd_grad=False),
|
25 |
-
'sigmoid': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x), def_alpha=None, def_gain=1.0, cuda_idx=5, ref='y', zero_2nd_grad=False),
|
26 |
-
'elu': EasyDict(func=lambda x, **_: tf.nn.elu(x), def_alpha=None, def_gain=1.0, cuda_idx=6, ref='y', zero_2nd_grad=False),
|
27 |
-
'selu': EasyDict(func=lambda x, **_: tf.nn.selu(x), def_alpha=None, def_gain=1.0, cuda_idx=7, ref='y', zero_2nd_grad=False),
|
28 |
-
'softplus': EasyDict(func=lambda x, **_: tf.nn.softplus(x), def_alpha=None, def_gain=1.0, cuda_idx=8, ref='y', zero_2nd_grad=False),
|
29 |
-
'swish': EasyDict(func=lambda x, **_: tf.nn.sigmoid(x) * x, def_alpha=None, def_gain=np.sqrt(2), cuda_idx=9, ref='x', zero_2nd_grad=False),
|
30 |
-
}
|
31 |
-
|
32 |
-
#----------------------------------------------------------------------------
|
33 |
-
|
34 |
-
def fused_bias_act(x, b=None, axis=1, act='linear', alpha=None, gain=None, impl='cuda'):
|
35 |
-
r"""Fused bias and activation function.
|
36 |
-
|
37 |
-
Adds bias `b` to activation tensor `x`, evaluates activation function `act`,
|
38 |
-
and scales the result by `gain`. Each of the steps is optional. In most cases,
|
39 |
-
the fused op is considerably more efficient than performing the same calculation
|
40 |
-
using standard TensorFlow ops. It supports first and second order gradients,
|
41 |
-
but not third order gradients.
|
42 |
-
|
43 |
-
Args:
|
44 |
-
x: Input activation tensor. Can have any shape, but if `b` is defined, the
|
45 |
-
dimension corresponding to `axis`, as well as the rank, must be known.
|
46 |
-
b: Bias vector, or `None` to disable. Must be a 1D tensor of the same type
|
47 |
-
as `x`. The shape must be known, and it must match the dimension of `x`
|
48 |
-
corresponding to `axis`.
|
49 |
-
axis: The dimension in `x` corresponding to the elements of `b`.
|
50 |
-
The value of `axis` is ignored if `b` is not specified.
|
51 |
-
act: Name of the activation function to evaluate, or `"linear"` to disable.
|
52 |
-
Can be e.g. `"relu"`, `"lrelu"`, `"tanh"`, `"sigmoid"`, `"swish"`, etc.
|
53 |
-
See `activation_funcs` for a full list. `None` is not allowed.
|
54 |
-
alpha: Shape parameter for the activation function, or `None` to use the default.
|
55 |
-
gain: Scaling factor for the output tensor, or `None` to use default.
|
56 |
-
See `activation_funcs` for the default scaling of each activation function.
|
57 |
-
If unsure, consider specifying `1.0`.
|
58 |
-
impl: Name of the implementation to use. Can be `"ref"` or `"cuda"` (default).
|
59 |
-
|
60 |
-
Returns:
|
61 |
-
Tensor of the same shape and datatype as `x`.
|
62 |
-
"""
|
63 |
-
|
64 |
-
impl_dict = {
|
65 |
-
'ref': _fused_bias_act_ref,
|
66 |
-
'cuda': _fused_bias_act_cuda,
|
67 |
-
}
|
68 |
-
return impl_dict[impl](x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
|
69 |
-
|
70 |
-
#----------------------------------------------------------------------------
|
71 |
-
|
72 |
-
def _fused_bias_act_ref(x, b, axis, act, alpha, gain):
|
73 |
-
"""Slow reference implementation of `fused_bias_act()` using standard TensorFlow ops."""
|
74 |
-
|
75 |
-
# Validate arguments.
|
76 |
-
x = tf.convert_to_tensor(x)
|
77 |
-
b = tf.convert_to_tensor(b) if b is not None else tf.constant([], dtype=x.dtype)
|
78 |
-
act_spec = activation_funcs[act]
|
79 |
-
assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
|
80 |
-
assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
|
81 |
-
if alpha is None:
|
82 |
-
alpha = act_spec.def_alpha
|
83 |
-
if gain is None:
|
84 |
-
gain = act_spec.def_gain
|
85 |
-
|
86 |
-
# Add bias.
|
87 |
-
if b.shape[0] != 0:
|
88 |
-
x += tf.reshape(b, [-1 if i == axis else 1 for i in range(x.shape.rank)])
|
89 |
-
|
90 |
-
# Evaluate activation function.
|
91 |
-
x = act_spec.func(x, alpha=alpha)
|
92 |
-
|
93 |
-
# Scale by gain.
|
94 |
-
if gain != 1:
|
95 |
-
x *= gain
|
96 |
-
return x
|
97 |
-
|
98 |
-
#----------------------------------------------------------------------------
|
99 |
-
|
100 |
-
def _fused_bias_act_cuda(x, b, axis, act, alpha, gain):
|
101 |
-
"""Fast CUDA implementation of `fused_bias_act()` using custom ops."""
|
102 |
-
|
103 |
-
# Validate arguments.
|
104 |
-
x = tf.convert_to_tensor(x)
|
105 |
-
empty_tensor = tf.constant([], dtype=x.dtype)
|
106 |
-
b = tf.convert_to_tensor(b) if b is not None else empty_tensor
|
107 |
-
act_spec = activation_funcs[act]
|
108 |
-
assert b.shape.rank == 1 and (b.shape[0] == 0 or b.shape[0] == x.shape[axis])
|
109 |
-
assert b.shape[0] == 0 or 0 <= axis < x.shape.rank
|
110 |
-
if alpha is None:
|
111 |
-
alpha = act_spec.def_alpha
|
112 |
-
if gain is None:
|
113 |
-
gain = act_spec.def_gain
|
114 |
-
|
115 |
-
# Special cases.
|
116 |
-
if act == 'linear' and b is None and gain == 1.0:
|
117 |
-
return x
|
118 |
-
if act_spec.cuda_idx is None:
|
119 |
-
return _fused_bias_act_ref(x=x, b=b, axis=axis, act=act, alpha=alpha, gain=gain)
|
120 |
-
|
121 |
-
# CUDA kernel.
|
122 |
-
cuda_kernel = _get_plugin().fused_bias_act
|
123 |
-
cuda_kwargs = dict(axis=axis, act=act_spec.cuda_idx, alpha=alpha, gain=gain)
|
124 |
-
|
125 |
-
# Forward pass: y = func(x, b).
|
126 |
-
def func_y(x, b):
|
127 |
-
y = cuda_kernel(x=x, b=b, ref=empty_tensor, grad=0, **cuda_kwargs)
|
128 |
-
y.set_shape(x.shape)
|
129 |
-
return y
|
130 |
-
|
131 |
-
# Backward pass: dx, db = grad(dy, x, y)
|
132 |
-
def grad_dx(dy, x, y):
|
133 |
-
ref = {'x': x, 'y': y}[act_spec.ref]
|
134 |
-
dx = cuda_kernel(x=dy, b=empty_tensor, ref=ref, grad=1, **cuda_kwargs)
|
135 |
-
dx.set_shape(x.shape)
|
136 |
-
return dx
|
137 |
-
def grad_db(dx):
|
138 |
-
if b.shape[0] == 0:
|
139 |
-
return empty_tensor
|
140 |
-
db = dx
|
141 |
-
if axis < x.shape.rank - 1:
|
142 |
-
db = tf.reduce_sum(db, list(range(axis + 1, x.shape.rank)))
|
143 |
-
if axis > 0:
|
144 |
-
db = tf.reduce_sum(db, list(range(axis)))
|
145 |
-
db.set_shape(b.shape)
|
146 |
-
return db
|
147 |
-
|
148 |
-
# Second order gradients: d_dy, d_x = grad2(d_dx, d_db, x, y)
|
149 |
-
def grad2_d_dy(d_dx, d_db, x, y):
|
150 |
-
ref = {'x': x, 'y': y}[act_spec.ref]
|
151 |
-
d_dy = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=1, **cuda_kwargs)
|
152 |
-
d_dy.set_shape(x.shape)
|
153 |
-
return d_dy
|
154 |
-
def grad2_d_x(d_dx, d_db, x, y):
|
155 |
-
ref = {'x': x, 'y': y}[act_spec.ref]
|
156 |
-
d_x = cuda_kernel(x=d_dx, b=d_db, ref=ref, grad=2, **cuda_kwargs)
|
157 |
-
d_x.set_shape(x.shape)
|
158 |
-
return d_x
|
159 |
-
|
160 |
-
# Fast version for piecewise-linear activation funcs.
|
161 |
-
@tf.custom_gradient
|
162 |
-
def func_zero_2nd_grad(x, b):
|
163 |
-
y = func_y(x, b)
|
164 |
-
@tf.custom_gradient
|
165 |
-
def grad(dy):
|
166 |
-
dx = grad_dx(dy, x, y)
|
167 |
-
db = grad_db(dx)
|
168 |
-
def grad2(d_dx, d_db):
|
169 |
-
d_dy = grad2_d_dy(d_dx, d_db, x, y)
|
170 |
-
return d_dy
|
171 |
-
return (dx, db), grad2
|
172 |
-
return y, grad
|
173 |
-
|
174 |
-
# Slow version for general activation funcs.
|
175 |
-
@tf.custom_gradient
|
176 |
-
def func_nonzero_2nd_grad(x, b):
|
177 |
-
y = func_y(x, b)
|
178 |
-
def grad_wrap(dy):
|
179 |
-
@tf.custom_gradient
|
180 |
-
def grad_impl(dy, x):
|
181 |
-
dx = grad_dx(dy, x, y)
|
182 |
-
db = grad_db(dx)
|
183 |
-
def grad2(d_dx, d_db):
|
184 |
-
d_dy = grad2_d_dy(d_dx, d_db, x, y)
|
185 |
-
d_x = grad2_d_x(d_dx, d_db, x, y)
|
186 |
-
return d_dy, d_x
|
187 |
-
return (dx, db), grad2
|
188 |
-
return grad_impl(dy, x)
|
189 |
-
return y, grad_wrap
|
190 |
-
|
191 |
-
# Which version to use?
|
192 |
-
if act_spec.zero_2nd_grad:
|
193 |
-
return func_zero_2nd_grad(x, b)
|
194 |
-
return func_nonzero_2nd_grad(x, b)
|
195 |
-
|
196 |
-
#----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tflib/ops/upfirdn_2d.cu
DELETED
@@ -1,326 +0,0 @@
|
|
1 |
-
// Copyright (c) 2019, NVIDIA Corporation. All rights reserved.
|
2 |
-
//
|
3 |
-
// This work is made available under the Nvidia Source Code License-NC.
|
4 |
-
// To view a copy of this license, visit
|
5 |
-
// https://nvlabs.github.io/stylegan2/license.html
|
6 |
-
|
7 |
-
#define EIGEN_USE_GPU
|
8 |
-
#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
|
9 |
-
#include "tensorflow/core/framework/op.h"
|
10 |
-
#include "tensorflow/core/framework/op_kernel.h"
|
11 |
-
#include "tensorflow/core/framework/shape_inference.h"
|
12 |
-
#include <stdio.h>
|
13 |
-
|
14 |
-
using namespace tensorflow;
|
15 |
-
using namespace tensorflow::shape_inference;
|
16 |
-
|
17 |
-
//------------------------------------------------------------------------
|
18 |
-
// Helpers.
|
19 |
-
|
20 |
-
#define OP_CHECK_CUDA_ERROR(CTX, CUDA_CALL) do { cudaError_t err = CUDA_CALL; OP_REQUIRES(CTX, err == cudaSuccess, errors::Internal(cudaGetErrorName(err))); } while (false)
|
21 |
-
|
22 |
-
static __host__ __device__ __forceinline__ int floorDiv(int a, int b)
|
23 |
-
{
|
24 |
-
int c = a / b;
|
25 |
-
if (c * b > a)
|
26 |
-
c--;
|
27 |
-
return c;
|
28 |
-
}
|
29 |
-
|
30 |
-
//------------------------------------------------------------------------
|
31 |
-
// CUDA kernel params.
|
32 |
-
|
33 |
-
template <class T>
|
34 |
-
struct UpFirDn2DKernelParams
|
35 |
-
{
|
36 |
-
const T* x; // [majorDim, inH, inW, minorDim]
|
37 |
-
const T* k; // [kernelH, kernelW]
|
38 |
-
T* y; // [majorDim, outH, outW, minorDim]
|
39 |
-
|
40 |
-
int upx;
|
41 |
-
int upy;
|
42 |
-
int downx;
|
43 |
-
int downy;
|
44 |
-
int padx0;
|
45 |
-
int padx1;
|
46 |
-
int pady0;
|
47 |
-
int pady1;
|
48 |
-
|
49 |
-
int majorDim;
|
50 |
-
int inH;
|
51 |
-
int inW;
|
52 |
-
int minorDim;
|
53 |
-
int kernelH;
|
54 |
-
int kernelW;
|
55 |
-
int outH;
|
56 |
-
int outW;
|
57 |
-
int loopMajor;
|
58 |
-
int loopX;
|
59 |
-
};
|
60 |
-
|
61 |
-
//------------------------------------------------------------------------
|
62 |
-
// General CUDA implementation for large filter kernels.
|
63 |
-
|
64 |
-
template <class T>
|
65 |
-
static __global__ void UpFirDn2DKernel_large(const UpFirDn2DKernelParams<T> p)
|
66 |
-
{
|
67 |
-
// Calculate thread index.
|
68 |
-
int minorIdx = blockIdx.x * blockDim.x + threadIdx.x;
|
69 |
-
int outY = minorIdx / p.minorDim;
|
70 |
-
minorIdx -= outY * p.minorDim;
|
71 |
-
int outXBase = blockIdx.y * p.loopX * blockDim.y + threadIdx.y;
|
72 |
-
int majorIdxBase = blockIdx.z * p.loopMajor;
|
73 |
-
if (outXBase >= p.outW || outY >= p.outH || majorIdxBase >= p.majorDim)
|
74 |
-
return;
|
75 |
-
|
76 |
-
// Setup Y receptive field.
|
77 |
-
int midY = outY * p.downy + p.upy - 1 - p.pady0;
|
78 |
-
int inY = min(max(floorDiv(midY, p.upy), 0), p.inH);
|
79 |
-
int h = min(max(floorDiv(midY + p.kernelH, p.upy), 0), p.inH) - inY;
|
80 |
-
int kernelY = midY + p.kernelH - (inY + 1) * p.upy;
|
81 |
-
|
82 |
-
// Loop over majorDim and outX.
|
83 |
-
for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor && majorIdx < p.majorDim; loopMajor++, majorIdx++)
|
84 |
-
for (int loopX = 0, outX = outXBase; loopX < p.loopX && outX < p.outW; loopX++, outX += blockDim.y)
|
85 |
-
{
|
86 |
-
// Setup X receptive field.
|
87 |
-
int midX = outX * p.downx + p.upx - 1 - p.padx0;
|
88 |
-
int inX = min(max(floorDiv(midX, p.upx), 0), p.inW);
|
89 |
-
int w = min(max(floorDiv(midX + p.kernelW, p.upx), 0), p.inW) - inX;
|
90 |
-
int kernelX = midX + p.kernelW - (inX + 1) * p.upx;
|
91 |
-
|
92 |
-
// Initialize pointers.
|
93 |
-
const T* xp = &p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
|
94 |
-
const T* kp = &p.k[kernelY * p.kernelW + kernelX];
|
95 |
-
int xpx = p.minorDim;
|
96 |
-
int kpx = -p.upx;
|
97 |
-
int xpy = p.inW * p.minorDim;
|
98 |
-
int kpy = -p.upy * p.kernelW;
|
99 |
-
|
100 |
-
// Inner loop.
|
101 |
-
float v = 0.0f;
|
102 |
-
for (int y = 0; y < h; y++)
|
103 |
-
{
|
104 |
-
for (int x = 0; x < w; x++)
|
105 |
-
{
|
106 |
-
v += (float)(*xp) * (float)(*kp);
|
107 |
-
xp += xpx;
|
108 |
-
kp += kpx;
|
109 |
-
}
|
110 |
-
xp += xpy - w * xpx;
|
111 |
-
kp += kpy - w * kpx;
|
112 |
-
}
|
113 |
-
|
114 |
-
// Store result.
|
115 |
-
p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
|
116 |
-
}
|
117 |
-
}
|
118 |
-
|
119 |
-
//------------------------------------------------------------------------
|
120 |
-
// Specialized CUDA implementation for small filter kernels.
|
121 |
-
|
122 |
-
template <class T, int upx, int upy, int downx, int downy, int kernelW, int kernelH, int tileOutW, int tileOutH>
|
123 |
-
static __global__ void UpFirDn2DKernel_small(const UpFirDn2DKernelParams<T> p)
|
124 |
-
{
|
125 |
-
//assert(kernelW % upx == 0);
|
126 |
-
//assert(kernelH % upy == 0);
|
127 |
-
const int tileInW = ((tileOutW - 1) * downx + kernelW - 1) / upx + 1;
|
128 |
-
const int tileInH = ((tileOutH - 1) * downy + kernelH - 1) / upy + 1;
|
129 |
-
__shared__ volatile float sk[kernelH][kernelW];
|
130 |
-
__shared__ volatile float sx[tileInH][tileInW];
|
131 |
-
|
132 |
-
// Calculate tile index.
|
133 |
-
int minorIdx = blockIdx.x;
|
134 |
-
int tileOutY = minorIdx / p.minorDim;
|
135 |
-
minorIdx -= tileOutY * p.minorDim;
|
136 |
-
tileOutY *= tileOutH;
|
137 |
-
int tileOutXBase = blockIdx.y * p.loopX * tileOutW;
|
138 |
-
int majorIdxBase = blockIdx.z * p.loopMajor;
|
139 |
-
if (tileOutXBase >= p.outW | tileOutY >= p.outH | majorIdxBase >= p.majorDim)
|
140 |
-
return;
|
141 |
-
|
142 |
-
// Load filter kernel (flipped).
|
143 |
-
for (int tapIdx = threadIdx.x; tapIdx < kernelH * kernelW; tapIdx += blockDim.x)
|
144 |
-
{
|
145 |
-
int ky = tapIdx / kernelW;
|
146 |
-
int kx = tapIdx - ky * kernelW;
|
147 |
-
float v = 0.0f;
|
148 |
-
if (kx < p.kernelW & ky < p.kernelH)
|
149 |
-
v = (float)p.k[(p.kernelH - 1 - ky) * p.kernelW + (p.kernelW - 1 - kx)];
|
150 |
-
sk[ky][kx] = v;
|
151 |
-
}
|
152 |
-
|
153 |
-
// Loop over majorDim and outX.
|
154 |
-
for (int loopMajor = 0, majorIdx = majorIdxBase; loopMajor < p.loopMajor & majorIdx < p.majorDim; loopMajor++, majorIdx++)
|
155 |
-
for (int loopX = 0, tileOutX = tileOutXBase; loopX < p.loopX & tileOutX < p.outW; loopX++, tileOutX += tileOutW)
|
156 |
-
{
|
157 |
-
// Load input pixels.
|
158 |
-
int tileMidX = tileOutX * downx + upx - 1 - p.padx0;
|
159 |
-
int tileMidY = tileOutY * downy + upy - 1 - p.pady0;
|
160 |
-
int tileInX = floorDiv(tileMidX, upx);
|
161 |
-
int tileInY = floorDiv(tileMidY, upy);
|
162 |
-
__syncthreads();
|
163 |
-
for (int inIdx = threadIdx.x; inIdx < tileInH * tileInW; inIdx += blockDim.x)
|
164 |
-
{
|
165 |
-
int relInY = inIdx / tileInW;
|
166 |
-
int relInX = inIdx - relInY * tileInW;
|
167 |
-
int inX = relInX + tileInX;
|
168 |
-
int inY = relInY + tileInY;
|
169 |
-
float v = 0.0f;
|
170 |
-
if (inX >= 0 & inY >= 0 & inX < p.inW & inY < p.inH)
|
171 |
-
v = (float)p.x[((majorIdx * p.inH + inY) * p.inW + inX) * p.minorDim + minorIdx];
|
172 |
-
sx[relInY][relInX] = v;
|
173 |
-
}
|
174 |
-
|
175 |
-
// Loop over output pixels.
|
176 |
-
__syncthreads();
|
177 |
-
for (int outIdx = threadIdx.x; outIdx < tileOutH * tileOutW; outIdx += blockDim.x)
|
178 |
-
{
|
179 |
-
int relOutY = outIdx / tileOutW;
|
180 |
-
int relOutX = outIdx - relOutY * tileOutW;
|
181 |
-
int outX = relOutX + tileOutX;
|
182 |
-
int outY = relOutY + tileOutY;
|
183 |
-
|
184 |
-
// Setup receptive field.
|
185 |
-
int midX = tileMidX + relOutX * downx;
|
186 |
-
int midY = tileMidY + relOutY * downy;
|
187 |
-
int inX = floorDiv(midX, upx);
|
188 |
-
int inY = floorDiv(midY, upy);
|
189 |
-
int relInX = inX - tileInX;
|
190 |
-
int relInY = inY - tileInY;
|
191 |
-
int kernelX = (inX + 1) * upx - midX - 1; // flipped
|
192 |
-
int kernelY = (inY + 1) * upy - midY - 1; // flipped
|
193 |
-
|
194 |
-
// Inner loop.
|
195 |
-
float v = 0.0f;
|
196 |
-
#pragma unroll
|
197 |
-
for (int y = 0; y < kernelH / upy; y++)
|
198 |
-
#pragma unroll
|
199 |
-
for (int x = 0; x < kernelW / upx; x++)
|
200 |
-
v += sx[relInY + y][relInX + x] * sk[kernelY + y * upy][kernelX + x * upx];
|
201 |
-
|
202 |
-
// Store result.
|
203 |
-
if (outX < p.outW & outY < p.outH)
|
204 |
-
p.y[((majorIdx * p.outH + outY) * p.outW + outX) * p.minorDim + minorIdx] = (T)v;
|
205 |
-
}
|
206 |
-
}
|
207 |
-
}
|
208 |
-
|
209 |
-
//------------------------------------------------------------------------
|
210 |
-
// TensorFlow op.
|
211 |
-
|
212 |
-
template <class T>
|
213 |
-
struct UpFirDn2DOp : public OpKernel
|
214 |
-
{
|
215 |
-
UpFirDn2DKernelParams<T> m_attribs;
|
216 |
-
|
217 |
-
UpFirDn2DOp(OpKernelConstruction* ctx) : OpKernel(ctx)
|
218 |
-
{
|
219 |
-
memset(&m_attribs, 0, sizeof(m_attribs));
|
220 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("upx", &m_attribs.upx));
|
221 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("upy", &m_attribs.upy));
|
222 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("downx", &m_attribs.downx));
|
223 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("downy", &m_attribs.downy));
|
224 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("padx0", &m_attribs.padx0));
|
225 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("padx1", &m_attribs.padx1));
|
226 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("pady0", &m_attribs.pady0));
|
227 |
-
OP_REQUIRES_OK(ctx, ctx->GetAttr("pady1", &m_attribs.pady1));
|
228 |
-
OP_REQUIRES(ctx, m_attribs.upx >= 1 && m_attribs.upy >= 1, errors::InvalidArgument("upx and upy must be at least 1x1"));
|
229 |
-
OP_REQUIRES(ctx, m_attribs.downx >= 1 && m_attribs.downy >= 1, errors::InvalidArgument("downx and downy must be at least 1x1"));
|
230 |
-
}
|
231 |
-
|
232 |
-
void Compute(OpKernelContext* ctx)
|
233 |
-
{
|
234 |
-
UpFirDn2DKernelParams<T> p = m_attribs;
|
235 |
-
cudaStream_t stream = ctx->eigen_device<Eigen::GpuDevice>().stream();
|
236 |
-
|
237 |
-
const Tensor& x = ctx->input(0); // [majorDim, inH, inW, minorDim]
|
238 |
-
const Tensor& k = ctx->input(1); // [kernelH, kernelW]
|
239 |
-
p.x = x.flat<T>().data();
|
240 |
-
p.k = k.flat<T>().data();
|
241 |
-
OP_REQUIRES(ctx, x.dims() == 4, errors::InvalidArgument("input must have rank 4"));
|
242 |
-
OP_REQUIRES(ctx, k.dims() == 2, errors::InvalidArgument("kernel must have rank 2"));
|
243 |
-
OP_REQUIRES(ctx, x.NumElements() <= kint32max, errors::InvalidArgument("input too large"));
|
244 |
-
OP_REQUIRES(ctx, k.NumElements() <= kint32max, errors::InvalidArgument("kernel too large"));
|
245 |
-
|
246 |
-
p.majorDim = (int)x.dim_size(0);
|
247 |
-
p.inH = (int)x.dim_size(1);
|
248 |
-
p.inW = (int)x.dim_size(2);
|
249 |
-
p.minorDim = (int)x.dim_size(3);
|
250 |
-
p.kernelH = (int)k.dim_size(0);
|
251 |
-
p.kernelW = (int)k.dim_size(1);
|
252 |
-
OP_REQUIRES(ctx, p.kernelW >= 1 && p.kernelH >= 1, errors::InvalidArgument("kernel must be at least 1x1"));
|
253 |
-
|
254 |
-
p.outW = (p.inW * p.upx + p.padx0 + p.padx1 - p.kernelW + p.downx) / p.downx;
|
255 |
-
p.outH = (p.inH * p.upy + p.pady0 + p.pady1 - p.kernelH + p.downy) / p.downy;
|
256 |
-
OP_REQUIRES(ctx, p.outW >= 1 && p.outH >= 1, errors::InvalidArgument("output must be at least 1x1"));
|
257 |
-
|
258 |
-
Tensor* y = NULL; // [majorDim, outH, outW, minorDim]
|
259 |
-
TensorShape ys;
|
260 |
-
ys.AddDim(p.majorDim);
|
261 |
-
ys.AddDim(p.outH);
|
262 |
-
ys.AddDim(p.outW);
|
263 |
-
ys.AddDim(p.minorDim);
|
264 |
-
OP_REQUIRES_OK(ctx, ctx->allocate_output(0, ys, &y));
|
265 |
-
p.y = y->flat<T>().data();
|
266 |
-
OP_REQUIRES(ctx, y->NumElements() <= kint32max, errors::InvalidArgument("output too large"));
|
267 |
-
|
268 |
-
// Choose CUDA kernel to use.
|
269 |
-
void* cudaKernel = (void*)UpFirDn2DKernel_large<T>;
|
270 |
-
int tileOutW = -1;
|
271 |
-
int tileOutH = -1;
|
272 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 7 && p.kernelH <= 7) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 7,7, 64,16>; tileOutW = 64; tileOutH = 16; }
|
273 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; }
|
274 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 5 && p.kernelH <= 5) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 5,5, 64,16>; tileOutW = 64; tileOutH = 16; }
|
275 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; }
|
276 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 1 && p.downy == 1 && p.kernelW <= 3 && p.kernelH <= 3) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 1,1, 3,3, 64,16>; tileOutW = 64; tileOutH = 16; }
|
277 |
-
if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 8,8, 64,16>; tileOutW = 64; tileOutH = 16; }
|
278 |
-
if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 6,6, 64,16>; tileOutW = 64; tileOutH = 16; }
|
279 |
-
if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 4,4, 64,16>; tileOutW = 64; tileOutH = 16; }
|
280 |
-
if (p.upx == 2 && p.upy == 2 && p.downx == 1 && p.downy == 1 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 2,2, 1,1, 2,2, 64,16>; tileOutW = 64; tileOutH = 16; }
|
281 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 8 && p.kernelH <= 8) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 8,8, 32,8>; tileOutW = 32; tileOutH = 8; }
|
282 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 6 && p.kernelH <= 6) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 6,6, 32,8>; tileOutW = 32; tileOutH = 8; }
|
283 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 4 && p.kernelH <= 4) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 4,4, 32,8>; tileOutW = 32; tileOutH = 8; }
|
284 |
-
if (p.upx == 1 && p.upy == 1 && p.downx == 2 && p.downy == 2 && p.kernelW <= 2 && p.kernelH <= 2) { cudaKernel = (void*)UpFirDn2DKernel_small<T, 1,1, 2,2, 2,2, 32,8>; tileOutW = 32; tileOutH = 8; }
|
285 |
-
|
286 |
-
// Choose launch params.
|
287 |
-
dim3 blockSize;
|
288 |
-
dim3 gridSize;
|
289 |
-
if (tileOutW > 0 && tileOutH > 0) // small
|
290 |
-
{
|
291 |
-
p.loopMajor = (p.majorDim - 1) / 16384 + 1;
|
292 |
-
p.loopX = 1;
|
293 |
-
blockSize = dim3(32 * 8, 1, 1);
|
294 |
-
gridSize = dim3(((p.outH - 1) / tileOutH + 1) * p.minorDim, (p.outW - 1) / (p.loopX * tileOutW) + 1, (p.majorDim - 1) / p.loopMajor + 1);
|
295 |
-
}
|
296 |
-
else // large
|
297 |
-
{
|
298 |
-
p.loopMajor = (p.majorDim - 1) / 16384 + 1;
|
299 |
-
p.loopX = 4;
|
300 |
-
blockSize = dim3(4, 32, 1);
|
301 |
-
gridSize = dim3((p.outH * p.minorDim - 1) / blockSize.x + 1, (p.outW - 1) / (p.loopX * blockSize.y) + 1, (p.majorDim - 1) / p.loopMajor + 1);
|
302 |
-
}
|
303 |
-
|
304 |
-
// Launch CUDA kernel.
|
305 |
-
void* args[] = {&p};
|
306 |
-
OP_CHECK_CUDA_ERROR(ctx, cudaLaunchKernel(cudaKernel, gridSize, blockSize, args, 0, stream));
|
307 |
-
}
|
308 |
-
};
|
309 |
-
|
310 |
-
REGISTER_OP("UpFirDn2D")
|
311 |
-
.Input ("x: T")
|
312 |
-
.Input ("k: T")
|
313 |
-
.Output ("y: T")
|
314 |
-
.Attr ("T: {float, half}")
|
315 |
-
.Attr ("upx: int = 1")
|
316 |
-
.Attr ("upy: int = 1")
|
317 |
-
.Attr ("downx: int = 1")
|
318 |
-
.Attr ("downy: int = 1")
|
319 |
-
.Attr ("padx0: int = 0")
|
320 |
-
.Attr ("padx1: int = 0")
|
321 |
-
.Attr ("pady0: int = 0")
|
322 |
-
.Attr ("pady1: int = 0");
|
323 |
-
REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<float>("T"), UpFirDn2DOp<float>);
|
324 |
-
REGISTER_KERNEL_BUILDER(Name("UpFirDn2D").Device(DEVICE_GPU).TypeConstraint<Eigen::half>("T"), UpFirDn2DOp<Eigen::half>);
|
325 |
-
|
326 |
-
//------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|