|
|
|
|
|
|
|
import functools |
|
import logging |
|
|
|
from .. import trial |
|
|
|
|
|
_logger = logging.getLogger(__name__) |
|
_MUTABLE_LAYER_SPACE_PREFIX = "_mutable_layer" |
|
_namespace = {} |
|
_tf_variables = {} |
|
_arch_logits_list = [] |
|
_optimizer = None |
|
_train_op = None |
|
|
|
|
|
def classic_mode( |
|
mutable_id, |
|
mutable_layer_id, |
|
funcs, |
|
funcs_args, |
|
fixed_inputs, |
|
optional_inputs, |
|
optional_input_size): |
|
'''Execute the chosen function and inputs directly. |
|
In this mode, the trial code is only running the chosen subgraph (i.e., the chosen ops and inputs), |
|
without touching the full model graph.''' |
|
if trial.get_current_parameter() is None: |
|
trial.get_next_parameter() |
|
|
|
chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, |
|
list(optional_inputs.keys())) |
|
real_chosen_inputs = [optional_inputs[input_name] for input_name in chosen_inputs] |
|
layer_out = funcs[chosen_layer]([fixed_inputs, real_chosen_inputs], **funcs_args[chosen_layer]) |
|
|
|
return layer_out |
|
|
|
|
|
def enas_mode( |
|
mutable_id, |
|
mutable_layer_id, |
|
funcs, |
|
funcs_args, |
|
fixed_inputs, |
|
optional_inputs, |
|
optional_input_size, |
|
tf): |
|
'''For enas mode, we build the full model graph in trial but only run a subgraph。 |
|
This is implemented by masking inputs and branching ops. |
|
Specifically, based on the received subgraph (through nni.get_next_parameter), |
|
it can be known which inputs should be masked and which op should be executed.''' |
|
name_prefix = "{}_{}".format(mutable_id, mutable_layer_id) |
|
|
|
_namespace[mutable_id] = True |
|
_namespace[name_prefix] = dict() |
|
_namespace[name_prefix]['funcs'] = list(funcs) |
|
_namespace[name_prefix]['optional_inputs'] = list(optional_inputs) |
|
|
|
name_for_optional_inputs = name_prefix + '_optional_inputs' |
|
name_for_funcs = name_prefix + '_funcs' |
|
_tf_variables[name_prefix] = dict() |
|
_tf_variables[name_prefix]['optional_inputs'] = tf.get_variable( |
|
name_for_optional_inputs, |
|
[len(optional_inputs)], |
|
dtype=tf.bool, |
|
trainable=False |
|
) |
|
_tf_variables[name_prefix]['funcs'] = tf.get_variable( |
|
name_for_funcs, [], dtype=tf.int64, trainable=False) |
|
|
|
|
|
real_optional_inputs_value = [optional_inputs[name] |
|
for name in _namespace[name_prefix]['optional_inputs']] |
|
real_func_value = [funcs[name] |
|
for name in _namespace[name_prefix]['funcs']] |
|
real_funcs_args = [funcs_args[name] |
|
for name in _namespace[name_prefix]['funcs']] |
|
|
|
real_chosen_inputs = tf.boolean_mask( |
|
real_optional_inputs_value, _tf_variables[name_prefix]['optional_inputs']) |
|
|
|
branches = dict() |
|
func_output = None |
|
for func_id in range(len(funcs)): |
|
func_output = real_func_value[func_id]([fixed_inputs, real_chosen_inputs], **real_funcs_args[func_id]) |
|
branches[tf.equal(_tf_variables[name_prefix]['funcs'], func_id)] = lambda: func_output |
|
layer_out = tf.case(branches, exclusive=True, default=lambda: func_output) |
|
|
|
return layer_out |
|
|
|
|
|
def oneshot_mode( |
|
mutable_id, |
|
mutable_layer_id, |
|
funcs, |
|
funcs_args, |
|
fixed_inputs, |
|
optional_inputs, |
|
optional_input_size, |
|
tf): |
|
'''Similar to enas mode, oneshot mode also builds the full model graph. |
|
The difference is that oneshot mode does not receive subgraph. |
|
Instead, it uses dropout to randomly dropout inputs and ops.''' |
|
|
|
if trial.get_current_parameter() is None: |
|
trial.get_next_parameter() |
|
optional_inputs = list(optional_inputs.values()) |
|
inputs_num = len(optional_inputs) |
|
|
|
if inputs_num > 0: |
|
rate = 0.01 ** (1 / inputs_num) |
|
noise_shape = [inputs_num] + [1] * len(optional_inputs[0].get_shape()) |
|
optional_inputs = tf.nn.dropout( |
|
optional_inputs, rate=rate, noise_shape=noise_shape) |
|
optional_inputs = [optional_inputs[idx] for idx in range(inputs_num)] |
|
layer_outs = [func([fixed_inputs, optional_inputs], **funcs_args[func_name]) |
|
for func_name, func in funcs.items()] |
|
output_num = len(layer_outs) |
|
rate = 0.01 ** (1 / output_num) |
|
noise_shape = [output_num] + [1] * len(layer_outs[0].get_shape()) |
|
layer_outs = tf.nn.dropout(layer_outs, rate=rate, noise_shape=noise_shape) |
|
layer_out = tf.reduce_sum(layer_outs, axis=0) |
|
|
|
return layer_out |
|
|
|
|
|
def darts_mode( |
|
mutable_id, |
|
mutable_layer_id, |
|
funcs, |
|
funcs_args, |
|
fixed_inputs, |
|
optional_inputs, |
|
optional_input_size, |
|
tf): |
|
optional_inputs = list(optional_inputs.values()) |
|
layer_outs = [func([fixed_inputs, optional_inputs], **funcs_args[func_name]) |
|
for func_name, func in funcs.items()] |
|
|
|
var_name = "{}_{}_arch_weights".format(mutable_id, mutable_layer_id) |
|
arch_logits = tf.get_variable(var_name, shape=[len(funcs)], trainable=False) |
|
_arch_logits_list.append(arch_logits) |
|
arch_weights = tf.nn.softmax(arch_logits) |
|
layer_out = tf.add_n([arch_weights[idx] * out for idx, out in enumerate(layer_outs)]) |
|
|
|
return layer_out |
|
|
|
|
|
def reload_tensorflow_variables(tf, session): |
|
'''In Enas mode, this function reload every signal varaible created in `enas_mode` function so |
|
the whole tensorflow graph will be changed into certain subgraph recerived from Tuner. |
|
--------------- |
|
session: the tensorflow session created by users |
|
tf: tensorflow module |
|
''' |
|
subgraph_from_tuner = trial.get_next_parameter() |
|
mutable_layers = set() |
|
for subgraph_key in subgraph_from_tuner: |
|
if "/" in subgraph_key: |
|
|
|
mutable_id, mutable_layer_id = _decompose_general_key(subgraph_key[:subgraph_key.rfind("/")]) |
|
if mutable_id is not None: |
|
mutable_layers.add((mutable_id, mutable_layer_id)) |
|
mutable_layers = sorted(list(mutable_layers)) |
|
for mutable_id, mutable_layer_id in mutable_layers: |
|
if mutable_id not in _namespace: |
|
_logger.warning("%s not found in name space", mutable_id) |
|
continue |
|
name_prefix = "{}_{}".format(mutable_id, mutable_layer_id) |
|
|
|
optional_inputs = _namespace[name_prefix]['optional_inputs'] |
|
|
|
chosen_layer, chosen_inputs = _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs) |
|
chosen_layer = _namespace[name_prefix]['funcs'].index(chosen_layer) |
|
chosen_inputs = [1 if inp in chosen_inputs else 0 for inp in optional_inputs] |
|
|
|
_tf_variables[name_prefix]['funcs'].load(chosen_layer, session) |
|
_tf_variables[name_prefix]['optional_inputs'].load( |
|
chosen_inputs, session) |
|
|
|
|
|
def _construct_general_key(mutable_id, mutable_layer_id): |
|
|
|
|
|
return _MUTABLE_LAYER_SPACE_PREFIX + "/" + mutable_id + "/" + mutable_layer_id |
|
|
|
|
|
def _decompose_general_key(key): |
|
|
|
if not key.startswith(_MUTABLE_LAYER_SPACE_PREFIX): |
|
return None, None |
|
else: |
|
_, mutable_id, mutable_layer_id = key.split("/", maxsplit=2) |
|
return mutable_id, mutable_layer_id |
|
|
|
|
|
def darts_training(tf, session, loss, feed_dict): |
|
global _optimizer, _train_op |
|
if _optimizer is None: |
|
_optimizer = tf.MomentumOptimizer(learning_rate=0.025) |
|
|
|
grads_and_vars = _optimizer.compute_gradients(loss, _arch_logits_list) |
|
_train_op = _optimizer.apply_gradients(grads_and_vars) |
|
session.run(_train_op) |
|
|
|
|
|
def training_update(nas_mode, tf=None, session=None, loss=None, feed_dict=None): |
|
if nas_mode == 'darts_mode': |
|
darts_training(tf, session, loss, feed_dict) |
|
elif nas_mode == 'enas_mode': |
|
reload_tensorflow_variables(tf, session) |
|
|
|
|
|
def _get_layer_and_inputs_from_tuner(mutable_id, mutable_layer_id, optional_inputs): |
|
|
|
try: |
|
mutable_block = trial.get_current_parameter(mutable_id) |
|
|
|
|
|
chosen_layer = mutable_block[mutable_layer_id]["chosen_layer"] |
|
chosen_inputs = mutable_block[mutable_layer_id]["chosen_inputs"] |
|
except KeyError: |
|
|
|
params = trial.get_current_parameter() |
|
expected_prefix = _construct_general_key(mutable_id, mutable_layer_id) |
|
chosen_layer = params[expected_prefix + "/layer_choice"] |
|
|
|
|
|
optional_input_size = int(params[expected_prefix + "/optional_input_size"]) |
|
|
|
|
|
optional_input_state = params[expected_prefix + "/optional_input_chosen_state"] |
|
chosen_inputs = [] |
|
|
|
optional_inputs_keys = sorted(optional_inputs) |
|
for _ in range(optional_input_size): |
|
chosen_inputs.append(optional_inputs_keys[optional_input_state % len(optional_inputs)]) |
|
optional_input_state //= len(optional_inputs) |
|
|
|
_logger.info("%s_%s: layer: %s, optional inputs: %s", mutable_id, mutable_layer_id, chosen_layer, chosen_inputs) |
|
return chosen_layer, chosen_inputs |
|
|
|
|
|
def convert_nas_search_space(search_space): |
|
""" |
|
Args: |
|
param search_space: raw search space |
|
return: the new search space, mutable_layers will be converted into choice |
|
""" |
|
if not isinstance(search_space, dict): |
|
return search_space |
|
ret = dict() |
|
for k, v in search_space.items(): |
|
if "_type" not in v: |
|
|
|
_logger.warning("There is no _type in one of your search space values with key '%s'" |
|
". Please check your search space", k) |
|
ret[k] = v |
|
elif v["_type"] != "mutable_layer": |
|
ret[k] = v |
|
else: |
|
_logger.info("Converting mutable_layer search space with key '%s'", k) |
|
|
|
values = v["_value"] |
|
for layer_name, layer_data in values.items(): |
|
|
|
|
|
|
|
layer_key = _construct_general_key(k, layer_name) |
|
|
|
if layer_data.get("layer_choice"): |
|
layer_choice = layer_data["layer_choice"] |
|
else: |
|
raise ValueError("No layer choice found in %s" % layer_key) |
|
|
|
if layer_data.get("optional_input_size"): |
|
input_size = layer_data["optional_input_size"] |
|
if isinstance(input_size, int): |
|
input_size = [input_size, input_size] |
|
if input_size[0] > input_size[1] or input_size[0] < 0: |
|
_logger.error("Might not be able to handle optional_input_size < 0, please double check") |
|
input_size[1] += 1 |
|
else: |
|
_logger.info("Optional input choices are set to empty by default in %s", layer_key) |
|
input_size = [0, 1] |
|
|
|
if layer_data.get("optional_inputs"): |
|
total_state_size = len(layer_data["optional_inputs"]) ** (input_size[1] - 1) |
|
else: |
|
_logger.info("Optional inputs not found in %s", layer_key) |
|
total_state_size = 1 |
|
|
|
converted = { |
|
layer_key + "/layer_choice": { |
|
"_type": "choice", "_value": layer_choice |
|
}, |
|
layer_key + "/optional_input_size": { |
|
"_type": "randint", "_value": input_size |
|
}, |
|
layer_key + "/optional_input_chosen_state": { |
|
"_type": "randint", "_value": [0, total_state_size] |
|
} |
|
} |
|
_logger.info(converted) |
|
ret.update(converted) |
|
|
|
return ret |
|
|
|
|
|
def rewrite_nas_space(func): |
|
@functools.wraps(func) |
|
def wrap(self, search_space): |
|
search_space = convert_nas_search_space(search_space) |
|
return func(self, search_space) |
|
return wrap |
|
|