from copy import deepcopy from priors.utils import uniform_int_sampler_f from priors.differentiable_prior import DifferentiableHyperparameter from ConfigSpace import hyperparameters as CSH import torch from priors.differentiable_prior import replace_differentiable_distributions import ConfigSpace as CS def get_general_config(max_features, bptt, eval_positions=None): """" Returns the general PFN training hyperparameters. """ config_general = { "lr": CSH.UniformFloatHyperparameter('lr', lower=0.00002, upper=0.0002, log=True), "dropout": CSH.CategoricalHyperparameter('dropout', [0.0]), "emsize": CSH.CategoricalHyperparameter('emsize', [2 ** i for i in range(8, 9)]), ## upper bound is -1 "batch_size": CSH.CategoricalHyperparameter('batch_size', [2 ** i for i in range(8, 9)]), "nlayers": CSH.CategoricalHyperparameter('nlayers', [12]), "num_features": max_features, "nhead": CSH.CategoricalHyperparameter('nhead', [4]), "nhid_factor": 2, "bptt": bptt, "eval_positions": None, "seq_len_used": bptt, "sampling": 'normal',#hp.choice('sampling', ['mixed', 'normal']), # uniform "epochs": 80, "num_steps": 100, "verbose": False, "pre_sample_causes": True, # This is MLP "mix_activations": False,#hp.choice('mix_activations', [True, False]), } return config_general def get_flexible_categorical_config(max_features): """" Returns the configuration parameters for the tabular multiclass wrapper. """ config_flexible_categorical = { "nan_prob_unknown_reason_reason_prior": CSH.CategoricalHyperparameter('nan_prob_unknown_reason_reason_prior', [1.0]), "categorical_feature_p": CSH.CategoricalHyperparameter('categorical_feature_p', [0.0]), "nan_prob_no_reason": CSH.CategoricalHyperparameter('nan_prob_no_reason', [0.0, 0.1, 0.2]), "nan_prob_unknown_reason": CSH.CategoricalHyperparameter('nan_prob_unknown_reason', [0.0]), "nan_prob_a_reason": CSH.CategoricalHyperparameter('nan_prob_a_reason', [0.0]), # "num_classes": lambda : random.randint(2, 10), "balanced": False, "max_num_classes": 2, "num_classes": 2, "noise_type": CSH.CategoricalHyperparameter('noise_type', ["Gaussian"]), # NN "balanced": True, "normalize_to_ranking": CSH.CategoricalHyperparameter('normalize_to_ranking', [False]), "set_value_to_nan": CSH.CategoricalHyperparameter('set_value_to_nan', [0.5, 0.2, 0.0]), "normalize_by_used_features": True, "num_features_used": {'uniform_int_sampler_f(3,max_features)': uniform_int_sampler_f(1, max_features)} # hp.choice('conv_activation', [{'distribution': 'uniform', 'min': 2.0, 'max': 8.0}, None]), } return config_flexible_categorical def get_diff_flex(): """" Returns the configuration parameters for a differentiable wrapper around the tabular multiclass wrapper. """ diff_flex = { # "ordinal_pct": {'distribution': 'uniform', 'min': 0.0, 'max': 0.5}, # "num_categorical_features_sampler_a": hp.choice('num_categorical_features_sampler_a', # [{'distribution': 'uniform', 'min': 0.3, 'max': 0.9}, None]), # "num_categorical_features_sampler_b": {'distribution': 'uniform', 'min': 0.3, 'max': 0.9}, "output_multiclass_ordered_p": {'distribution': 'uniform', 'min': 0.0, 'max': 0.5}, #CSH.CategoricalHyperparameter('output_multiclass_ordered_p', [0.0, 0.1, 0.2]), "multiclass_type": {'distribution': 'meta_choice', 'choice_values': ['value', 'rank']}, } return diff_flex def get_diff_gp(): """" Returns the configuration parameters for a differentiable wrapper around GP. """ diff_gp = { 'outputscale': {'distribution': 'meta_trunc_norm_log_scaled', 'max_mean': 10., 'min_mean': 0.00001, 'round': False, 'lower_bound': 0}, 'lengthscale': {'distribution': 'meta_trunc_norm_log_scaled', 'max_mean': 10., 'min_mean': 0.00001, 'round': False, 'lower_bound': 0}, 'noise': {'distribution': 'meta_choice', 'choice_values': [0.00001, 0.0001, 0.01]} } return diff_gp def get_diff_causal(): """" Returns the configuration parameters for a differentiable wrapper around MLP / Causal mixture. """ diff_causal = { "num_layers": {'distribution': 'meta_trunc_norm_log_scaled', 'max_mean': 6, 'min_mean': 1, 'round': True, 'lower_bound': 2}, # Better beta? "prior_mlp_hidden_dim": {'distribution': 'meta_trunc_norm_log_scaled', 'max_mean': 130, 'min_mean': 5, 'round': True, 'lower_bound': 4}, "prior_mlp_dropout_prob": {'distribution': 'meta_beta', 'scale': 0.9, 'min': 0.1, 'max': 5.0}, # This mustn't be too high since activations get too large otherwise "noise_std": {'distribution': 'meta_trunc_norm_log_scaled', 'max_mean': .3, 'min_mean': 0.0001, 'round': False, 'lower_bound': 0.0}, "init_std": {'distribution': 'meta_trunc_norm_log_scaled', 'max_mean': 10.0, 'min_mean': 0.01, 'round': False, 'lower_bound': 0.0}, "num_causes": {'distribution': 'meta_trunc_norm_log_scaled', 'max_mean': 12, 'min_mean': 1, 'round': True, 'lower_bound': 1}, "is_causal": {'distribution': 'meta_choice', 'choice_values': [True, False]}, "pre_sample_weights": {'distribution': 'meta_choice', 'choice_values': [True, False]}, "y_is_effect": {'distribution': 'meta_choice', 'choice_values': [True, False]}, "prior_mlp_activations": {'distribution': 'meta_choice_mixed', 'choice_values': [ torch.nn.Tanh , torch.nn.ReLU , torch.nn.Identity , lambda : torch.nn.LeakyReLU(negative_slope=0.1) , torch.nn.ELU ]}, "block_wise_dropout": {'distribution': 'meta_choice', 'choice_values': [True, False]}, "sort_features": {'distribution': 'meta_choice', 'choice_values': [True, False]}, "in_clique": {'distribution': 'meta_choice', 'choice_values': [True, False]}, } return diff_causal def get_diff_prior_bag(): """" Returns the configuration parameters for a GP and MLP / Causal mixture. """ diff_prior_bag = { 'prior_bag_exp_weights_1': {'distribution': 'uniform', 'min': 100000., 'max': 100001.}, # MLP Weight (Biased, since MLP works better, 1.0 is weight for prior number 0) } return diff_prior_bag def get_diff_config(): """" Returns the configuration parameters for a differentiable wrapper around GP and MLP / Causal mixture priors. """ diff_prior_bag = get_diff_prior_bag() diff_causal = get_diff_causal() diff_gp = get_diff_gp() diff_flex = get_diff_flex() config_diff = {'differentiable_hyperparameters': {**diff_prior_bag, **diff_causal, **diff_gp, **diff_flex}} return config_diff def sample_differentiable(config): """" Returns sampled hyperparameters from a differentiable wrapper, that is it makes a non-differentiable out of differentiable. """ # config is a dict of dicts, dicts that have a 'distribution' key are treated as distributions to be sampled result = deepcopy(config) del result['differentiable_hyperparameters'] for k, v in config['differentiable_hyperparameters'].items(): s_indicator, s_hp = DifferentiableHyperparameter(**v, embedding_dim=None, device=None)() # both of these are actually not used to the best of my knowledge result[k] = s_hp return result def list_all_hps_in_nested(config): """" Returns a list of hyperparameters from a neszed dict of hyperparameters. """ if isinstance(config, CSH.Hyperparameter): return [config] elif isinstance(config, dict): result = [] for k, v in config.items(): result += list_all_hps_in_nested(v) return result else: return [] def create_configspace_from_hierarchical(config): cs = CS.ConfigurationSpace() for hp in list_all_hps_in_nested(config): cs.add_hyperparameter(hp) return cs def fill_in_configsample(config, configsample): # config is our dict that defines config distribution # configsample is a CS.Configuration hierarchical_configsample = deepcopy(config) for k, v in config.items(): if isinstance(v, CSH.Hyperparameter): hierarchical_configsample[k] = configsample[v.name] elif isinstance(v, dict): hierarchical_configsample[k] = fill_in_configsample(v, configsample) return hierarchical_configsample def evaluate_hypers(config, sample_diff_hps=False): """" Samples a hyperparameter configuration from a sampleable configuration (can be used in HP search). """ if sample_diff_hps: # I do a deepcopy here, such that the config stays the same and can still be used with diff. hps config = deepcopy(config) replace_differentiable_distributions(config) cs = create_configspace_from_hierarchical(config) cs_sample = cs.sample_configuration() return fill_in_configsample(config, cs_sample)