Spaces:

amirhosseinkarami
/

SFL

Runtime error

SFL

File size: 6,669 Bytes

bae498f

import time

import numpy as np
from gplearn.genetic import SymbolicRegressor
from sklearn.utils.validation import column_or_1d

import Settings as settings
from DataUtils import make_y_multi_safe

pop_size = 5000
generations = 20
p_crossover = 0.7
warm_start = False


class Genetic_Model:
    def __init__(self):
        self.name = "Genetic Model"
        self.short_name = "GP"
        self.function_set = settings.function_set.copy()
        if "id" in self.function_set:
            self.function_set.remove("id")

        self.est_gp = SymbolicRegressor(population_size=pop_size,
                                        generations=generations, stopping_criteria=0.01,  # 20 gen
                                        p_crossover=p_crossover, p_subtree_mutation=0.1,
                                        p_hoist_mutation=0.05, p_point_mutation=0.1,
                                        warm_start=warm_start,
                                        max_samples=0.9, verbose=False,
                                        parsimony_coefficient=0.01,
                                        function_set=self.function_set)

    def reset(self):
        del self.est_gp
        self.est_gp = SymbolicRegressor(population_size=pop_size,
                                        generations=generations, stopping_criteria=0.01,  # 20 gen
                                        p_crossover=p_crossover, p_subtree_mutation=0.1,
                                        p_hoist_mutation=0.05, p_point_mutation=0.1,
                                        warm_start=warm_start,
                                        max_samples=0.9, verbose=False,
                                        parsimony_coefficient=0.01,
                                        function_set=self.function_set)

    def soft_reset(self):
        del self.est_gp
        self.est_gp = SymbolicRegressor(population_size=pop_size,
                                        generations=generations, stopping_criteria=0.01,  # 20 gen
                                        p_crossover=p_crossover, p_subtree_mutation=0.1,
                                        p_hoist_mutation=0.05, p_point_mutation=0.1,
                                        warm_start=warm_start,
                                        max_samples=0.9, verbose=False,
                                        parsimony_coefficient=0.01,
                                        function_set=self.function_set)

    def predict(self, X):
        return self.est_gp.predict(X)

    def get_formula(self):
        return self.est_gp._program

    def get_simple_formula(self, digits=None):
        return self.get_formula()

    def get_big_formula(self):
        formula_string = str(self.get_formula())
        nested_list_string = formula_string.replace("sqrt(", "[\'sqrt\', ")
        nested_list_string = nested_list_string.replace("add(", "[\'+\', ")
        nested_list_string = nested_list_string.replace("mul(", "[\'*\', ")
        nested_list_string = nested_list_string.replace("sub(", "[\'-\', ")
        nested_list_string = nested_list_string.replace("sin(", "[\'sin\', ")
        nested_list_string = nested_list_string.replace(")", "]")
        nested_list_string = nested_list_string.replace("X", "Y")

        retval = ""
        currently_digits = False
        current_number = ""
        for current_char in nested_list_string:
            if current_char == 'Y':
                retval += "\'x"
                currently_digits = True
                current_number = ""
            elif currently_digits:
                if current_char.isdigit():
                    # retval += "{}".format(current_char)
                    current_number += "{}".format(current_char)
                else:
                    currently_digits = False
                    retval += "{}".format(int(current_number) + 1)
                    retval += "\'{}".format(current_char)
            else:
                retval += "{}".format(current_char)

        if "Y" in retval:
            print("ERROR: formula still contains a Y...")
            print("   formula string: {}\n   nested list string: {}".format(formula_string, nested_list_string))

        return eval(retval)

    def train(self, X, Y):
        X = np.reshape(X, [X.shape[0], -1])
        Y = np.reshape(Y, [-1, 1])
        Y = column_or_1d(Y)
        self.est_gp.fit(X, Y)
        return None

    # Does not repeat train. Sorry.
    def repeat_train(self, x, y, test_x=None, test_y=None,
                     num_repeats=settings.num_train_repeat_processes,
                     num_steps_to_train=settings.num_train_steps_in_repeat_mode,
                     verbose=True):
        train_set_size = int(len(x) * settings.quick_train_fraction + 0.1)
        x = np.array(x)
        y = np.reshape(np.array(y), [-1, ])
        sample = np.random.choice(range(x.shape[0]), size=train_set_size, replace=False)
        out_sample = [yyy for yyy in range(x.shape[0]) if yyy not in sample]

        train_x = x[sample][:]
        train_y = y[sample][:]
        valid_x = x[out_sample][:]
        valid_y = y[out_sample][:]

        old_time = time.time()

        if verbose:
            print("Beginning {} repeat sessions of {} iterations each.".format(num_repeats,
                                                                               settings.num_train_steps_in_repeat_mode))
            print()
            start_time = time.time()
            old_time = start_time

        self.soft_reset()
        self.train(train_x, train_y)

        current_time = time.time()
        if verbose:
            # print(self.get_simple_formula())
            print("Attained validation error: {:.5f}".format(valid_err))

        best_formula = self.get_simple_formula()
        if test_x is not None:
            safe_test_y = make_y_multi_safe(test_y)
            best_err = self.test(test_x, safe_test_y)
        else:
            best_err = self.test(valid_x, valid_y)

        if verbose:
            iters_per_minute = 60.0 / (current_time - old_time)
            print("Took {:.2f} minutes.".format((current_time - old_time) / 60))
            print("Est. {:.2f} minutes remaining.".format((num_repeats - train_iter) / iters_per_minute))
            print()

        return best_formula, 0, best_err

    # Mean square error
    def test(self, x, y):
        x = np.reshape(x, [x.shape[0], -1])
        y_hat = np.reshape(self.est_gp.predict(x), [1, -1])[0]
        y_gold = np.reshape(y, [1, -1])[0]
        our_sum = 0
        for i in range(len(y_gold)):
            our_sum += (y_hat[i] - y_gold[i]) ** 2

        return our_sum / len(y_gold)