import time import numpy as np import LearnTriangles import Settings as settings from SymbolicFunctionLearner import SFL num_triangles = 1000 num_fake = 3000 max_domain = 5 num_trials = 500 num_smp_features = 2 settings.show_output = True settings.keep_logs = True settings.mode = "lr" # settings.initialize_ops = ["mul", "mul", "mul", "mul", "id", "id", "id"] # settings.initialize_ops = ["mul", "mul", "id"] """" These are the things to change""" var_names = LearnTriangles.get_xy_var_names() # real_data = LearnTriangles.get_right_triangle_data(num_triangles, max_domain) # real_data = LearnTriangles.get_angle_data(num_triangles) real_data = LearnTriangles.get_xy_data(num_triangles, max_domain) real_y = [1 for v in real_data] fake_data = LearnTriangles.get_fake_xy_data(num_triangles, max_domain) # fake_data = LearnTriangles.get_fake_angles(num_triangles) # fake_data = LearnTriangles.get_triangle_data(num_triangles, max_domain) fake_y = [0 for _ in fake_data] # real_test_data = LearnTriangles.get_right_triangle_data(num_triangles, max_domain * 2) # real_test_data = LearnTriangles.get_angle_data(num_triangles) real_test_data = LearnTriangles.get_xy_data(num_triangles, max_domain) real_test_y = [1 for v in real_test_data] # fake_test_data = LearnTriangles.get_triangle_data(num_triangles, max_domain * 2) # fake_test_data = LearnTriangles.get_fake_angles(num_triangles) fake_test_data = LearnTriangles.get_fake_xy_data(num_triangles, max_domain) fake_test_y = [0 for _ in fake_test_data] """ Don't change after this """ print("real data: ") for r_d in real_data[:5]: print(r_d) print("real y:") for r_d in real_y[:5]: print(r_d) print("fake data:") for r_d in fake_data[:5]: print(r_d) print("fake y:") for r_d in fake_y[:5]: print(r_d) full_data = real_data.copy() full_data.extend(fake_data) full_labels = real_y.copy() full_labels.extend(fake_y) full_test_data = real_test_data.copy() full_test_data.extend(fake_test_data) full_test_labels = real_test_y.copy() full_test_labels.extend(fake_test_y) # print("full data:\n{}".format(full_data)) # print("full y:\n{}".format(full_labels)) # # for datum in real_data: # print(datum[0]*datum[4] - datum[1]*datum[3]) our_results = [] settings.true_eqn = "0*x1" settings.num_features = num_smp_features model = SFL() for trial_round in range(num_trials): sampled_features = np.random.choice(range(len(real_data[0])), num_smp_features, replace=True) sampled_features = [0, 1] data = [[row[smp_i] for smp_i in sampled_features] for row in full_data] test_data = [[row[smp_i] for smp_i in sampled_features] for row in full_test_data] smp_var_names = [var_names[smp_i] for smp_i in sampled_features] print("Trial round {} of {}.".format(trial_round + 1, num_trials)) print(" Using variables {}.".format(smp_var_names)) settings.fixed_x = [] settings.fixed_y = [] for line in data: settings.fixed_x.append(line) settings.fixed_y = full_labels # print("fixed_x: {}, {}".format(len(settings.fixed_x), len(settings.fixed_x[0]))) # print("fixed_y: {}".format(len(settings.fixed_y))) model.reset(var_names=smp_var_names) # train_X = DataUtils.generate_data(settings.train_N, n_vars=model.n_input_variables, # avoid_zero=settings.avoid_zero) # valid_X = DataUtils.generate_data(settings.train_N, n_vars=model.n_input_variables, # avoid_zero=settings.avoid_zero) # test_X = DataUtils.generate_data(settings.test_N, n_vars=model.n_input_variables, # min_x=settings.test_scope[0], # max_x=settings.test_scope[1]) train_X = np.array(data) train_Y = full_labels test_X = np.array(test_data) test_Y = full_test_labels train_X = train_X.reshape([-1, settings.num_dims_per_feature, settings.num_features]) test_X = test_X.reshape([-1, settings.num_dims_per_feature, settings.num_features]) start_time = time.time() best_model, best_iter, best_err = model.repeat_train(train_X, train_Y, settings.num_train_repeat_processes, test_x=test_X, test_y=test_Y) running_time = time.time() - start_time print("best_model: {}".format(best_model)) print("----------------------") print("Finished this experiment. Took {:.2f} minutes.\n".format(running_time / 60)) our_results.append([best_err, best_model, smp_var_names]) our_results = sorted(our_results, key=lambda entry: entry[0]) output_file = open("images/triangle_output.txt", "w") for entry in our_results: output_file.write("{}\n{}\n{}\n\n".format(entry[0], entry[2], entry[1])) output_file.close() print("Final solution found at attempt {}:".format(best_iter)) print("y = {}".format(best_model)) print("Test error: {}".format(best_err)) if best_err < 0.02: print("Attained error less than 0.02 - great!") print()