SFL / LearnRules.py
amirhosseinkarami's picture
Add code files
bae498f
import time
import numpy as np
import LearnTriangles
import Settings as settings
from SymbolicFunctionLearner import SFL
num_triangles = 1000
num_fake = 3000
max_domain = 5
num_trials = 500
num_smp_features = 2
settings.show_output = True
settings.keep_logs = True
settings.mode = "lr"
# settings.initialize_ops = ["mul", "mul", "mul", "mul", "id", "id", "id"]
# settings.initialize_ops = ["mul", "mul", "id"]
"""" These are the things to change"""
var_names = LearnTriangles.get_xy_var_names()
# real_data = LearnTriangles.get_right_triangle_data(num_triangles, max_domain)
# real_data = LearnTriangles.get_angle_data(num_triangles)
real_data = LearnTriangles.get_xy_data(num_triangles, max_domain)
real_y = [1 for v in real_data]
fake_data = LearnTriangles.get_fake_xy_data(num_triangles, max_domain)
# fake_data = LearnTriangles.get_fake_angles(num_triangles)
# fake_data = LearnTriangles.get_triangle_data(num_triangles, max_domain)
fake_y = [0 for _ in fake_data]
# real_test_data = LearnTriangles.get_right_triangle_data(num_triangles, max_domain * 2)
# real_test_data = LearnTriangles.get_angle_data(num_triangles)
real_test_data = LearnTriangles.get_xy_data(num_triangles, max_domain)
real_test_y = [1 for v in real_test_data]
# fake_test_data = LearnTriangles.get_triangle_data(num_triangles, max_domain * 2)
# fake_test_data = LearnTriangles.get_fake_angles(num_triangles)
fake_test_data = LearnTriangles.get_fake_xy_data(num_triangles, max_domain)
fake_test_y = [0 for _ in fake_test_data]
""" Don't change after this """
print("real data: ")
for r_d in real_data[:5]:
print(r_d)
print("real y:")
for r_d in real_y[:5]:
print(r_d)
print("fake data:")
for r_d in fake_data[:5]:
print(r_d)
print("fake y:")
for r_d in fake_y[:5]:
print(r_d)
full_data = real_data.copy()
full_data.extend(fake_data)
full_labels = real_y.copy()
full_labels.extend(fake_y)
full_test_data = real_test_data.copy()
full_test_data.extend(fake_test_data)
full_test_labels = real_test_y.copy()
full_test_labels.extend(fake_test_y)
# print("full data:\n{}".format(full_data))
# print("full y:\n{}".format(full_labels))
#
# for datum in real_data:
# print(datum[0]*datum[4] - datum[1]*datum[3])
our_results = []
settings.true_eqn = "0*x1"
settings.num_features = num_smp_features
model = SFL()
for trial_round in range(num_trials):
sampled_features = np.random.choice(range(len(real_data[0])), num_smp_features, replace=True)
sampled_features = [0, 1]
data = [[row[smp_i] for smp_i in sampled_features] for row in full_data]
test_data = [[row[smp_i] for smp_i in sampled_features] for row in full_test_data]
smp_var_names = [var_names[smp_i] for smp_i in sampled_features]
print("Trial round {} of {}.".format(trial_round + 1, num_trials))
print(" Using variables {}.".format(smp_var_names))
settings.fixed_x = []
settings.fixed_y = []
for line in data:
settings.fixed_x.append(line)
settings.fixed_y = full_labels
# print("fixed_x: {}, {}".format(len(settings.fixed_x), len(settings.fixed_x[0])))
# print("fixed_y: {}".format(len(settings.fixed_y)))
model.reset(var_names=smp_var_names)
# train_X = DataUtils.generate_data(settings.train_N, n_vars=model.n_input_variables,
# avoid_zero=settings.avoid_zero)
# valid_X = DataUtils.generate_data(settings.train_N, n_vars=model.n_input_variables,
# avoid_zero=settings.avoid_zero)
# test_X = DataUtils.generate_data(settings.test_N, n_vars=model.n_input_variables,
# min_x=settings.test_scope[0],
# max_x=settings.test_scope[1])
train_X = np.array(data)
train_Y = full_labels
test_X = np.array(test_data)
test_Y = full_test_labels
train_X = train_X.reshape([-1, settings.num_dims_per_feature, settings.num_features])
test_X = test_X.reshape([-1, settings.num_dims_per_feature, settings.num_features])
start_time = time.time()
best_model, best_iter, best_err = model.repeat_train(train_X, train_Y,
settings.num_train_repeat_processes,
test_x=test_X, test_y=test_Y)
running_time = time.time() - start_time
print("best_model: {}".format(best_model))
print("----------------------")
print("Finished this experiment. Took {:.2f} minutes.\n".format(running_time / 60))
our_results.append([best_err, best_model, smp_var_names])
our_results = sorted(our_results, key=lambda entry: entry[0])
output_file = open("images/triangle_output.txt", "w")
for entry in our_results:
output_file.write("{}\n{}\n{}\n\n".format(entry[0], entry[2], entry[1]))
output_file.close()
print("Final solution found at attempt {}:".format(best_iter))
print("y = {}".format(best_model))
print("Test error: {}".format(best_err))
if best_err < 0.02:
print("Attained error less than 0.02 - great!")
print()