|
"""Dataset generation functions for testing BackpropNEAT.""" |
|
|
|
import numpy as np |
|
import jax.numpy as jnp |
|
|
|
def generate_xor_data(n_samples: int = 200, complexity: float = 1.0) -> tuple: |
|
"""Generate complex XOR dataset with multiple clusters and rotations. |
|
|
|
Args: |
|
n_samples: Number of samples per quadrant |
|
complexity: Controls the complexity of the pattern (rotation and noise) |
|
|
|
Returns: |
|
Tuple of (features, labels) |
|
""" |
|
points = [] |
|
labels = [] |
|
|
|
|
|
n_clusters = 3 |
|
samples_per_cluster = n_samples // n_clusters |
|
|
|
for cluster in range(n_clusters): |
|
|
|
rotation = complexity * cluster * np.pi / 6 |
|
|
|
|
|
centers = [ |
|
|
|
(-0.7 - 0.3*cluster, -0.7 - 0.3*cluster, 0.2, -1), |
|
(0.7 + 0.3*cluster, 0.7 + 0.3*cluster, 0.2, -1), |
|
(-0.7 - 0.3*cluster, 0.7 + 0.3*cluster, 0.2, 1), |
|
(0.7 + 0.3*cluster, -0.7 - 0.3*cluster, 0.2, 1), |
|
] |
|
|
|
for cx, cy, radius, label in centers: |
|
|
|
theta = np.random.uniform(0, 2*np.pi, samples_per_cluster) |
|
r = np.random.uniform(0, radius, samples_per_cluster) |
|
|
|
|
|
x = r * np.cos(theta) |
|
y = r * np.sin(theta) |
|
|
|
|
|
x_rot = x * np.cos(rotation) - y * np.sin(rotation) |
|
y_rot = x * np.sin(rotation) + y * np.cos(rotation) |
|
|
|
|
|
x = cx + x_rot + np.random.normal(0, 0.05, samples_per_cluster) |
|
y = cy + y_rot + np.random.normal(0, 0.05, samples_per_cluster) |
|
|
|
|
|
cluster_points = np.column_stack([x, y]) |
|
points.append(cluster_points) |
|
labels.extend([label] * samples_per_cluster) |
|
|
|
|
|
X = np.vstack(points) |
|
y = np.array(labels, dtype=np.float32) |
|
|
|
|
|
theta = complexity * np.pi / 4 |
|
rotation_matrix = np.array([ |
|
[np.cos(theta), -np.sin(theta)], |
|
[np.sin(theta), np.cos(theta)] |
|
]) |
|
X = X @ rotation_matrix |
|
|
|
|
|
perm = np.random.permutation(len(X)) |
|
X = X[perm] |
|
y = y[perm] |
|
|
|
return jnp.array(X), jnp.array(y) |
|
|
|
def generate_circle_data(n_samples: int = 1000, noise: float = 0.1) -> tuple: |
|
"""Generate circle classification dataset. |
|
|
|
Args: |
|
n_samples: Number of samples per class |
|
noise: Standard deviation of Gaussian noise |
|
|
|
Returns: |
|
Tuple of (features, labels) |
|
""" |
|
|
|
theta = np.random.uniform(0, 2*np.pi, n_samples) |
|
|
|
|
|
r_inner = 0.5 + np.random.normal(0, noise, n_samples) |
|
X_inner = np.column_stack([ |
|
r_inner * np.cos(theta), |
|
r_inner * np.sin(theta) |
|
]) |
|
y_inner = np.full(n_samples, -1.0) |
|
|
|
|
|
r_outer = 1.5 + np.random.normal(0, noise, n_samples) |
|
X_outer = np.column_stack([ |
|
r_outer * np.cos(theta), |
|
r_outer * np.sin(theta) |
|
]) |
|
y_outer = np.full(n_samples, 1.0) |
|
|
|
|
|
X = np.vstack([X_inner, X_outer]) |
|
y = np.hstack([y_inner, y_outer]) |
|
|
|
|
|
perm = np.random.permutation(len(X)) |
|
return X[perm], y[perm] |
|
|
|
def generate_spiral_dataset(n_points=1000, noise=0.1): |
|
"""Generate a spiral dataset with rotation-invariant features.""" |
|
|
|
theta = np.sqrt(np.random.uniform(0, 1, n_points)) * 4 * np.pi |
|
|
|
|
|
data = [] |
|
labels = [] |
|
eps = 1e-8 |
|
|
|
for i in range(n_points): |
|
|
|
r_base = theta[i] / (4 * np.pi) |
|
|
|
|
|
noise_scale = noise * (1 - np.exp(-2 * r_base)) |
|
|
|
for spiral_idx in range(2): |
|
|
|
angle = theta[i] + np.pi * spiral_idx |
|
|
|
|
|
r = r_base + np.random.normal(0, noise_scale) |
|
angle_noise = np.random.normal(0, noise_scale * 0.1) |
|
angle += angle_noise |
|
|
|
|
|
x = r * np.cos(angle) |
|
y = r * np.sin(angle) |
|
|
|
|
|
r_point = np.sqrt(x*x + y*y) |
|
theta_point = np.arctan2(y, x) |
|
|
|
|
|
theta_unwrapped = theta_point + 2 * np.pi * (angle // (2 * np.pi)) |
|
|
|
|
|
|
|
|
|
curvature = 1 / (r_point + eps) |
|
|
|
|
|
phase = theta_unwrapped % (2 * np.pi) / (2 * np.pi) |
|
|
|
|
|
dr_dtheta = 1 / (4 * np.pi) |
|
|
|
|
|
angular_pos = theta_unwrapped / (4 * np.pi) |
|
|
|
|
|
tightness = r_point / (theta_unwrapped + eps) |
|
|
|
|
|
|
|
other_angle = angle + np.pi |
|
other_x = r * np.cos(other_angle) |
|
other_y = r * np.sin(other_angle) |
|
dist_to_other = np.sqrt((x - other_x)**2 + (y - other_y)**2) |
|
|
|
|
|
sin_phase = np.sin(phase * 2 * np.pi) |
|
cos_phase = np.cos(phase * 2 * np.pi) |
|
|
|
|
|
features = np.array([ |
|
x / 2.0, |
|
y / 2.0, |
|
r_point / 2.0, |
|
sin_phase, |
|
cos_phase, |
|
np.tanh(curvature * 2), |
|
angular_pos / 2.0, |
|
np.tanh(tightness), |
|
np.tanh(dr_dtheta * 10), |
|
dist_to_other / 4.0 |
|
]) |
|
|
|
data.append(features) |
|
labels.append(spiral_idx * 2 - 1) |
|
|
|
return np.array(data), np.array(labels) |
|
|
|
def generate_checkerboard_data(n_samples: int = 200) -> tuple: |
|
"""Generate checkerboard dataset. |
|
|
|
Args: |
|
n_samples: Number of samples per class |
|
|
|
Returns: |
|
Tuple of (features, labels) |
|
""" |
|
|
|
X = np.random.uniform(-2, 2, (n_samples * 2, 2)) |
|
|
|
|
|
y = np.zeros(n_samples * 2) |
|
for i in range(len(X)): |
|
x1, x2 = X[i] |
|
y[i] = 1 if (int(np.floor(x1)) + int(np.floor(x2))) % 2 == 0 else 0 |
|
|
|
return jnp.array(X), jnp.array(y) |
|
|
|
|
|
__all__ = ['generate_xor_data', 'generate_circle_data', 'generate_spiral_dataset', |
|
'generate_checkerboard_data'] |
|
|