Upload neat\datasets.py with huggingface_hub
Browse files- neat//datasets.py +220 -0
neat//datasets.py
ADDED
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Dataset generation functions for testing BackpropNEAT."""
|
2 |
+
|
3 |
+
import numpy as np
|
4 |
+
import jax.numpy as jnp
|
5 |
+
|
6 |
+
def generate_xor_data(n_samples: int = 200, complexity: float = 1.0) -> tuple:
|
7 |
+
"""Generate complex XOR dataset with multiple clusters and rotations.
|
8 |
+
|
9 |
+
Args:
|
10 |
+
n_samples: Number of samples per quadrant
|
11 |
+
complexity: Controls the complexity of the pattern (rotation and noise)
|
12 |
+
|
13 |
+
Returns:
|
14 |
+
Tuple of (features, labels)
|
15 |
+
"""
|
16 |
+
points = []
|
17 |
+
labels = []
|
18 |
+
|
19 |
+
# Generate multiple clusters per quadrant
|
20 |
+
n_clusters = 3
|
21 |
+
samples_per_cluster = n_samples // n_clusters
|
22 |
+
|
23 |
+
for cluster in range(n_clusters):
|
24 |
+
# Add rotation to each subsequent cluster
|
25 |
+
rotation = complexity * cluster * np.pi / 6 # 30 degree rotation per cluster
|
26 |
+
|
27 |
+
# Define cluster centers with gaps
|
28 |
+
centers = [
|
29 |
+
# (x, y, radius, label)
|
30 |
+
(-0.7 - 0.3*cluster, -0.7 - 0.3*cluster, 0.2, -1), # Bottom-left
|
31 |
+
(0.7 + 0.3*cluster, 0.7 + 0.3*cluster, 0.2, -1), # Top-right
|
32 |
+
(-0.7 - 0.3*cluster, 0.7 + 0.3*cluster, 0.2, 1), # Top-left
|
33 |
+
(0.7 + 0.3*cluster, -0.7 - 0.3*cluster, 0.2, 1), # Bottom-right
|
34 |
+
]
|
35 |
+
|
36 |
+
for cx, cy, radius, label in centers:
|
37 |
+
# Generate points in a circle around center
|
38 |
+
theta = np.random.uniform(0, 2*np.pi, samples_per_cluster)
|
39 |
+
r = np.random.uniform(0, radius, samples_per_cluster)
|
40 |
+
|
41 |
+
# Convert to cartesian coordinates
|
42 |
+
x = r * np.cos(theta)
|
43 |
+
y = r * np.sin(theta)
|
44 |
+
|
45 |
+
# Apply rotation
|
46 |
+
x_rot = x * np.cos(rotation) - y * np.sin(rotation)
|
47 |
+
y_rot = x * np.sin(rotation) + y * np.cos(rotation)
|
48 |
+
|
49 |
+
# Add cluster center and noise
|
50 |
+
x = cx + x_rot + np.random.normal(0, 0.05, samples_per_cluster)
|
51 |
+
y = cy + y_rot + np.random.normal(0, 0.05, samples_per_cluster)
|
52 |
+
|
53 |
+
# Add points
|
54 |
+
cluster_points = np.column_stack([x, y])
|
55 |
+
points.append(cluster_points)
|
56 |
+
labels.extend([label] * samples_per_cluster)
|
57 |
+
|
58 |
+
# Convert to arrays
|
59 |
+
X = np.vstack(points)
|
60 |
+
y = np.array(labels, dtype=np.float32)
|
61 |
+
|
62 |
+
# Add global rotation
|
63 |
+
theta = complexity * np.pi / 4 # 45 degree global rotation
|
64 |
+
rotation_matrix = np.array([
|
65 |
+
[np.cos(theta), -np.sin(theta)],
|
66 |
+
[np.sin(theta), np.cos(theta)]
|
67 |
+
])
|
68 |
+
X = X @ rotation_matrix
|
69 |
+
|
70 |
+
# Shuffle data
|
71 |
+
perm = np.random.permutation(len(X))
|
72 |
+
X = X[perm]
|
73 |
+
y = y[perm]
|
74 |
+
|
75 |
+
return jnp.array(X), jnp.array(y)
|
76 |
+
|
77 |
+
def generate_circle_data(n_samples: int = 1000, noise: float = 0.1) -> tuple:
|
78 |
+
"""Generate circle classification dataset.
|
79 |
+
|
80 |
+
Args:
|
81 |
+
n_samples: Number of samples per class
|
82 |
+
noise: Standard deviation of Gaussian noise
|
83 |
+
|
84 |
+
Returns:
|
85 |
+
Tuple of (features, labels)
|
86 |
+
"""
|
87 |
+
# Generate random angles
|
88 |
+
theta = np.random.uniform(0, 2*np.pi, n_samples)
|
89 |
+
|
90 |
+
# Inner circle (class -1)
|
91 |
+
r_inner = 0.5 + np.random.normal(0, noise, n_samples)
|
92 |
+
X_inner = np.column_stack([
|
93 |
+
r_inner * np.cos(theta),
|
94 |
+
r_inner * np.sin(theta)
|
95 |
+
])
|
96 |
+
y_inner = np.full(n_samples, -1.0)
|
97 |
+
|
98 |
+
# Outer circle (class 1)
|
99 |
+
r_outer = 1.5 + np.random.normal(0, noise, n_samples)
|
100 |
+
X_outer = np.column_stack([
|
101 |
+
r_outer * np.cos(theta),
|
102 |
+
r_outer * np.sin(theta)
|
103 |
+
])
|
104 |
+
y_outer = np.full(n_samples, 1.0)
|
105 |
+
|
106 |
+
# Combine and shuffle
|
107 |
+
X = np.vstack([X_inner, X_outer])
|
108 |
+
y = np.hstack([y_inner, y_outer])
|
109 |
+
|
110 |
+
# Shuffle
|
111 |
+
perm = np.random.permutation(len(X))
|
112 |
+
return X[perm], y[perm]
|
113 |
+
|
114 |
+
def generate_spiral_dataset(n_points=1000, noise=0.1):
|
115 |
+
"""Generate a spiral dataset with rotation-invariant features."""
|
116 |
+
# Generate theta values with more points near the center
|
117 |
+
theta = np.sqrt(np.random.uniform(0, 1, n_points)) * 4 * np.pi
|
118 |
+
|
119 |
+
# Generate two spirals
|
120 |
+
data = []
|
121 |
+
labels = []
|
122 |
+
eps = 1e-8
|
123 |
+
|
124 |
+
for i in range(n_points):
|
125 |
+
# Base radius increases with theta
|
126 |
+
r_base = theta[i] / (4 * np.pi)
|
127 |
+
|
128 |
+
# Add noise that scales with radius
|
129 |
+
noise_scale = noise * (1 - np.exp(-2 * r_base))
|
130 |
+
|
131 |
+
for spiral_idx in range(2):
|
132 |
+
# Rotate second spiral by pi
|
133 |
+
angle = theta[i] + np.pi * spiral_idx
|
134 |
+
|
135 |
+
# Add controlled noise to radius and angle
|
136 |
+
r = r_base + np.random.normal(0, noise_scale)
|
137 |
+
angle_noise = np.random.normal(0, noise_scale * 0.1) # Less noise in angle
|
138 |
+
angle += angle_noise
|
139 |
+
|
140 |
+
# Calculate cartesian coordinates
|
141 |
+
x = r * np.cos(angle)
|
142 |
+
y = r * np.sin(angle)
|
143 |
+
|
144 |
+
# Calculate polar coordinates
|
145 |
+
r_point = np.sqrt(x*x + y*y)
|
146 |
+
theta_point = np.arctan2(y, x)
|
147 |
+
|
148 |
+
# Unwrap theta to handle multiple revolutions
|
149 |
+
theta_unwrapped = theta_point + 2 * np.pi * (angle // (2 * np.pi))
|
150 |
+
|
151 |
+
# Calculate spiral-specific features
|
152 |
+
|
153 |
+
# 1. Local curvature (how much the spiral curves at this point)
|
154 |
+
curvature = 1 / (r_point + eps)
|
155 |
+
|
156 |
+
# 2. Spiral phase (position along spiral revolution)
|
157 |
+
phase = theta_unwrapped % (2 * np.pi) / (2 * np.pi)
|
158 |
+
|
159 |
+
# 3. Radial velocity (how fast radius changes with angle)
|
160 |
+
dr_dtheta = 1 / (4 * np.pi)
|
161 |
+
|
162 |
+
# 4. Normalized angular position (accounts for multiple revolutions)
|
163 |
+
angular_pos = theta_unwrapped / (4 * np.pi)
|
164 |
+
|
165 |
+
# 5. Spiral tightness (local measure of how tight the spiral is)
|
166 |
+
tightness = r_point / (theta_unwrapped + eps)
|
167 |
+
|
168 |
+
# 6. Relative position features (help distinguish between spirals)
|
169 |
+
# Distance to other spiral
|
170 |
+
other_angle = angle + np.pi
|
171 |
+
other_x = r * np.cos(other_angle)
|
172 |
+
other_y = r * np.sin(other_angle)
|
173 |
+
dist_to_other = np.sqrt((x - other_x)**2 + (y - other_y)**2)
|
174 |
+
|
175 |
+
# 7. Rotation-invariant features
|
176 |
+
sin_phase = np.sin(phase * 2 * np.pi)
|
177 |
+
cos_phase = np.cos(phase * 2 * np.pi)
|
178 |
+
|
179 |
+
# Combine features with careful normalization
|
180 |
+
features = np.array([
|
181 |
+
x / 2.0, # Normalize coordinates
|
182 |
+
y / 2.0,
|
183 |
+
r_point / 2.0, # Normalize radius
|
184 |
+
sin_phase, # Already normalized
|
185 |
+
cos_phase, # Already normalized
|
186 |
+
np.tanh(curvature * 2), # Normalize curvature
|
187 |
+
angular_pos / 2.0, # Normalize angular position
|
188 |
+
np.tanh(tightness), # Normalize tightness
|
189 |
+
np.tanh(dr_dtheta * 10), # Normalize radial velocity
|
190 |
+
dist_to_other / 4.0 # Normalize distance to other spiral
|
191 |
+
])
|
192 |
+
|
193 |
+
data.append(features)
|
194 |
+
labels.append(spiral_idx * 2 - 1) # Convert to [-1, 1]
|
195 |
+
|
196 |
+
return np.array(data), np.array(labels)
|
197 |
+
|
198 |
+
def generate_checkerboard_data(n_samples: int = 200) -> tuple:
|
199 |
+
"""Generate checkerboard dataset.
|
200 |
+
|
201 |
+
Args:
|
202 |
+
n_samples: Number of samples per class
|
203 |
+
|
204 |
+
Returns:
|
205 |
+
Tuple of (features, labels)
|
206 |
+
"""
|
207 |
+
# Generate random points
|
208 |
+
X = np.random.uniform(-2, 2, (n_samples * 2, 2))
|
209 |
+
|
210 |
+
# Assign labels based on checkerboard pattern
|
211 |
+
y = np.zeros(n_samples * 2)
|
212 |
+
for i in range(len(X)):
|
213 |
+
x1, x2 = X[i]
|
214 |
+
y[i] = 1 if (int(np.floor(x1)) + int(np.floor(x2))) % 2 == 0 else 0
|
215 |
+
|
216 |
+
return jnp.array(X), jnp.array(y)
|
217 |
+
|
218 |
+
# Export dataset functions
|
219 |
+
__all__ = ['generate_xor_data', 'generate_circle_data', 'generate_spiral_dataset',
|
220 |
+
'generate_checkerboard_data']
|