eyad-silx commited on
Commit
80f8293
·
verified ·
1 Parent(s): 05e7a95

Upload neat\datasets.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. neat//datasets.py +220 -0
neat//datasets.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Dataset generation functions for testing BackpropNEAT."""
2
+
3
+ import numpy as np
4
+ import jax.numpy as jnp
5
+
6
+ def generate_xor_data(n_samples: int = 200, complexity: float = 1.0) -> tuple:
7
+ """Generate complex XOR dataset with multiple clusters and rotations.
8
+
9
+ Args:
10
+ n_samples: Number of samples per quadrant
11
+ complexity: Controls the complexity of the pattern (rotation and noise)
12
+
13
+ Returns:
14
+ Tuple of (features, labels)
15
+ """
16
+ points = []
17
+ labels = []
18
+
19
+ # Generate multiple clusters per quadrant
20
+ n_clusters = 3
21
+ samples_per_cluster = n_samples // n_clusters
22
+
23
+ for cluster in range(n_clusters):
24
+ # Add rotation to each subsequent cluster
25
+ rotation = complexity * cluster * np.pi / 6 # 30 degree rotation per cluster
26
+
27
+ # Define cluster centers with gaps
28
+ centers = [
29
+ # (x, y, radius, label)
30
+ (-0.7 - 0.3*cluster, -0.7 - 0.3*cluster, 0.2, -1), # Bottom-left
31
+ (0.7 + 0.3*cluster, 0.7 + 0.3*cluster, 0.2, -1), # Top-right
32
+ (-0.7 - 0.3*cluster, 0.7 + 0.3*cluster, 0.2, 1), # Top-left
33
+ (0.7 + 0.3*cluster, -0.7 - 0.3*cluster, 0.2, 1), # Bottom-right
34
+ ]
35
+
36
+ for cx, cy, radius, label in centers:
37
+ # Generate points in a circle around center
38
+ theta = np.random.uniform(0, 2*np.pi, samples_per_cluster)
39
+ r = np.random.uniform(0, radius, samples_per_cluster)
40
+
41
+ # Convert to cartesian coordinates
42
+ x = r * np.cos(theta)
43
+ y = r * np.sin(theta)
44
+
45
+ # Apply rotation
46
+ x_rot = x * np.cos(rotation) - y * np.sin(rotation)
47
+ y_rot = x * np.sin(rotation) + y * np.cos(rotation)
48
+
49
+ # Add cluster center and noise
50
+ x = cx + x_rot + np.random.normal(0, 0.05, samples_per_cluster)
51
+ y = cy + y_rot + np.random.normal(0, 0.05, samples_per_cluster)
52
+
53
+ # Add points
54
+ cluster_points = np.column_stack([x, y])
55
+ points.append(cluster_points)
56
+ labels.extend([label] * samples_per_cluster)
57
+
58
+ # Convert to arrays
59
+ X = np.vstack(points)
60
+ y = np.array(labels, dtype=np.float32)
61
+
62
+ # Add global rotation
63
+ theta = complexity * np.pi / 4 # 45 degree global rotation
64
+ rotation_matrix = np.array([
65
+ [np.cos(theta), -np.sin(theta)],
66
+ [np.sin(theta), np.cos(theta)]
67
+ ])
68
+ X = X @ rotation_matrix
69
+
70
+ # Shuffle data
71
+ perm = np.random.permutation(len(X))
72
+ X = X[perm]
73
+ y = y[perm]
74
+
75
+ return jnp.array(X), jnp.array(y)
76
+
77
+ def generate_circle_data(n_samples: int = 1000, noise: float = 0.1) -> tuple:
78
+ """Generate circle classification dataset.
79
+
80
+ Args:
81
+ n_samples: Number of samples per class
82
+ noise: Standard deviation of Gaussian noise
83
+
84
+ Returns:
85
+ Tuple of (features, labels)
86
+ """
87
+ # Generate random angles
88
+ theta = np.random.uniform(0, 2*np.pi, n_samples)
89
+
90
+ # Inner circle (class -1)
91
+ r_inner = 0.5 + np.random.normal(0, noise, n_samples)
92
+ X_inner = np.column_stack([
93
+ r_inner * np.cos(theta),
94
+ r_inner * np.sin(theta)
95
+ ])
96
+ y_inner = np.full(n_samples, -1.0)
97
+
98
+ # Outer circle (class 1)
99
+ r_outer = 1.5 + np.random.normal(0, noise, n_samples)
100
+ X_outer = np.column_stack([
101
+ r_outer * np.cos(theta),
102
+ r_outer * np.sin(theta)
103
+ ])
104
+ y_outer = np.full(n_samples, 1.0)
105
+
106
+ # Combine and shuffle
107
+ X = np.vstack([X_inner, X_outer])
108
+ y = np.hstack([y_inner, y_outer])
109
+
110
+ # Shuffle
111
+ perm = np.random.permutation(len(X))
112
+ return X[perm], y[perm]
113
+
114
+ def generate_spiral_dataset(n_points=1000, noise=0.1):
115
+ """Generate a spiral dataset with rotation-invariant features."""
116
+ # Generate theta values with more points near the center
117
+ theta = np.sqrt(np.random.uniform(0, 1, n_points)) * 4 * np.pi
118
+
119
+ # Generate two spirals
120
+ data = []
121
+ labels = []
122
+ eps = 1e-8
123
+
124
+ for i in range(n_points):
125
+ # Base radius increases with theta
126
+ r_base = theta[i] / (4 * np.pi)
127
+
128
+ # Add noise that scales with radius
129
+ noise_scale = noise * (1 - np.exp(-2 * r_base))
130
+
131
+ for spiral_idx in range(2):
132
+ # Rotate second spiral by pi
133
+ angle = theta[i] + np.pi * spiral_idx
134
+
135
+ # Add controlled noise to radius and angle
136
+ r = r_base + np.random.normal(0, noise_scale)
137
+ angle_noise = np.random.normal(0, noise_scale * 0.1) # Less noise in angle
138
+ angle += angle_noise
139
+
140
+ # Calculate cartesian coordinates
141
+ x = r * np.cos(angle)
142
+ y = r * np.sin(angle)
143
+
144
+ # Calculate polar coordinates
145
+ r_point = np.sqrt(x*x + y*y)
146
+ theta_point = np.arctan2(y, x)
147
+
148
+ # Unwrap theta to handle multiple revolutions
149
+ theta_unwrapped = theta_point + 2 * np.pi * (angle // (2 * np.pi))
150
+
151
+ # Calculate spiral-specific features
152
+
153
+ # 1. Local curvature (how much the spiral curves at this point)
154
+ curvature = 1 / (r_point + eps)
155
+
156
+ # 2. Spiral phase (position along spiral revolution)
157
+ phase = theta_unwrapped % (2 * np.pi) / (2 * np.pi)
158
+
159
+ # 3. Radial velocity (how fast radius changes with angle)
160
+ dr_dtheta = 1 / (4 * np.pi)
161
+
162
+ # 4. Normalized angular position (accounts for multiple revolutions)
163
+ angular_pos = theta_unwrapped / (4 * np.pi)
164
+
165
+ # 5. Spiral tightness (local measure of how tight the spiral is)
166
+ tightness = r_point / (theta_unwrapped + eps)
167
+
168
+ # 6. Relative position features (help distinguish between spirals)
169
+ # Distance to other spiral
170
+ other_angle = angle + np.pi
171
+ other_x = r * np.cos(other_angle)
172
+ other_y = r * np.sin(other_angle)
173
+ dist_to_other = np.sqrt((x - other_x)**2 + (y - other_y)**2)
174
+
175
+ # 7. Rotation-invariant features
176
+ sin_phase = np.sin(phase * 2 * np.pi)
177
+ cos_phase = np.cos(phase * 2 * np.pi)
178
+
179
+ # Combine features with careful normalization
180
+ features = np.array([
181
+ x / 2.0, # Normalize coordinates
182
+ y / 2.0,
183
+ r_point / 2.0, # Normalize radius
184
+ sin_phase, # Already normalized
185
+ cos_phase, # Already normalized
186
+ np.tanh(curvature * 2), # Normalize curvature
187
+ angular_pos / 2.0, # Normalize angular position
188
+ np.tanh(tightness), # Normalize tightness
189
+ np.tanh(dr_dtheta * 10), # Normalize radial velocity
190
+ dist_to_other / 4.0 # Normalize distance to other spiral
191
+ ])
192
+
193
+ data.append(features)
194
+ labels.append(spiral_idx * 2 - 1) # Convert to [-1, 1]
195
+
196
+ return np.array(data), np.array(labels)
197
+
198
+ def generate_checkerboard_data(n_samples: int = 200) -> tuple:
199
+ """Generate checkerboard dataset.
200
+
201
+ Args:
202
+ n_samples: Number of samples per class
203
+
204
+ Returns:
205
+ Tuple of (features, labels)
206
+ """
207
+ # Generate random points
208
+ X = np.random.uniform(-2, 2, (n_samples * 2, 2))
209
+
210
+ # Assign labels based on checkerboard pattern
211
+ y = np.zeros(n_samples * 2)
212
+ for i in range(len(X)):
213
+ x1, x2 = X[i]
214
+ y[i] = 1 if (int(np.floor(x1)) + int(np.floor(x2))) % 2 == 0 else 0
215
+
216
+ return jnp.array(X), jnp.array(y)
217
+
218
+ # Export dataset functions
219
+ __all__ = ['generate_xor_data', 'generate_circle_data', 'generate_spiral_dataset',
220
+ 'generate_checkerboard_data']