Spaces:

eyad-silx
/

neat

Configuration error

App Files Files Community

neat / backprop_train.py

eyad-silx

Upload backprop_train.py with huggingface_hub

61162bb verified 6 months ago

raw

history blame

12.7 kB

	"""Train BackpropNEAT on Spiral dataset."""

	import numpy as np
	import matplotlib.pyplot as plt
	import jax.numpy as jnp
	import jax
	import os
	import json
	from datetime import datetime
	from sklearn.model_selection import train_test_split
	from sklearn.utils import shuffle

	from neat.backprop_neat import BackpropNEAT
	from neat.datasets import generate_spiral_dataset
	from neat.network import Network
	from neat.genome import Genome

	class NetworkLogger:
	"""Logger for tracking network evolution."""

	def __init__(self, output_dir: str):
	self.output_dir = output_dir
	self.log_file = os.path.join(output_dir, "evolution_log.json")
	self.history = []

	def log_network(self, epoch: int, network, loss: float, accuracy: float):
	"""Log network state."""
	network_state = {
	'epoch': epoch,
	'loss': float(loss),
	'accuracy': float(accuracy),
	'n_nodes': network.genome.n_nodes,
	'n_connections': len(network.genome.connections),
	'complexity_score': self.calculate_complexity(network),
	'structure': self.get_network_structure(network),
	'timestamp': datetime.now().isoformat()
	}
	self.history.append(network_state)

	# Save to file
	with open(self.log_file, 'w') as f:
	json.dump(self.history, f, indent=2)

	def calculate_complexity(self, network):
	"""Calculate network complexity score."""
	n_nodes = network.genome.n_nodes
	n_connections = len(network.genome.connections)
	return n_nodes * 0.5 + n_connections

	def get_network_structure(self, network):
	"""Get detailed network structure."""
	connections = []
	for (src, dst), weight in network.genome.connections.items():
	connections.append({
	'source': int(src),
	'target': int(dst),
	'weight': float(weight)
	})
	return {
	'input_size': network.genome.input_size,
	'output_size': network.genome.output_size,
	'hidden_nodes': network.genome.n_nodes - network.genome.input_size - network.genome.output_size,
	'connections': connections
	}

	def plot_evolution(self, save_path: str):
	"""Plot network evolution metrics."""
	epochs = [log['epoch'] for log in self.history]
	accuracies = [log['accuracy'] for log in self.history]
	complexities = [log['complexity_score'] for log in self.history]
	losses = [log['loss'] for log in self.history]

	fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(12, 12))

	# Plot accuracy
	ax1.plot(epochs, accuracies, 'b-', label='Accuracy')
	ax1.set_ylabel('Accuracy')
	ax1.set_title('Network Evolution')
	ax1.grid(True)
	ax1.legend()

	# Plot complexity
	ax2.plot(epochs, complexities, 'r-', label='Complexity Score')
	ax2.set_ylabel('Complexity Score')
	ax2.grid(True)
	ax2.legend()

	# Plot loss
	ax3.plot(epochs, losses, 'g-', label='Loss')
	ax3.set_ylabel('Loss')
	ax3.set_xlabel('Epoch')
	ax3.grid(True)
	ax3.legend()

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()

	def visualize_dataset(X, y, network=None, title=None, save_path=None):
	"""Visualize dataset with decision boundary."""
	plt.figure(figsize=(10, 8))

	if network is not None:
	# Create mesh grid
	x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
	y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
	xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
	np.linspace(y_min, y_max, 100))

	# Make predictions
	X_mesh = jnp.array(np.c_[xx.ravel(), yy.ravel()], dtype=jnp.float32)
	Z = network.predict(X_mesh)
	Z = Z.reshape(xx.shape)

	# Plot decision boundary
	plt.contourf(xx, yy, Z, alpha=0.4, cmap='RdYlBu')

	plt.scatter(X[y == 1, 0], X[y == 1, 1], c='red', label='Class 1')
	plt.scatter(X[y == -1, 0], X[y == -1, 1], c='blue', label='Class -1')
	plt.grid(True)
	plt.legend()
	plt.title(title or 'Dataset')
	plt.xlabel('X1')
	plt.ylabel('X2')

	if save_path:
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	print(f"Saved plot to {save_path}")
	else:
	plt.show()
	plt.close()

	def train_network(network, X, y, n_epochs=300, batch_size=32, patience=50):
	"""Train a single network."""
	print("Starting network training...")
	print(f"Input shape: {X.shape}, Output shape: {y.shape}")
	print(f"Network params: {network.params['weights'].keys()}")

	n_samples = len(X)
	n_batches = n_samples // batch_size
	best_accuracy = 0.0
	patience_counter = 0
	best_params = None

	# Convert to JAX arrays
	print("Converting to JAX arrays...")
	X = jnp.array(X, dtype=jnp.float32)
	y = jnp.array(y, dtype=jnp.float32)

	# Learning rate schedule
	base_lr = 0.01
	warmup_epochs = 5

	print(f"\nTraining for {n_epochs} epochs with {n_batches} batches per epoch")
	print(f"Batch size: {batch_size}, Patience: {patience}")

	for epoch in range(n_epochs):
	try:
	# Shuffle data
	perm = np.random.permutation(n_samples)
	X = X[perm]
	y = y[perm]

	# Adjust learning rate with warmup and cosine decay
	if epoch < warmup_epochs:
	lr = base_lr * (epoch + 1) / warmup_epochs
	else:
	# Cosine decay with restarts
	cycle_length = 50
	cycle = (epoch - warmup_epochs) // cycle_length
	t = (epoch - warmup_epochs) % cycle_length
	lr = base_lr * 0.5 * (1 + np.cos(t * np.pi / cycle_length))
	# Add small restart bump every cycle
	if t == 0:
	lr = base_lr * (0.9 ** cycle)

	epoch_loss = 0.0

	# Train on mini-batches
	for i in range(n_batches):
	start_idx = i * batch_size
	end_idx = start_idx + batch_size
	X_batch = X[start_idx:end_idx]
	y_batch = y[start_idx:end_idx]

	try:
	# Update network parameters
	network.params, loss = network._train_step(
	network.params,
	X_batch,
	y_batch
	)
	epoch_loss += loss
	except Exception as e:
	print(f"Error in batch {i}: {str(e)}")
	print(f"X_batch shape: {X_batch.shape}, y_batch shape: {y_batch.shape}")
	raise e

	# Compute training accuracy
	predictions = network.predict(X)
	train_accuracy = np.mean((predictions > 0) == (y > 0))

	# Early stopping check
	if train_accuracy > best_accuracy:
	best_accuracy = train_accuracy
	best_params = {k: v.copy() for k, v in network.params.items()}
	patience_counter = 0
	else:
	patience_counter += 1

	# Print progress every epoch
	print(f"Epoch {epoch}: Train Acc = {train_accuracy:.4f}, Loss = {epoch_loss/n_batches:.4f}, LR = {lr:.6f}")

	# Early stopping
	if patience_counter >= patience:
	print(f"Early stopping at epoch {epoch}")
	break

	except Exception as e:
	print(f"Error in epoch {epoch}: {str(e)}")
	raise e

	# Restore best parameters
	if best_params is not None:
	network.params = best_params
	print(f"\nRestored best parameters with accuracy: {best_accuracy:.4f}")

	return best_accuracy

	def plot_decision_boundary(network, X, y, save_path):
	"""Plot decision boundary with multiple views."""
	fig, axes = plt.subplots(2, 2, figsize=(15, 15))

	# Cartesian View
	x_min, x_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
	y_min, y_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
	xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100),
	np.linspace(y_min, y_max, 100))

	# Create all features for prediction
	r = np.sqrt(xx2 + yy2)
	theta = np.arctan2(yy, xx)
	theta = np.unwrap(theta)
	dr_dtheta = r / theta

	# Normalize features
	x_norm = xx.ravel() / np.max(np.abs(X[:, 0]))
	y_norm = yy.ravel() / np.max(np.abs(X[:, 1]))
	r_norm = r.ravel() / np.max(X[:, 2] * np.max(np.abs(X[:, 0])))
	theta_norm = theta.ravel() / (6 * np.pi)
	dr_norm = dr_dtheta.ravel() / np.max(np.abs(X[:, 4]))

	# Make predictions
	X_mesh = jnp.array(np.column_stack([
	x_norm, y_norm, r_norm, theta_norm, dr_norm
	]), dtype=jnp.float32)
	Z = network.predict(X_mesh)
	Z = Z.reshape(xx.shape)

	# Plot Cartesian view
	axes[0,0].contourf(xx, yy, Z, alpha=0.4, cmap='RdYlBu')
	axes[0,0].scatter(X[:, 0] * np.max(np.abs(X[:, 0])),
	X[:, 1] * np.max(np.abs(X[:, 1])),
	c=['red' if label == 1 else 'blue' for label in y],
	alpha=0.6)
	axes[0,0].set_title('Cartesian View')
	axes[0,0].grid(True)

	# Plot Polar view (θ vs r)
	axes[0,1].scatter(X[:, 3] * 6 * np.pi, # Denormalize theta
	X[:, 2] * np.max(np.abs(X[:, 0])), # Denormalize radius
	c=['red' if label == 1 else 'blue' for label in y],
	alpha=0.6)
	axes[0,1].set_title('Polar View (θ vs r)')
	axes[0,1].grid(True)

	# Plot dr/dθ vs θ
	axes[1,0].scatter(X[:, 3] * 6 * np.pi, # theta
	X[:, 4] * np.max(np.abs(X[:, 4])), # dr/dtheta
	c=['red' if label == 1 else 'blue' for label in y],
	alpha=0.6)
	axes[1,0].set_title('Spiral Tightness (dr/dθ vs θ)')
	axes[1,0].grid(True)

	# Plot r vs dr/dθ
	axes[1,1].scatter(X[:, 4] * np.max(np.abs(X[:, 4])), # dr/dtheta
	X[:, 2] * np.max(np.abs(X[:, 0])), # radius
	c=['red' if label == 1 else 'blue' for label in y],
	alpha=0.6)
	axes[1,1].set_title('Growth Rate (r vs dr/dθ)')
	axes[1,1].grid(True)

	plt.tight_layout()
	plt.savefig(save_path, dpi=300, bbox_inches='tight')
	plt.close()

	def main():
	"""Main training loop."""
	print("\nTraining on Spiral dataset...")

	# Generate spiral dataset
	X, y = generate_spiral_dataset(n_points=1000, noise=0.1)

	# Split data
	X_train, X_val, y_train, y_val = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	# Initialize BackpropNEAT with smaller network
	n_features = X.shape[1]
	neat = BackpropNEAT(
	n_inputs=n_features,
	n_outputs=1,
	n_hidden=32, # Reduced hidden layer size
	population_size=5,
	learning_rate=0.01,
	beta=0.9
	)

	# Training parameters
	n_epochs = 300
	batch_size = 32
	patience = 30 # Reduced patience

	# Train each network in the population
	best_network = None
	best_val_acc = 0.0

	for i, network in enumerate(neat.population):
	print(f"\nTraining network {i+1}/{len(neat.population)}...")

	# Train network
	train_accuracy = train_network(
	network,
	X_train,
	y_train,
	n_epochs=n_epochs,
	batch_size=batch_size,
	patience=patience
	)

	# Evaluate on validation set
	val_preds = network.predict(X_val)
	val_accuracy = np.mean((val_preds > 0) == (y_val > 0))

	print(f"Network {i+1} - Train Acc: {train_accuracy:.4f}, Val Acc: {val_accuracy:.4f}")

	# Update best network
	if val_accuracy > best_val_acc:
	best_val_acc = val_accuracy
	best_network = network

	# Plot decision boundary for best network
	if best_network is not None:
	plot_path = "spiral_decision_boundary.png"
	plot_decision_boundary(best_network, X, y, plot_path)
	print(f"\nDecision boundary plot saved to {plot_path}")

	if __name__ == "__main__":
	main()