improved_digits_recognition / torchnn.py

im2

improved from online tutorial

061a822 about 2 months ago

4.03 kB

	import torch
	import torch.nn as nn
	from torch.optim import Adam
	from torch.utils.data import DataLoader
	from torchvision import datasets, transforms
	import torch.nn.functional as F
	from PIL import Image
	import matplotlib.pyplot as plt
	import cv2

	# 1. Model Definition with Adaptive Pooling
	class ImageClassifier(nn.Module):
	def __init__(self):
	super().__init__()
	self.model = nn.Sequential(
	nn.Conv2d(1, 32, (3,3)),
	nn.ReLU(),
	nn.Conv2d(32, 64, (3,3)),
	nn.ReLU(),
	nn.Conv2d(64, 64, (3,3)),
	nn.ReLU(),
	nn.AdaptiveAvgPool2d((1, 1)), # Pool to 1x1 to avoid hardcoding dimensions
	nn.Flatten(),
	nn.Linear(64, 10) # Final layer to output 10 classes (0-9)
	)

	def forward(self, x):
	return self.model(x)

	# 2. Data Augmentation for Training
	train_transform = transforms.Compose([
	transforms.RandomRotation(10), # Random rotation between -10 to 10 degrees
	transforms.RandomAffine(0, translate=(0.1, 0.1)), # Random translation
	transforms.ToTensor(),
	transforms.Normalize((0.5,), (0.5,)) # Normalize to [-1, 1]
	])

	# Load MNIST dataset
	train_dataset = datasets.MNIST(root="data", download=True, train=True, transform=train_transform)
	train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

	# 3. Train the Model
	def train_model(model, train_loader, num_epochs=10):
	opt = Adam(model.parameters(), lr=1e-3)
	loss_fn = nn.CrossEntropyLoss()

	model.train()
	for epoch in range(num_epochs):
	total_loss = 0
	for batch in train_loader:
	X, y = batch
	X, y = X.to('cpu'), y.to('cpu')

	# Forward pass
	yhat = model(X)
	loss = loss_fn(yhat, y)

	# Backpropagation
	opt.zero_grad()
	loss.backward()
	opt.step()

	total_loss += loss.item()

	print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

	# Initialize model
	clf = ImageClassifier().to('cpu')

	# Train the model
	train_model(clf, train_loader)

	# Save the trained model
	torch.save(clf.state_dict(), 'mnist_classifier.pth')
	print("Model saved as 'mnist_classifier.pth'")

	# 4. Noise Reduction and Preprocessing for Test Image
	def preprocess_image(image_path):
	# Load image using OpenCV
	img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

	# Resize to 28x28 pixels to match MNIST
	img = cv2.resize(img, (28, 28))

	# Apply Gaussian blur to reduce noise
	img_blur = cv2.GaussianBlur(img, (5, 5), 0)

	# Convert to PIL Image for compatibility with torchvision transforms
	img_pil = Image.fromarray(img_blur)

	# Apply transformations: normalize same as MNIST
	transform = transforms.Compose([
	transforms.ToTensor(),
	transforms.Normalize((0.5,), (0.5,))
	])

	img_tensor = transform(img_pil).unsqueeze(0) # Add batch dimension
	return img_tensor

	# 5. Test on Noisy Image
	def test_model_on_image(model, image_path):
	# Preprocess the noisy image
	img_tensor = preprocess_image(image_path).to('cpu')

	# Model in evaluation mode
	model.eval()
	with torch.no_grad():
	output = model(img_tensor)
	predicted = torch.argmax(output)

	# Get softmax probabilities
	probs = F.softmax(output, dim=1)
	confidence = probs[0][predicted].item()

	print(f"Predicted Label: {predicted.item()}, Confidence: {confidence}")

	# Visualize the processed image
	img_np = img_tensor.squeeze().cpu().numpy()
	plt.imshow(img_np, cmap='gray')
	plt.title(f"Predicted: {predicted.item()}, Confidence: {confidence}")
	plt.show()

	# Later: Load the saved model and test
	clf = ImageClassifier().to('cpu')
	clf.load_state_dict(torch.load('mnist_classifier.pth'))
	print("Model loaded for inference.")

	# Test the model on img_4.jpg (the noisy outlier)
	test_image_path = 'img_4.jpg' # Path to the noisy image
	test_model_on_image(clf, test_image_path)