im2
improved from online tutorial
061a822
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
import cv2
# 1. Model Definition with Adaptive Pooling
class ImageClassifier(nn.Module):
def __init__(self):
super().__init__()
self.model = nn.Sequential(
nn.Conv2d(1, 32, (3,3)),
nn.ReLU(),
nn.Conv2d(32, 64, (3,3)),
nn.ReLU(),
nn.Conv2d(64, 64, (3,3)),
nn.ReLU(),
nn.AdaptiveAvgPool2d((1, 1)), # Pool to 1x1 to avoid hardcoding dimensions
nn.Flatten(),
nn.Linear(64, 10) # Final layer to output 10 classes (0-9)
)
def forward(self, x):
return self.model(x)
# 2. Data Augmentation for Training
train_transform = transforms.Compose([
transforms.RandomRotation(10), # Random rotation between -10 to 10 degrees
transforms.RandomAffine(0, translate=(0.1, 0.1)), # Random translation
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,)) # Normalize to [-1, 1]
])
# Load MNIST dataset
train_dataset = datasets.MNIST(root="data", download=True, train=True, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# 3. Train the Model
def train_model(model, train_loader, num_epochs=10):
opt = Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
model.train()
for epoch in range(num_epochs):
total_loss = 0
for batch in train_loader:
X, y = batch
X, y = X.to('cpu'), y.to('cpu')
# Forward pass
yhat = model(X)
loss = loss_fn(yhat, y)
# Backpropagation
opt.zero_grad()
loss.backward()
opt.step()
total_loss += loss.item()
print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")
# Initialize model
clf = ImageClassifier().to('cpu')
# Train the model
train_model(clf, train_loader)
# Save the trained model
torch.save(clf.state_dict(), 'mnist_classifier.pth')
print("Model saved as 'mnist_classifier.pth'")
# 4. Noise Reduction and Preprocessing for Test Image
def preprocess_image(image_path):
# Load image using OpenCV
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
# Resize to 28x28 pixels to match MNIST
img = cv2.resize(img, (28, 28))
# Apply Gaussian blur to reduce noise
img_blur = cv2.GaussianBlur(img, (5, 5), 0)
# Convert to PIL Image for compatibility with torchvision transforms
img_pil = Image.fromarray(img_blur)
# Apply transformations: normalize same as MNIST
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,), (0.5,))
])
img_tensor = transform(img_pil).unsqueeze(0) # Add batch dimension
return img_tensor
# 5. Test on Noisy Image
def test_model_on_image(model, image_path):
# Preprocess the noisy image
img_tensor = preprocess_image(image_path).to('cpu')
# Model in evaluation mode
model.eval()
with torch.no_grad():
output = model(img_tensor)
predicted = torch.argmax(output)
# Get softmax probabilities
probs = F.softmax(output, dim=1)
confidence = probs[0][predicted].item()
print(f"Predicted Label: {predicted.item()}, Confidence: {confidence}")
# Visualize the processed image
img_np = img_tensor.squeeze().cpu().numpy()
plt.imshow(img_np, cmap='gray')
plt.title(f"Predicted: {predicted.item()}, Confidence: {confidence}")
plt.show()
# Later: Load the saved model and test
clf = ImageClassifier().to('cpu')
clf.load_state_dict(torch.load('mnist_classifier.pth'))
print("Model loaded for inference.")
# Test the model on img_4.jpg (the noisy outlier)
test_image_path = 'img_4.jpg' # Path to the noisy image
test_model_on_image(clf, test_image_path)