File size: 4,027 Bytes

061a822

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
import cv2

# 1. Model Definition with Adaptive Pooling
class ImageClassifier(nn.Module): 
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, (3,3)), 
            nn.ReLU(),
            nn.Conv2d(32, 64, (3,3)), 
            nn.ReLU(),
            nn.Conv2d(64, 64, (3,3)), 
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),  # Pool to 1x1 to avoid hardcoding dimensions
            nn.Flatten(), 
            nn.Linear(64, 10)  # Final layer to output 10 classes (0-9)
        )

    def forward(self, x): 
        return self.model(x)

# 2. Data Augmentation for Training
train_transform = transforms.Compose([
    transforms.RandomRotation(10),    # Random rotation between -10 to 10 degrees
    transforms.RandomAffine(0, translate=(0.1, 0.1)),  # Random translation
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# Load MNIST dataset
train_dataset = datasets.MNIST(root="data", download=True, train=True, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# 3. Train the Model
def train_model(model, train_loader, num_epochs=10):
    opt = Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()
    
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_loader:
            X, y = batch
            X, y = X.to('cpu'), y.to('cpu')

            # Forward pass
            yhat = model(X)
            loss = loss_fn(yhat, y)

            # Backpropagation
            opt.zero_grad()
            loss.backward()
            opt.step()

            total_loss += loss.item()
        
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

# Initialize model
clf = ImageClassifier().to('cpu')

# Train the model
train_model(clf, train_loader)

# Save the trained model
torch.save(clf.state_dict(), 'mnist_classifier.pth')
print("Model saved as 'mnist_classifier.pth'")

# 4. Noise Reduction and Preprocessing for Test Image
def preprocess_image(image_path):
    # Load image using OpenCV
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Resize to 28x28 pixels to match MNIST
    img = cv2.resize(img, (28, 28))

    # Apply Gaussian blur to reduce noise
    img_blur = cv2.GaussianBlur(img, (5, 5), 0)

    # Convert to PIL Image for compatibility with torchvision transforms
    img_pil = Image.fromarray(img_blur)

    # Apply transformations: normalize same as MNIST
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    img_tensor = transform(img_pil).unsqueeze(0)  # Add batch dimension
    return img_tensor

# 5. Test on Noisy Image
def test_model_on_image(model, image_path):
    # Preprocess the noisy image
    img_tensor = preprocess_image(image_path).to('cpu')

    # Model in evaluation mode
    model.eval()
    with torch.no_grad():
        output = model(img_tensor)
        predicted = torch.argmax(output)

    # Get softmax probabilities
    probs = F.softmax(output, dim=1)
    confidence = probs[0][predicted].item()
    
    print(f"Predicted Label: {predicted.item()}, Confidence: {confidence}")

    # Visualize the processed image
    img_np = img_tensor.squeeze().cpu().numpy()
    plt.imshow(img_np, cmap='gray')
    plt.title(f"Predicted: {predicted.item()}, Confidence: {confidence}")
    plt.show()

# Later: Load the saved model and test
clf = ImageClassifier().to('cpu')
clf.load_state_dict(torch.load('mnist_classifier.pth'))
print("Model loaded for inference.")

# Test the model on img_4.jpg (the noisy outlier)
test_image_path = 'img_4.jpg'  # Path to the noisy image
test_model_on_image(clf, test_image_path)