File size: 4,027 Bytes
061a822
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
import cv2

# 1. Model Definition with Adaptive Pooling
class ImageClassifier(nn.Module): 
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, (3,3)), 
            nn.ReLU(),
            nn.Conv2d(32, 64, (3,3)), 
            nn.ReLU(),
            nn.Conv2d(64, 64, (3,3)), 
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),  # Pool to 1x1 to avoid hardcoding dimensions
            nn.Flatten(), 
            nn.Linear(64, 10)  # Final layer to output 10 classes (0-9)
        )

    def forward(self, x): 
        return self.model(x)

# 2. Data Augmentation for Training
train_transform = transforms.Compose([
    transforms.RandomRotation(10),    # Random rotation between -10 to 10 degrees
    transforms.RandomAffine(0, translate=(0.1, 0.1)),  # Random translation
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# Load MNIST dataset
train_dataset = datasets.MNIST(root="data", download=True, train=True, transform=train_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# 3. Train the Model
def train_model(model, train_loader, num_epochs=10):
    opt = Adam(model.parameters(), lr=1e-3)
    loss_fn = nn.CrossEntropyLoss()
    
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for batch in train_loader:
            X, y = batch
            X, y = X.to('cpu'), y.to('cpu')

            # Forward pass
            yhat = model(X)
            loss = loss_fn(yhat, y)

            # Backpropagation
            opt.zero_grad()
            loss.backward()
            opt.step()

            total_loss += loss.item()
        
        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")

# Initialize model
clf = ImageClassifier().to('cpu')

# Train the model
train_model(clf, train_loader)

# Save the trained model
torch.save(clf.state_dict(), 'mnist_classifier.pth')
print("Model saved as 'mnist_classifier.pth'")

# 4. Noise Reduction and Preprocessing for Test Image
def preprocess_image(image_path):
    # Load image using OpenCV
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

    # Resize to 28x28 pixels to match MNIST
    img = cv2.resize(img, (28, 28))

    # Apply Gaussian blur to reduce noise
    img_blur = cv2.GaussianBlur(img, (5, 5), 0)

    # Convert to PIL Image for compatibility with torchvision transforms
    img_pil = Image.fromarray(img_blur)

    # Apply transformations: normalize same as MNIST
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])

    img_tensor = transform(img_pil).unsqueeze(0)  # Add batch dimension
    return img_tensor

# 5. Test on Noisy Image
def test_model_on_image(model, image_path):
    # Preprocess the noisy image
    img_tensor = preprocess_image(image_path).to('cpu')

    # Model in evaluation mode
    model.eval()
    with torch.no_grad():
        output = model(img_tensor)
        predicted = torch.argmax(output)

    # Get softmax probabilities
    probs = F.softmax(output, dim=1)
    confidence = probs[0][predicted].item()
    
    print(f"Predicted Label: {predicted.item()}, Confidence: {confidence}")

    # Visualize the processed image
    img_np = img_tensor.squeeze().cpu().numpy()
    plt.imshow(img_np, cmap='gray')
    plt.title(f"Predicted: {predicted.item()}, Confidence: {confidence}")
    plt.show()

# Later: Load the saved model and test
clf = ImageClassifier().to('cpu')
clf.load_state_dict(torch.load('mnist_classifier.pth'))
print("Model loaded for inference.")

# Test the model on img_4.jpg (the noisy outlier)
test_image_path = 'img_4.jpg'  # Path to the noisy image
test_model_on_image(clf, test_image_path)