im2 commited on
Commit
061a822
1 Parent(s): 3f3b4ee

improved from online tutorial

Browse files
Files changed (7) hide show
  1. README.md +5 -3
  2. img_1.jpg +0 -0
  3. img_2.jpg +0 -0
  4. img_3.jpg +0 -0
  5. img_4.jpg +0 -0
  6. mnist_classifier.pth +0 -0
  7. torchnn.py +130 -0
README.md CHANGED
@@ -1,3 +1,5 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
1
+ Changes from previous author:
2
+ - Updated Architecture: Using AdaptiveAvgPool2d ensures that the fully connected layer receives a consistent input size, regardless of the input dimensions.
3
+ - Data Augmentation: Training with rotated and shifted images ensures the model becomes more robust to variations, improving generalization.
4
+ - Noise Reduction: Preprocessing the image by removing noise helps the model focus on the digit itself.
5
+
img_1.jpg ADDED
img_2.jpg ADDED
img_3.jpg ADDED
img_4.jpg ADDED
mnist_classifier.pth ADDED
Binary file (229 kB). View file
 
torchnn.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch.optim import Adam
4
+ from torch.utils.data import DataLoader
5
+ from torchvision import datasets, transforms
6
+ import torch.nn.functional as F
7
+ from PIL import Image
8
+ import matplotlib.pyplot as plt
9
+ import cv2
10
+
11
+ # 1. Model Definition with Adaptive Pooling
12
+ class ImageClassifier(nn.Module):
13
+ def __init__(self):
14
+ super().__init__()
15
+ self.model = nn.Sequential(
16
+ nn.Conv2d(1, 32, (3,3)),
17
+ nn.ReLU(),
18
+ nn.Conv2d(32, 64, (3,3)),
19
+ nn.ReLU(),
20
+ nn.Conv2d(64, 64, (3,3)),
21
+ nn.ReLU(),
22
+ nn.AdaptiveAvgPool2d((1, 1)), # Pool to 1x1 to avoid hardcoding dimensions
23
+ nn.Flatten(),
24
+ nn.Linear(64, 10) # Final layer to output 10 classes (0-9)
25
+ )
26
+
27
+ def forward(self, x):
28
+ return self.model(x)
29
+
30
+ # 2. Data Augmentation for Training
31
+ train_transform = transforms.Compose([
32
+ transforms.RandomRotation(10), # Random rotation between -10 to 10 degrees
33
+ transforms.RandomAffine(0, translate=(0.1, 0.1)), # Random translation
34
+ transforms.ToTensor(),
35
+ transforms.Normalize((0.5,), (0.5,)) # Normalize to [-1, 1]
36
+ ])
37
+
38
+ # Load MNIST dataset
39
+ train_dataset = datasets.MNIST(root="data", download=True, train=True, transform=train_transform)
40
+ train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
41
+
42
+ # 3. Train the Model
43
+ def train_model(model, train_loader, num_epochs=10):
44
+ opt = Adam(model.parameters(), lr=1e-3)
45
+ loss_fn = nn.CrossEntropyLoss()
46
+
47
+ model.train()
48
+ for epoch in range(num_epochs):
49
+ total_loss = 0
50
+ for batch in train_loader:
51
+ X, y = batch
52
+ X, y = X.to('cpu'), y.to('cpu')
53
+
54
+ # Forward pass
55
+ yhat = model(X)
56
+ loss = loss_fn(yhat, y)
57
+
58
+ # Backpropagation
59
+ opt.zero_grad()
60
+ loss.backward()
61
+ opt.step()
62
+
63
+ total_loss += loss.item()
64
+
65
+ print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")
66
+
67
+ # Initialize model
68
+ clf = ImageClassifier().to('cpu')
69
+
70
+ # Train the model
71
+ train_model(clf, train_loader)
72
+
73
+ # Save the trained model
74
+ torch.save(clf.state_dict(), 'mnist_classifier.pth')
75
+ print("Model saved as 'mnist_classifier.pth'")
76
+
77
+ # 4. Noise Reduction and Preprocessing for Test Image
78
+ def preprocess_image(image_path):
79
+ # Load image using OpenCV
80
+ img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
81
+
82
+ # Resize to 28x28 pixels to match MNIST
83
+ img = cv2.resize(img, (28, 28))
84
+
85
+ # Apply Gaussian blur to reduce noise
86
+ img_blur = cv2.GaussianBlur(img, (5, 5), 0)
87
+
88
+ # Convert to PIL Image for compatibility with torchvision transforms
89
+ img_pil = Image.fromarray(img_blur)
90
+
91
+ # Apply transformations: normalize same as MNIST
92
+ transform = transforms.Compose([
93
+ transforms.ToTensor(),
94
+ transforms.Normalize((0.5,), (0.5,))
95
+ ])
96
+
97
+ img_tensor = transform(img_pil).unsqueeze(0) # Add batch dimension
98
+ return img_tensor
99
+
100
+ # 5. Test on Noisy Image
101
+ def test_model_on_image(model, image_path):
102
+ # Preprocess the noisy image
103
+ img_tensor = preprocess_image(image_path).to('cpu')
104
+
105
+ # Model in evaluation mode
106
+ model.eval()
107
+ with torch.no_grad():
108
+ output = model(img_tensor)
109
+ predicted = torch.argmax(output)
110
+
111
+ # Get softmax probabilities
112
+ probs = F.softmax(output, dim=1)
113
+ confidence = probs[0][predicted].item()
114
+
115
+ print(f"Predicted Label: {predicted.item()}, Confidence: {confidence}")
116
+
117
+ # Visualize the processed image
118
+ img_np = img_tensor.squeeze().cpu().numpy()
119
+ plt.imshow(img_np, cmap='gray')
120
+ plt.title(f"Predicted: {predicted.item()}, Confidence: {confidence}")
121
+ plt.show()
122
+
123
+ # Later: Load the saved model and test
124
+ clf = ImageClassifier().to('cpu')
125
+ clf.load_state_dict(torch.load('mnist_classifier.pth'))
126
+ print("Model loaded for inference.")
127
+
128
+ # Test the model on img_4.jpg (the noisy outlier)
129
+ test_image_path = 'img_4.jpg' # Path to the noisy image
130
+ test_model_on_image(clf, test_image_path)