improved from online tutorial

Browse files

Files changed (7) hide show

README.md +5 -3
img_1.jpg +0 -0
img_2.jpg +0 -0
img_3.jpg +0 -0
img_4.jpg +0 -0
mnist_classifier.pth +0 -0
torchnn.py +130 -0

README.md CHANGED Viewed

@@ -1,3 +1,5 @@
----
-license: mit
----

+Changes from previous author:
+- Updated Architecture: Using AdaptiveAvgPool2d ensures that the fully connected layer receives a consistent input size, regardless of the input dimensions.
+- Data Augmentation: Training with rotated and shifted images ensures the model becomes more robust to variations, improving generalization.
+- Noise Reduction: Preprocessing the image by removing noise helps the model focus on the digit itself.

img_1.jpg ADDED Viewed

img_2.jpg ADDED Viewed

img_3.jpg ADDED Viewed

img_4.jpg ADDED Viewed

mnist_classifier.pth ADDED Viewed

Binary file (229 kB). View file

torchnn.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import torch
+import torch.nn as nn
+from torch.optim import Adam
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+import torch.nn.functional as F
+from PIL import Image
+import matplotlib.pyplot as plt
+import cv2
+# 1. Model Definition with Adaptive Pooling
+class ImageClassifier(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.model = nn.Sequential(
+            nn.Conv2d(1, 32, (3,3)),
+            nn.ReLU(),
+            nn.Conv2d(32, 64, (3,3)),
+            nn.ReLU(),
+            nn.Conv2d(64, 64, (3,3)),
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d((1, 1)),  # Pool to 1x1 to avoid hardcoding dimensions
+            nn.Flatten(),
+            nn.Linear(64, 10)  # Final layer to output 10 classes (0-9)
+        )
+    def forward(self, x):
+        return self.model(x)
+# 2. Data Augmentation for Training
+train_transform = transforms.Compose([
+    transforms.RandomRotation(10),    # Random rotation between -10 to 10 degrees
+    transforms.RandomAffine(0, translate=(0.1, 0.1)),  # Random translation
+    transforms.ToTensor(),
+    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
+])
+# Load MNIST dataset
+train_dataset = datasets.MNIST(root="data", download=True, train=True, transform=train_transform)
+train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
+# 3. Train the Model
+def train_model(model, train_loader, num_epochs=10):
+    opt = Adam(model.parameters(), lr=1e-3)
+    loss_fn = nn.CrossEntropyLoss()
+    model.train()
+    for epoch in range(num_epochs):
+        total_loss = 0
+        for batch in train_loader:
+            X, y = batch
+            X, y = X.to('cpu'), y.to('cpu')
+            # Forward pass
+            yhat = model(X)
+            loss = loss_fn(yhat, y)
+            # Backpropagation
+            opt.zero_grad()
+            loss.backward()
+            opt.step()
+            total_loss += loss.item()
+        print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}")
+# Initialize model
+clf = ImageClassifier().to('cpu')
+# Train the model
+train_model(clf, train_loader)
+# Save the trained model
+torch.save(clf.state_dict(), 'mnist_classifier.pth')
+print("Model saved as 'mnist_classifier.pth'")
+# 4. Noise Reduction and Preprocessing for Test Image
+def preprocess_image(image_path):
+    # Load image using OpenCV
+    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
+    # Resize to 28x28 pixels to match MNIST
+    img = cv2.resize(img, (28, 28))
+    # Apply Gaussian blur to reduce noise
+    img_blur = cv2.GaussianBlur(img, (5, 5), 0)
+    # Convert to PIL Image for compatibility with torchvision transforms
+    img_pil = Image.fromarray(img_blur)
+    # Apply transformations: normalize same as MNIST
+    transform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.5,), (0.5,))
+    ])
+    img_tensor = transform(img_pil).unsqueeze(0)  # Add batch dimension
+    return img_tensor
+# 5. Test on Noisy Image
+def test_model_on_image(model, image_path):
+    # Preprocess the noisy image
+    img_tensor = preprocess_image(image_path).to('cpu')
+    # Model in evaluation mode
+    model.eval()
+    with torch.no_grad():
+        output = model(img_tensor)
+        predicted = torch.argmax(output)
+    # Get softmax probabilities
+    probs = F.softmax(output, dim=1)
+    confidence = probs[0][predicted].item()
+    print(f"Predicted Label: {predicted.item()}, Confidence: {confidence}")
+    # Visualize the processed image
+    img_np = img_tensor.squeeze().cpu().numpy()
+    plt.imshow(img_np, cmap='gray')
+    plt.title(f"Predicted: {predicted.item()}, Confidence: {confidence}")
+    plt.show()
+# Later: Load the saved model and test
+clf = ImageClassifier().to('cpu')
+clf.load_state_dict(torch.load('mnist_classifier.pth'))
+print("Model loaded for inference.")
+# Test the model on img_4.jpg (the noisy outlier)
+test_image_path = 'img_4.jpg'  # Path to the noisy image
+test_model_on_image(clf, test_image_path)