|
import torch |
|
import torch.nn as nn |
|
from torch.optim import Adam |
|
from torch.utils.data import DataLoader |
|
from torchvision import datasets, transforms |
|
import torch.nn.functional as F |
|
from PIL import Image |
|
import matplotlib.pyplot as plt |
|
import cv2 |
|
|
|
|
|
class ImageClassifier(nn.Module): |
|
def __init__(self): |
|
super().__init__() |
|
self.model = nn.Sequential( |
|
nn.Conv2d(1, 32, (3,3)), |
|
nn.ReLU(), |
|
nn.Conv2d(32, 64, (3,3)), |
|
nn.ReLU(), |
|
nn.Conv2d(64, 64, (3,3)), |
|
nn.ReLU(), |
|
nn.AdaptiveAvgPool2d((1, 1)), |
|
nn.Flatten(), |
|
nn.Linear(64, 10) |
|
) |
|
|
|
def forward(self, x): |
|
return self.model(x) |
|
|
|
|
|
train_transform = transforms.Compose([ |
|
transforms.RandomRotation(10), |
|
transforms.RandomAffine(0, translate=(0.1, 0.1)), |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.5,), (0.5,)) |
|
]) |
|
|
|
|
|
train_dataset = datasets.MNIST(root="data", download=True, train=True, transform=train_transform) |
|
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) |
|
|
|
|
|
def train_model(model, train_loader, num_epochs=10): |
|
opt = Adam(model.parameters(), lr=1e-3) |
|
loss_fn = nn.CrossEntropyLoss() |
|
|
|
model.train() |
|
for epoch in range(num_epochs): |
|
total_loss = 0 |
|
for batch in train_loader: |
|
X, y = batch |
|
X, y = X.to('cpu'), y.to('cpu') |
|
|
|
|
|
yhat = model(X) |
|
loss = loss_fn(yhat, y) |
|
|
|
|
|
opt.zero_grad() |
|
loss.backward() |
|
opt.step() |
|
|
|
total_loss += loss.item() |
|
|
|
print(f"Epoch {epoch+1}, Loss: {total_loss / len(train_loader)}") |
|
|
|
|
|
clf = ImageClassifier().to('cpu') |
|
|
|
|
|
train_model(clf, train_loader) |
|
|
|
|
|
torch.save(clf.state_dict(), 'mnist_classifier.pth') |
|
print("Model saved as 'mnist_classifier.pth'") |
|
|
|
|
|
def preprocess_image(image_path): |
|
|
|
img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) |
|
|
|
|
|
img = cv2.resize(img, (28, 28)) |
|
|
|
|
|
img_blur = cv2.GaussianBlur(img, (5, 5), 0) |
|
|
|
|
|
img_pil = Image.fromarray(img_blur) |
|
|
|
|
|
transform = transforms.Compose([ |
|
transforms.ToTensor(), |
|
transforms.Normalize((0.5,), (0.5,)) |
|
]) |
|
|
|
img_tensor = transform(img_pil).unsqueeze(0) |
|
return img_tensor |
|
|
|
|
|
def test_model_on_image(model, image_path): |
|
|
|
img_tensor = preprocess_image(image_path).to('cpu') |
|
|
|
|
|
model.eval() |
|
with torch.no_grad(): |
|
output = model(img_tensor) |
|
predicted = torch.argmax(output) |
|
|
|
|
|
probs = F.softmax(output, dim=1) |
|
confidence = probs[0][predicted].item() |
|
|
|
print(f"Predicted Label: {predicted.item()}, Confidence: {confidence}") |
|
|
|
|
|
img_np = img_tensor.squeeze().cpu().numpy() |
|
plt.imshow(img_np, cmap='gray') |
|
plt.title(f"Predicted: {predicted.item()}, Confidence: {confidence}") |
|
plt.show() |
|
|
|
|
|
clf = ImageClassifier().to('cpu') |
|
clf.load_state_dict(torch.load('mnist_classifier.pth')) |
|
print("Model loaded for inference.") |
|
|
|
|
|
test_image_path = 'img_4.jpg' |
|
test_model_on_image(clf, test_image_path) |
|
|