atiwari751 commited on
Commit
ea78c09
·
1 Parent(s): 171643a

Final submission

Browse files
Files changed (1) hide show
  1. resnet_execute.py +0 -220
resnet_execute.py DELETED
@@ -1,220 +0,0 @@
1
- import torch
2
- import torchvision
3
- import torchvision.transforms as transforms
4
- from torch.utils.data import DataLoader
5
- import torch.nn as nn
6
- import torch.optim as optim
7
- from resnet_model import ResNet50
8
- from tqdm import tqdm
9
- from torchvision import datasets
10
- from checkpoint import save_checkpoint, load_checkpoint
11
- import matplotlib.pyplot as plt
12
- from torchvision.utils import make_grid
13
- import albumentations as A
14
- from albumentations.pytorch import ToTensorV2
15
- import numpy as np
16
- from torchsummary import summary
17
-
18
- # Define transformations
19
- train_transform = A.Compose([
20
- A.RandomResizedCrop(height=224, width=224, scale=(0.08, 1.0), ratio=(3/4, 4/3), p=1.0),
21
- A.HorizontalFlip(p=0.5),
22
- A.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1, p=0.8),
23
- A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
24
- ToTensorV2()
25
- ])
26
-
27
- test_transform = A.Compose([
28
- A.Resize(height=256, width=256),
29
- A.CenterCrop(height=224, width=224),
30
- A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
31
- ToTensorV2()
32
- ])
33
-
34
- # Train dataset and loader
35
- trainset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/train', transform=lambda img: train_transform(image=np.array(img))['image'])
36
- trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True)
37
-
38
- testset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/val', transform=lambda img: test_transform(image=np.array(img))['image'])
39
- testloader = DataLoader(testset, batch_size=500, shuffle=False, num_workers=8, pin_memory=True)
40
-
41
- # Initialize model, loss function, and optimizer
42
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
43
- print( device )
44
- model = ResNet50()
45
- model = torch.nn.DataParallel(model)
46
- model = model.to(device)
47
- summary(model, input_size=(3, 224, 224))
48
-
49
- criterion = nn.CrossEntropyLoss()
50
- optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
51
-
52
- # Training function
53
- from torch.amp import autocast
54
-
55
- def train(model, device, train_loader, optimizer, criterion, epoch, accumulation_steps=4):
56
- model.train()
57
- running_loss = 0.0
58
- correct1 = 0
59
- correct5 = 0
60
- total = 0
61
- pbar = tqdm(train_loader)
62
-
63
- for batch_idx, (inputs, targets) in enumerate(pbar):
64
- inputs, targets = inputs.to(device), targets.to(device)
65
-
66
- with autocast(device_type='cuda'):
67
- outputs = model(inputs)
68
- loss = criterion(outputs, targets) / accumulation_steps
69
-
70
- loss.backward()
71
-
72
- if (batch_idx + 1) % accumulation_steps == 0 or (batch_idx + 1) == len(train_loader):
73
- optimizer.step()
74
- optimizer.zero_grad()
75
-
76
- running_loss += loss.item() * accumulation_steps
77
- _, predicted = outputs.topk(5, 1, True, True)
78
- total += targets.size(0)
79
- correct1 += predicted[:, :1].eq(targets.view(-1, 1).expand_as(predicted[:, :1])).sum().item()
80
- correct5 += predicted.eq(targets.view(-1, 1).expand_as(predicted)).sum().item()
81
-
82
- pbar.set_description(desc=f'Epoch {epoch} | Loss: {running_loss / (batch_idx + 1):.4f} | Top-1 Acc: {100. * correct1 / total:.2f} | Top-5 Acc: {100. * correct5 / total:.2f}')
83
-
84
- if (batch_idx + 1) % 50 == 0:
85
- torch.cuda.empty_cache()
86
-
87
- return 100. * correct1 / total, 100. * correct5 / total, running_loss / len(train_loader)
88
-
89
- # Testing function
90
- def test(model, device, test_loader, criterion):
91
- model.eval()
92
- test_loss = 0
93
- correct1 = 0
94
- correct5 = 0
95
- total = 0
96
- misclassified_images = []
97
- misclassified_labels = []
98
- misclassified_preds = []
99
-
100
- with torch.no_grad():
101
- for inputs, targets in test_loader:
102
- inputs, targets = inputs.to(device), targets.to(device)
103
- outputs = model(inputs)
104
- loss = criterion(outputs, targets)
105
-
106
- test_loss += loss.item()
107
- _, predicted = outputs.topk(5, 1, True, True)
108
- total += targets.size(0)
109
- correct1 += predicted[:, :1].eq(targets.view(-1, 1).expand_as(predicted[:, :1])).sum().item()
110
- correct5 += predicted.eq(targets.view(-1, 1).expand_as(predicted)).sum().item()
111
-
112
- # Collect misclassified samples
113
- for i in range(inputs.size(0)):
114
- if targets[i] not in predicted[i, :1]:
115
- misclassified_images.append(inputs[i].cpu())
116
- misclassified_labels.append(targets[i].cpu())
117
- misclassified_preds.append(predicted[i, :1].cpu())
118
-
119
- test_accuracy1 = 100. * correct1 / total
120
- test_accuracy5 = 100. * correct5 / total
121
- print(f'Test Loss: {test_loss/len(test_loader):.4f}, Top-1 Accuracy: {test_accuracy1:.2f}, Top-5 Accuracy: {test_accuracy5:.2f}')
122
- return test_accuracy1, test_accuracy5, test_loss / len(test_loader), misclassified_images, misclassified_labels, misclassified_preds
123
-
124
- # Main execution
125
- if __name__ == '__main__':
126
- # Early stopping parameters and checkpoint path
127
- checkpoint_path = "checkpoint.pth"
128
- best_loss = float('inf')
129
- patience = 5
130
- patience_counter = 0
131
- # Load checkpoint if it exists to resume training
132
- try:
133
- model, optimizer, best_test_accuracy = load_checkpoint(model, optimizer, checkpoint_path)
134
- except FileNotFoundError:
135
- print("No checkpoint found, starting from scratch.")
136
-
137
- # Store results for each epoch
138
- results = []
139
- learning_rates = []
140
-
141
- for epoch in range(1, 26): # 20 epochs
142
- train_accuracy1, train_accuracy5, train_loss = train(model, device, trainloader, optimizer, criterion, epoch)
143
- test_accuracy1, test_accuracy5, test_loss, misclassified_images, misclassified_labels, misclassified_preds = test(model, device, testloader, criterion)
144
- print(f'Epoch {epoch} | Train Top-1 Acc: {train_accuracy1:.2f} | Train Top-5 Acc: {train_accuracy5:.2f} | Test Top-1 Acc: {test_accuracy1:.2f} | Test Top-5 Acc: {test_accuracy5:.2f}')
145
-
146
- # Append results for this epoch
147
- results.append((epoch, train_accuracy1, train_accuracy5, test_accuracy1, test_accuracy5, train_loss, test_loss))
148
- learning_rates.append(optimizer.param_groups[0]['lr'])
149
-
150
- if test_loss < best_loss:
151
- best_loss = test_loss
152
- patience_counter = 0
153
- save_checkpoint(model, optimizer, epoch, test_loss, checkpoint_path)
154
- else:
155
- patience_counter += 1
156
-
157
- if patience_counter >= patience:
158
- print("Early stopping triggered. Training terminated.")
159
- break
160
-
161
- # Only process misclassified samples after the last epoch
162
- if epoch == 25:
163
- # Display or process misclassified samples
164
- if misclassified_images:
165
- print("\nDisplaying some misclassified samples from the last epoch:")
166
- misclassified_grid = make_grid(misclassified_images[:16], nrow=4, normalize=True, scale_each=True)
167
- plt.figure(figsize=(8, 8))
168
- plt.imshow(misclassified_grid.permute(1, 2, 0))
169
- plt.title("Misclassified Samples")
170
- plt.axis('off')
171
- plt.show()
172
-
173
- # Print the Top-1 accuracy results in a tab-separated format
174
- print("\nEpoch\tTrain Top-1 Accuracy\tTest Top-1 Accuracy")
175
- for epoch, train_acc1, test_acc1, *_ in results:
176
- print(f"{epoch}\t{train_acc1:.2f}\t{test_acc1:.2f}")
177
-
178
- # Plotting
179
- epochs = [r[0] for r in results]
180
- train_acc1 = [r[1] for r in results]
181
- train_acc5 = [r[2] for r in results]
182
- test_acc1 = [r[3] for r in results]
183
- test_acc5 = [r[4] for r in results]
184
- train_losses = [r[5] for r in results]
185
- test_losses = [r[6] for r in results]
186
-
187
- plt.figure(figsize=(12, 8))
188
- plt.subplot(2, 2, 1)
189
- plt.plot(epochs, train_acc1, label='Train Top-1 Acc')
190
- plt.plot(epochs, test_acc1, label='Test Top-1 Acc')
191
- plt.xlabel('Epoch')
192
- plt.ylabel('Accuracy')
193
- plt.legend()
194
- plt.title('Top-1 Accuracy')
195
-
196
- plt.subplot(2, 2, 2)
197
- plt.plot(epochs, train_acc5, label='Train Top-5 Acc')
198
- plt.plot(epochs, test_acc5, label='Test Top-5 Acc')
199
- plt.xlabel('Epoch')
200
- plt.ylabel('Accuracy')
201
- plt.legend()
202
- plt.title('Top-5 Accuracy')
203
-
204
- plt.subplot(2, 2, 3)
205
- plt.plot(epochs, train_losses, label='Train Loss')
206
- plt.plot(epochs, test_losses, label='Test Loss')
207
- plt.xlabel('Epoch')
208
- plt.ylabel('Loss')
209
- plt.legend()
210
- plt.title('Loss')
211
-
212
- plt.subplot(2, 2, 4)
213
- plt.plot(epochs, learning_rates, label='Learning Rate')
214
- plt.xlabel('Epoch')
215
- plt.ylabel('Learning Rate')
216
- plt.legend()
217
- plt.title('Learning Rate')
218
-
219
- plt.tight_layout()
220
- plt.show()