Ubuntu commited on
Commit
6e2d47c
·
1 Parent(s): 2e9c13e

Changed num_workers to 8 instead of 16

Browse files
resnet_execute.py CHANGED
@@ -13,6 +13,7 @@ from torchvision.utils import make_grid
13
  import albumentations as A
14
  from albumentations.pytorch import ToTensorV2
15
  import numpy as np
 
16
 
17
  # Define transformations
18
  train_transform = A.Compose([
@@ -32,16 +33,18 @@ test_transform = A.Compose([
32
 
33
  # Train dataset and loader
34
  trainset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/train', transform=lambda img: train_transform(image=np.array(img))['image'])
35
- trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=16, pin_memory=True)
36
 
37
  testset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/val', transform=lambda img: test_transform(image=np.array(img))['image'])
38
- testloader = DataLoader(testset, batch_size=500, shuffle=False, num_workers=16, pin_memory=True)
39
 
40
  # Initialize model, loss function, and optimizer
41
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
42
  model = ResNet50()
43
  model = torch.nn.DataParallel(model)
44
  model = model.to(device)
 
45
 
46
  criterion = nn.CrossEntropyLoss()
47
  optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
@@ -49,7 +52,7 @@ optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e
49
  # Training function
50
  from torch.amp import autocast
51
 
52
- def train(model, device, train_loader, optimizer, criterion, epoch, accumulation_steps=2):
53
  model.train()
54
  running_loss = 0.0
55
  correct1 = 0
 
13
  import albumentations as A
14
  from albumentations.pytorch import ToTensorV2
15
  import numpy as np
16
+ from torchsummary import summary
17
 
18
  # Define transformations
19
  train_transform = A.Compose([
 
33
 
34
  # Train dataset and loader
35
  trainset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/train', transform=lambda img: train_transform(image=np.array(img))['image'])
36
+ trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=8, pin_memory=True)
37
 
38
  testset = datasets.ImageFolder(root='/mnt/imagenet/ILSVRC/Data/CLS-LOC/val', transform=lambda img: test_transform(image=np.array(img))['image'])
39
+ testloader = DataLoader(testset, batch_size=500, shuffle=False, num_workers=8, pin_memory=True)
40
 
41
  # Initialize model, loss function, and optimizer
42
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
43
+ print( device )
44
  model = ResNet50()
45
  model = torch.nn.DataParallel(model)
46
  model = model.to(device)
47
+ summary(model, input_size=(3, 224, 224))
48
 
49
  criterion = nn.CrossEntropyLoss()
50
  optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)
 
52
  # Training function
53
  from torch.amp import autocast
54
 
55
+ def train(model, device, train_loader, optimizer, criterion, epoch, accumulation_steps=4):
56
  model.train()
57
  running_loss = 0.0
58
  correct1 = 0
tmppl87qjev/_remote_module_non_scriptable.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import *
2
+
3
+ import torch
4
+ import torch.distributed.rpc as rpc
5
+ from torch import Tensor
6
+ from torch._jit_internal import Future
7
+ from torch.distributed.rpc import RRef
8
+ from typing import Tuple # pyre-ignore: unused import
9
+
10
+
11
+ module_interface_cls = None
12
+
13
+
14
+ def forward_async(self, *args, **kwargs):
15
+ args = (self.module_rref, self.device, self.is_device_map_set, *args)
16
+ kwargs = {**kwargs}
17
+ return rpc.rpc_async(
18
+ self.module_rref.owner(),
19
+ _remote_forward,
20
+ args,
21
+ kwargs,
22
+ )
23
+
24
+
25
+ def forward(self, *args, **kwargs):
26
+ args = (self.module_rref, self.device, self.is_device_map_set, *args)
27
+ kwargs = {**kwargs}
28
+ ret_fut = rpc.rpc_async(
29
+ self.module_rref.owner(),
30
+ _remote_forward,
31
+ args,
32
+ kwargs,
33
+ )
34
+ return ret_fut.wait()
35
+
36
+
37
+ _generated_methods = [
38
+ forward_async,
39
+ forward,
40
+ ]
41
+
42
+
43
+
44
+
45
+ def _remote_forward(
46
+ module_rref: RRef[module_interface_cls], device: str, is_device_map_set: bool, *args, **kwargs):
47
+ module = module_rref.local_value()
48
+ device = torch.device(device)
49
+
50
+ if device.type != "cuda":
51
+ return module.forward(*args, **kwargs)
52
+
53
+ # If the module is on a cuda device,
54
+ # move any CPU tensor in args or kwargs to the same cuda device.
55
+ # Since torch script does not support generator expression,
56
+ # have to use concatenation instead of
57
+ # ``tuple(i.to(device) if isinstance(i, Tensor) else i for i in *args)``.
58
+ args = (*args,)
59
+ out_args: Tuple[()] = ()
60
+ for arg in args:
61
+ arg = (arg.to(device),) if isinstance(arg, Tensor) else (arg,)
62
+ out_args = out_args + arg
63
+
64
+ kwargs = {**kwargs}
65
+ for k, v in kwargs.items():
66
+ if isinstance(v, Tensor):
67
+ kwargs[k] = kwargs[k].to(device)
68
+
69
+ if is_device_map_set:
70
+ return module.forward(*out_args, **kwargs)
71
+
72
+ # If the device map is empty, then only CPU tensors are allowed to send over wire,
73
+ # so have to move any GPU tensor to CPU in the output.
74
+ # Since torch script does not support generator expression,
75
+ # have to use concatenation instead of
76
+ # ``tuple(i.cpu() if isinstance(i, Tensor) else i for i in module.forward(*out_args, **kwargs))``.
77
+ ret: Tuple[()] = ()
78
+ for i in module.forward(*out_args, **kwargs):
79
+ i = (i.cpu(),) if isinstance(i, Tensor) else (i,)
80
+ ret = ret + i
81
+ return ret