File size: 2,988 Bytes
345ee20 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import copy
import os
import random
import time
import core
import core.models.decoders as decoders
import core.models.backbones as backbones
import core.models.necks as necks
import core.data.datasets as datasets
from core.models.model_entry import model_entry
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.distributed as dist
from core.distributed_utils import DistributedGivenIterationSampler
from torch.utils.data import DataLoader
from .solver import Solver
class WorkerInit(object):
def __init__(self, rank, num_workers):
self.rank = rank
self.num_workers = num_workers
def func(self, pid):
print(f'[rank{self.rank}] setting worker seed {self.rank*self.num_workers+pid}', flush=True)
np.random.seed(self.rank*self.num_workers+pid)
class SolverDeter(Solver):
def __init__(self, C):
super().__init__(C)
if self.config.get('deterministic', False):
if self.config.get('cudnn_deterministic', True):
cudnn.deterministic = True
cudnn.benchmark = False
else:
cudnn.benchmark = True
seed = self.config.get('random_seed', 0)
worker_rank = self.config.get('worker_rank', False)
if worker_rank:
worker_init = WorkerInit(self.C.rank, self.config.workers)
else:
worker_init = WorkerInit(0, 0)
self.worker_init_fn = worker_init.func
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
if dist.is_initialized():
dist.barrier()
if self.C.rank == 0:
self.logger.info(f'deterministic mode, seed: {seed}, worker_rank: {worker_rank},\
cudnn_deterministic: {self.config.get("cudnn_deterministic", True)}')
if dist.is_initialized():
dist.barrier()
else:
self.worker_init_fn = None
def create_dataloader(self):
config = self.config
ginfo = self.ginfo
self.sampler = DistributedGivenIterationSampler(
self.dataset, config.max_iter, config.sampler.batch_size,
world_size=ginfo.task_size, rank=ginfo.task_rank,
last_iter=self.last_iter,
shuffle_strategy=config.sampler.shuffle_strategy,
random_seed=ginfo.task_random_seed,
ret_save_path=config.sampler.get('ret_save_path', None))
self.loader = DataLoader(self.dataset, batch_size=config.sampler.batch_size,
shuffle=False, num_workers=config.workers,
pin_memory=False, sampler=self.sampler, worker_init_fn=self.worker_init_fn)
|