Add synchronized random seed to training

This commit is contained in:
Yin Li 2019-12-08 21:27:44 -05:00
parent 11c9caa1e2
commit f64b1e42e9
2 changed files with 4 additions and 1 deletions

View File

@ -64,7 +64,7 @@ def add_train_args(parser):
# help='weight decay') # help='weight decay')
parser.add_argument('--dist-backend', default='nccl', type=str, parser.add_argument('--dist-backend', default='nccl', type=str,
choices=['gloo', 'nccl'], help='distributed backend') choices=['gloo', 'nccl'], help='distributed backend')
parser.add_argument('--seed', default=42, type=int, parser.add_argument('--seed', type=int,
help='seed for initializing training') help='seed for initializing training')
parser.add_argument('--log-interval', default=20, type=int, parser.add_argument('--log-interval', default=20, type=int,
help='interval between logging training loss') help='interval between logging training loss')

View File

@ -1,5 +1,6 @@
import os import os
import shutil import shutil
import random
import torch import torch
from torch.multiprocessing import spawn from torch.multiprocessing import spawn
from torch.distributed import init_process_group, destroy_process_group, all_reduce from torch.distributed import init_process_group, destroy_process_group, all_reduce
@ -13,6 +14,8 @@ from .models import UNet, narrow_like
def node_worker(args): def node_worker(args):
if args.seed is None:
args.seed = random.randint(0, 65535)
torch.manual_seed(args.seed) # NOTE: why here not in gpu_worker? torch.manual_seed(args.seed) # NOTE: why here not in gpu_worker?
#torch.backends.cudnn.deterministic = True # NOTE: test perf #torch.backends.cudnn.deterministic = True # NOTE: test perf