Add synchronized random seed to training
This commit is contained in:
parent
11c9caa1e2
commit
f64b1e42e9
2 changed files with 4 additions and 1 deletions
|
@ -64,7 +64,7 @@ def add_train_args(parser):
|
|||
# help='weight decay')
|
||||
parser.add_argument('--dist-backend', default='nccl', type=str,
|
||||
choices=['gloo', 'nccl'], help='distributed backend')
|
||||
parser.add_argument('--seed', default=42, type=int,
|
||||
parser.add_argument('--seed', type=int,
|
||||
help='seed for initializing training')
|
||||
parser.add_argument('--log-interval', default=20, type=int,
|
||||
help='interval between logging training loss')
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
import shutil
|
||||
import random
|
||||
import torch
|
||||
from torch.multiprocessing import spawn
|
||||
from torch.distributed import init_process_group, destroy_process_group, all_reduce
|
||||
|
@ -13,6 +14,8 @@ from .models import UNet, narrow_like
|
|||
|
||||
|
||||
def node_worker(args):
|
||||
if args.seed is None:
|
||||
args.seed = random.randint(0, 65535)
|
||||
torch.manual_seed(args.seed) # NOTE: why here not in gpu_worker?
|
||||
#torch.backends.cudnn.deterministic = True # NOTE: test perf
|
||||
|
||||
|
|
Loading…
Reference in a new issue