Skip to content

Commit

Permalink
fix id/uuid consistency in distributed training
Browse files Browse the repository at this point in the history
Signed-off-by: Zhiyuan Chen <[email protected]>
  • Loading branch information
ZhiyuanChen committed Mar 21, 2023
1 parent be89036 commit 2340932
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 3 deletions.
5 changes: 4 additions & 1 deletion danling/runner/base_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,14 @@ def init_tensorboard(self, *args, **kwargs) -> None:
"""
raise NotImplementedError

def set_seed(self, bias: Optional[int] = None) -> None:
def set_seed(self, seed: Optional[int] = None, bias: Optional[int] = None) -> None:
r"""
Set up random seed.
Args:
seed: Random seed to set.
Defaults to `self.seed` (`config.seed`).
bias: Make the seed different for each processes.
This avoids same data augmentation are applied on every processes.
Expand Down
13 changes: 11 additions & 2 deletions danling/runner/torch_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,14 @@ def init_tensorboard(self, *args, **kwargs) -> None:
self.writer = SummaryWriter(*args, **kwargs)
self.writer.add_scalar = catch(OSError, verbose=False)(self.writer.add_scalar) # type: ignore

def set_seed(self, bias: Optional[int] = None) -> None:
def set_seed(self, seed: Optional[int] = None, bias: Optional[int] = None) -> None:
r"""
Set up random seed.
Args:
seed: Random seed to set.
Defaults to `self.seed` (`config.seed`).
bias: Make the seed different for each processes.
This avoids same data augmentation are applied on every processes.
Expand All @@ -75,7 +78,8 @@ def set_seed(self, bias: Optional[int] = None) -> None:
Set to `False` to disable this feature.
"""

seed = self.seed
if seed is None:
seed = self.seed
if self.distributed:
object_list = [seed]
dist.broadcast_object_list(object_list)
Expand All @@ -84,6 +88,7 @@ def set_seed(self, bias: Optional[int] = None) -> None:
bias = self.rank
if bias:
seed += bias
self.seed = seed
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
Expand Down Expand Up @@ -185,6 +190,10 @@ def init_distributed(self) -> None:
"""

self.accelerator = Accelerator(**self.accelerate)
if self.distributed:
object_list = [self.id, self.uuid]
dist.broadcast_object_list(object_list)
self.id, self.uuid = object_list[0], object_list[1]

def __getattr__(self, name: str) -> Any:
if self.accelerator is not None and hasattr(self.accelerator, name):
Expand Down

0 comments on commit 2340932

Please sign in to comment.