Skip to content

Commit

Permalink
bmtrain.init synchronize
Browse files Browse the repository at this point in the history
  • Loading branch information
a710128 committed Apr 24, 2022
1 parent d02e49f commit 549a5ef
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions bmtrain/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from .global_var import config
from . import nccl
import time
from .synchronize import synchronize

def init_distributed(
init_method : str = "env://",
Expand Down Expand Up @@ -96,12 +97,15 @@ def init_distributed(
unique_id = bytes.fromhex(store.get("BMTRAIN_UNIQUE_ID").decode())
config['comm'] = nccl.commInitRank(unique_id, world_size, rank)

print_dict("Initialization", {
"rank": rank,
"local_rank": local_rank,
"world_size": world_size,
"local_size": local_size,
"master" : master,
"device": torch.cuda.current_device(),
"cpus": cpus_this_worker
})
for i in range(world_size):
if i == rank:
print_dict("Initialization", {
"rank": rank,
"local_rank": local_rank,
"world_size": world_size,
"local_size": local_size,
"master" : master,
"device": torch.cuda.current_device(),
"cpus": cpus_this_worker
})
synchronize()

0 comments on commit 549a5ef

Please sign in to comment.