From a94c82e7a782879130a3e236aa19211ff04c0cf6 Mon Sep 17 00:00:00 2001 From: Anna Lappe Date: Mon, 2 Dec 2024 11:55:54 +0100 Subject: [PATCH] Removed unused export in slurm script, removed abstract train method in RayTorchTrainer --- src/itwinai/torch/distributed.py | 2 ++ src/itwinai/torch/trainer.py | 4 ---- use-cases/virgo/slurm_ray.sh | 4 ---- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/itwinai/torch/distributed.py b/src/itwinai/torch/distributed.py index bc87588b..75e2c3e8 100644 --- a/src/itwinai/torch/distributed.py +++ b/src/itwinai/torch/distributed.py @@ -72,6 +72,8 @@ def _initialize_ray() -> None: return ray.init(address="auto") + print(f"Nodes in the cluster: {ray.nodes()}") + print(f"Available cluster resources: {ray.available_resources()}") class TorchDistributedStrategy(DistributedStrategy): diff --git a/src/itwinai/torch/trainer.py b/src/itwinai/torch/trainer.py index 620a6e75..54732e16 100644 --- a/src/itwinai/torch/trainer.py +++ b/src/itwinai/torch/trainer.py @@ -1388,10 +1388,6 @@ def create_dataloaders( else: self.test_dataloader = None - @abstractmethod - def train(config, data=None): - pass - @monitor_exec def execute( self, diff --git a/use-cases/virgo/slurm_ray.sh b/use-cases/virgo/slurm_ray.sh index 84c7e297..511e2888 100644 --- a/use-cases/virgo/slurm_ray.sh +++ b/use-cases/virgo/slurm_ray.sh @@ -48,10 +48,6 @@ nodes_array=($nodes) head_node=${nodes_array[0]} port=7639 # This port will be used by Ray to communicate with worker nodes. -ip_head="$head_node"i:"$port" -export ip_head -echo "IP Head: $ip_head" - echo "Starting HEAD at $head_node" # Start Ray on the head node. # The `--head` option specifies that this node will be the head of the Ray cluster.